def testNgramIdToTokenSeparator(self): vocab = test_helper.test_src_dir_path('core/ops/testdata/test_ngrams.txt') with self.session(use_gpu=False): ngram_ids = [[14, 11, 6, 24, 7, 3, 13, 82, 2, 2], [57, 3, 73, 17, 22, 9, 2, 2, 2, 2]] lengths = [8, 6] scripts = py_x_ops.ngram_id_to_token( ngram_ids, lengths, ngram_vocab_filepath=vocab, ngram_separator='.') scripts_expected = ['p.n.?.o.".{.t.we', 'gh.{.rt.l.c.r'] self.assertEqual(scripts_expected, scripts.eval().tolist())
def testNgramIdToToken(self): vocab = test_helper.test_src_dir_path('core/ops/testdata/test_ngrams.txt') with self.session(use_gpu=False): ngram_ids = [[14, 11, 6, 24, 7, 3, 13, 82, 2, 2], [57, 3, 73, 17, 22, 9, 2, 2, 2, 2]] lengths = [8, 6] scripts = py_x_ops.ngram_id_to_token( ngram_ids, lengths, ngram_vocab_filepath=vocab) scripts_expected = [b'pn?o"{twe', b'gh{rtlcr'] self.assertEqual(scripts_expected, scripts.eval().tolist())
def IdsToStrings(self, ids, lens): self._CheckParams() p = self.params if p.token_vocab_filepath: ngram_vocab_filepath = p.token_vocab_filepath ngram_separator = p.tokens_delimiter elif p.ngram_vocab_filepath: ngram_vocab_filepath = p.ngram_vocab_filepath ngram_separator = p.ngram_separator return py_x_ops.ngram_id_to_token( token_ids=ids, seq_lengths=lens, ngram_vocab_filepath=ngram_vocab_filepath, ngram_separator=ngram_separator)