Exemplo n.º 1
0
def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix,
                                   source_vocab_size, target_vocab_size):
    """Generate source and target data from a single file."""
    source_vocab = generator_utils.get_or_generate_tabbed_vocab(
        data_dir, tmp_dir, "parsing_train.pairs", 0,
        prefix + "_source.vocab.%d" % source_vocab_size, source_vocab_size)
    target_vocab = generator_utils.get_or_generate_tabbed_vocab(
        data_dir, tmp_dir, "parsing_train.pairs", 1,
        prefix + "_target.vocab.%d" % target_vocab_size, target_vocab_size)
    filename = "parsing_%s" % ("train" if train else "dev")
    pair_filepath = os.path.join(tmp_dir, filename + ".pairs")
    return tabbed_generator(pair_filepath, source_vocab, target_vocab, EOS)
Exemplo n.º 2
0
def tabbed_parsing_token_generator(data_dir, tmp_dir, train, prefix,
                                   source_vocab_size, target_vocab_size):
  """Generate source and target data from a single file."""
  filename = "parsing_{0}.pairs".format("train" if train else "dev")
  source_vocab = generator_utils.get_or_generate_tabbed_vocab(
      data_dir, tmp_dir, filename, 0,
      prefix + "_source.tokens.vocab.%d" % source_vocab_size, source_vocab_size)
  target_vocab = generator_utils.get_or_generate_tabbed_vocab(
      data_dir, tmp_dir, filename, 1,
      prefix + "_target.tokens.vocab.%d" % target_vocab_size, target_vocab_size)
  pair_filepath = os.path.join(tmp_dir, filename)
  return translate.tabbed_generator(pair_filepath, source_vocab, target_vocab,
                                    EOS)