Exemplo n.º 1
0
def main(argv):
  if len(argv) > 1:
    raise app.UsageError("Too many command-line arguments.")
  config = tf_example_utils.PretrainConversionConfig(
      vocab_file=FLAGS.vocab_file,
      max_seq_length=FLAGS.max_seq_length,
      max_predictions_per_seq=FLAGS.max_predictions_per_seq,
      random_seed=FLAGS.random_seed,
      masked_lm_prob=FLAGS.masked_lm_prob,
      max_column_id=FLAGS.max_column_id,
      max_row_id=FLAGS.max_row_id,
      min_question_length=FLAGS.min_question_length,
      max_question_length=FLAGS.max_question_length,
      always_continue_cells=FLAGS.always_continue_cells,
      strip_column_names=False,
  )
  pipeline = pretrain_utils.build_pretrain_data_pipeline(
      input_file=FLAGS.input_file,
      output_dir=FLAGS.output_dir,
      config=config,
      dupe_factor=FLAGS.dupe_factor,
      min_num_rows=FLAGS.min_num_rows,
      min_num_columns=FLAGS.min_num_columns,
  )
  beam_runner.run(pipeline)
def run(inputs, outputs, input_format):
    beam_runner.run(
        create_data.build_retrieval_pipeline(
            input_files=inputs,
            input_format=input_format,
            output_files=outputs,
            config=tf_example_utils.RetrievalConversionConfig(
                vocab_file=FLAGS.vocab_file,
                max_seq_length=FLAGS.max_seq_length,
                max_column_id=FLAGS.max_column_id,
                max_row_id=FLAGS.max_row_id,
                strip_column_names=False,
                cell_trim_length=FLAGS.cell_trim_length,
                use_document_title=FLAGS.use_document_title,
            ),
            converter_impl=FLAGS.converter_impl,
        )).wait_until_finish()
Exemplo n.º 3
0
def main(unused_argv):
  r"""Reads nearest neigbors adds them to the interactions."""
  del unused_argv

  inputs_outputs = _get_inputs_outputs(
      FLAGS.interaction_dir,
      FLAGS.json_dir,
      FLAGS.output_dir,
  )
  pipeline = add_negative_tables_to_interactions(
      max_num_negatives=FLAGS.max_num_negatives,
      input_interactions_files=inputs_outputs.input_interaction_files,
      input_tables_file=FLAGS.input_tables_file,
      input_json_files=inputs_outputs.input_json_files,
      output_files=inputs_outputs.output_interaction_files,
  )
  beam_runner.run(pipeline)
Exemplo n.º 4
0
def main(unused_argv):
    del unused_argv
    config = synthesize_entablement.SynthesizationConfig(
        prob_count_aggregation=FLAGS.prob_count_aggregation, )
    conversion_config = None
    if FLAGS.convert_to_examples:
        conversion_config = tf_example_utils.ClassifierConversionConfig(
            vocab_file=FLAGS.vocab_file,
            max_seq_length=FLAGS.max_seq_length,
            max_column_id=FLAGS.max_seq_length,
            max_row_id=FLAGS.max_seq_length,
            strip_column_names=False,
        )
    pipeline = intermediate_pretrain_utils.build_pipeline(
        mode=FLAGS.mode,
        config=config,
        use_fake_table=FLAGS.use_fake_table,
        add_opposite_table=FLAGS.add_opposite_table,
        drop_without_support_rate=FLAGS.drop_without_support_rate,
        input_file=FLAGS.input_file,
        output_dir=FLAGS.output_dir,
        output_suffix=FLAGS.output_suffix,
        conversion_config=conversion_config)
    beam_runner.run(pipeline).wait_until_finish()
def main(_):
    beam_runner.run(
        build_pipeline(inputs=FLAGS.inputs,
                       output_dir=FLAGS.output_dir)).wait_until_finish()