def main(argv): del argv stdlogging.getLogger().setLevel(stdlogging.INFO) runner.program_started() # Must be called before creating the pipeline. pipeline = process_files(FLAGS.input_path, FLAGS.output_path) runner.FlumeRunner().run(pipeline).wait_until_finish()
def main(argv): del argv if FLAGS.num_output_files < 1: raise ValueError("Number of output shards must be defined.") stdlogging.getLogger().setLevel(stdlogging.INFO) runner.program_started() # Must be called before creating the pipeline. pipeline = process_files(FLAGS.input_path, FLAGS.output_path) runner.FlumeRunner().run(pipeline).wait_until_finish()
def main(argv=()): del argv # Unused. def pipeline(root): # pylint: disable=expression-not-assigned (root | 'Range' >> beam.Create( [FLAGS.batch_size for _ in range(FLAGS.num_batches)]) | RunInferenceForBatch(FLAGS.sequence_length, FLAGS.target_name, FLAGS.model_dir, FLAGS.checkpoint_path, FLAGS.affinity_target_map) | 'TopByValue' >> beam.transforms.combiners.Top.Of( FLAGS.num_to_save, compare=lambda a, b: (a[1], a[0]) < (b[1], b[0])) | 'save' >> beam.io.WriteToText(FLAGS.output_name)) # pylint: enable=expression-not-assigned runner.FlumeRunner().run(pipeline)