def main(argv): beam_utils.BeamInit() assert FLAGS.input_file_pattern assert FLAGS.output_filebase # Construct pipeline options from argv. options = beam.options.pipeline_options.PipelineOptions(argv[1:]) reader = beam_utils.GetReader('tfrecord', FLAGS.input_file_pattern, value_coder=beam.coders.ProtoCoder( dataset_pb2.Frame)) writer = beam_utils.GetWriter('tfrecord', file_pattern=FLAGS.output_filebase, value_coder=beam.coders.ProtoCoder( tf.train.Example)) emitter_fn = beam_utils.GetEmitterFn('tfrecord') with beam_utils.GetPipelineRoot(options=options) as root: _ = (root | 'Read' >> reader | 'ConvertToTFExample' >> beam.ParDo( waymo_proto_to_tfe.WaymoOpenDatasetConverter(emitter_fn)) | 'Write' >> writer)
def main(argv): beam_utils.BeamInit() # Construct pipeline options from argv. options = beam.options.pipeline_options.PipelineOptions(argv[1:]) reader = beam_utils.GetReader(FLAGS.record_format, FLAGS.input_file_pattern, value_coder=beam.coders.BytesCoder()) with beam_utils.GetPipelineRoot(options=options) as root: _ = ( root | 'Read' >> reader # Read each record. | 'EmitOne' >> beam.Map(lambda _: 1) # Emit a 1 for each record. | 'Count' >> beam.CombineGlobally(sum) # Sum counts. | 'WriteToText' >> beam.io.WriteToText(FLAGS.output_count_file))
def main(_): beam_utils.BeamInit() if not FLAGS.output_file_pattern: raise ValueError('Must provide an output_file_pattern') reader = beam.io.ReadFromTFRecord(FLAGS.input_file_pattern, coder=beam.coders.ProtoCoder( tf.train.Example)) model_name = FLAGS.model_name split = FLAGS.split run_preprocessors = FLAGS.run_preprocessors with beam_utils.GetPipelineRoot() as root: _ = (root | 'Read' >> reader | 'ToTFExample' >> beam.ParDo( _ProcessShard(model_name, split, run_preprocessors)) | 'Reshuffle' >> beam.Reshuffle() | 'Write' >> beam.io.WriteToTFRecord(FLAGS.output_file_pattern, coder=beam.coders.ProtoCoder( tf.train.Example)))
def testGetPipelineRoot(self): with beam_utils.GetPipelineRoot() as root: _ = root | beam.Create([1, 2, 3]) | beam.Map(lambda x: x)