Exemplo n.º 1
0
def make_text_sink(output_path, input, coder=coders.ToStringCoder()):
    return maptask.WorkerWrite(fileio.NativeTextFileSink(
        file_path_prefix=output_path,
        append_trailing_newlines=True,
        coder=coder),
                               input=input,
                               output_coders=(coder, ))
Exemplo n.º 2
0
 def _parse_text_sink(specs, codec_specs, unused_context):
   if specs['@type'] == 'TextSink':
     coder = get_coder_from_spec(codec_specs)
     return fileio.NativeTextFileSink(
         file_path_prefix=specs['filename']['value'],
         append_trailing_newlines=specs['append_trailing_newlines']['value'],
         coder=coder)
Exemplo n.º 3
0
 def _parse_avro_sink(specs, unused_codec_specs, unused_context):
     # Note that the worker does not really implement AVRO yet.It takes
     # advantage that both reading and writing is done through the worker to
     # choose a supported format (text files with one pickled object per line).
     if specs['@type'] == 'AvroSink':
         return fileio.NativeTextFileSink(specs['filename']['value'],
                                          append_trailing_newlines=True,
                                          coder=coders.Base64PickleCoder())
Exemplo n.º 4
0
 def test_write_entire_file(self):
     lines = ['First', 'Second', 'Third']
     file_path = self.create_temp_file()
     sink = fileio.NativeTextFileSink(file_path)
     with sink.writer() as writer:
         for line in lines:
             writer.Write(line)
     with open(file_path, 'r') as f:
         self.assertEqual(f.read().splitlines(), lines)
 def test_ungrouped_shuffle_source_to_text_sink(self):
     work = workitem.get_work_items(
         get_shuffle_source_to_text_sink_message(
             UNGROUPED_SHUFFLE_SOURCE_SPEC))
     self.assertEqual((work.proto.id, work.map_task.operations), (1234, [
         maptask.WorkerUngroupedShuffleRead(start_shuffle_position='opaque',
                                            end_shuffle_position='opaque',
                                            shuffle_reader_config='opaque',
                                            coder=CODER,
                                            output_coders=[CODER]),
         maptask.WorkerWrite(fileio.NativeTextFileSink(
             file_path_prefix='gs://somefile',
             append_trailing_newlines=True,
             coder=CODER),
                             input=(0, 0),
                             output_coders=(CODER, ))
     ]))
 def test_in_memory_source_to_text_sink(self):
     work = workitem.get_work_items(
         get_in_memory_source_to_text_sink_message())
     self.assertEqual((work.proto.id, work.map_task.operations), (1234, [
         maptask.WorkerRead(inmemory.InMemorySource(
             start_index=1,
             end_index=3,
             elements=[
                 base64.b64decode(v['value']) for v in IN_MEMORY_ELEMENTS
             ],
             coder=CODER),
                            output_coders=[CODER]),
         maptask.WorkerWrite(fileio.NativeTextFileSink(
             file_path_prefix='gs://somefile',
             append_trailing_newlines=True,
             coder=CODER),
                             input=(0, 0),
                             output_coders=(CODER, ))
     ]))