def start(self): super(ShuffleWriteOperation, self).start() self.is_ungrouped = self.spec.shuffle_kind == 'ungrouped' coder = self.spec.output_coders[0] if self.is_ungrouped: coders = (BytesCoder(), coder) else: coders = (coder.key_coder(), coder.value_coder()) self._write_coder = WindowedValueCoder(TupleCoder(coders)) if self.shuffle_sink is None: self.shuffle_sink = shuffle.ShuffleSink( self.spec.shuffle_writer_config, coder=coders) self.writer = self.shuffle_sink.writer() self.writer.__enter__()
def start(self): super(UngroupedShuffleReadOperation, self).start() write_coder = None if self.shuffle_source is None: coders = (BytesCoder(), self.spec.coder) write_coder = WindowedValueCoder(TupleCoder(coders)) self.shuffle_source = shuffle.UngroupedShuffleSource( self.spec.shuffle_reader_config, coder=coders, start_position=self.spec.start_shuffle_position, end_position=self.spec.end_shuffle_position) with self.shuffle_source.reader() as reader: for value in reader: self._reader = reader windowed_value = GlobalWindows.WindowedValue(value) self.output(windowed_value, coder=write_coder)