def process(self, element, restriction_tracker=DoFn.RestrictionParam(ReadFilesProvider()), *args, **kwargs): file_name = element with open(file_name, 'rb') as file: pos = restriction_tracker.current_restriction().start if restriction_tracker.current_restriction().start > 0: file.seek(restriction_tracker.current_restriction().start - 1) line = file.readline() pos = pos - 1 + len(line) output_count = 0 while restriction_tracker.try_claim(pos): line = file.readline() len_line = len(line) line = line.strip() if not line: break if line is None: break yield line output_count += 1 if self._resume_count and output_count == self._resume_count: restriction_tracker.defer_remainder() break pos += len_line
def process(self, element, side1, side2, side3, window=beam.DoFn.WindowParam, restriction_tracker=DoFn.RestrictionParam( ExpandStringsProvider()), *args, **kwargs): side = [] side.extend(side1) side.extend(side2) side.extend(side3) side = list(side) for i in range(restriction_tracker.current_restriction().start, restriction_tracker.current_restriction().stop): if restriction_tracker.try_claim(i): if not side: yield (element[0] + ':' + str(element[1]) + ':' + str(int(window.start)) if self._record_window else element) else: for val in side: ret = (element[0] + ':' + str(element[1]) + ':' + str(int(window.start)) if self._record_window else element) yield ret + ':' + val else: break