Exemplo n.º 1
0
    def process(self,
                element,
                restriction_tracker=DoFn.RestrictionParam(ReadFilesProvider()),
                *args,
                **kwargs):
        file_name = element

        with open(file_name, 'rb') as file:
            pos = restriction_tracker.current_restriction().start
            if restriction_tracker.current_restriction().start > 0:
                file.seek(restriction_tracker.current_restriction().start - 1)
                line = file.readline()
                pos = pos - 1 + len(line)

            output_count = 0
            while restriction_tracker.try_claim(pos):
                line = file.readline()
                len_line = len(line)
                line = line.strip()
                if not line:
                    break

                if line is None:
                    break
                yield line
                output_count += 1

                if self._resume_count and output_count == self._resume_count:
                    restriction_tracker.defer_remainder()
                    break

                pos += len_line
Exemplo n.º 2
0
 def process(self,
             element,
             side1,
             side2,
             side3,
             window=beam.DoFn.WindowParam,
             restriction_tracker=DoFn.RestrictionParam(
                 ExpandStringsProvider()),
             *args,
             **kwargs):
     side = []
     side.extend(side1)
     side.extend(side2)
     side.extend(side3)
     side = list(side)
     for i in range(restriction_tracker.current_restriction().start,
                    restriction_tracker.current_restriction().stop):
         if restriction_tracker.try_claim(i):
             if not side:
                 yield (element[0] + ':' + str(element[1]) + ':' +
                        str(int(window.start))
                        if self._record_window else element)
             else:
                 for val in side:
                     ret = (element[0] + ':' + str(element[1]) + ':' +
                            str(int(window.start))
                            if self._record_window else element)
                     yield ret + ':' + val
         else:
             break