Exemplo n.º 1
0
 def expand(self, pvalue):
   pcoll = pvalue.pipeline | Read(self._source)
   pcoll.element_type = bytes
   if self.with_attributes:
     pcoll = pcoll | Map(PubsubMessage._from_proto_str)
     pcoll.element_type = PubsubMessage
   return pcoll
Exemplo n.º 2
0
 def expand(self, pvalue):
     frames = (pvalue.pipeline
               | Read(self._source)
               | beam.Partition(splitBadFiles, 2))
     chunks = (frames[1]
               | beam.FlatMap(lambda e: [e])
               | beam.CombinePerKey(combineTZ()))
     return chunks
Exemplo n.º 3
0
 def expand(self, pvalue):
     frames = (pvalue.pipeline
               | Read(self._source)
               | beam.ParDo(TagFrames()).with_outputs())
     offsetMap = (frames.offsets | beam.CombineGlobally(CombineOffsets()))
     chunks = (frames.frames
               | beam.ParDo(
                   FramesToChunks(chunkShape=self.chunkShape,
                                  Overlap=self.Overlap,
                                  downSample=self.downSample),
                   beam.pvalue.AsSingleton(offsetMap)).with_outputs())
     chunksCombined = (chunks.chunks | beam.CombinePerKey(combineTZ()))
     return chunksCombined
Exemplo n.º 4
0
 def expand(self, pvalue):
   return pvalue.pipeline | Read(self._source)
Exemplo n.º 5
0
 def expand(self, pvalue):
   pcoll = pvalue.pipeline | Read(self._source)
   pcoll.element_type = bytes
   pcoll = pcoll | 'DecodeString' >> Map(lambda b: b.decode('utf-8'))
   pcoll.element_type = unicode
   return pcoll
Exemplo n.º 6
0
 def expand(self, pvalue):
   return pvalue | Read(self._source) | ParDo(_ArrowTableToRowDictionaries())
Exemplo n.º 7
0
 def expand(self, pvalue):
     return pvalue.pipeline | Read(_TFRecordSource(*self._args))
Exemplo n.º 8
0
 def expand(self, pvalue):
     return pvalue.pipeline | Read(self._source).with_output_types(
         self._source.output_type_hint())
Exemplo n.º 9
0
 def expand(self, pcoll):
     """
     Implements method `apache_beam.transforms.ptransform.PTransform.expand`.
     """
     self.logger.info('Starting Billboard.com scrape.')
     return pcoll | Read(self._source)
Exemplo n.º 10
0
 def expand(self, pvalue):
     pcoll = pvalue.pipeline | Read(self._source)
     pcoll.element_type = bytes
     pcoll = pcoll | 'decode string' >> ParDo(_decodeUtf8String)
     pcoll.element_type = unicode
     return pcoll