示例#1
0
 def expand(self, pvalue):
   pcoll = pvalue.pipeline | Read(self._source)
   pcoll.element_type = bytes
   if self.with_attributes:
     pcoll = pcoll | Map(PubsubMessage._from_proto_str)
     pcoll.element_type = PubsubMessage
   return pcoll
示例#2
0
 def expand(self, pvalue):
     frames = (pvalue.pipeline
               | Read(self._source)
               | beam.Partition(splitBadFiles, 2))
     chunks = (frames[1]
               | beam.FlatMap(lambda e: [e])
               | beam.CombinePerKey(combineTZ()))
     return chunks
示例#3
0
 def expand(self, pvalue):
     frames = (pvalue.pipeline
               | Read(self._source)
               | beam.ParDo(TagFrames()).with_outputs())
     offsetMap = (frames.offsets | beam.CombineGlobally(CombineOffsets()))
     chunks = (frames.frames
               | beam.ParDo(
                   FramesToChunks(chunkShape=self.chunkShape,
                                  Overlap=self.Overlap,
                                  downSample=self.downSample),
                   beam.pvalue.AsSingleton(offsetMap)).with_outputs())
     chunksCombined = (chunks.chunks | beam.CombinePerKey(combineTZ()))
     return chunksCombined
示例#4
0
 def expand(self, pvalue):
   return pvalue.pipeline | Read(self._source)
示例#5
0
 def expand(self, pvalue):
   pcoll = pvalue.pipeline | Read(self._source)
   pcoll.element_type = bytes
   pcoll = pcoll | 'DecodeString' >> Map(lambda b: b.decode('utf-8'))
   pcoll.element_type = unicode
   return pcoll
示例#6
0
 def expand(self, pvalue):
   return pvalue | Read(self._source) | ParDo(_ArrowTableToRowDictionaries())
示例#7
0
 def expand(self, pvalue):
     return pvalue.pipeline | Read(_TFRecordSource(*self._args))
示例#8
0
 def expand(self, pvalue):
     return pvalue.pipeline | Read(self._source).with_output_types(
         self._source.output_type_hint())
示例#9
0
 def expand(self, pcoll):
     """
     Implements method `apache_beam.transforms.ptransform.PTransform.expand`.
     """
     self.logger.info('Starting Billboard.com scrape.')
     return pcoll | Read(self._source)
示例#10
0
 def expand(self, pvalue):
     pcoll = pvalue.pipeline | Read(self._source)
     pcoll.element_type = bytes
     pcoll = pcoll | 'decode string' >> ParDo(_decodeUtf8String)
     pcoll.element_type = unicode
     return pcoll