示例#1
0
 def expand(self, pvalue):
   if self._handle.endswith('.csv'):
     # The input is CSV file(s).
     schema = reddit.make_input_schema(mode=self._mode)
     csv_coder = reddit.make_csv_coder(schema, mode=self._mode)
     return (pvalue.pipeline
             | 'ReadFromText' >> beam.io.ReadFromText(
                 self._handle,
                 # TODO(b/35653662): Obviate the need for setting this.
                 coder=beam.coders.BytesCoder())
             | 'ParseCSV' >> beam.Map(csv_coder.decode))
   else:
     # The input is BigQuery table name(s).
     query = reddit.make_standard_sql(self._handle, mode=self._mode)
     return (pvalue.pipeline
             | 'ReadFromBigQuery' >> beam.io.Read(
                 beam.io.BigQuerySource(query=query, use_standard_sql=True)))
示例#2
0
 def expand(self, pvalue):
   if self._handle.endswith('.csv'):
     # The input is CSV file(s).
     schema = reddit.make_input_schema(mode=self._mode)
     csv_coder = reddit.make_csv_coder(schema, mode=self._mode)
     return (pvalue.pipeline
             | 'ReadFromText' >> beam.io.ReadFromText(
                 self._handle,
                 # TODO(b/35653662): Obviate the need for setting this.
                 coder=beam.coders.BytesCoder())
             | 'ParseCSV' >> beam.Map(csv_coder.decode))
   else:
     # The input is BigQuery table name(s).
     query = reddit.make_standard_sql(self._handle, mode=self._mode)
     return (pvalue.pipeline
             | 'ReadFromBigQuery' >> beam.io.Read(
                 beam.io.BigQuerySource(query=query, use_standard_sql=True)))