def expand(self, pvalue): if self._handle.endswith('.csv'): # The input is CSV file(s). schema = reddit.make_input_schema(mode=self._mode) csv_coder = reddit.make_csv_coder(schema, mode=self._mode) return (pvalue.pipeline | 'ReadFromText' >> beam.io.ReadFromText( self._handle, # TODO(b/35653662): Obviate the need for setting this. coder=beam.coders.BytesCoder()) | 'ParseCSV' >> beam.Map(csv_coder.decode)) else: # The input is BigQuery table name(s). query = reddit.make_standard_sql(self._handle, mode=self._mode) return (pvalue.pipeline | 'ReadFromBigQuery' >> beam.io.Read( beam.io.BigQuerySource(query=query, use_standard_sql=True)))