def read_file(p, label, file_pattern, pk=None): data = p | 'Read: %s' % label >> beam.io.Read(CsvFileSource(file_pattern, add_source=False, dictionary_output=True)) if pk: data = data | 'Key: %s' % label >> beam.Map(lambda x: (x[pk], x)) return data
def read_index_file(label, file_pattern, pk, bk, add_source=False, dictionary_output=True): return (p | 'Read: %s' % label >> beam.io.Read(CsvFileSource(file_pattern, add_source=add_source, dictionary_output=dictionary_output)) | 'Tuple: %s' % label >> beam.Map(lambda x: (x[pk], x[bk])))