def make_source_dataset(self, index, num_hosts): """See base class.""" data = self.selection client = contrib_cloud.BigtableClient(data.project, data.instance) table = client.table(data.table) ds = table.parallel_scan_prefix( data.prefix, columns=[(data.column_family, data.column_qualifier)]) # The Bigtable datasets will have the shape (row_key, data) ds_data = ds.map(lambda index, data: data) if self.is_training: ds_data = ds_data.repeat() return ds_data
def __init__(self, project_name, instance_name, table_name): """Constructor. Args: project_name: string name of GCP project having table. instance_name: string name of CBT instance in project. table_name: string name of CBT table in instance. """ self.btspec = BigtableSpec(project_name, instance_name, table_name) self.bt_table = bigtable.Client( self.btspec.project, admin=True).instance(self.btspec.instance).table(self.btspec.table) self.tf_table = contrib_cloud.BigtableClient( self.btspec.project, self.btspec.instance).table(self.btspec.table)