def test_bigtable_create_family_gc_nested(): # [START bigtable_create_family_gc_nested] from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) # Create a column family with nested GC policies. # Create a nested GC rule: # Drop cells that are either older than the 10 recent versions # OR # Drop cells that are older than a month AND older than the # 2 recent versions rule1 = column_family.MaxVersionsGCRule(10) rule2 = column_family.GCRuleIntersection([ column_family.MaxAgeGCRule(datetime.timedelta(days=5)), column_family.MaxVersionsGCRule(2), ]) nested_rule = column_family.GCRuleUnion([rule1, rule2]) column_family_obj = table.column_family("cf5", nested_rule) column_family_obj.create() # [END bigtable_create_family_gc_nested] rule = str(column_family_obj.to_pb()) assert "intersection" in rule assert "max_num_versions: 2" in rule assert "max_age" in rule assert "seconds: 432000" in rule column_family_obj.delete()
def create_column_family(self, column_family_name, table_name, max_age=None, nr_max_versions=None, gc_rule_union=None): """Create a column family and add it to a table. Garbage collection rules can be included to the column family. Args: column_family_name (str): table_name (str): max_age (int): the time to live in days nr_max_versions (int): the number of versions that should be kept gc_rule_union (bool or None): if both max_age and nr_max_versions are specified, then this parameter should be a bool. If True, then the max age and the max versions rules are unified, if False, then the intersection of the rules is used. Returns: google.cloud.bigtable.column_family.ColumnFamily """ if max_age and nr_max_versions: # Both rules are specified, this also means a merge method must be specified (union or intersection) time_to_live = dt.timedelta(days=max_age) max_age_rule = bt_column_family.MaxAgeGCRule(time_to_live) max_versions_rule = bt_column_family.MaxVersionsGCRule( nr_max_versions) if gc_rule_union is None: raise Conflict( "If max_age and nr_max_versions are both specified, then gc_rule_union cannot be None." ) elif gc_rule_union: gc_rule = bt_column_family.GCRuleUnion( rules=[max_age_rule, max_versions_rule]) else: gc_rule = bt_column_family.GCRuleIntersection( rules=[max_age_rule, max_versions_rule]) elif max_age: # only max age is specified time_to_live = dt.timedelta(days=max_age) gc_rule = bt_column_family.MaxAgeGCRule(time_to_live) elif nr_max_versions: # only max number of versions is specified gc_rule = bt_column_family.MaxVersionsGCRule(nr_max_versions) else: # no rule is specified gc_rule = None table = self.instance.table(table_name) if not table.exists(): raise NotFound( "Table name '{}' does not exist.".format(table_name)) logging.info("Creating column family '%s' in table '%s'.", column_family_name, table_name) column_family = bt_column_family.ColumnFamily(column_family_name, table, gc_rule) column_family.create()
def insert_to_bt(request): client = bigtable.Client(project=BT_PROJECT, admin=True) instance = client.instance(BT_INSTANCE) table = instance.table(BT_TABLE) max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) rows = [] row_key = str(uuid.uuid4()) row_key_encoded = row_key.encode('utf-8') row = table.row(row_key_encoded) for column in COLUMNS: row.set_cell(column_family_id, column.encode(), str(request.get(column))) rows.append(row) table.mutate_rows(rows) return row_key
def test_bigtable_create_table(): # [START bigtable_create_table] from google.api_core import exceptions from google.api_core import retry from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table("table_my") # Define the GC policy to retain only the most recent 2 versions. max_versions_rule = column_family.MaxVersionsGCRule(2) # Could include other retriable exception types # Could configure deadline, etc. predicate_504 = retry.if_exception_type(exceptions.DeadlineExceeded) retry_504 = retry.Retry(predicate_504) retry_504(table.create)(column_families={"cf1": max_versions_rule}) # [END bigtable_create_table] try: assert table.exists() finally: retry_429_503(table.delete)()
def test_bigtable_create_family_gc_intersection(): # [START bigtable_create_family_gc_intersection] from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) max_versions_rule = column_family.MaxVersionsGCRule(2) max_age_rule = column_family.MaxAgeGCRule(datetime.timedelta(days=5)) intersection_rule = column_family.GCRuleIntersection( [max_versions_rule, max_age_rule] ) column_family_obj = table.column_family("cf4", intersection_rule) column_family_obj.create() # [END bigtable_create_family_gc_intersection] rule = str(column_family_obj.to_pb()) assert "intersection" in rule assert "max_num_versions: 2" in rule assert "max_age" in rule assert "seconds: 432000" in rule column_family_obj.delete()
def create_table_in_bigtable(self): from google.cloud import bigtable from google.cloud.bigtable import column_family from google.cloud.bigtable import row_filters from datetime import timedelta print("Checking if we need to create the {} table.".format( self.BIGQUERY_TABLE_ID)) client = bigtable.Client(project=self.GCP_PROJECT, admin=True) instance = client.instance(self.BIGTABLE_INSTANCE_ID) table = instance.table(self.BIGTABLE_TABLE_ID) print("Creating column family `profile`") # Define the GC policy to retain only the most recent version max_age_rule = column_family.MaxAgeGCRule(timedelta(days=90)) max_versions_rule = column_family.MaxVersionsGCRule(1) gc_rule = column_family.GCRuleUnion( rules=[max_age_rule, max_versions_rule]) # Note that this ties out to the configuration in # taar.profile_fetcher::BigTableProfileController column_family_id = "profile" column_families = {column_family_id: gc_rule} if not table.exists(): table.create(column_families=column_families) print(f"Created {column_family_id}")
def setUp(self): try: from google.cloud.bigtable import enums self.STORAGE_TYPE = enums.StorageType.HDD self.INSTANCE_TYPE = enums.Instance.Type.DEVELOPMENT except ImportError: self.STORAGE_TYPE = 2 self.INSTANCE_TYPE = 2 self.project = 'grass-clump-479' self.client = Client(project=self.project, admin=True) self.instance = self.client.instance(self.instance_id, instance_type=self.INSTANCE_TYPE, labels=LABELS) if not self.instance.exists(): cluster = self.instance.cluster( self.cluster_id, self.LOCATION_ID, default_storage_type=self.STORAGE_TYPE) self.instance.create(clusters=[cluster]) self.table = self.instance.table(self.table_id) if not self.table.exists(): max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} self.table.create(column_families=column_families)
def setUp(self): try: from google.cloud.bigtable import enums self.STORAGE_TYPE = enums.StorageType.HDD self.INSTANCE_TYPE = enums.Instance.Type.DEVELOPMENT except ImportError: self.STORAGE_TYPE = 2 self.INSTANCE_TYPE = 2 self.test_pipeline = TestPipeline(is_integration_test=True) self.runner_name = type(self.test_pipeline.runner).__name__ self.project = self.test_pipeline.get_option('project') self.client = Client(project=self.project, admin=True) self._delete_old_instances() self.instance = self.client.instance(self.instance_id, instance_type=self.INSTANCE_TYPE, labels=LABELS) if not self.instance.exists(): cluster = self.instance.cluster( self.cluster_id, self.LOCATION_ID, default_storage_type=self.STORAGE_TYPE) self.instance.create(clusters=[cluster]) self.table = self.instance.table(self.table_id) if not self.table.exists(): max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} self.table.create(column_families=column_families)
def create_table(self): print('Creating column family cf1 with Max Version GC rule...') # Create a column family with GC policy : most recent N versions # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_families = {self.column_family_id: max_versions_rule} if not self.table.exists(): self.table.create(column_families=column_families) else: print("Table {} already exists.".format(self.table_id))
def test_bigtable_create_update_delete_column_family(): # [START bigtable_create_column_family] from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) column_family_id = "column_family_id1" gc_rule = column_family.MaxVersionsGCRule(2) column_family_obj = table.column_family(column_family_id, gc_rule=gc_rule) column_family_obj.create() # [END bigtable_create_column_family] column_families = table.list_column_families() assert column_families[column_family_id].gc_rule == gc_rule # [START bigtable_update_column_family] from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) # Already existing column family id column_family_id = "column_family_id1" # Define the GC rule to retain data with max age of 5 days max_age_rule = column_family.MaxAgeGCRule(datetime.timedelta(days=5)) column_family_obj = table.column_family(column_family_id, gc_rule=max_age_rule) column_family_obj.update() # [END bigtable_update_column_family] updated_families = table.list_column_families() assert updated_families[column_family_id].gc_rule == max_age_rule # [START bigtable_delete_column_family] from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) column_family_id = "column_family_id1" column_family_obj = table.column_family(column_family_id) column_family_obj.delete() # [END bigtable_delete_column_family] column_families = table.list_column_families() assert column_family_id not in column_families
def test_bigtable_create_table(): # [START bigtable_create_table] from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table("table_my") # Define the GC policy to retain only the most recent 2 versions. max_versions_rule = column_family.MaxVersionsGCRule(2) table.create(column_families={"cf1": max_versions_rule}) # [END bigtable_create_table] assert table.exists()
def setup_module(): client = Config.CLIENT = Client(admin=True) Config.INSTANCE = client.instance( INSTANCE_ID, instance_type=PRODUCTION, labels=LABELS ) cluster = Config.INSTANCE.cluster( CLUSTER_ID, location_id=LOCATION_ID, serve_nodes=SERVER_NODES, default_storage_type=STORAGE_TYPE, ) operation = Config.INSTANCE.create(clusters=[cluster]) # We want to make sure the operation completes. operation.result(timeout=100) Config.TABLE = Config.INSTANCE.table(TABLE_ID) Config.TABLE.create() gc_rule = column_family.MaxVersionsGCRule(2) column_family1 = Config.TABLE.column_family(COLUMN_FAMILY_ID, gc_rule=gc_rule) column_family1.create() gc_rule2 = column_family.MaxVersionsGCRule(4) column_family2 = Config.TABLE.column_family(COLUMN_FAMILY_ID2, gc_rule=gc_rule2) column_family2.create()
def create_table(self): print("Creating the {} table.".format(self.table_id)) table = self.instance.table(self.table_id) print( "Creating column family cf1 with Max Version GC rule: most recent {} versions" .format(self.row_filter_count)) max_versions_rule = column_family.MaxVersionsGCRule( self.row_filter_count) column_families = {self.column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print("Table {} already exists.".format(self.table_id)) return table
def create(self): """Create the table underlying the queue. Create the 'metadata' and 'tfexample' column families and their properties. """ if self.bt_table.exists(): print('Table already exists') return max_versions_rule = column_family.MaxVersionsGCRule(1) self.bt_table.create(column_families={ METADATA: max_versions_rule, TFEXAMPLE: max_versions_rule})
def createTable(project_id, instance_id, table_id, column_family_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) print('Creating column family cf1 with Max Version GC rule...') # Create a column family with GC policy : most recent N versions # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print("Table {} already exists.".format(table_id)) return table
def read_from_bt(request): bt_client = bigtable.Client(project=BT_PROJECT, admin=True) instance = bt_client.instance(BT_INSTANCE) table = instance.table(BT_TABLE) max_versions_rule = column_family.MaxVersionsGCRule(2) column_families = {COLUMN_FAMILY_ID: max_versions_rule} if not table.exists(): table.create(column_families=column_families) bt_row_filter = row_filters.CellsColumnLimitFilter(1) bt_row_key = request['receipt_id'] bt_row = table.read_row(bt_row_key.encode('utf-8'), bt_row_filter) return bt_row
def main(project_id, instance_id, table_id): client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) print('Creating the {} table.'.format(table_id)) table = instance.table(table_id) print('Creating column family cf1 with Max Version GC rule...') max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print("Table {} already exists.".format(table_id)) print('Writing some greetings to the table.') greetings = ['Hello World!', 'Hello Cloud Bigtable!', 'Hello Python!'] rows = [] column = 'greeting'.encode() for i, value in enumerate(greetings): row_key = 'greeting{}'.format(i).encode() row = table.row(row_key) row.set_cell(column_family_id, column, value, timestamp=datetime.datetime.utcnow()) rows.append(row) table.mutate_rows(rows) row_filter = row_filters.CellsColumnLimitFilter(1) print('Getting a single greeting by row key.') key = 'greeting0'.encode() row = table.read_row(key, row_filter) cell = row.cells[column_family_id][column][0] print(cell.value.decode('utf-8')) print('Scanning for all greetings:') partial_rows = table.read_rows(filter_=row_filter) for row in partial_rows: cell = row.cells[column_family_id][column][0] print(cell.value.decode('utf-8')) print('Deleting the {} table.'.format(table_id)) table.delete()
def start_bundle(self): from google.cloud import bigtable from google.cloud.bigtable import column_family try: self.client = bigtable.Client(project=self.project_id, admin=True) self.instance = self.client.instance(self.instance_id) self.table = self.instance.table(self.table_id) max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} if not self.table.exists(): self.table.create(column_families=column_families) else: logging.info("Table {} already exists.".format(self.table_id)) except: logging.error("Failed to start bundle") raise
def __init__(self, ctx, project_id, instance_id, table_id): self._ctx = ctx self._project_id = project_id self._instance_id = instance_id self._table_id = table_id self._column_family_id = "profile" self._column_name = "payload".encode() # Define the GC policy to retain only the most recent version max_age_rule = column_family.MaxAgeGCRule(datetime.timedelta(days=90)) max_versions_rule = column_family.MaxVersionsGCRule(1) self._gc_rule = column_family.GCRuleUnion( rules=[max_age_rule, max_versions_rule] ) self._client = bigtable.Client(project=project_id, admin=False) self._instance = self._client.instance(self._instance_id)
def create_table(self): instance = self.client.instance(self.instance_id, instance_type=self.INSTANCE_TYPE, labels=LABELS) if not instance.exists(): cluster = instance.cluster(self.cluster_id, self.LOCATION_ID, default_storage_type=self.STORAGE_TYPE) instance.create(clusters=[cluster]) table = instance.table(self.table_id) if not table.exists(): max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} table.create(column_families=column_families)
def materialize(self, sa_key_path=None): if isinstance(sa_key_path, str): self.client = bigtable.Client.from_service_account_json(sa_key_path, admin=True) else: self.client = bigtable.Client(admin=True) self.instance = self.client.instance(self.instance_name) self.table = self.instance.table(self.table_name) if self.table.exists(): return max_versions_rule = cbt_lib_column_family.MaxVersionsGCRule(1) cf = {key: max_versions_rule for key in self.column_families} self.table.create(column_families=cf)
def test_bigtable_create_family_gc_max_versions(): # [START bigtable_create_family_gc_max_versions] from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table(TABLE_ID) # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_obj = table.column_family("cf2", max_versions_rule) column_family_obj.create() # [END bigtable_create_family_gc_max_versions] rule = str(column_family_obj.to_pb()) assert "max_num_versions: 2" in rule column_family_obj.delete()
def test_bigtable_list_tables(): from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) table = instance.table("to_list") max_versions_rule = column_family.MaxVersionsGCRule(2) table.create(column_families={"cf1": max_versions_rule}) # [START bigtable_list_tables] from google.cloud.bigtable import Client client = Client(admin=True) instance = client.instance(INSTANCE_ID) tables_list = instance.list_tables() # [END bigtable_list_tables] table_names = [table.name for table in tables_list] try: assert table.name in table_names finally: retry_429(table.delete)()
def test_bigtable_create_table(): # [START bigtable_api_create_table] from google.cloud.bigtable import Client from google.cloud.bigtable import column_family client = Client(admin=True) instance = client.instance(INSTANCE_ID) # Create table without Column families. table1 = instance.table("table_id1") table1.create() # Create table with Column families. table2 = instance.table("table_id2") # Define the GC policy to retain only the most recent 2 versions. max_versions_rule = column_family.MaxVersionsGCRule(2) table2.create(column_families={"cf1": max_versions_rule}) # [END bigtable_api_create_table] assert table1.exists() assert table2.exists() table1.delete() table2.delete()
def run_table_operations(project_id, instance_id, table_id): """Create a Bigtable table and perform basic operations on it :type project_id: str :param project_id: Project id of the client. :type instance_id: str :param instance_id: Instance of the client. :type table_id: str :param table_id: Table id to create table. """ client, instance, table = create_table(project_id, instance_id, table_id) # [START bigtable_list_tables] tables = instance.list_tables() print("Listing tables in current project...") if tables != []: for tbl in tables: print(tbl.table_id) else: print("No table exists in current project...") # [END bigtable_list_tables] # [START bigtable_create_family_gc_max_age] print("Creating column family cf1 with with MaxAge GC Rule...") # Create a column family with GC policy : maximum age # where age = current time minus cell timestamp # Define the GC rule to retain data with max age of 5 days max_age_rule = column_family.MaxAgeGCRule(datetime.timedelta(days=5)) column_family1 = table.column_family("cf1", max_age_rule) column_family1.create() print("Created column family cf1 with MaxAge GC Rule.") # [END bigtable_create_family_gc_max_age] # [START bigtable_create_family_gc_max_versions] print("Creating column family cf2 with max versions GC rule...") # Create a column family with GC policy : most recent N versions # where 1 = most recent version # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_family2 = table.column_family("cf2", max_versions_rule) column_family2.create() print("Created column family cf2 with Max Versions GC Rule.") # [END bigtable_create_family_gc_max_versions] # [START bigtable_create_family_gc_union] print("Creating column family cf3 with union GC rule...") # Create a column family with GC policy to drop data that matches # at least one condition. # Define a GC rule to drop cells older than 5 days or not the # most recent version union_rule = column_family.GCRuleUnion([ column_family.MaxAgeGCRule(datetime.timedelta(days=5)), column_family.MaxVersionsGCRule(2), ]) column_family3 = table.column_family("cf3", union_rule) column_family3.create() print("Created column family cf3 with Union GC rule") # [END bigtable_create_family_gc_union] # [START bigtable_create_family_gc_intersection] print("Creating column family cf4 with Intersection GC rule...") # Create a column family with GC policy to drop data that matches # all conditions # GC rule: Drop cells older than 5 days AND older than the most # recent 2 versions intersection_rule = column_family.GCRuleIntersection([ column_family.MaxAgeGCRule(datetime.timedelta(days=5)), column_family.MaxVersionsGCRule(2), ]) column_family4 = table.column_family("cf4", intersection_rule) column_family4.create() print("Created column family cf4 with Intersection GC rule.") # [END bigtable_create_family_gc_intersection] # [START bigtable_create_family_gc_nested] print("Creating column family cf5 with a Nested GC rule...") # Create a column family with nested GC policies. # Create a nested GC rule: # Drop cells that are either older than the 10 recent versions # OR # Drop cells that are older than a month AND older than the # 2 recent versions rule1 = column_family.MaxVersionsGCRule(10) rule2 = column_family.GCRuleIntersection([ column_family.MaxAgeGCRule(datetime.timedelta(days=30)), column_family.MaxVersionsGCRule(2), ]) nested_rule = column_family.GCRuleUnion([rule1, rule2]) column_family5 = table.column_family("cf5", nested_rule) column_family5.create() print("Created column family cf5 with a Nested GC rule.") # [END bigtable_create_family_gc_nested] # [START bigtable_list_column_families] print("Printing Column Family and GC Rule for all column families...") column_families = table.list_column_families() for column_family_name, gc_rule in sorted(column_families.items()): print("Column Family:", column_family_name) print("GC Rule:") print(gc_rule.to_pb()) # Sample output: # Column Family: cf4 # GC Rule: # gc_rule { # intersection { # rules { # max_age { # seconds: 432000 # } # } # rules { # max_num_versions: 2 # } # } # } # [END bigtable_list_column_families] print("Print column family cf1 GC rule before update...") print("Column Family: cf1") print(column_family1.to_pb()) # [START bigtable_update_gc_rule] print("Updating column family cf1 GC rule...") # Update the column family cf1 to update the GC rule column_family1 = table.column_family("cf1", column_family.MaxVersionsGCRule(1)) column_family1.update() print("Updated column family cf1 GC rule\n") # [END bigtable_update_gc_rule] print("Print column family cf1 GC rule after update...") print("Column Family: cf1") print(column_family1.to_pb()) # [START bigtable_delete_family] print("Delete a column family cf2...") # Delete a column family column_family2.delete() print("Column family cf2 deleted successfully.") # [END bigtable_delete_family] print('execute command "python tableadmin.py delete [project_id] \ [instance_id] --table [tableName]" to delete the table.')
def bigtable_func(project_id, instance_id, table_id): from google.cloud import bigtable from google.cloud.bigtable import column_family from google.cloud.bigtable import row_filters from google.auth.credentials import AnonymousCredentials os.environ["BIGTABLE_EMULATOR_HOST"] = "localhost:8086" # [START bigtable_hw_connect] # The client must be created with admin=True because it will create a # table. client = bigtable.Client(project=project_id, admin=True, credentials=AnonymousCredentials()) instance = client.instance(instance_id) # [END bigtable_hw_connect] # [START bigtable_hw_create_table] print(f"Creating the {table_id} table.") table = instance.table(table_id) print("Creating column family cf1 with Max Version GC rule...") # Create a column family with GC policy : most recent N versions # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = "cf1" column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print(f"Table {table_id} already exists.") # [END bigtable_hw_create_table] # [START bigtable_hw_write_rows] print("Writing some greetings to the table.") greetings = ["Hello World!", "Hello Cloud Bigtable!", "Hello Python!"] rows = [] column = b"greeting" for i, value in enumerate(greetings): # Note: This example uses sequential numeric IDs for simplicity, # but this can result in poor performance in a production # application. Since rows are stored in sorted order by key, # sequential keys can result in poor distribution of operations # across nodes. # # For more information about how to design a Bigtable schema for # the best performance, see the documentation: # # https://cloud.google.com/bigtable/docs/schema-design row_key = f"greeting{i}".encode() row = table.direct_row(row_key) row.set_cell(column_family_id, column, value, timestamp=datetime.datetime.utcnow()) rows.append(row) table.mutate_rows(rows) # [END bigtable_hw_write_rows] # [START bigtable_hw_create_filter] # Create a filter to only retrieve the most recent version of the cell # for each column accross entire row. row_filter = row_filters.CellsColumnLimitFilter(1) # [END bigtable_hw_create_filter] # [START bigtable_hw_get_with_filter] print("Getting a single greeting by row key.") key = b"greeting0" row = table.read_row(key, row_filter) cell = row.cells[column_family_id][column][0] print(cell.value.decode("utf-8")) # [END bigtable_hw_get_with_filter] # [START bigtable_hw_scan_with_filter] print("Scanning for all greetings:") partial_rows = table.read_rows(filter_=row_filter) for row in partial_rows: cell = row.cells[column_family_id][column][0] print(cell.value.decode("utf-8")) # [END bigtable_hw_scan_with_filter] # [START bigtable_hw_delete_table] print(f"Deleting the {table_id} table.") table.delete()
def run_table_operations(project_id, instance_id, table_id): ''' Create a Bigtable table and perform basic table operations :type project_id: str :param project_id: Project id of the client. :type instance_id: str :param instance_id: Instance of the client. :type table_id: str :param table_id: Table id to create table. ''' client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) table = instance.table(table_id) # Check whether table exists in an instance. # Create table if it does not exists. print 'Checking if table {} exists...'.format(table_id) if table.exists(): print 'Table {} already exists.'.format(table_id) else: print 'Creating the {} table.'.format(table_id) table.create() print 'Created table {}.'.format(table_id) # [START bigtable_list_tables] tables = instance.list_tables() print 'Listing tables in current project...' if tables != []: for tbl in tables: print tbl.table_id else: print 'No table exists in current project...' # [END bigtable_list_tables] # [START bigtable_create_family_gc_max_age] print 'Creating column family cf1 with with MaxAge GC Rule...' # Create a column family with GC policy : maximum age # where age = current time minus cell timestamp # Define the GC rule to retain data with max age of 5 days max_age_rule = column_family.MaxAgeGCRule(datetime.timedelta(days=5)) column_family1 = table.column_family('cf1', max_age_rule) column_family1.create() print 'Created column family cf1 with MaxAge GC Rule.' # [END bigtable_create_family_gc_max_age] # [START bigtable_create_family_gc_max_versions] print 'Creating column family cf2 with max versions GC rule...' # Create a column family with GC policy : most recent N versions # where 1 = most recent version # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_family2 = table.column_family('cf2', max_versions_rule) column_family2.create() print 'Created column family cf2 with Max Versions GC Rule.' # [END bigtable_create_family_gc_max_versions] # [START bigtable_create_family_gc_union] print 'Creating column family cf3 with union GC rule...' # Create a column family with GC policy to drop data that matches # at least one condition. # Define a GC rule to drop cells older than 5 days or not the # most recent version union_rule = column_family.GCRuleUnion([ column_family.MaxAgeGCRule(datetime.timedelta(days=5)), column_family.MaxVersionsGCRule(2) ]) column_family3 = table.column_family('cf3', union_rule) column_family3.create() print 'Created column family cf3 with Union GC rule' # [END bigtable_create_family_gc_union] # [START bigtable_create_family_gc_intersection] print 'Creating column family cf4 with Intersection GC rule...' # Create a column family with GC policy to drop data that matches # all conditions # GC rule: Drop cells older than 5 days AND older than the most # recent 2 versions intersection_rule = column_family.GCRuleIntersection([ column_family.MaxAgeGCRule(datetime.timedelta(days=5)), column_family.MaxVersionsGCRule(2) ]) column_family4 = table.column_family('cf4', intersection_rule) column_family4.create() print 'Created column family cf4 with Intersection GC rule.' # [END bigtable_create_family_gc_intersection] # [START bigtable_create_family_gc_nested] print 'Creating column family cf5 with a Nested GC rule...' # Create a column family with nested GC policies. # Create a nested GC rule: # Drop cells that are either older than the 10 recent versions # OR # Drop cells that are older than a month AND older than the # 2 recent versions rule1 = column_family.MaxVersionsGCRule(10) rule2 = column_family.GCRuleIntersection([ column_family.MaxAgeGCRule(datetime.timedelta(days=30)), column_family.MaxVersionsGCRule(2) ]) nested_rule = column_family.GCRuleUnion([rule1, rule2]) column_family5 = table.column_family('cf5', nested_rule) column_family5.create() print 'Created column family cf5 with a Nested GC rule.' # [END bigtable_create_family_gc_nested] # [START bigtable_list_column_families] print 'Printing Column Family and GC Rule for all column families...' column_families = table.list_column_families() for column_family_name, gc_rule in sorted(column_families.items()): print 'Column Family:', column_family_name print 'GC Rule:' print gc_rule.to_pb() # Sample output: # Column Family: cf4 # GC Rule: # gc_rule { # intersection { # rules { # max_age { # seconds: 432000 # } # } # rules { # max_num_versions: 2 # } # } # } # [END bigtable_list_column_families] print 'Print column family cf1 GC rule before update...' print 'Column Family: cf1' print column_family1.to_pb() # [START bigtable_update_gc_rule] print 'Updating column family cf1 GC rule...' # Update the column family cf1 to update the GC rule column_family1 = table.column_family('cf1', column_family.MaxVersionsGCRule(1)) column_family1.update() print 'Updated column family cf1 GC rule\n' # [END bigtable_update_gc_rule] print 'Print column family cf1 GC rule after update...' print 'Column Family: cf1' print column_family1.to_pb() # [START bigtable_delete_family] print 'Delete a column family cf2...' # Delete a column family column_family2.delete() print 'Column family cf2 deleted successfully.' # [END bigtable_delete_family] print 'execute command "python tableadmin.py delete [project_id] \
from google.cloud import bigtable from google.cloud.bigtable import column_family project_id = 'thisisasimplenameasap' instance_id = 'fortest' table_id = 'table00' column_family_id = 'cf' client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) print('Creating the {} table.'.format(table_id)) table = instance.table(table_id) print('Creating column family cf1 with Max Version GC rule...') max_versions_rule = column_family.MaxVersionsGCRule(2) column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print("Table {} already exists.".format(table_id))
def main(project_id, instance_id, table_id): # [START bigtable_hw_connect] # The client must be created with admin=True because it will create a # table. client = bigtable.Client(project=project_id, admin=True) instance = client.instance(instance_id) # [END bigtable_hw_connect] # [START bigtable_hw_create_table] print('Creating the {} table.'.format(table_id)) table = instance.table(table_id) print('Creating column family cf1 with Max Version GC rule...') # Create a column family with GC policy : most recent N versions # Define the GC policy to retain only the most recent 2 versions max_versions_rule = column_family.MaxVersionsGCRule(2) column_family_id = 'cf1' column_families = {column_family_id: max_versions_rule} if not table.exists(): table.create(column_families=column_families) else: print("Table {} already exists.".format(table_id)) # [END bigtable_hw_create_table] # [START bigtable_hw_write_rows] print('Writing some greetings to the table.') greetings = ['Hello World!', 'Hello Cloud Bigtable!', 'Hello Python!'] rows = [] column = 'greeting'.encode() for i, value in enumerate(greetings): # Note: This example uses sequential numeric IDs for simplicity, # but this can result in poor performance in a production # application. Since rows are stored in sorted order by key, # sequential keys can result in poor distribution of operations # across nodes. # # For more information about how to design a Bigtable schema for # the best performance, see the documentation: # # https://cloud.google.com/bigtable/docs/schema-design row_key = 'greeting{}'.format(i).encode() row = table.direct_row(row_key) row.set_cell(column_family_id, column, value, timestamp=datetime.datetime.utcnow()) rows.append(row) table.mutate_rows(rows) # [END bigtable_hw_write_rows] # [START bigtable_hw_create_filter] # Create a filter to only retrieve the most recent version of the cell # for each column accross entire row. row_filter = row_filters.CellsColumnLimitFilter(1) # [END bigtable_hw_create_filter] # [START bigtable_hw_get_with_filter] print('Getting a single greeting by row key.') key = 'greeting0'.encode() row = table.read_row(key, row_filter) cell = row.cells[column_family_id][column][0] print(cell.value.decode('utf-8')) # [END bigtable_hw_get_with_filter] # [START bigtable_hw_scan_with_filter] print('Scanning for all greetings:') partial_rows = table.read_rows(filter_=row_filter) for row in partial_rows: cell = row.cells[column_family_id][column][0] print(cell.value.decode('utf-8')) # [END bigtable_hw_scan_with_filter] # [START bigtable_hw_delete_table] print('Deleting the {} table.'.format(table_id)) table.delete()