def create_table(self, dataset, table, schema): """Create a new table in the dataset. Args: dataset: the dataset to create the table in. table: the name of table to create. schema: table schema dict. Returns: bool indicating if the table was successfully created or not. """ body = { 'schema': {'fields': schema}, 'tableReference': { 'tableId': table, 'projectId': self.project_id, 'datasetId': dataset } } try: self.bigquery.tables().insert( projectId=self.project_id, datasetId=dataset, body=body ).execute() return True except: logger.error('Cannot create table %s.%s' % (dataset, table)) return False
def create_dataset(self, dataset_id, friendly_name=None, description=None, access=None): """Create a new BigQuery dataset. Args: dataset_id: required unique string identifying the dataset with the project (the referenceId of the dataset, not the integer id of the dataset) friendly_name: optional string providing a human readable name description: optional longer string providing a description access: optional object indicating access permissions (see https://developers.google.com/bigquery/docs/reference/v2/ datasets#resource) Returns: bool indicating if dataset was created or not """ try: datasets = self.bigquery.datasets() dataset_data = self.dataset_resource(dataset_id, friendly_name=friendly_name, description=description, access=access) datasets.insert(projectId=self.project_id, body=dataset_data).execute() return True except Exception, e: logger.error('Cannot create dataset %s, %s' % (dataset_id, e)) return False
def patch_dataset(self, dataset_id, friendly_name=None, description=None, access=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. Args: dataset_id: required unique string identifying the dataset with the projedct (the referenceId of the dataset). friendly_name: an optional descriptive name for the dataset. description: an optional description of the dataset. access: an optional object indicating access permissions. Returns: bool indicating if the patch was successful or not. """ try: datasets = self.bigquery.datasets() body = self.dataset_resource(dataset_id, friendly_name, description, access) request = datasets.patch(projectId=self.project_id, datasetId=dataset_id, body=body) request.execute() return True except Exception, e: logger.error('Cannot patch dataset %s: %s' % (dataset_id, e)) return False
def create_table(self, dataset, table, schema): """Create a new table in the dataset. Args: dataset: the dataset to create the table in. table: the name of table to create. schema: table schema dict. Returns: bool indicating if the table was successfully created or not. """ body = { 'schema': { 'fields': schema }, 'tableReference': { 'tableId': table, 'projectId': self.project_id, 'datasetId': dataset } } try: self.bigquery.tables().insert(projectId=self.project_id, datasetId=dataset, body=body).execute() return True except: logger.error('Cannot create table %s.%s' % (dataset, table)) return False
def get_datasets(self): """List all datasets in the project. Returns: a list of dataset resources """ try: datasets = self.bigquery.datasets() request = datasets.list(projectId=self.project_id) result = request.execute() return result.get('datasets', []) except Exception, e: logger.error("Cannot list datasets: %s" % e) return None
def push_rows(self, dataset, table, rows, insert_id_key=None): """Upload rows to BigQuery table. Args: dataset: the dataset to upload to. table: the name of the table to insert rows into. rows: list of rows to add to table insert_id_key: key for insertId in row Returns: bool indicating if insert succeeded or not. """ table_data = self.bigquery.tabledata() rows_data = [] for row in rows: each_row = {} each_row["json"] = row if insert_id_key in row: each_row["insertId"] = row[insert_id_key] rows_data.append(each_row) data = { "kind": "bigquery#tableDataInsertAllRequest", "rows": rows_data } try: response = table_data.insertAll( projectId=self.project_id, datasetId=dataset, tableId=table, body=data ).execute() if response.get('insertErrors'): logger.error('BigQuery insert errors: %s' % response) return False return True except: logger.error('Problem with BigQuery insertAll') return False
def push_rows(self, dataset, table, rows, insert_id_key=None): """Upload rows to BigQuery table. Args: dataset: the dataset to upload to. table: the name of the table to insert rows into. rows: list of rows to add to table insert_id_key: key for insertId in row Returns: bool indicating if insert succeeded or not. """ table_data = self.bigquery.tabledata() rows_data = [] for row in rows: each_row = {} each_row["json"] = row if insert_id_key in row: each_row["insertId"] = row[insert_id_key] rows_data.append(each_row) data = { "kind": "bigquery#tableDataInsertAllRequest", "rows": rows_data } try: response = table_data.insertAll(projectId=self.project_id, datasetId=dataset, tableId=table, body=data).execute() if response.get('insertErrors'): logger.error('BigQuery insert errors: %s' % response) return False return True except: logger.error('Problem with BigQuery insertAll') return False
def delete_table(self, dataset, table): """Delete a table from the dataset. Args: dataset: the dataset to delete the table from. table: the name of the table to delete. Returns: bool indicating if the table was successfully deleted or not. """ try: self.bigquery.tables().delete(projectId=self.project_id, datasetId=dataset, tableId=table).execute() return True except: logger.error('Cannot delete table %s.%s' % (dataset, table)) return False
def delete_dataset(self, dataset_id): """Delete a BigQuery dataset. Args: dataset_id: required unique string identifying the dataset with the project (the referenceId of the dataset) Returns: bool indicating if the delete was successful or not Raises: HttpError 404 when dataset with dataset_id does not exist """ try: datasets = self.bigquery.datasets() request = datasets.delete(projectId=self.project_id, datasetId=dataset_id) request.execute() return True except Exception, e: logger.error('Cannot delete dataset %s: %s' % (dataset_id, e)) return None
def delete_table(self, dataset, table): """Delete a table from the dataset. Args: dataset: the dataset to delete the table from. table: the name of the table to delete. Returns: bool indicating if the table was successfully deleted or not. """ try: self.bigquery.tables().delete( projectId=self.project_id, datasetId=dataset, tableId=table ).execute() return True except: logger.error('Cannot delete table %s.%s' % (dataset, table)) return False
def delete_dataset(self, dataset_id, delete_contents=False): """Delete a BigQuery dataset. Args: dataset_id: required unique string identifying the dataset with the project (the referenceId of the dataset) delete_contents: forces deletion of the dataset even when the dataset contains data Returns: bool indicating if the delete was successful or not Raises: HttpError 404 when dataset with dataset_id does not exist """ try: datasets = self.bigquery.datasets() request = datasets.delete(projectId=self.project_id, datasetId=dataset_id, deleteContents=delete_contents) request.execute() return True except Exception, e: logger.error('Cannot delete dataset %s: %s' % (dataset_id, e)) return False