def create_tbl_ddl(self, bq_table, schema): """ Creates the Create Table DDL to be executed on the Redshift instance. Only run if table does not exist at time of first run. """ bq_service = BigQueryHook( bigquery_conn_id=self.bq_conn_id).get_service() table_resource = {'tableReference': {'tableId': bq_table}} table_resource['schema'] = { 'fields': schema + [{ "name": "partition_date", "type": "DATE", "mode": "NULLABLE" }] } table_resource['timePartitioning'] = { "type": "DAY", "field": "partition_date" } try: bq_service.tables().insert( projectId=self.bq_project, datasetId=self.bq_dataset, body=table_resource).execute(num_retries=5) self.log.info('Table created successfully: %s:%s.%s', self.bq_project, self.bq_dataset, bq_table) except HttpError as err: raise AirflowException('BigQuery job failed. Error was: {}'.format( err.content))
def patch_bq_cols(self, bq_table, sf_cols): """ Used to decide whether we need to run an ALTER or CREATE table command. Leverages alter_tbl_ddl() and create_tbl_ddl() to create the DDL that will be run. """ bq_service = BigQueryHook( bigquery_conn_id=self.bq_conn_id).get_service() bq_conn = BigQueryBaseCursor(bq_service, self.bq_project) missing_cols = [] try: bq_cols = bq_conn.get_schema(self.bq_dataset, bq_table) print(bq_cols) bq_cols = [col for col in bq_cols['fields']] missing_cols = [x for x in sf_cols if x['name'] not in bq_cols] except: bq_cols = [] for col in sf_cols: bq_cols.append({ "type": col['type'], "name": col["name"].lower(), "mode": "NULLABLE" }) self.create_tbl_ddl(bq_table, bq_cols) if missing_cols: bq_cols = [] for col in sf_cols: bq_cols.append({ "type": col['type'], "name": col["name"].lower(), "mode": "NULLABLE" }) bq_cols.append({ "name": "partition_date", "type": "DATE", "mode": "NULLABLE" }) print('new schema is ' + str(bq_cols)) table_resource = {} table_resource['schema'] = {'fields': bq_cols} try: bq_service.tables().patch(projectId=self.bq_project, datasetId=self.bq_dataset, tableId=bq_table, body=table_resource).execute() self.log.info('Table patched successfully') except HttpError as err: raise AirflowException( 'BigQuery job failed. Error was: {}'.format(err.content)) return bq_cols