def bq_get_last_modified(self): logging.info("Connecting to Big Query") bq_hook = BigQueryHook(bigquery_conn_id=self.bigquery_conn_id, delegate_to=self.delegate_to) bq_conn = bq_hook.get_connection(self.bigquery_conn_id) bq_conn_extra_json = bq_conn.extra bq_conn_extra = json.loads(bq_conn_extra_json) service_dict = bq_conn_extra['extra__google_cloud_platform__keyfile_dict'] sql = """ #standardSQL SELECT last_modified_time AS TS FROM `{0}.{1}.__TABLES__` WHERE table_id = '{2}' """.format(self.project_id, self.dataset, self.table_name) logging.info("Getting table last_modified_time from BQ with SQL:/n{0}".format(sql)) df = read_gbq(sql, dialect='standard', project_id=self.project_id, private_key = service_dict) logging.info("Got table!") ts = str(df['TS'][0]) return ts
def execute(self, context): dest = self.destination_file sql = self.sql logging.info("Connecting to Big Query") bq_hook = BigQueryHook(bigquery_conn_id=self.bigquery_conn_id, delegate_to=self.delegate_to) bq_conn = bq_hook.get_connection(self.bigquery_conn_id) bq_conn_extra_json = bq_conn.extra bq_conn_extra = json.loads(bq_conn_extra_json) service_dict = bq_conn_extra['extra__google_cloud_platform__keyfile_dict'] logging.info("Getting table from BQ with SQL:/n{0}".format(sql)) df = read_gbq(sql, dialect='standard', private_key = service_dict) logging.info("Got table!") #logging.info('\tSaving to... {}'.format(save_dir)) #if not os.path.isdir(save_dir): # os.mkdir(save_dir) logging.info("Writing table to disk in feather format") feather.write_dataframe(df, dest) logging.info("Table written to {0}".format(dest)) return df.info()