Пример #1
0
    def bq_get_last_modified(self):
        logging.info("Connecting to Big Query")
        bq_hook = BigQueryHook(bigquery_conn_id=self.bigquery_conn_id, delegate_to=self.delegate_to)
        bq_conn = bq_hook.get_connection(self.bigquery_conn_id)
        bq_conn_extra_json = bq_conn.extra
        bq_conn_extra = json.loads(bq_conn_extra_json)
        service_dict = bq_conn_extra['extra__google_cloud_platform__keyfile_dict']

        sql = """
            #standardSQL
            SELECT last_modified_time AS TS
            FROM `{0}.{1}.__TABLES__`
            WHERE table_id = '{2}'
            """.format(self.project_id, self.dataset, self.table_name)

        logging.info("Getting table last_modified_time from BQ with SQL:/n{0}".format(sql))
        df = read_gbq(sql, dialect='standard', project_id=self.project_id, private_key = service_dict)
        logging.info("Got table!")

        ts = str(df['TS'][0])
        return ts
Пример #2
0
    def execute(self, context):
        dest = self.destination_file
        sql = self.sql
        logging.info("Connecting to Big Query")
        bq_hook = BigQueryHook(bigquery_conn_id=self.bigquery_conn_id, delegate_to=self.delegate_to)
        bq_conn = bq_hook.get_connection(self.bigquery_conn_id)
        bq_conn_extra_json = bq_conn.extra
        bq_conn_extra = json.loads(bq_conn_extra_json)
        service_dict = bq_conn_extra['extra__google_cloud_platform__keyfile_dict']

        logging.info("Getting table from BQ with SQL:/n{0}".format(sql))
        df = read_gbq(sql, dialect='standard', private_key = service_dict)
        logging.info("Got table!")

        #logging.info('\tSaving to... {}'.format(save_dir))
        #if not os.path.isdir(save_dir):
        #    os.mkdir(save_dir)
        logging.info("Writing table to disk in feather format")
        feather.write_dataframe(df, dest)

        logging.info("Table written to {0}".format(dest))
        return df.info()