Python BigQueryHook.get_pandas_df示例

编程语言: Python

命名空间/包名称: airflow.contrib.hooks.bigquery_hook

类/类型: BigQueryHook

方法/功能: get_pandas_df

hotexamples.com的示例: 6

Python BigQueryHook.get_pandas_df - 已找到6个示例。这些是从开源项目中提取的最受好评的airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_pandas_df现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get_conn(30)

BigQueryHook(13)

get_pandas_df(6)

_get_credentials(5)

get_service(4)

_get_field(3)

run(3)

table_exists(3)

get_connection(2)

table_list_partition(2)

tables(2)

get_first(1)

get_records(1)

run_extract(1)

run_query(1)

示例#1

显示文件

def _bq_to_feast(conn_id: str, project: str, sql: str, feature_set: str):
    feast_hook = FeastHook(conn_id)
    client = feast_hook.get_client(project)
    bq = BigQueryHook(use_legacy_sql=False, location='US')
    features_df = bq.get_pandas_df(sql)
    # ingest features into feast (a partitioned BQ table)
    client.ingest(feature_set, features_df)

示例#2

显示文件

文件： bq_to_pubsub.py 项目： pdeyhim/google-demo

def bq_to_pubsub_query_executor(**kwargs):
    """Executes a custom detector query in BigQuery and passes the results to the next task"""

    query = kwargs['templates_dict']['query']
    logging.info(query)
    bigquery_hook = BigQueryHook(use_legacy_sql=False)
    df = bigquery_hook.get_pandas_df(sql=query)

    messages = [{
        'data': b64e(row.to_json().encode()).decode()
    } for index, row in df.iterrows()]
    """splitting the array to 1000 size chunks (PubSub limit)"""
    messages_chunks = chunks(messages, 1000)
    pubsub_hoook = PubSubHook()
    for chunk in messages_chunks:
        pubsub_hoook.publish(project=gcp_project,
                             topic=pubsub_topic,
                             messages=chunk)

示例#3

显示文件

class BigQueryToFeastFeatureSetOperator(BaseOperator):
    def __init__(self, conn_id: str, project: str, feature_set_name: str,
                 entity_name: str, sql: str, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.feast_client = FeastHook(conn_id).get_client(project)
        self.project = project
        self.feature_set_name = feature_set_name
        self.entity_name = entity_name
        self.sql = sql
        self.bq = BigQueryHook(use_legacy_sql=False, location='US')

    def execute(self, context):
        features_df = self.bq.get_pandas_df(self.sql)
        fs = FeatureSet(
            self.feature_set_name,
            max_age=Duration(seconds=86400),
            entities=[Entity(name=self.entity_name, dtype=ValueType.INT64)])
        fs.infer_fields_from_df(features_df, replace_existing_features=True)
        self.feast_client.apply(fs)

示例#4

显示文件

文件： bq_sql_to_list_plugin.py 项目： andybaran/BurwoodDataEngineering

    def execute(self, context):
        if self.bq_cursor is None:
            self.log.info('Executing: %s', self.sql)
            hook = BigQueryHook(
                bigquery_conn_id=self.bigquery_conn_id,
                use_legacy_sql=self.use_legacy_sql,
                delegate_to=self.delegate_to,
                location=self.location,
            )
            conn = hook.get_conn()
            self.bq_cursor = conn.cursor()
        job_id = self.bq_cursor.run_query(
            sql=self.sql,
            destination_dataset_table=self.destination_dataset_table,
            write_disposition=self.write_disposition,
            allow_large_results=self.allow_large_results,
            flatten_results=self.flatten_results,
            udf_config=self.udf_config,
            maximum_billing_tier=self.maximum_billing_tier,
            maximum_bytes_billed=self.maximum_bytes_billed,
            create_disposition=self.create_disposition,
            query_params=self.query_params,
            labels=self.labels,
            schema_update_options=self.schema_update_options,
            priority=self.priority,
            time_partitioning=self.time_partitioning,
            api_resource_configs=self.api_resource_configs,
            cluster_fields=self.cluster_fields,
        )
        context['task_instance'].xcom_push(key='job_id', value=job_id)

        df = hook.get_pandas_df(self.sql)

        if self.sort_by is not None:
            df.sort_values('self.sort_by')

        list_to_return = df.astype(str).to_dict('index')
        print(list_to_return)
        return list_to_return

示例#5

显示文件

文件： dev_creacioncsql.py 项目： danimaeztu/Pipeline-Bigquery-CloudSql-using-Cloud-Composer

def get_df_and_types(**context):
    '''Set the columns and types of a csv to create a table with SQL.
    Save a CSV to import later'''
    def traducir(x):
        '''Translate panda's type to SQL type'''
        tipoSQL = []
        tr = ''
        for i in x:
            if i == 'float64':
                tr = 'float'
            elif i == 'int64':
                tr = 'int'
            else:
                tr = 'text'
            tipoSQL.append(str(tr))
        return (tipoSQL)

    bq = BigQueryHook(bigquery_conn_id=cfg.bigquery_conn_id,
                      use_legacy_sql=False)

    df = bq.get_pandas_df(QUERY)
    df.to_csv('{}/cloudSQLexport_temp.csv'.format(DIR_TMP),
              index=None,
              header=False)

    df = pd.DataFrame(df.dtypes)
    df.columns = ['tipo']
    df['nombre'] = df.index
    df['tipoSQL'] = traducir(df['tipo'])
    df['col_sql'] = df.apply(
        lambda x: "{} {} NULL".format(x['nombre'], x['tipoSQL']), axis=1)

    sqlcolumnas = ", ".join(df['col_sql'])
    context['ti'].xcom_push(key='SQL',
                            value="CREATE TABLE {} ({})".format(
                                TABLE, sqlcolumnas))

示例#6

显示文件

文件： big_query.py 项目： turbaszek/dbnd

    def run(self):
        from airflow.contrib.hooks.bigquery_hook import BigQueryHook

        hook = BigQueryHook(bigquery_conn_id="bigquery_default",
                            use_legacy_sql=False)
        self.output = hook.get_pandas_df(self.query)