Пример #1
0
def test_ingest_into_bq(
    feast_client: Client,
    customer_entity: Entity,
    driver_entity: Entity,
    bq_dataframe: pd.DataFrame,
    bq_dataset: str,
    pytestconfig,
):
    bq_project = pytestconfig.getoption("bq_project")
    bq_table_id = f"bq_staging_{datetime.now():%Y%m%d%H%M%s}"
    ft = FeatureTable(
        name="basic_featuretable",
        entities=["driver_id", "customer_id"],
        features=[
            Feature(name="dev_feature_float", dtype=ValueType.FLOAT),
            Feature(name="dev_feature_string", dtype=ValueType.STRING),
        ],
        max_age=Duration(seconds=3600),
        batch_source=BigQuerySource(
            table_ref=f"{bq_project}:{bq_dataset}.{bq_table_id}",
            event_timestamp_column="datetime",
            created_timestamp_column="timestamp",
        ),
    )

    # ApplyEntity
    feast_client.apply(customer_entity)
    feast_client.apply(driver_entity)

    # ApplyFeatureTable
    feast_client.apply(ft)
    feast_client.ingest(ft, bq_dataframe, timeout=120)

    bq_client = bigquery.Client(project=bq_project)

    # Poll BQ for table until the table has been created
    def try_get_table():
        try:
            table = bq_client.get_table(
                bigquery.TableReference(
                    bigquery.DatasetReference(bq_project, bq_dataset), bq_table_id
                )
            )
        except NotFound:
            return None, False
        else:
            return table, True

    wait_retry_backoff(
        retry_fn=try_get_table,
        timeout_secs=30,
        timeout_msg="Timed out trying to get bigquery table",
    )

    query_string = f"SELECT * FROM `{bq_project}.{bq_dataset}.{bq_table_id}`"

    job = bq_client.query(query_string)
    query_df = job.to_dataframe()

    assert_frame_equal(query_df, bq_dataframe)
Пример #2
0
def test_ingest(
    client: Client,
    customer_entity: Entity,
    driver_entity: Entity,
    bq_featuretable: FeatureTable,
    bq_dataset: pd.DataFrame,
    bq_table_id: str,
):
    gcp_project, _ = bq_table_id.split(":")
    bq_table_id = bq_table_id.replace(":", ".")

    # ApplyEntity
    client.apply_entity(customer_entity)
    client.apply_entity(driver_entity)

    # ApplyFeatureTable
    client.apply_feature_table(bq_featuretable)
    client.ingest(bq_featuretable, bq_dataset, timeout=120)

    from google.api_core.exceptions import NotFound
    from google.cloud import bigquery

    bq_client = bigquery.Client(project=gcp_project)

    # Poll BQ for table until the table has been created
    def try_get_table():
        table_exist = False
        table_resp = None
        try:
            table_resp = bq_client.get_table(bq_table_id)

            if table_resp and table_resp.table_id == bq_table_id.split(
                    ".")[-1]:
                table_exist = True
        except NotFound:
            pass

        return table_resp, table_exist

    wait_retry_backoff(
        retry_fn=try_get_table,
        timeout_secs=30,
        timeout_msg="Timed out trying to get bigquery table",
    )

    query_string = f"SELECT * FROM `{bq_table_id}`"

    job = bq_client.query(query_string)
    query_df = job.to_dataframe()

    assert_frame_equal(query_df, bq_dataset)

    bq_client.delete_table(bq_table_id, not_found_ok=True)