Exemplo n.º 1
0
def feast_client(
    pytestconfig,
    ingestion_job_jar,
    redis_server: RedisExecutor,
    feast_core: Tuple[str, int],
    feast_serving: Tuple[str, int],
    local_staging_path,
    feast_jobservice: Optional[Tuple[str, int]],
    enable_auth,
):
    if feast_jobservice is None:
        job_service_env = dict()
    else:
        job_service_env = dict(
            job_service_url=f"{feast_jobservice[0]}:{feast_jobservice[1]}")

    if pytestconfig.getoption("env") == "local":
        import pyspark

        return Client(
            core_url=f"{feast_core[0]}:{feast_core[1]}",
            serving_url=f"{feast_serving[0]}:{feast_serving[1]}",
            spark_launcher="standalone",
            spark_standalone_master="local",
            spark_home=os.getenv("SPARK_HOME")
            or os.path.dirname(pyspark.__file__),
            spark_ingestion_jar=ingestion_job_jar,
            redis_host=redis_server.host,
            redis_port=redis_server.port,
            spark_staging_location=os.path.join(local_staging_path, "spark"),
            historical_feature_output_location=os.path.join(
                local_staging_path, "historical_output"),
            ingestion_drop_invalid_rows=True,
            **job_service_env,
        )

    elif pytestconfig.getoption("env") == "gcloud":
        c = Client(
            core_url=f"{feast_core[0]}:{feast_core[1]}",
            serving_url=f"{feast_serving[0]}:{feast_serving[1]}",
            spark_launcher="dataproc",
            dataproc_cluster_name=pytestconfig.getoption(
                "dataproc_cluster_name"),
            dataproc_project=pytestconfig.getoption("dataproc_project"),
            dataproc_region=pytestconfig.getoption("dataproc_region"),
            spark_staging_location=os.path.join(local_staging_path,
                                                "dataproc"),
            spark_ingestion_jar=ingestion_job_jar,
            redis_host=pytestconfig.getoption("redis_url").split(":")[0],
            redis_port=pytestconfig.getoption("redis_url").split(":")[1],
            historical_feature_output_location=os.path.join(
                local_staging_path, "historical_output"),
            ingestion_drop_invalid_rows=True,
            **job_service_env,
        )
    elif pytestconfig.getoption("env") == "aws":
        return Client(
            core_url=f"{feast_core[0]}:{feast_core[1]}",
            serving_url=f"{feast_serving[0]}:{feast_serving[1]}",
            spark_launcher="emr",
            emr_cluster_id=pytestconfig.getoption("emr_cluster_id"),
            emr_region=pytestconfig.getoption("emr_region"),
            spark_staging_location=os.path.join(local_staging_path, "emr"),
            emr_log_location=os.path.join(local_staging_path, "emr_logs"),
            spark_ingestion_jar=ingestion_job_jar,
            redis_host=pytestconfig.getoption("redis_url").split(":")[0],
            redis_port=pytestconfig.getoption("redis_url").split(":")[1],
            historical_feature_output_location=os.path.join(
                local_staging_path, "historical_output"),
            ingestion_drop_invalid_rows=True,
        )
    elif pytestconfig.getoption("env") == "k8s":
        return Client(
            core_url=f"{feast_core[0]}:{feast_core[1]}",
            serving_url=f"{feast_serving[0]}:{feast_serving[1]}",
            spark_launcher="k8s",
            spark_staging_location=os.path.join(local_staging_path, "k8s"),
            spark_ingestion_jar=ingestion_job_jar,
            redis_host=pytestconfig.getoption("redis_url").split(":")[0],
            redis_port=pytestconfig.getoption("redis_url").split(":")[1],
            historical_feature_output_location=os.path.join(
                local_staging_path, "historical_output"),
        )
    else:
        raise KeyError(f"Unknown environment {pytestconfig.getoption('env')}")

    c.set_project(pytestconfig.getoption("feast_project"))
    return c
Exemplo n.º 2
0
def tfrecord_feast_client(
    pytestconfig,
    feast_core: Tuple[str, int],
    local_staging_path,
    feast_jobservice: Optional[Tuple[str, int]],
    enable_auth,
):
    if feast_jobservice is None:
        job_service_env = dict()
    else:
        job_service_env = dict(
            job_service_url=f"{feast_jobservice[0]}:{feast_jobservice[1]}")

    if pytestconfig.getoption("env") == "local":
        import pyspark

        return Client(
            core_url=f"{feast_core[0]}:{feast_core[1]}",
            spark_launcher="standalone",
            spark_standalone_master="local",
            spark_home=os.getenv("SPARK_HOME")
            or os.path.dirname(pyspark.__file__),
            spark_staging_location=os.path.join(local_staging_path, "spark"),
            historical_feature_output_format="tfrecord",
            historical_feature_output_location=os.path.join(
                local_staging_path, "historical_output"),
            **job_service_env,
        )

    elif pytestconfig.getoption("env") == "gcloud":
        c = Client(
            core_url=f"{feast_core[0]}:{feast_core[1]}",
            spark_launcher="dataproc",
            dataproc_cluster_name=pytestconfig.getoption(
                "dataproc_cluster_name"),
            dataproc_project=pytestconfig.getoption("dataproc_project"),
            dataproc_region=pytestconfig.getoption("dataproc_region"),
            spark_staging_location=os.path.join(local_staging_path,
                                                "dataproc"),
            historical_feature_output_format="tfrecord",
            historical_feature_output_location=os.path.join(
                local_staging_path, "historical_output"),
            ingestion_drop_invalid_rows=True,
            **job_service_env,
        )
    elif pytestconfig.getoption("env") == "aws":
        return Client(
            core_url=f"{feast_core[0]}:{feast_core[1]}",
            spark_launcher="emr",
            emr_cluster_id=pytestconfig.getoption("emr_cluster_id"),
            emr_region=pytestconfig.getoption("emr_region"),
            spark_staging_location=os.path.join(local_staging_path, "emr"),
            emr_log_location=os.path.join(local_staging_path, "emr_logs"),
            historical_feature_output_format="tfrecord",
            historical_feature_output_location=os.path.join(
                local_staging_path, "historical_output"),
        )
    elif pytestconfig.getoption("env") == "k8s":
        return Client(
            core_url=f"{feast_core[0]}:{feast_core[1]}",
            spark_launcher="k8s",
            spark_staging_location=os.path.join(local_staging_path, "k8s"),
            historical_feature_output_format="tfrecord",
            historical_feature_output_location=os.path.join(
                local_staging_path, "historical_output"),
            **job_service_env,
        )
    elif pytestconfig.getoption("env") == "synapse":
        return Client(
            core_url=f"{feast_core[0]}:{feast_core[1]}",
            spark_launcher="synapse",
            azure_synapse_dev_url=pytestconfig.getoption(
                "azure_synapse_dev_url"),
            azure_synapse_pool_name=pytestconfig.getoption(
                "azure_synapse_pool_name"),
            azure_synapse_datalake_dir=pytestconfig.getoption(
                "azure_synapse_datalake_dir"),
            spark_staging_location=os.path.join(local_staging_path, "synapse"),
            azure_blob_account_name=pytestconfig.getoption(
                "azure_blob_account_name"),
            azure_blob_account_access_key=pytestconfig.getoption(
                "azure_blob_account_access_key"),
            historical_feature_output_format="tfrecord",
            historical_feature_output_location=os.path.join(
                local_staging_path, "historical_output"),
        )
    else:
        raise KeyError(f"Unknown environment {pytestconfig.getoption('env')}")

    c.set_project(pytestconfig.getoption("feast_project"))
    return c
def client(server):
    return Client(core_url=f"localhost:{free_port}")
Exemplo n.º 4
0
import socket

from feast import Client
from feast.data_format import ParquetFormat
from feast.data_source import FileSource
from feast.entity import Entity
from feast.feature import Feature
from feast.feature_table import FeatureTable
from feast.value_type import ValueType

if __name__ == "__main__":

    test_client = Client(core_url="testfeast:6565")

    # create dummy entity since Feast demands it
    entity_1 = Entity(
        name="dummy_entity_1",
        description="Dummy entity 1",
        value_type=ValueType.STRING,
        labels={"key": "val"},
    )

    # create dummy entity since Feast demands it
    entity_2 = Entity(
        name="dummy_entity_2",
        description="Dummy entity 2",
        value_type=ValueType.INT32,
        labels={"key": "val"},
    )

    # commit entities