Пример #1
0
    def test_from_csv_staging_location_not_specified(self):
        with pytest.raises(ValueError,
                           match="Specify staging_location for importing local file/dataframe"):
            feature_columns = ["avg_distance_completed",
                               "avg_customer_distance_completed"]
            csv_path = "tests/data/driver_features.csv"
            Importer.from_csv(path=csv_path,
                              entity="driver",
                              granularity=Granularity.DAY,
                              owner="*****@*****.**",
                              feature_columns=feature_columns,
                              timestamp_column="ts")

        with pytest.raises(ValueError,
                           match="Staging location must be in GCS") as e_info:
            feature_columns = ["avg_distance_completed",
                               "avg_customer_distance_completed"]
            csv_path = "tests/data/driver_features.csv"
            Importer.from_csv(path=csv_path,
                              entity="driver",
                              granularity=Granularity.DAY,
                              owner="*****@*****.**",
                              staging_location="/home",
                              feature_columns=feature_columns,
                              timestamp_column="ts")
Пример #2
0
 def test_from_csv_id_column_not_specified(self):
     with pytest.raises(ValueError,
                        match="Column with name driver is not found"):
         feature_columns = ["avg_distance_completed",
                            "avg_customer_distance_completed"]
         csv_path = "tests/data/driver_features.csv"
         Importer.from_csv(path=csv_path,
                           entity="driver",
                           granularity=Granularity.DAY,
                           owner="*****@*****.**",
                           staging_location="gs://test-bucket",
                           feature_columns=feature_columns,
                           timestamp_column="ts")
Пример #3
0
 def test_from_csv_staging_location_not_valid(self):
     with pytest.raises(ValueError,
                        match="Staging location must be in GCS") as e_info:
         feature_columns = [
             "avg_distance_completed", "avg_customer_distance_completed"
         ]
         csv_path = "tests/data/driver_features.csv"
         Importer.from_csv(path=csv_path,
                           entity="driver",
                           owner="*****@*****.**",
                           staging_location="/home",
                           feature_columns=feature_columns,
                           timestamp_column="ts")
Пример #4
0
    def test_from_csv_timestamp_column_not_specified(self):
        feature_columns = [
            "avg_distance_completed", "avg_customer_distance_completed",
            "avg_distance_cancelled"
        ]
        csv_path = "tests/data/driver_features.csv"
        entity_name = "driver"
        granularity = Granularity.DAY
        owner = "*****@*****.**"
        staging_location = "gs://test-bucket"
        id_column = "driver_id"
        importer = Importer.from_csv(path=csv_path,
                                     entity=entity_name,
                                     granularity=granularity,
                                     owner=owner,
                                     staging_location=staging_location,
                                     id_column=id_column,
                                     feature_columns=feature_columns)

        self._validate_csv_importer(importer,
                                    csv_path,
                                    entity_name,
                                    granularity,
                                    owner,
                                    staging_location=staging_location,
                                    id_column=id_column,
                                    feature_columns=feature_columns)
Пример #5
0
 def test_stage_df_without_timestamp(self, mocker):
     mocker.patch("feast.sdk.importer.df_to_gcs", return_value=True)
     feature_columns = [
         "avg_distance_completed", "avg_customer_distance_completed",
         "avg_distance_cancelled"
     ]
     csv_path = "tests/data/driver_features.csv"
     entity_name = "driver"
     owner = "*****@*****.**"
     staging_location = "gs://test-bucket"
     id_column = "driver_id"
     importer = Importer.from_csv(path=csv_path,
                                  entity=entity_name,
                                  owner=owner,
                                  staging_location=staging_location,
                                  id_column=id_column,
                                  feature_columns=feature_columns)
     importer.stage(None)
Пример #6
0
    def test_from_csv_feature_columns_not_specified(self):
        csv_path = "tests/data/driver_features.csv"
        entity_name = "driver"
        granularity = Granularity.DAY
        owner = "*****@*****.**"
        staging_location = "gs://test-bucket"
        id_column = "driver_id"
        timestamp_column = "ts"
        importer = Importer.from_csv(path=csv_path,
                                     entity=entity_name,
                                     granularity=granularity,
                                     owner=owner,
                                     staging_location=staging_location,
                                     id_column=id_column,
                                     timestamp_column=timestamp_column)

        self._validate_csv_importer(importer, csv_path, entity_name,
                                    granularity, owner,
                                    staging_location=staging_location,
                                    id_column=id_column,
                                    timestamp_column=timestamp_column)
Пример #7
0
    def test_from_csv(self):
        csv_path = "tests/data/driver_features.csv"
        entity_name = "driver"
        owner = "*****@*****.**"
        staging_location = "gs://test-bucket"
        id_column = "driver_id"
        feature_columns = [
            "avg_distance_completed", "avg_customer_distance_completed"
        ]
        timestamp_column = "ts"

        importer = Importer.from_csv(path=csv_path,
                                     entity=entity_name,
                                     owner=owner,
                                     staging_location=staging_location,
                                     id_column=id_column,
                                     feature_columns=feature_columns,
                                     timestamp_column=timestamp_column)

        self._validate_csv_importer(importer, csv_path, entity_name, owner,
                                    staging_location, id_column,
                                    feature_columns, timestamp_column)