示例#1
0
    def test_get_schema(self):
        expected_schema = [
            {"column_name": "id", "type": LongType(), "primary_key": True},
            {"column_name": "timestamp", "type": TimestampType(), "primary_key": False},
            {
                "column_name": "feature1__avg_over_2_minutes_fixed_windows",
                "type": FloatType(),
                "primary_key": False,
            },
            {
                "column_name": "feature1__avg_over_15_minutes_fixed_windows",
                "type": FloatType(),
                "primary_key": False,
            },
            {
                "column_name": "feature1__stddev_pop_over_2_minutes_fixed_windows",
                "type": DoubleType(),
                "primary_key": False,
            },
            {
                "column_name": "feature1__stddev_pop_over_15_minutes_fixed_windows",
                "type": DoubleType(),
                "primary_key": False,
            },
        ]

        feature_set = FeatureSet(
            name="feature_set",
            entity="entity",
            description="description",
            features=[
                Feature(
                    name="feature1",
                    description="test",
                    transformation=SparkFunctionTransform(
                        functions=[
                            Function(F.avg, DataType.FLOAT),
                            Function(F.stddev_pop, DataType.DOUBLE),
                        ]
                    ).with_window(
                        partition_by="id",
                        order_by=TIMESTAMP_COLUMN,
                        mode="fixed_windows",
                        window_definition=["2 minutes", "15 minutes"],
                    ),
                ),
            ],
            keys=[
                KeyFeature(
                    name="id",
                    description="The user's Main ID or device ID",
                    dtype=DataType.BIGINT,
                )
            ],
            timestamp=TimestampFeature(),
        )

        schema = feature_set.get_schema()

        assert schema == expected_schema
示例#2
0
    def apply_migration(self, feature_set: FeatureSet, writer: Writer,
                        debug_mode: bool) -> None:
        """Apply the migration in the respective database.

        Args:
            feature_set: the feature set.
            writer: the writer being used to load the feature set.
            debug_mode: if active, it brings up the queries generated.
        """
        logger.info(f"Migrating feature set: {feature_set.name}")

        table_name = (feature_set.name
                      if not writer.write_to_entity else feature_set.entity)

        fs_schema = writer.db_config.translate(feature_set.get_schema())
        db_schema = self._get_schema(table_name, writer.database)

        queries = self.create_query(fs_schema, table_name, db_schema,
                                    writer.write_to_entity)

        if debug_mode:
            print("#### DEBUG MODE ###\n"
                  f"Feature set: {feature_set.name}\n"
                  "Queries:\n"
                  f"{queries}")
        else:
            for q in queries:
                logger.info(f"Applying this query: {q} ...")
                self._client.sql(q)

            logger.info(f"Feature Set migration finished successfully.")

            # inform in drone console which feature set was migrated
            print(f"The {feature_set.name} feature set was migrated.")
示例#3
0
    def get_db_schema(self, feature_set: FeatureSet) -> List[Dict[Any, Any]]:
        """Get desired database schema.

        Args:
            feature_set: object processed with feature set metadata.

        Returns:
            Desired database schema.

        """
        db_schema = self.db_config.translate(feature_set.get_schema())
        return db_schema