def test_get_schema(self): expected_schema = [ {"column_name": "id", "type": LongType(), "primary_key": True}, {"column_name": "timestamp", "type": TimestampType(), "primary_key": False}, { "column_name": "feature1__avg_over_2_minutes_fixed_windows", "type": FloatType(), "primary_key": False, }, { "column_name": "feature1__avg_over_15_minutes_fixed_windows", "type": FloatType(), "primary_key": False, }, { "column_name": "feature1__stddev_pop_over_2_minutes_fixed_windows", "type": DoubleType(), "primary_key": False, }, { "column_name": "feature1__stddev_pop_over_15_minutes_fixed_windows", "type": DoubleType(), "primary_key": False, }, ] feature_set = FeatureSet( name="feature_set", entity="entity", description="description", features=[ Feature( name="feature1", description="test", transformation=SparkFunctionTransform( functions=[ Function(F.avg, DataType.FLOAT), Function(F.stddev_pop, DataType.DOUBLE), ] ).with_window( partition_by="id", order_by=TIMESTAMP_COLUMN, mode="fixed_windows", window_definition=["2 minutes", "15 minutes"], ), ), ], keys=[ KeyFeature( name="id", description="The user's Main ID or device ID", dtype=DataType.BIGINT, ) ], timestamp=TimestampFeature(), ) schema = feature_set.get_schema() assert schema == expected_schema
def apply_migration(self, feature_set: FeatureSet, writer: Writer, debug_mode: bool) -> None: """Apply the migration in the respective database. Args: feature_set: the feature set. writer: the writer being used to load the feature set. debug_mode: if active, it brings up the queries generated. """ logger.info(f"Migrating feature set: {feature_set.name}") table_name = (feature_set.name if not writer.write_to_entity else feature_set.entity) fs_schema = writer.db_config.translate(feature_set.get_schema()) db_schema = self._get_schema(table_name, writer.database) queries = self.create_query(fs_schema, table_name, db_schema, writer.write_to_entity) if debug_mode: print("#### DEBUG MODE ###\n" f"Feature set: {feature_set.name}\n" "Queries:\n" f"{queries}") else: for q in queries: logger.info(f"Applying this query: {q} ...") self._client.sql(q) logger.info(f"Feature Set migration finished successfully.") # inform in drone console which feature set was migrated print(f"The {feature_set.name} feature set was migrated.")
def get_db_schema(self, feature_set: FeatureSet) -> List[Dict[Any, Any]]: """Get desired database schema. Args: feature_set: object processed with feature set metadata. Returns: Desired database schema. """ db_schema = self.db_config.translate(feature_set.get_schema()) return db_schema