class DatabaseDummy: tenant_id = '###_IBM_###' db_type = 'db2' model_store = FileModelStore('./data') def _init(self): return
def test_anomaly_scores(): numba_logger = logging.getLogger('numba') numba_logger.setLevel(logging.ERROR) #### print('Create dummy database') db_schema=None db = DatabaseDummy() print (db.model_store) ##### jobsettings = { 'db': db, '_db_schema': 'public'} EngineLogging.configure_console_logging(logging.DEBUG) # Run on the good pump first # Get stuff in print('Read Anomaly Sample data in') df_i = pd.read_csv('./data/AzureAnomalysample.csv', index_col=False, parse_dates=['timestamp']) df_i['entity'] = 'MyRoom' df_i[Temperature] = df_i['value'] + 20 df_i = df_i.drop(columns=['value']) # and sort it by timestamp df_i = df_i.sort_values(by='timestamp') df_i = df_i.set_index(['entity', 'timestamp']).dropna() for i in range(0, df_i.index.nlevels): print(str(df_i.index.get_level_values(i))) ##### print('Use scaling model generated with sklearn 0.21.3') print('Compute Saliency Anomaly Score') sali = SaliencybasedGeneralizedAnomalyScoreV2(Temperature, 12, True, sal) et = sali._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) sali._entity_type = et df_i = sali.execute(df=df_i) print('Compute FFT Anomaly Score') ffti = FFTbasedGeneralizedAnomalyScoreV2(Temperature, 12, True, fft) et = ffti._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) ffti._entity_type = et df_i = ffti.execute(df=df_i) print('Compute K-Means Anomaly Score') kmi = KMeansAnomalyScoreV2(Temperature, 12, True, kmeans) et = kmi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) kmi._entity_type = et df_comp = kmi.execute(df=df_i) print("Executed Anomaly functions on sklearn 0.21.3") print("Now generate new scalings with recent sklearn") db.model_store = FileModelStore('/tmp') print('Compute Spectral Anomaly Score') spsi = SpectralAnomalyScoreExt(Temperature, 12, spectral, spectralinv) et = spsi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) spsi._entity_type = et df_i = spsi.execute(df=df_i) print('Compute Saliency Anomaly Score') sali = SaliencybasedGeneralizedAnomalyScoreV2(Temperature, 12, True, sal) et = sali._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) sali._entity_type = et df_i = sali.execute(df=df_i) print('Compute FFT Anomaly Score') ffti = FFTbasedGeneralizedAnomalyScoreV2(Temperature, 12, True, fft) et = ffti._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) ffti._entity_type = et df_i = ffti.execute(df=df_i) print('Compute K-Means Anomaly Score') kmi = KMeansAnomalyScoreV2(Temperature, 12, True, kmeans) et = kmi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) kmi._entity_type = et df_comp = kmi.execute(df=df_i) print("Executed Anomaly functions") # df_comp.to_csv('./data/AzureAnomalysampleOutputV2.csv') df_o = pd.read_csv('./data/AzureAnomalysampleOutputV2.csv') # print('Compare Scores - Linf') print('Compare Scores R2-score') comp2 = {spectral: r2_score(df_o[spectralinv].values, df_comp[spectralinv].values), fft: r2_score(df_o[fft].values, df_comp[fft].values), sal: r2_score(df_o[sal].values, df_comp[sal].values), kmeans: r2_score(df_o[kmeans].values, df_comp[kmeans].values)} print(comp2) # assert_true(comp2[spectral] > 0.9) assert_true(comp2[fft] > 0.9) assert_true(comp2[sal] > 0.9) # assert_true(comp2[kmeans] > 0.9) df_agg = df_i.copy() # add frequency to time df_agg = df_agg.reset_index().set_index(['timestamp']).asfreq(freq='T') df_agg['site'] = 'Munich' df_agg = df_agg.reset_index().set_index(['entity', 'timestamp', 'site']).dropna() print('Compute Spectral Anomaly Score - aggr') spsi = SpectralAnomalyScoreExt(Temperature, 12, spectral, spectralinv) et = spsi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) spsi._entity_type = et df_agg = spsi.execute(df=df_agg) print('Compute K-Means Anomaly Score - aggr') kmi = KMeansAnomalyScoreV2(Temperature, 12, True, kmeans) et = kmi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) kmi._entity_type = et df_agg = kmi.execute(df=df_agg) print('Compute Saliency Anomaly Score - aggr') sali = SaliencybasedGeneralizedAnomalyScoreV2(Temperature, 12, True, sal) et = sali._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) sali._entity_type = et df_agg = sali.execute(df=df_agg) print('Compute FFT Anomaly Score - aggr') ffti = FFTbasedGeneralizedAnomalyScoreV2(Temperature, 12, True, fft) et = ffti._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) ffti._entity_type = et df_agg = ffti.execute(df=df_agg) print(df_agg.describe()) comp3 = {spectral: r2_score(df_o[spectralinv].values, df_agg[spectralinv].values), fft: r2_score(df_o[fft].values, df_agg[fft].values), sal: r2_score(df_o[sal].values, df_agg[sal].values), kmeans: r2_score(df_o[kmeans].values, df_agg[kmeans].values)} print(comp3) print("Executed Anomaly functions on aggregation data") pass
def test_light_gbm(): numba_logger = logging.getLogger('numba') numba_logger.setLevel(logging.ERROR) # Run on the good pump first # Get stuff in print('Read Regressor Sample data in') df_i = pd.read_csv('./data/RegressionTestData.csv', index_col=False, parse_dates=['DATETIME']) df_i = df_i.rename(columns={'DATETIME': 'timestamp'}) df_i['entity'] = 'MyShop' df_i[Temperature] = pd.to_numeric(df_i[Temperature], errors='coerce') df_i[Humidity] = pd.to_numeric(df_i[Humidity], errors='coerce') # and sort it by timestamp df_i = df_i.sort_values(by='timestamp') df_i = df_i.set_index(['entity', 'timestamp']).dropna() for i in range(0, df_i.index.nlevels): print(str(df_i.index.get_level_values(i))) EngineLogging.configure_console_logging(logging.DEBUG) ##### print('Create dummy database') db_schema=None db = DatabaseDummy() print (db.model_store) ##### print('lightGBM regressor - testing training pipeline with sklearn 0.21.3') db.model_store = FileModelStore('/tmp') jobsettings = { 'db': db, '_db_schema': 'public'} brgi = GBMRegressor(features=[Temperature, Humidity], targets=[KW], predictions=['KW_pred'], n_estimators=500, num_leaves=40, learning_rate=0.2, max_depth=-1) brgi.stop_auto_improve_at = 0.4 brgi.active_models = dict() et = brgi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) brgi._entity_type = et df_i = brgi.execute(df=df_i) print('lightGBM regressor - testing training pipeline with recent sklearn and lightgbm') print('lightGBM regressor - first time training') jobsettings = { 'db': db, '_db_schema': 'public'} brgi = GBMRegressor(features=[Temperature, Humidity], targets=[KW], predictions=['KW_pred'], n_estimators=500, num_leaves=40, learning_rate=0.2, max_depth=-1) brgi.stop_auto_improve_at = 0.4 brgi.active_models = dict() et = brgi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) brgi._entity_type = et df_i = brgi.execute(df=df_i) print('lightGBM regressor done') mtrc = brgi.active_models['model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.KW.MyShop'][0].eval_metric_test print ('Trained model r2 ', mtrc) assert_true(mtrc > 0.4) print('lightGBM regressor - testing training pipeline done ') ##### print('lightGBM regressor - inference') print('lightGBM regressor - first time training') jobsettings = { 'db': db, '_db_schema': 'public'} #, 'save_trace_to_file' : True} brgi = GBMRegressor(features=[Temperature, Humidity], targets=[KW], predictions=['KW_pred']) brgi.stop_auto_improve_at = 0.4 brgi.active_models = dict() et = brgi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) brgi._entity_type = et df_i = brgi.execute(df=df_i) print('lightGBM regressor done') mtrc = brgi.active_models['model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.KW.MyShop'][0].eval_metric_test print ('Trained model r2 ', mtrc) assert_true(mtrc > 0.4) print('lightGBM regressor - inference done') ##### print('lightGBM regressor - enforce retraining') print('lightGBM regressor - first time training') jobsettings = { 'db': db, '_db_schema': 'public'} #, 'save_trace_to_file' : True} brgi = GBMRegressor(features=[Temperature, Humidity], targets=[KW], predictions=['KW_pred']) brgi.stop_auto_improve_at = mtrc + 2 # force retrain as r2 metric is considered bad now brgi.active_models = dict() et = brgi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) brgi._entity_type = et df_i = brgi.execute(df=df_i) print('lightGBM regressor done') mtrc = brgi.active_models['model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.KW.MyShop'][0].eval_metric_test print ('Trained model r2 ', mtrc) assert_true(mtrc > 0.4) print('lightGBM regressor - enforce retraining done') ##### print('lightGBM forecaster - first time training') jobsettings = { 'db': db, '_db_schema': 'public'} #, 'save_trace_to_file' : True} brgei = GBMForecaster(features=[Temperature, Humidity], targets=[KW], predictions=['KW_pred'], lags=[1,3,7]) brgei.stop_auto_improve_at = mtrc + 2 # force retrain as r2 metric is considered bad now brgei.active_models = dict() et = brgei._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings) brgei._entity_type = et df_i = brgei.execute(df=df_i) print('lightGBM forecaster done') mtrc = brgei.active_models['model.TEST_ENTITY_FOR_GBMFORECASTER.GBMForecaster.KW.MyShop'][0].eval_metric_test print ('Trained model r2 ', mtrc) assert_true(mtrc > 0.4) print('lightGBM forecaster - training done') pass
#!/usr/bin/python3 #Import packages and libraries import datetime as dt import json import pandas as pd import numpy as np from sqlalchemy import Column, Integer, String, Float, DateTime, Boolean, func import iotfunctions.bif as bif from iotfunctions.metadata import EntityType, LocalEntityType from iotfunctions.db import Database from iotfunctions.dbtables import FileModelStore #Connect to the service with open('credentials_as_monitor_demo.json', encoding='utf-8') as F: credentials = json.loads(F.read()) db_schema = None db = Database(credentials=credentials) #Write the function def f(df, parameters=None): adjusted_distance = df['distance'] * 0.9 return adjusted_distance #Save the function to a local model store model_store = FileModelStore() model_store.store_model('adjusted_distance', f)