def benchmarks(house_id): redd_train = DataSet(REDD_FILE) redd_test = DataSet(REDD_FILE) # set up training and test sets redd_train.set_window(end=TRAIN_END) redd_test.set_window(start=TRAIN_END) # get top N_DEV devices house = redd_train.buildings[house_id] test_elec = redd_test.buildings[house_id].elec top_apps = house.elec.submeters().select_top_k(k=N_DEV) # store mains data test_mains = next(test_elec.mains().load()) truth = {} predictions = {} # benchmark classifier 1 co = CombinatorialOptimisation() start = time.time() print("*" *20) print('Combinatorial Optimisation: ') print("*" *20) co.train(top_apps, sample_period=SAMPLE_PERIOD) truth['CO'], predictions['CO'] = predict(co, test_elec, SAMPLE_PERIOD, redd_train.metadata['timezone']) end = time.time() print("Runtime: ", end-start) # benchmark classifier 2 fhmm = FHMM() start = time.time() print("*" *20) print('Factorial Hidden Markov Model: ') print("*" *20) fhmm.train(top_apps, sample_period=SAMPLE_PERIOD) truth['FHMM'], predictions['FHMM'] = predict(fhmm, test_elec, SAMPLE_PERIOD, redd_train.metadata['timezone']) end = time.time() print("Runtime: ", end-start) # add mains to truth truth['CO']['Main'] = test_mains truth['FHMM']['Main'] = test_mains return truth, predictions
def test_fhmm_correctness(self): elec = self.dataset.buildings[1].elec fhmm = FHMM() fhmm.train(elec) mains = elec.mains() output = HDFDataStore('output.h5', 'w') fhmm.disaggregate(mains, output, sample_period=1) for meter in range(2, 4): df1 = output.store.get('/building1/elec/meter{}'.format(meter)) df2 = self.dataset.store.store.get( '/building1/elec/meter{}'.format(meter)) self.assertEqual((df1 == df2).sum().values[0], len(df1.index)) self.assertEqual(len(df1.index), len(df2.index)) output.close() remove("output.h5")
def fhmm(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec appliances = [meter_key] selected_meters = [train_elec[app] for app in appliances] selected_meters.append(train_elec.mains()) selected = MeterGroup(selected_meters) fhmm = FHMM() # print("========== TRAIN ============") fhmm.train(selected, sample_period=sample_period) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') fhmm.disaggregate(val_elec.mains(), output_datastore=output) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') fhmm.disaggregate(test_elec.mains(), output_datastore=output) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'FHMM', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': None # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': None, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
out = {} for b_id, building in building_chunk_items[home_group]: try: if b_id in existing_files_names: print("Skipping", b_id) continue print b_id out[b_id] = {} start = time.time() #cls_dict = {"Hart":Hart85()} cls_dict = { "CO": CombinatorialOptimisation(), "FHMM": FHMM(), "Hart": Hart85() } elec = building.elec mains = elec.mains() train = DataSet(ds_path) test = DataSet(ds_path) split_point = datetime.date(2013, 7, 16) train.set_window(end=split_point) #test.set_window(start=split_point) train_elec = train.buildings[b_id].elec test_elec = test.buildings[b_id].elec test_mains = test_elec.mains() # AC elec
#Intersection of index gt_index_utc = gt_overall.index.tz_convert("UTC") pred_index_utc = pred_overall.index.tz_convert("UTC") common_index_utc = gt_index_utc.intersection(pred_index_utc) common_index_local = common_index_utc.tz_convert(timezone) gt_overall = gt_overall.ix[common_index_local] pred_overall = pred_overall.ix[common_index_local] appliance_labels = [m.label() for m in gt_overall.columns.values] gt_overall.columns = appliance_labels pred_overall.columns = appliance_labels return gt_overall, pred_overall #Run classifiers CO and FHMM classifiers = {'CO': CombinatorialOptimisation(), 'FHMM': FHMM()} predictions = {} sample_period = 6 for clf_name, clf in classifiers.iteritems(): print("*" * 20) print(clf_name) print("*" * 20) clf.train(top_5_train_elec, sample_period=sample_period) gt, predictions[clf_name] = predict(clf, test_elec, 6, train.metadata['timezone']) #Evaluate algorithms by rmse metric def compute_rmse(gt, pred): from sklearn.metrics import mean_squared_error rms_error = {}
train.set_window(end="2011-04-30") test.set_window(start="2011-04-30") train_elec = train.buildings[1].elec test_elec = test.buildings[1].elec top_5_train_elec = train_elec.submeters().select_top_k(k=5) np.random.seed(42) params = {} #classifiers = {'CO':CombinatorialOptimisation(), 'FHMM':FHMM()} predictions = {} sample_period = 120 co = CombinatorialOptimisation() fhmm = FHMM() ## Train models co.train(top_5_train_elec, sample_period=sample_period) fhmm.train(top_5_train_elec, sample_period=sample_period) ## Export models co.export_model(filename='co.h5') fhmm.export_model(filename='fhmm.h5') co.import_model(filename='co.h5') fhmm.import_model(filename='fhmm.h5')
common_index_local = common_index_utc.tz_convert(timezone) gt_overall = gt_overall.loc[common_index_local] pred_overall = pred_overall.loc[common_index_local] appliance_labels = [m for m in gt_overall.columns.values] gt_overall.columns = appliance_labels pred_overall.columns = appliance_labels return gt_overall, pred_overall np.random.seed(42) params = {} co = CombinatorialOptimisation() fhmm = FHMM() #predictions = {} sample_period = 120 print("*"*20) print('CO') print("*" *20) co.train(top_5_train_elec, sample_period=sample_period) gt_1, predictions_co = predict(co, test_elec, 120, train.metadata['timezone']) print("*"*20) print('FHMM') print("*" *20) fhmm.train(top_5_train_elec, sample_period=sample_period) gt_2, predictions_fhmm = predict(fhmm, test_elec, 120, train.metadata['timezone']) rmse = {} rmse['CO'] = nilmtk.utils.compute_rmse(gt_1, predictions_co, pretty=True)
common_index_utc = gt_index_utc.intersection(pred_index_utc) common_index_local = common_index_utc.tz_convert(timezone) gt_overall = gt_overall.loc[common_index_local] pred_overall = pred_overall.loc[common_index_local] appliance_labels = [m for m in gt_overall.columns.values] gt_overall.columns = appliance_labels pred_overall.columns = appliance_labels return gt_overall, pred_overall np.random.seed(42) co = CombinatorialOptimisation() fhmm = FHMM() co.import_model(filename='co.h5') fhmm.import_model(filename='fhmm.h5') cot, pred_co = predict(co, test_elec, 120, train.metadata['timezone']) fhmmt, pred_fhmm = predict(fhmm, test_elec, 120, train.metadata['timezone']) rmse = {} rmse["CO"] = nilmtk.utils.compute_rmse(cot, pred_co, pretty=True) rmse["FHMM"] = nilmtk.utils.compute_rmse(fhmmt, pred_fhmm, pretty=True) for clf_name in classifiers.keys(): rmse[clf_name] = nilmtk.utils.compute_rmse(gt, predictions[clf_name], pretty=True)
def run_fhmm(meters): # TRAIN FHMM logger.info("Training FHMM...") fhmm = FHMM() fhmm.train(meters) logger.info("Disag FHMM...") run_nilmtk_disag(fhmm, 'fhmm')