def disaggregate_original_co(h5_input, h5_output,dataset_start_date_disag, dataset_end_date_disag, centroids=None ): import nilmtk.disaggregate as original_nilmtk ds = DataSet(h5_input) elec = ds.buildings[1].elec vampire_power_used_in_original = elec.mains().vampire_power() #Train plain_co = original_nilmtk.CombinatorialOptimisation() plain_co.train(elec) #Modify centroids manually if centroids is not None: for i, model in enumerate(plain_co.model): instance = model['training_metadata'].instance() model['states'] = centroids[instance] #Disaggregate ds.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag) elec = ds.buildings[1].elec output_plain_co = HDFDataStore(h5_output, 'w') plain_co.disaggregate(elec.mains(), output_plain_co) output_plain_co.close() return plain_co, vampire_power_used_in_original
def hart85(start_train, end_train, start_test, end_test, train_elec): #Start training data.set_window(start_train, end_train) elec = data.buildings[1].elec hart = hart_85.Hart85() hart.train(train_elec, sample_period=1) #Start disaggregating data.set_window(start_test, end_test) disag_filename = './build/disagg_sum_hart85_{}_k.h5'.format( len(train_elec.meters)) output = HDFDataStore(disag_filename, 'w') hart.disaggregate(elec.mains(), output) output.close() disag = DataSet(disag_filename) disag_elec = disag.buildings[1].elec disag_elec.plot() plt.show() plt.title("HART85") #Calculate F1-Score f1 = f1_score(disag_elec, train_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance') plt.xlabel('f-score') plt.title("Hart85") plt.show()
def __init__(self, in_filepath, out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath, 'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!")
def __init__ (self,in_filepath,out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath,'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!")
def disaggregate_building_to_file(self, building_idx, filename, model=None): if model == None: model = self.fit_a_model(building_idx) elec = self.get_elec_meter_data_of_a_building(building_idx) output = HDFDataStore(filename, 'w') model.disaggregate(elec.mains(), output) output.close()
def co(start_train, end_train, start_test, end_test, train_elec): #Start training data.set_window(start_train, end_train) elec = data.buildings[1].elec co = CombinatorialOptimisation() co.train(train_elec, ac_type='active', physical_quantity='power', sample_period=1) #Start disaggregating data.set_window(start_test, end_test) disag_filename = './build/disagg_sum_co_{}_k.h5'.format( len(train_elec.meters)) output = HDFDataStore(disag_filename, 'w') co.disaggregate(elec.mains(), output, ac_type='active', physical_quantity='power', sample_period=1) output.close() dates_dict = { "start_train": start_train, "end_train": end_train, "start_test": start_test, "end_test": end_test } # write test and train timeframe into json file with open(disag_filename + ".json", 'w') as dates_file: json.dump(dates_dict, dates_file) #Calulate F1-Score disag = DataSet(disag_filename) disag_elec = disag.buildings[1].elec disag_elec.plot() plt.title("CO") plt.show() f1 = f1_score(disag_elec, train_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance') plt.xlabel('f-score') plt.title("CO") plt.show()
def mle(start_train, end_train, start_test, end_test, train_elec): # #Start training data.set_window(start_train, end_train) elec = data.buildings[1].elec mle = maximum_likelihood_estimation.MLE() mle.sample_period = "1s" mle.train(train_elec) #Start disaggregating data.set_window(start_test, end_test) disag_filename = './build/disagg_sum_mle_{}_k.h5'.format( len(train_elec.meters)) output = HDFDataStore(disag_filename, 'w') mle.disaggregate(elec.mains(), output) output.close() dates_dict = { "start_train": start_train, "end_train": end_train, "start_test": start_test, "end_test": end_test } # write test and train timeframe into json file with open(disag_filename + ".json", 'w') as dates_file: json.dump(dates_dict, dates_file) disag = DataSet(disag_filename) disag_elec = disag.buildings[1].elec disag_elec.plot() plt.show() plt.title("FHMM") #Calculate F1-Score f1 = f1_score(disag_elec, train_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance') plt.xlabel('f-score') plt.title("FHMM") plt.show()
class REDD_Data(object): ''' REDD_Data Class is an object designed to abstract the lower level commands of the NILMTK software package, with focus on the use of REDD DataSet. Function is designed to allow rapid experimentation and disaggregation compared to attempting to set package up from scratch. This class requires the following for proper usage: - NILMTK package: https://github.com/nilmtk - REDD Dataset (converted to .h5): redd.csail.mit.edu - Various dependancies (that NILMTK also requires), most can be downloaded through Anaconda: continuum.io/downloads Parameters ----------- in_filepath: Filepath of converted REDD dataset (in .h5 format) out_filepath: filepath to place output disaggregation dataset (in .h5 format) Attributes ----------- km: Key_Map Object initializes the key_map object which will allow for the mapping of a meters appliance name to its specific .H5 key. dataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the converted REDD DataSet. dataSet: NILMTK DataSet Object the DataSet object that is generated from the REDD DataStore (self.dataStore) outDataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the disaggregated dataset. co: NILMTK CombinatorialOptimisation object the disaggregation model object that will be trained and will disaggregate the working dataset train_group: NILMTK MeterGroup object the MeterGroup object that is used to train the disaggregation model (self.co) ''' def __init__ (self,in_filepath,out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath,'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!") def train_disag_model(self,building_inst, use_topk = False, k = 5): ''' Function trains the disaggregation model using a selected MeterGroup. Parameters ----------- building_inst: the instance # of the building that you wish to grab the training group from. use_topk: true if you wish to only grab the top k most energy intensive appliance to train the model, false if you wish to use all appliances. k: the # of appliances you wish to use (if use_topk = True) ''' print("Training CO Disaggregation Model using given metergroup...") if (building_inst <= 6) & (building_inst > 0): #Select appropiate meter group to train with if use_topk == True: self.train_group = self.dataSet.buildings[building_inst].elec.select_top_k(k) else: self.train_group = self.dataSet.buildings[building_inst].elec self.co.train(self.train_group) print("CO Disaggreation Model Sucessfully Trained!") else: print("Error: Please select a building_inst of 1-6.") print("Model unsucessfully trained.") def load_disag_model(self, filepath): ''' Function loads the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Loading CO Disaggreation Model...") self.co.import_model(filepath) print("Model Sucessfully Loaded!") def save_disag_model(self,filepath): ''' Function saves the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Saving CO Disaggregation Model...") self.co.export_model(filepath) print("Model Sucessfully Saved!") def disaggregate(self,building_inst): ''' Function will disaggregate the mains MeterGroup of the passed building instance, and save this to the self.outDataStore object. Parameters ----------- building_inst: instance # of the building mains you wish to disaggregate. ''' print("Disaggregating Building Mains...") self.co.disaggregate(self.dataSet.buildings[building_inst].elec.mains(),self.outDataStore) print("Mains sucessfully disaggregated!") def close(self): ''' Function closes all open DataStore's being used by the program. ''' print("Closing DataStores...") self.dataStore.close() self.outDataStore.close() print("Output DataStores Sucessfully Closed") ''' All Plot Functions below are a WORK IN PROGRESS!----------------------------------- Documentation will be provided upon completion.------------------------------------ ''' def plot_disag_apl(self,inst,appliance,t1="",t2=""): self.km = Key_Map(inst) plot_series(self.outDataStore.store.get(self.km.get_key(appliance))[t1: t2]) plt.title("Disaggregated " + appliance.capitalize()+" Energy") plt.show() def show_plots(self): plt.show() def building_plot_all(self,building_inst,t1,t2): self.dataSet.buildings[building_inst].elec.plot(t1,t2) plt.title("Building "+str(building_inst)+" Energy per Appliance") plt.ylabel('Power [W]') plt.xlabel('Hour') def plot_redd_mains_data(self, inst=1, t1 = "", t2 = ""): self.km = Key_Map(inst) series1 = self.dataStore.store.get(self.km.get_key('mains1'))[t1:t2] series2 = self.dataStore.store.get(self.km.get_key('mains2'))[t1:t2] plot_series(series1 + series2) plt.title("Building "+str(inst)+" Mains Energy") plt.show()
def setUpClass(cls): filename = join(data_dir(), 'energy.h5') cls.datastore = HDFDataStore(filename) ElecMeter.load_meter_devices(cls.datastore)
plt.clf() # Define the buildings to be used for training and disaggregation train_building = 1 disag_building = 1 #~ ## Dummy training and disaggregation ### Training dum = DummyDisaggregator() print('\n== dum.train(dataset.buildings[%d].elec)' % (train_building)) dum.train(dataset.buildings[train_building].elec) ### Disaggregation dum_outfile = dataset_directory / ('%s-da-co.h5' % (dataset_name.lower())) output = HDFDataStore(str(dum_outfile), 'w') print('\n== dum.disaggregate(dataset.buildings[%d].mains(), output)' % (disag_building)) dum.disaggregate(dataset.buildings[disag_building].elec.mains(), output) output.close() ### Results print('\n== Plotting Dummy disaggregation results...') da_data = DataSet(str(dum_outfile)) da_elec = da_data.buildings[disag_building].elec ax = da_elec.plot() ax.set_title("B%d Dummy disaggregation results" % (disag_building)) plt.savefig('results/%s__b%d__elec__dummy.png' % (dataset_name, disag_building)) plt.clf() f1 = f1_score(da_elec, dataset.buildings[disag_building].elec)
def generate_vertices(): """ Predicts the power demand of the target appliance using the intermediate models which are exported during training. Generates a polygon from those predictions. """ train = DataSet('../data/ukdale.h5') train.clear_cache() train.set_window(start="13-4-2013", end="31-7-2013") test = DataSet('../data/ukdale.h5') test.clear_cache() test.set_window(start='7-2-2014 08:00:00', end='7-3-2014') train_building = 1 test_building = 5 sample_period = 6 meter_key = 'kettle' learning_rate = 1e-5 train_elec = train.buildings[train_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] test_mains = test_elec.mains() results_dir = '../results/UKDALE-ACROSS-BUILDINGS-RNN-lr=1e-05-2018-02-03-11-48-12' train_logfile = os.path.join(results_dir, 'training.log') val_logfile = os.path.join(results_dir, 'validation.log') rnn = RNNDisaggregator(train_logfile, val_logfile, learning_rate, init=False) verts = [] zs = [] # epochs for z in np.arange(10, 341, 10): # disaggregate model model = 'UKDALE-RNN-kettle-{}epochs.h5'.format(z) rnn.import_model(os.path.join(results_dir, model)) disag_filename = 'disag-out-{}epochs.h5'.format(z) output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w') results_file = os.path.join(results_dir, 'results-{}epochs.txt'.format(z)) rnn.disaggregate(test_mains, output, results_file, train_meter, sample_period=sample_period) os.remove(results_file) output.close() # get predicted curve for epoch=z result = DataSet(os.path.join(results_dir, disag_filename)) res_elec = result.buildings[test_building].elec os.remove(os.path.join(results_dir, disag_filename)) predicted = res_elec[meter_key] predicted = predicted.power_series(sample_period=sample_period) predicted = next(predicted) predicted.fillna(0, inplace=True) ys = np.array(predicted) # power xs = np.arange(ys.shape[0]) # timestamps verts.append(list(zip(xs, ys))) # add list of x-y-coordinates zs.append(z) ground_truth = test_elec[meter_key] ground_truth = ground_truth.power_series(sample_period=sample_period) ground_truth = next(ground_truth) ground_truth.fillna(0, inplace=True) ys = np.array(ground_truth) # power xs = np.arange(ys.shape[0]) # timestamps verts.append(list(zip(xs, ys))) # add list of x-y-coordinates zs.append(350) zs = np.asarray(zs) for i in range(len(verts)): verts[i].insert(0, [0, np.array([0])]) verts[i].append([len(verts[i]), np.array([0])]) pickle.dump(verts, open(os.path.join(results_dir, 'vertices.pkl'), 'wb')) pickle.dump(zs, open(os.path.join(results_dir, 'zs.pkl'), 'wb')) pickle.dump(ys, open(os.path.join(results_dir, 'ys.pkl'), 'wb'))
def get_disaggregation(device, total_aggregate): here = os.path.dirname(os.path.abspath(__file__)) dataset_file = os.path.join(here, "dataset/iawe2.h5") devices = ["fridge", "air conditioner", "washing machine"] if device not in devices: return None total_seconds = 30 * 24 * 60 val_per_second = float(total_aggregate) / (total_seconds) print(val_per_second) start = 0 end = 0 with h5py.File(dataset_file, "r+") as f1: table = f1["building1/elec/meter1/table"].value start = int(str(table[0][0])[:10]) end = start + total_seconds print(end - start, total_seconds) # for i in range(total_seconds): # # for j in range(7): # print("Progress {:2.1%}".format(i / total_seconds), end="\r") # table[i][1][2] = val_per_second + np.random.uniform(-1e-17, # 1e-17, 1) # f1["building1/elec/meter1/table"][...] = table # print(table) # start = datetime.fromtimestamp(start) end = datetime.fromtimestamp(end) # start = start.isoformat(' ', 'seconds') end = end.isoformat(' ', 'seconds') # print(start, end) test = DataSet(dataset_file) # test.set_window(start=start, end=end) test.set_window(end=end) test_elec = test.buildings[1].elec test_mains = test_elec.mains()[1] df = next(test_mains.load()) print(df) test_meter = test_elec.submeters()[device] disag_filename = 'disag-out.h5' # The filename of the resulting datastore output = HDFDataStore(disag_filename, 'w') disaggregator = ShortSeq2PointDisaggregator() model_file = os.path.join( here, "disag1/IAWE-RNN-h{}-{}-{}epochs.h5".format(1, device, 10)) disaggregator.import_model(model_file) # anykey = input() # test_mains: The aggregated signal meter # output: The output datastore # train_meter: This is used in order to copy the metadata of the train # meter into the datastore disaggregator.disaggregate(test_mains, output, test_mains, sample_period=1) output.close() result = DataSet(disag_filename) res_elec = result.buildings[1].elec # prediction = res_elec[device] prediction = res_elec # df = next(prediction.load()) # prediction = df["power"]["active"][0] return prediction
def dae(dataset_path, train_building, train_start, train_end, test_building, test_start, test_end, val_building, val_start, val_end, meter_key, sample_period, num_epochs, patience, sequence_length, optimizer, learning_rate, loss): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] try: train_mains = train_elec.mains().all_meters()[0] val_mains = val_elec.mains().all_meters()[0] test_mains = test_elec.mains().all_meters()[0] except AttributeError: train_mains = train_elec.mains() test_mains = test_elec.mains() dae = DAEDisaggregator(sequence_length, patience, optimizer, learning_rate, loss) # print("========== TRAIN ============") dae.train(train_mains, train_meter, epochs=num_epochs, sample_period=sample_period) # Get number of earlystop epochs num_epochs = dae.stopped_epoch if dae.stopped_epoch != 0 else num_epochs #dae.export_model("results/dae-model-{}-{}epochs.h5".format(meter_key, num_epochs)) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') dae.disaggregate(val_mains, output, train_meter, sample_period=sample_period) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') dae.disaggregate(test_mains, output, train_meter, sample_period=sample_period) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'DAE', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': num_epochs # }, # 'hyperparameters': { # 'sequence_length': sequence_length, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': num_epochs, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
def nilmtkECOfunc(dataset_loc, train_start, train_end, test_start, test_end, output_period): #### configuration #### period_s = output_period building = 2 #### load #### total = DataSet(dataset_loc) train = DataSet(dataset_loc) test = DataSet(dataset_loc) train.set_window(start=train_start, end=train_end) test.set_window(start=test_start, end=test_end) print(train_start) print(train_end) print(test_start) print(test_end) #### get timeframe #### tf_total = total.buildings[building].elec.mains().get_timeframe() tf_train = train.buildings[building].elec.mains().get_timeframe() tf_test = test.buildings[building].elec.mains().get_timeframe() #### eletrical metergroup #### total_elec = total.buildings[building].elec train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec #### training process #### start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() co.train(train_elec, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") #### disaggregation process #### start = time.time() disag_filename = '../dataset/ecob-b2-kall-co-1w:11-1m.h5' output = HDFDataStore(disag_filename, 'w') co.disaggregate(test_elec.mains(), output, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") output.close() disag_co = DataSet(disag_filename) disag_co_elec = disag_co.buildings[building].elec #### fraction energy assigned correctly #### #FTE_co_all = FTE_func(disag_co_elec, test_elec); #### total disaaggregation error #### #Te_co_all = total_disag_err(disag_co_elec, test_elec); #### creating dataframe from both disaggregated and ground truth metergroups disag_co_elec_df = disag_co_elec.dataframe_of_meters() disag_co_elec_df_nona = disag_co_elec_df.dropna() gt_full_df = test_elec.dataframe_of_meters() gt_full_df_nona = gt_full_df.dropna() gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index] #### jaccard #### #Ja_co_all = jaccard_similarity(disag_co_elec_df_nona, gt_df_nona, disag_co_elec.submeters().instance(), test_elec.instance()); #print("FTE all", FTE_co_all); #print("TE all", Te_co_all); #print("Ja all", Ja_co_all); #### output #### # drop aggregated power disag_co_elec_submeter_df = disag_co_elec_df.drop( disag_co_elec_df.columns[[0]], axis=1) # disag_co_elec_submeter_df = disag_co_elec_df # drop the unwanted timestamp gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index] # drop aggregated power gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0, 1, 2]], axis=1) # train train_elec_df = train_elec.dataframe_of_meters() train_elec_df_aligned = train_elec_df.resample(str(period_s) + 'S').asfreq()[0:] train_elec_df_aligned_drop = train_elec_df_aligned.drop( train_elec_df_aligned.columns[[0, 1, 2]], axis=1) return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop
meter_key, epochs)) end = time.time() print("Train =", end - start, "seconds.") file = open('stackTrainSetsInfo_' + meter_key, 'r') for line in file: toks = line.split(',') StackTrain = DataSet(toks[0]) print(toks[2], '-', toks[3]) StackTrain.set_window(start=toks[2], end=toks[3]) test_elec = StackTrain.buildings[int(toks[1])].elec test_mains = test_elec.mains() print("========== DISAGGREGATE (stackTrain)============") disag_filename = "StackTrain-h" + toks[1] + ".h5" output = HDFDataStore(disag_filename, 'w') disaggregator.disaggregate(test_mains, output, test_elec[meter_key], sample_period=sample_period) output.close() for i in test_building_list: test_elec = test.buildings[i].elec test_mains = test_elec.mains() print("========== DISAGGREGATE ============") disag_filename = "StackTest-" + str(i) + ".h5" output = HDFDataStore(disag_filename, 'w') disaggregator.disaggregate(test_mains, output,
def nilmtkDREDfunc(dataset_loc, train_start, train_end, test_start, test_end, output_period): #### configuration #### period_s = output_period building = 1 #### load #### total = DataSet(dataset_loc) train = DataSet(dataset_loc) test = DataSet(dataset_loc) train.set_window(start=train_start, end=train_end) test.set_window(start=test_start, end=test_end) print(train_start) print(train_end) print(test_start) print(test_end) #### get timeframe #### tf_total = total.buildings[building].elec.mains().get_timeframe() tf_train = train.buildings[building].elec.mains().get_timeframe() tf_test = test.buildings[building].elec.mains().get_timeframe() #### eletrical metergroup #### total_elec = total.buildings[building].elec train_elec = train.buildings[building].elec test_elec = test.buildings[building].elec #### training process #### start = time.time() from nilmtk.disaggregate import CombinatorialOptimisation co = CombinatorialOptimisation() co.train(train_elec, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") #### disaggregation process #### start = time.time() disag_filename = dataset_loc + 'DREDapp.h5' output = HDFDataStore(disag_filename, 'w') co.disaggregate(test_elec.mains(), output, sample_period=period_s) end = time.time() print("Runtime =", end - start, "seconds.") output.close() disag_co = DataSet(disag_filename) disag_co_elec = disag_co.buildings[building].elec #### creating dataframe from both disaggregated and ground truth metergroups disag_co_elec_df = disag_co_elec.dataframe_of_meters() gt_full_df = test_elec.dataframe_of_meters() # drop the NA, it might be needed (initially it is used for Ja) disag_co_elec_df_nona = disag_co_elec_df.dropna() gt_full_df_nona = gt_full_df.dropna() # drop the unwanted timestamp gt_df_nona = gt_full_df_nona.ix[disag_co_elec_df_nona.index] #### output #### # drop aggregated power from output disag_co_elec_submeter_df = disag_co_elec_df.drop( disag_co_elec_df.columns[[0]], axis=1) # drop the unwanted timestamp on ground truth (take the sampled timestamp) gt_df_aligned = gt_full_df.ix[disag_co_elec_submeter_df.index] # drop aggregated power from ground truth gt_df_sub = gt_df_aligned.drop(gt_df_aligned.columns[[0]], axis=1) # train data frame, resample based in disaggregation period, drop the main power train_elec_df = train_elec.dataframe_of_meters() train_elec_df_aligned = train_elec_df.resample(str(period_s) + 'S').asfreq()[0:] train_elec_df_aligned_drop = train_elec_df_aligned.drop( train_elec_df_aligned.columns[[0]], axis=1) return disag_co_elec_submeter_df, gt_df_sub, co, train_elec_df_aligned_drop
print( "Calculating ground truth===============================================") loc.dataset.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag) gt = GroundTruth(loc, co, baseline=vampire_power_in_original) gt.generate() time_start_disag = time.time() print("\nTotal elapsed: %s seconds ---" % (time_start_disag - start_time)) print("Section Ground truth : %s seconds ---\n" % (time_start_disag - time_start_gt)) #DISAGREGGATION================================================================ print( "Disaggregating=========================================================") output = HDFDataStore(h5_disag, 'w') loc.dataset.set_window(start=dataset_start_date_disag, end=dataset_end_date_disag) co.disaggregate(loc.elec.mains(), output, location_data=loc, baseline=vampire_power_in_original, resample_seconds=60) output.close() time_start_metrics = time.time() print("\nTotal elapsed: %s seconds ---" % (time_start_metrics - start_time)) print("Section Disaggregation: %s seconds ---\n" % (time_start_metrics - time_start_disag)) #METRICS=======================================================================
def test_all(path_to_directory): ''' path_to_directory: Contains the h5 files on which the tests are supposed to be run ''' check_directory_exists(path_to_directory) #files=[f for f in listdir(path_to_directory) and '.h5' in f and '.swp' not in f] files = [f for f in listdir(path_to_directory) if isfile(join(path_to_directory, f)) and '.h5' in f and '.swp' not in f] files.sort() print ("Datasets collected and sorted. Processing...") try: for i, file in enumerate(files): current_file=DataSet(join(path_to_directory, file)) print ("Printing metadata for current file...done.") print_dict(current_file.metadata) print (" Loading file # ", i, " : ", file, ". Please wait.") for building_number in range(1, len(current_file.buildings)+1): #Examine metadata for a single house elec=current_file.buildings[building_number].elec print ("The dataset being processed is : ", elec.dataset()) print ("Metadata for current file: ") print_dict(current_file.buildings[building_number].metadata) print ("Appliance label information: ", elec.appliance_label()) #print (elec.appliances) print ("Appliances:- ") for i in elec.appliances: print (i) print ("Examining sub-metered appliances...") print ("Collecting stats on meters...Done.") print (elec._collect_stats_on_all_meters) print ("Timeframe: ", elec.get_timeframe()) print ("Available power AC types: ", elec.available_power_ac_types()) print ("Clearing cache...done.") elec.clear_cache() print ("Testing if there are meters from multiple buildings. Result returned by method: ", elec.contains_meters_from_multiple_buildings()) # TODO: Find a better way to test the correlation function # print ("Testing the correlation function. ", elec.correlation(elec)) print ("List of disabled meters: ", elec.disabled_meters) print ("Trying to determine the dominant appliance: ") try: elec.dominant_appliance() except RuntimeError: print ('''More than one dominant appliance in MeterGroup! (The dominant appliance per meter should be manually specified in the metadata. If it isn't and if there are multiple appliances for a meter then NILMTK assumes all appliances on that meter are dominant. NILMTK can't automatically distinguish between multiple appliances on the same meter (at least, not without using NILM!))''') pass print ("Dropout rate: ", elec.dropout_rate()) try: print ("Calculating energy per meter:") print (elec.energy_per_meter()) print ("Calculating total entropy") print (elec.entropy()) print ("Calculating entropy per meter: ") print (elec.entropy_per_meter()) except ValueError: print ("ValueError: Total size of array must remain unchanged.") pass print ("Calculating fraction per meter.") print (elec.fraction_per_meter()) #print ("Average energy per period: ", elec.average_energy_per_period()) print ("Executing functions...") lis=[] func="" '''for function in dir(elec): try: start=time.time() if ("__" not in function or "dataframe_of_meters" not in function): func=getattr(elec, function) print ("Currently executing ", function, ". Please wait...") print (func()) # print ("cProfile stats - printed") # cProfile.run("func") end=time.time() print ("Time taken for the entire process : ", (end - start)) except AttributeError: print ("Attribute error occured. ") except TypeError: lis.append(function) print ("Warning: TypeError") pass''' print ("Plotting wiring hierarchy of meters....") elec.draw_wiring_graph() ## DISAGGREGATION STARTS HERE appliance_type="unknown" #TODO : appliance_type should cycle through all appliances and check for each of them. For this, use a list. selected_appliance=nilmtk.global_meter_group.select_using_appliances(type=appliance_type) appliance_restricted = MeterGroup(selected_appliance.meters) if ((appliance_restricted.proportion_of_upstream_total_per_meter()) is not None): proportion_per_appliance = appliance_restricted.proportion_of_upstream_total_per_meter() proportion_per_appliance.plot(kind='bar'); plt.title('Appliance energy as proportion of total building energy'); plt.ylabel('Proportion'); plt.xlabel('Appliance (<appliance instance>, <building instance>, <dataset name>)'); selected_appliance.select(building=building_number).total_energy() selected_appliance.select(building=1).plot(); appliance_restricted = MeterGroup(selected_appliance.meters) daily_energy = pd.DataFrame([meter.average_energy_per_period(offset_alias='D') for meter in appliance_restricted.meters]) daily_energy.plot(kind='hist'); plt.title('Histogram of daily energy'); plt.xlabel('energy (kWh)'); plt.ylabel('Occurences'); plt.legend().set_visible(False) current_file.store.window=TimeFrame(start='2012-04-01 00:00:00-05:00', end='2012-04-02 00:00:00-05:00') #elec.plot(); fraction = elec.submeters().fraction_per_meter().dropna() labels = elec.get_appliance_labels(fraction.index) plt.figure(figsize=(8,8)) fraction.plot(kind='pie', labels=labels); elec.select_using_appliances(category='heating') elec.select_using_appliances(category='single-phase induction motor') co = CombinatorialOptimisation() co.train(elec) for model in co.model: print_dict(model) disag_filename = join(data_dir, 'ampds-disag.h5') output = HDFDataStore(disag_filename, 'w') co.disaggregate(elec.mains(), output) output.close() disag = DataSet(disag_filename) disag_elec = disag.buildings[building_number].elec f1 = f1_score(disag_elec, elec) f1.index = disag_elec.get_appliance_labels(f1.index) f1.plot(kind='bar') plt.xlabel('appliance'); plt.ylabel('f-score'); disag_elec.plot() disag.store.close() except AttributeError: print ("AttributeError occured while executing. This means that the value returned by proportion_per_appliance = appliance_restricted.proportion_of_upstream_total_per_meter() is None") pass
def runExperiment(experiment: experimentInfo, metricsResFileName, clearMetricsFile): dsPathsList_Test = experiment.dsList outFileName = experiment.outName test_building = experiment.building meter_key = experiment.meter_key pathOrigDS = experiment.pathOrigDS meterTH = experiment.meterTH print('House ', test_building) # Load a "complete" dataset to have the test's timerange test = DataSet(dsPathsList_Test[0]) test_elec = test.buildings[test_building].elec testRef_meter = test_elec.submeters( )[meter_key] # will be used as reference to align all meters based on this # Align every test meter with testRef_meter as master test_series_list = [] for path in dsPathsList_Test: test = DataSet(path) test_elec = test.buildings[test_building].elec test_meter = test_elec.submeters()[meter_key] # print('Stack test: ', test_meter.get_timeframe().start.date(), " - ", test_meter.get_timeframe().end.date()) aligned_meters = align_two_meters(testRef_meter, test_meter) test_series_list.append(aligned_meters) # Init vars for the output MIN_CHUNK_LENGTH = 300 # Depends on the basemodels of the ensemble timeframes = [] building_path = '/building{}'.format(test_meter.building()) mains_data_location = building_path + '/elec/meter1' data_is_available = False disag_filename = outFileName output_datastore = HDFDataStore(disag_filename, 'w') run = True chunkDataForOutput = None # -- Used to hold necessary data for saving the results using NILMTK (e.g. timeframes). # -- (in case where chunks have different size (not in current implementation), must use the chunk whose windowsSize is the least (to have all the data)) while run: try: testX = [] columnInd = 0 # Get Next chunk of each series for testXGen in test_series_list: chunkALL = next(testXGen) chunk = chunkALL[ 'slave'] # slave is the meter needed (master is only for aligning) chunk.fillna(0, inplace=True) if (columnInd == 0): chunkDataForOutput = chunk # Use 1st found chunk for it's metadata if (testX == []): testX = np.zeros( [len(chunk), len(test_series_list)] ) # Initialize the array that will hold all of the series as columns testX[:, columnInd] = chunk[:] columnInd += 1 testX = scaler.transform(testX) except: run = False break if len(chunkDataForOutput) < MIN_CHUNK_LENGTH: continue # print("New sensible chunk: {}".format(len(chunk))) startTime = chunkDataForOutput.index[0] endTime = chunkDataForOutput.index[ -1] # chunkDataForOutput.shape[0] - 1 # print('Start:',startTime,'End:',endTime) timeframes.append(TimeFrame( startTime, endTime)) #info needed for output for use with NILMTK measurement = ('power', 'active') pred = clf.predict(testX) column = pd.Series(pred, index=chunkDataForOutput.index, name=0) appliance_powers_dict = {} appliance_powers_dict[0] = column appliance_power = pd.DataFrame(appliance_powers_dict) appliance_power[appliance_power < 0] = 0 # Append prediction to output data_is_available = True cols = pd.MultiIndex.from_tuples([measurement]) meter_instance = test_meter.instance() df = pd.DataFrame(appliance_power.values, index=appliance_power.index, columns=cols, dtype="float32") key = '{}/elec/meter{}'.format(building_path, meter_instance) output_datastore.append(key, df) # Append aggregate data to output mains_df = pd.DataFrame(chunkDataForOutput, columns=cols, dtype="float32") # Note (For later): not 100% right. Should be mains. But it won't be used anywhere, so it doesn't matter in this case output_datastore.append(key=mains_data_location, value=mains_df) # Save metadata to output if data_is_available: disagr = Disaggregator() disagr.MODEL_NAME = 'Stacked model' disagr._save_metadata_for_disaggregation( output_datastore=output_datastore, sample_period=sample_period, measurement=measurement, timeframes=timeframes, building=test_meter.building(), meters=[test_meter]) #======================== Calculate Metrics ===================================== testYDS = DataSet(pathOrigDS) testYDS.set_window(start=test_meter.get_timeframe().start.date(), end=test_meter.get_timeframe().end.date()) testY_elec = testYDS.buildings[test_building].elec testY_meter = testY_elec.submeters()[meter_key] test_mains = testY_elec.mains() result = DataSet(disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], testY_meter, meterTH, meterTH) relError = metrics.relative_error_total_energy(res_elec[meter_key], testY_meter) MAE = metrics.mean_absolute_error(res_elec[meter_key], testY_meter) RMSE = metrics.RMSE(res_elec[meter_key], testY_meter) print("============ Recall: {}".format(rpaf[0])) print("============ Precision: {}".format(rpaf[1])) print("============ Accuracy: {}".format(rpaf[2])) print("============ F1 Score: {}".format(rpaf[3])) print("============ Relative error in total energy: {}".format(relError)) print("============ Mean absolute error(in Watts): {}".format(MAE)) print("=== For docs: {:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}".format( rpaf[0], rpaf[1], rpaf[2], rpaf[3], relError, MAE)) # print("============ RMSE: {}".format(RMSE)) # print("============ TECA: {}".format(metrics.TECA([res_elec[meter_key]],[testY_meter],test_mains))) resDict = { 'model': 'TEST', 'building': test_building, 'Appliance': meter_key, 'Appliance_Type': 2, 'Recall': rpaf[0], 'Precision': rpaf[1], 'Accuracy': rpaf[2], 'F1': rpaf[3], 'relError': relError, 'MAE': MAE, 'RMSE': RMSE } metrics.writeResultsToCSV(resDict, metricsResFileName, clearMetricsFile)
co7.train(training_set7) print("set 7 trained") co8.train(training_set8) print("set 8 trained") co9.train(training_set9) print("set 9 trained") print("Algorithms trained!") ''' Create 9 output files to hold disaggregated data. ''' print("Creating output files...") outData1 = HDFDataStore("C:/NILM/Data/Model_Train/output1.h5", 'w') outData2 = HDFDataStore("C:/NILM/Data/Model_Train/output2.h5", 'w') outData3 = HDFDataStore("C:/NILM/Data/Model_Train/output3.h5", 'w') outData4 = HDFDataStore("C:/NILM/Data/Model_Train/output4.h5", 'w') outData5 = HDFDataStore("C:/NILM/Data/Model_Train/output5.h5", 'w') outData6 = HDFDataStore("C:/NILM/Data/Model_Train/output6.h5", 'w') outData7 = HDFDataStore("C:/NILM/Data/Model_Train/output7.h5", 'w') outData8 = HDFDataStore("C:/NILM/Data/Model_Train/output8.h5", 'w') outData9 = HDFDataStore("C:/NILM/Data/Model_Train/output9.h5", 'w') print("output files created!") ''' Disaggregate building 1 data using each training set ''' print("Disaggregating building 1 mains using each trained model...")
def plot_zoomed_new_predicted_energy_consumption(): """ Predicts a new short window (of the given test set). """ train = DataSet('../data/ukdale.h5') train.clear_cache() train.set_window(start="13-4-2013", end="31-7-2013") test = DataSet('../data/ukdale.h5') test.clear_cache() test.set_window(start='16-9-2013 17:00:00', end='16-9-2013 18:00:00') train_building = 1 test_building = 1 sample_period = 6 meter_key = 'kettle' learning_rate = 1e-5 best_epoch = 140 train_elec = train.buildings[train_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] test_mains = test_elec.mains() results_dir = '../results/UKDALE-RNN-lr=1e-05-2018-02-16-18-52-34' train_logfile = os.path.join(results_dir, 'training.log') val_logfile = os.path.join(results_dir, 'validation.log') rnn = RNNDisaggregator(train_logfile, val_logfile, learning_rate, init=False) model = 'UKDALE-RNN-kettle-{}epochs.h5'.format(best_epoch) rnn.import_model(os.path.join(results_dir, model)) disag_filename = 'disag-out-{}epochs.h5'.format(best_epoch) output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w') results_file = os.path.join(results_dir, 'results-{}epochs.txt'.format(best_epoch)) rnn.disaggregate(test_mains, output, results_file, train_meter, sample_period=sample_period) os.remove(results_file) output.close() # get predicted curve for the best epoch result = DataSet(os.path.join(results_dir, disag_filename)) res_elec = result.buildings[test_building].elec os.remove(os.path.join(results_dir, disag_filename)) predicted = res_elec[meter_key] predicted = predicted.power_series(sample_period=sample_period) predicted = next(predicted) predicted.fillna(0, inplace=True) y1 = np.array(predicted) # power x1 = np.arange(y1.shape[0]) # timestamps ground_truth = test_elec[meter_key] ground_truth = ground_truth.power_series(sample_period=sample_period) ground_truth = next(ground_truth) ground_truth.fillna(0, inplace=True) y2 = np.array(ground_truth) # power x2 = np.arange(y2.shape[0]) # timestamps fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True) ax1.plot(x1, y1, color='r', label='predicted') ax1.plot(x2, y2, color='b', label='ground truth') ax2.plot(x1, y1, color='r') ax3.plot(x2, y2, color='b') ax1.set_title('Appliance: {}'.format(meter_key)) fig.legend() fig.savefig( os.path.join(results_dir, 'zoomed_new_predicted_vs_ground_truth.png'))
class REDD_Data(object): ''' REDD_Data Class is an object designed to abstract the lower level commands of the NILMTK software package, with focus on the use of REDD DataSet. Function is designed to allow rapid experimentation and disaggregation compared to attempting to set package up from scratch. This class requires the following for proper usage: - NILMTK package: https://github.com/nilmtk - REDD Dataset (converted to .h5): redd.csail.mit.edu - Various dependancies (that NILMTK also requires), most can be downloaded through Anaconda: continuum.io/downloads Parameters ----------- in_filepath: Filepath of converted REDD dataset (in .h5 format) out_filepath: filepath to place output disaggregation dataset (in .h5 format) Attributes ----------- km: Key_Map Object initializes the key_map object which will allow for the mapping of a meters appliance name to its specific .H5 key. dataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the converted REDD DataSet. dataSet: NILMTK DataSet Object the DataSet object that is generated from the REDD DataStore (self.dataStore) outDataStore: NILMTK HDFDataStore Object the HDFDataStore that will contain the disaggregated dataset. co: NILMTK CombinatorialOptimisation object the disaggregation model object that will be trained and will disaggregate the working dataset train_group: NILMTK MeterGroup object the MeterGroup object that is used to train the disaggregation model (self.co) ''' def __init__(self, in_filepath, out_filepath): print("Loading DataStore and Generating Dataset...") self.km = {} self.dataStore = HDFDataStore(in_filepath) self.dataSet = DataSet() self.dataSet.load(self.dataStore) self.outDataStore = HDFDataStore(out_filepath, 'w') self.co = CombinatorialOptimisation() self.train_group = {} print("Data Properly Loaded!") def train_disag_model(self, building_inst, use_topk=False, k=5): ''' Function trains the disaggregation model using a selected MeterGroup. Parameters ----------- building_inst: the instance # of the building that you wish to grab the training group from. use_topk: true if you wish to only grab the top k most energy intensive appliance to train the model, false if you wish to use all appliances. k: the # of appliances you wish to use (if use_topk = True) ''' print("Training CO Disaggregation Model using given metergroup...") if (building_inst <= 6) & (building_inst > 0): #Select appropiate meter group to train with if use_topk == True: self.train_group = self.dataSet.buildings[ building_inst].elec.select_top_k(k) else: self.train_group = self.dataSet.buildings[building_inst].elec self.co.train(self.train_group) print("CO Disaggreation Model Sucessfully Trained!") else: print("Error: Please select a building_inst of 1-6.") print("Model unsucessfully trained.") def load_disag_model(self, filepath): ''' Function loads the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Loading CO Disaggreation Model...") self.co.import_model(filepath) print("Model Sucessfully Loaded!") def save_disag_model(self, filepath): ''' Function saves the disaggregation model to a file. Parameters ----------- filepath: exact filepath of the model file. ''' print("Saving CO Disaggregation Model...") self.co.export_model(filepath) print("Model Sucessfully Saved!") def disaggregate(self, building_inst): ''' Function will disaggregate the mains MeterGroup of the passed building instance, and save this to the self.outDataStore object. Parameters ----------- building_inst: instance # of the building mains you wish to disaggregate. ''' print("Disaggregating Building Mains...") self.co.disaggregate( self.dataSet.buildings[building_inst].elec.mains(), self.outDataStore) print("Mains sucessfully disaggregated!") def close(self): ''' Function closes all open DataStore's being used by the program. ''' print("Closing DataStores...") self.dataStore.close() self.outDataStore.close() print("Output DataStores Sucessfully Closed") ''' All Plot Functions below are a WORK IN PROGRESS!----------------------------------- Documentation will be provided upon completion.------------------------------------ ''' def plot_disag_apl(self, inst, appliance, t1="", t2=""): self.km = Key_Map(inst) plot_series( self.outDataStore.store.get(self.km.get_key(appliance))[t1:t2]) plt.title("Disaggregated " + appliance.capitalize() + " Energy") plt.show() def show_plots(self): plt.show() def building_plot_all(self, building_inst, t1, t2): self.dataSet.buildings[building_inst].elec.plot(t1, t2) plt.title("Building " + str(building_inst) + " Energy per Appliance") plt.ylabel('Power [W]') plt.xlabel('Hour') def plot_redd_mains_data(self, inst=1, t1="", t2=""): self.km = Key_Map(inst) series1 = self.dataStore.store.get(self.km.get_key('mains1'))[t1:t2] series2 = self.dataStore.store.get(self.km.get_key('mains2'))[t1:t2] plot_series(series1 + series2) plt.title("Building " + str(inst) + " Mains Energy") plt.show()
#same day but smaller scale for more refined data plots t1 ="2011-05-1 6:00" t2 ="2011-05-1 7:00" #initialize key map for building 1 kmap = Key_map() #set the disaggregated plot here disag_apl = 'fridge' disag_key = kmap.get_key(disag_apl) #First we must load in the converted REDD Dataset print ("Loading DataSet.....") #declare datastore and load converted HDF that stores REDD data r_datastore = HDFDataStore("C:/NILM/Data_Sets/redd_data.h5") #declare dataset object to work with and load datastore into Dataset r_dataset = DataSet() r_dataset.load(r_datastore) #output sucessfull loading of data to user print("DataSet Sucessfully Loaded!") #now we take the data and elminate all sections with no samples print("Conditioning Data... \n") #load the metergroup from building one (house1 in REDD) r_elec = r_dataset.buildings[1].elec
https://github.com/nilmtk/nilmtk/issues/376 """ data_dir = '/data/REDD' building_number = 3 disag_filename = join(data_dir, 'disag-fhmm' + str(building_number) + '.h5') data = DataSet(join(data_dir, 'redd.h5')) print("Loading building " + str(building_number)) elec = data.buildings[building_number].elec top_train_elec = elec.submeters().select_top_k(k=5) fhmm = fhmm_exact.FHMM() fhmm.train(top_train_elec) output = HDFDataStore(disag_filename, 'w') fhmm.disaggregate(elec.mains(), output) output.close() ### f1score fhmm disag = DataSet(disag_filename) disag_elec = disag.buildings[building_number].elec f1 = f1_score(disag_elec, elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') plt.ylabel('appliance'); plt.xlabel('f-score'); plt.title("FHMM"); plt.savefig(join(data_dir, 'f1-fhmm' + str(building_number) + '.png')) disag.store.close()
if km.is_in_map(disag_appliance) == False: sys.exit( "An incorrect appliance name has been entered. Please ensure the entered name is exactly correct." ) redd_data = DataSet("C:/NILM/Data/REDD/redd.h5") # load mains of the building building_mains = redd_data.buildings[redd_building].elec.mains() #train disaggregation set co = CombinatorialOptimisation() training_set = redd_data.buildings[redd_building].elec co.train(training_set) #set output datastore outputData = HDFDataStore("C:/NILM/Data/Output/output.h5", 'w') #disaggregate co.disaggregate(building_mains, outputData) # to add: # 1) get the meter instance # of the appliance selected # 2) export the meter instance series of the output datastore to database using SQL, within t1-t2 parameters* # # *Cannot be implemented until database is setup in environment #Close open datastores redd_data.store.close() outputData.store.close()
print(middleTimeStr) train.set_window(end=middleTimeStr) test.set_window(start=middleTimeStr) train_elec = train.buildings[building_number].elec test_elec = test.buildings[building_number].elec top_train_elec = train_elec.submeters().select_top_k(k=5) fhmm = fhmm_exact.FHMM() #mk change this later to default fhmm.train(top_train_elec, sample_period=60, resample=True) outputAddress = "/nilmtk/data/iawe_449_3.h5" output = HDFDataStore(outputAddress, 'w') fhmm.disaggregate(test_elec.mains(), output, sample_period=60, resample=True) output.close() disag = DataSet(outputAddress) #load FHMM prediction disag_elec = disag.buildings[building_number].elec #disag_elec.plot() # plot all disaggregated data f1 = f1_score(disag_elec, test_elec) f1.index = disag_elec.get_labels(f1.index) f1.plot(kind='barh') disag.store.window = TimeFrame(start='2013-07-10 18:00:00-05:00', end='2013-07-17 04:00:00-05:00') disag.buildings[building_number].elec.plot() # plot all disaggregated data
# verify a real appliance has been entered #if km.is_in_map(disag_appliance) == False: # sys.exit("An incorrect appliance name has been entered. Please ensure the entered name is exactly correct.") redd_data = DataSet(redd_fp) # load mains of the building building_mains = redd_data.buildings[1].elec.mains() #train disaggregation set co = CombinatorialOptimisation() training_set = redd_data.buildings[1].elec.select_top_k(15) co.train(training_set) #set output datastore outputData = HDFDataStore(output_fp, 'w') #disaggregate co.disaggregate(building_mains, outputData) print(outputData.store.keys()) #set sub-datastore for CSV outputs output_csv_store = outputData.store.__getitem__(km.get_key(disag_appliance)) mains1 = redd_data.store.__getitem__(km.get_key("mains1")) mains2 = redd_data.store.__getitem__(km.get_key("mains2")) mains1 = mains1.fillna(value=0) mains1 = mains1.resample("1min")
sys.exit( "An incorrect appliance name has been entered. Please ensure the entered name is exactly correct." ) redd_data = DataSet("/home/mike/workspace/data/redd_data.h5") # load mains of the building building_mains = redd_data.buildings[1].elec.mains() #train disaggregation set co = CombinatorialOptimisation() training_set = redd_data.buildings[1].elec co.train(training_set) #set output datastore outputData = HDFDataStore("/home/mike/workspace/data/redd_output.h5", 'w') #disaggregate co.disaggregate(building_mains, outputData) #set sub-datastore for CSV output output_csv_store = outputData.store.__getitem__(km.get_key(disag_appliance)) #set date parameters output_csv_store = output_csv_store[t1:t2] #fill NA values with 0 for graphing output_csv_store = output_csv_store.fillna(value=0) # #metrics processing ---------------------------------------------------------- # #create dict to hold energy metrics
def plot_prediction_over_epochs_ploty(): """ Predicts the power demand of the target appliance using the intermediate models which are exported during training. Plots the prediction curves using plotly. """ train = DataSet('../data/ukdale.h5') train.clear_cache() train.set_window(start="13-4-2013", end="31-7-2013") test = DataSet('../data/ukdale.h5') test.clear_cache() test.set_window(start="23-7-2014 10:00:00", end="23-7-2014 11:00:00") train_building = 1 test_building = 5 sample_period = 6 meter_key = 'kettle' learning_rate = 1e-5 train_elec = train.buildings[train_building].elec test_elec = test.buildings[test_building].elec train_meter = train_elec.submeters()[meter_key] test_mains = test_elec.mains() results_dir = '../results/UKDALE-ACROSS-BUILDINGS-RNN-lr=1e-05-2018-02-03-11-48-12' train_logfile = os.path.join(results_dir, 'training.log') val_logfile = os.path.join(results_dir, 'validation.log') rnn = RNNDisaggregator(train_logfile, val_logfile, learning_rate, init=False) data = [] for i in range(10, 401, 10): # disaggregate model model = 'UKDALE-RNN-kettle-{}epochs.h5'.format(i) rnn.import_model(os.path.join(results_dir, model)) disag_filename = 'disag-out-{}epochs.h5'.format(i) output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w') results_file = os.path.join(results_dir, 'results-{}epochs.txt'.format(i)) rnn.disaggregate(test_mains, output, results_file, train_meter, sample_period=sample_period) os.remove(results_file) output.close() # plot predicted curve for epoch=i result = DataSet(os.path.join(results_dir, disag_filename)) res_elec = result.buildings[test_building].elec os.remove(os.path.join(results_dir, disag_filename)) predicted = res_elec[meter_key] predicted = predicted.power_series(sample_period=sample_period) predicted = next(predicted) predicted.fillna(0, inplace=True) power = predicted.tolist() length = len(power) timestamps = list(range(length)) x = [] y = [] z = [] ci = int(255 / 420 * i) # ci = "color index" for j in range(length): x.append([timestamps[j], timestamps[j]]) # timestamps y.append([i, i + 5]) # epochs z.append([power[j], power[j]]) # power data.append( dict( z=z, x=x, y=y, colorscale=[[i, 'rgb(%d,%d,255)' % (ci, ci)] for i in np.arange(0, 1.1, 0.1)], showscale=False, type='surface', )) # plot ground truth curve as the last curve ground_truth = test_elec[meter_key] ground_truth = ground_truth.power_series(sample_period=sample_period) ground_truth = next(ground_truth) ground_truth.fillna(0, inplace=True) power = ground_truth.tolist() length = len(power) timestamps = list(range(length)) i = 410 x = [] y = [] z = [] ci = int(255 / 410 * i) # ci = "color index" for j in range(length): x.append([timestamps[j], timestamps[j]]) # timestamps y.append([i, i + 5]) # epochs z.append([power[j], power[j]]) # power data.append( dict( z=z, x=x, y=y, colorscale=[[i, 'rgb(%d,%d,255)' % (ci, ci)] for i in np.arange(0, 1.1, 0.1)], showscale=False, type='surface', )) layout = dict(title='prediction over epochs', showlegend=False, scene=dict(xaxis=dict(title='timestamps'), yaxis=dict(title='epochs'), zaxis=dict(title='power'), camera=dict(eye=dict(x=-1.7, y=-1.7, z=0.5)))) fig = dict(data=data, layout=layout) plotly.offline.plot(fig, filename='filled-3d-lines')
def fhmm(dataset_path, train_building, train_start, train_end, val_building, val_start, val_end, test_building, test_start, test_end, meter_key, sample_period): # Start tracking time start = time.time() # Prepare dataset and options # print("========== OPEN DATASETS ============") dataset_path = dataset_path train = DataSet(dataset_path) train.set_window(start=train_start, end=train_end) val = DataSet(dataset_path) val.set_window(start=val_start, end=val_end) test = DataSet(dataset_path) test.set_window(start=test_start, end=test_end) train_building = train_building test_building = test_building meter_key = meter_key sample_period = sample_period train_elec = train.buildings[train_building].elec val_elec = val.buildings[val_building].elec test_elec = test.buildings[test_building].elec appliances = [meter_key] selected_meters = [train_elec[app] for app in appliances] selected_meters.append(train_elec.mains()) selected = MeterGroup(selected_meters) fhmm = FHMM() # print("========== TRAIN ============") fhmm.train(selected, sample_period=sample_period) # print("========== DISAGGREGATE ============") # Validation val_disag_filename = 'disag-out-val.h5' output = HDFDataStore(val_disag_filename, 'w') fhmm.disaggregate(val_elec.mains(), output_datastore=output) output.close() # Test test_disag_filename = 'disag-out-test.h5' output = HDFDataStore(test_disag_filename, 'w') fhmm.disaggregate(test_elec.mains(), output_datastore=output) output.close() # print("========== RESULTS ============") # Validation result_val = DataSet(val_disag_filename) res_elec_val = result_val.buildings[val_building].elec rpaf_val = metrics.recall_precision_accuracy_f1(res_elec_val[meter_key], val_elec[meter_key]) val_metrics_results_dict = { 'recall_score': rpaf_val[0], 'precision_score': rpaf_val[1], 'accuracy_score': rpaf_val[2], 'f1_score': rpaf_val[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec_val[meter_key], val_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec_val[meter_key], val_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec_val[meter_key], val_elec[meter_key]), 'nad': metrics.nad(res_elec_val[meter_key], val_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec_val[meter_key], val_elec[meter_key]) } # Test result = DataSet(test_disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) test_metrics_results_dict = { 'recall_score': rpaf[0], 'precision_score': rpaf[1], 'accuracy_score': rpaf[2], 'f1_score': rpaf[3], 'mean_absolute_error': metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key]), 'mean_squared_error': metrics.mean_square_error(res_elec[meter_key], test_elec[meter_key]), 'relative_error_in_total_energy': metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]), 'nad': metrics.nad(res_elec[meter_key], test_elec[meter_key]), 'disaggregation_accuracy': metrics.disaggregation_accuracy(res_elec[meter_key], test_elec[meter_key]) } # end tracking time end = time.time() time_taken = end - start # in seconds # model_result_data = { # 'algorithm_name': 'FHMM', # 'datapath': dataset_path, # 'train_building': train_building, # 'train_start': str(train_start.date()) if train_start != None else None , # 'train_end': str(train_end.date()) if train_end != None else None , # 'test_building': test_building, # 'test_start': str(test_start.date()) if test_start != None else None , # 'test_end': str(test_end.date()) if test_end != None else None , # 'appliance': meter_key, # 'sampling_rate': sample_period, # # 'algorithm_info': { # 'options': { # 'epochs': None # }, # 'hyperparameters': { # 'sequence_length': None, # 'min_sample_split': None, # 'num_layers': None # }, # 'profile': { # 'parameters': None # } # }, # # 'metrics': metrics_results_dict, # # 'time_taken': format(time_taken, '.2f'), # } model_result_data = { 'val_metrics': val_metrics_results_dict, 'test_metrics': test_metrics_results_dict, 'time_taken': format(time_taken, '.2f'), 'epochs': None, } # Close digag_filename result.store.close() result_val.store.close() # Close Dataset files train.store.close() val.store.close() test.store.close() return model_result_data
validation = pd.read_csv(val_logfile) epochs = np.array(validation.as_matrix()[:, 0], dtype='int') loss = np.array(validation.as_matrix()[:, 1], dtype='float32') argmin = np.argmin(loss) best_epoch = epochs[argmin] + 1 rnn.import_model( os.path.join(results_dir, "UKDALE-RNN-{}-{}epochs.h5".format(meter_key, best_epoch))) test_loss = rnn.evaluate(test_mains, test_meter, sample_period=sample_period) line = 'Test loss: {}'.format(test_loss) with open(results_file, "a") as text_file: text_file.write(line + '\n') print(line) disag_filename = 'disag-out.h5' output = HDFDataStore(os.path.join(results_dir, disag_filename), 'w') rnn.disaggregate(test_mains, output, results_file, train_meterlist[0], sample_period=sample_period) output.close() print("========== PLOTS ============") # plot train, validation and test loss plot_loss(train_logfile, val_logfile, results_dir, best_epoch, test_loss) # plot predicted energy consumption result = DataSet(os.path.join(results_dir, disag_filename)) res_elec = result.buildings[test_building].elec predicted = res_elec[meter_key]