def train(self, metergroup, cols=[('power', 'active')], buffer_size=20, noise_level=70, state_threshold=15, min_tolerance=100, percent_tolerance=0.035, large_transition=1000, **kwargs): """ Train using Hart85. Places the learnt model in `model` attribute. Parameters ---------- metergroup : a nilmtk.MeterGroup object cols: nilmtk.Measurement, should be one of the following [('power','active')] [('power','apparent')] [('power','reactive')] [('power','active'), ('power', 'reactive')] buffer_size: int, optional size of the buffer to use for finding edges min_tolerance: int, optional variance in power draw allowed for pairing a match percent_tolerance: float, optional if transition is greater than large_transition, then use percent of large_transition large_transition: float, optional power draw of a Large transition """ self.cols = cols self.state_threshold = state_threshold self.noise_level = noise_level [self.steady_states, self.transients] = find_steady_states_transients( metergroup, cols, noise_level, state_threshold, **kwargs) self.pair_df = self.pair( buffer_size, min_tolerance, percent_tolerance, large_transition) self.centroids = hart85_means_shift_cluster(self.pair_df, cols)
def disaggregate(self, mains, output_datastore, **load_kwargs): """Disaggregate mains according to the model learnt previously. Parameters ---------- mains : nilmtk.ElecMeter or nilmtk.MeterGroup output_datastore : instance of nilmtk.DataStore subclass For storing power predictions from disaggregation algorithm. sample_period : number, optional The desired sample period in seconds. **load_kwargs : key word arguments Passed to `mains.power_series(**kwargs)` """ load_kwargs = self._pre_disaggregation_checks(load_kwargs) load_kwargs.setdefault('sample_period', 60) load_kwargs.setdefault('sections', mains.good_sections()) timeframes = [] building_path = '/building{}'.format(mains.building()) mains_data_location = building_path + '/elec/meter1' data_is_available = False [_, transients] = find_steady_states_transients( mains, columns=self.columns, state_threshold=self.state_threshold, noise_level=self.noise_level, **load_kwargs) # For now ignoring the first transient # transients = transients[1:] # Initially all appliances/meters are in unknown state (denoted by -1) prev = OrderedDict() learnt_meters = self.centroids.index.values for meter in learnt_meters: prev[meter] = -1 timeframes = [] # Now iterating over mains data and disaggregating chunk by chunk if len(self.columns) == 1: ac_type = self.columns[0][1] else: ac_type = ['active', 'reactive'] for chunk in mains.power_series(**load_kwargs): # Record metadata timeframes.append(chunk.timeframe) measurement = chunk.name power_df, dimen = self.disaggregate_chunk(chunk, prev, transients) if dimen == 2: columns = pd.MultiIndex.from_tuples([chunk.name]) else: tuples = list(self.columns) columns = pd.MultiIndex.from_tuples(tuples) for meter in learnt_meters: data_is_available = True df = power_df[[meter]] df.columns = columns df.columns.names = ['physical_quantity', 'type'] key = '{}/elec/meter{:d}'.format(building_path, meter + 2) val = df.apply(pd.to_numeric).astype('float32') output_datastore.append(key, value=val) print('Next Chunk..') print('Appending mains data to datastore') for chunk_mains in mains.load(ac_type=ac_type): chunk_df = chunk_mains chunk_df = chunk_df.apply(pd.to_numeric).astype('float32') print('Done') output_datastore.append(key=mains_data_location, value=chunk_df) # save metadata if data_is_available: self._save_metadata_for_disaggregation( output_datastore=output_datastore, sample_period=load_kwargs['sample_period'], measurement=measurement, timeframes=timeframes, building=mains.building(), supervised=False, num_meters=len(self.centroids)) return power_df
def disaggregate_chunk(self, test_mains): """ Parameters ---------- chunk : pd.DataFrame mains power prev transients : returned by find_steady_state_transients Returns ------- states : pd.DataFrame with same index as `chunk`. """ #print(test_mains) test_predictions_list = [] print( '...............................Disaggregator starts...............................' ) for chunk in test_mains: [_, transients] = find_steady_states_transients( test_mains[0], columns=self.columns, state_threshold=self.state_threshold, noise_level=self.noise_level) #print('Transients:',transients) # For now ignoring the first transient # transients = transients[1:] # Initially all appliances/meters are in unknown state (denoted by -1) prev = OrderedDict() learnt_meters = self.centroids.index.values for meter in learnt_meters: prev[meter] = -1 states = pd.DataFrame(-1, index=chunk.index, columns=self.centroids.index.values) #print('STATES:',states) for transient_tuple in transients.itertuples(): if transient_tuple[0] < chunk.index[0]: # Transient occurs before chunk has started; do nothing pass elif transient_tuple[0] > chunk.index[-1]: # Transient occurs after chunk has ended; do nothing pass else: # Absolute value of transient abs_value = np.abs(transient_tuple[1:]) positive = transient_tuple[1] > 0 abs_value_transient_minus_centroid = pd.DataFrame( (self.centroids - abs_value).abs()) if len(transient_tuple) == 2: # 1d data index_least_delta = (abs_value_transient_minus_centroid .idxmin().values[0]) else: # 2d data. # Need to find absolute value before computing minimum columns = abs_value_transient_minus_centroid.columns abs_value_transient_minus_centroid["multidim"] = ( abs_value_transient_minus_centroid[columns[0]]**2 + abs_value_transient_minus_centroid[columns[1]]**2) index_least_delta = ( abs_value_transient_minus_centroid["multidim"]. idxmin()) if positive: # Turned on states.loc[transient_tuple[0]][index_least_delta] = 1 else: # Turned off states.loc[transient_tuple[0]][index_least_delta] = 0 prev = states.iloc[-1].to_dict() power_chunk_dict = self.assign_power_from_states(states, prev) self.power_dict = power_chunk_dict self.chunk_index = chunk.index # Check whether 1d data or 2d data and converting dict to dataframe if len(transient_tuple) == 2: temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index) else: tuples = [] for i in range(len(self.centroids.index.values)): for j in range(0, 2): tuples.append([i, j]) columns = pd.MultiIndex.from_tuples(tuples) temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index, columns=columns) for i in range(len(chunk.index)): for j in range(len(self.centroids.index.values)): for k in range(0, 2): temp_df.iloc[i, j, k] = power_chunk_dict[j, i, k] temp_df = temp_df.fillna(0) temp = pd.DataFrame() for appliance in self.appliances: matched_col = self.best_matches[appliance] temp[appliance] = temp_df[matched_col] test_predictions_list.append(temp) print( '............................Disaggregator ends...........................' ) return test_predictions_list
def partial_fit(self, train_main, train_appliances, buffer_size=20, noise_level=70, state_threshold=15, min_tolerance=100, percent_tolerance=0.035, large_transition=1000, **kwargs): """ Train using Hart85. Places the learnt model in `model` attribute. Parameters ---------- metergroup : a nilmtk.MeterGroup object columns: nilmtk.Measurement, should be one of the following [('power','active')] [('power','apparent')] [('power','reactive')] [('power','active'), ('power', 'reactive')] buffer_size: int, optional size of the buffer to use for finding edges min_tolerance: int, optional variance in power draw allowed for pairing a match percent_tolerance: float, optional if transition is greater than large_transition, then use percent of large_transition large_transition: float, optional power draw of a Large transition """ # Train_appliances : list of tuples [('appliance',df),('appliance',df)] self.appliances = [] for row in train_appliances: self.appliances.append(row[0]) print( "...........................Hart_85 Partial Fit Running..............." ) train_main = train_main[0] l = [] l.append(train_main.columns[0]) columns = l self.columns = columns self.state_threshold = state_threshold self.noise_level = noise_level [self.steady_states, self.transients ] = find_steady_states_transients(train_main, columns, noise_level, state_threshold) self.pair_df = self.pair(buffer_size, min_tolerance, percent_tolerance, large_transition) self.centroids = hart85_means_shift_cluster(self.pair_df, columns) print( '..............................Predicting Centroid Matching..........................' ) chunk = train_main transients = self.transients temp_df = pd.DataFrame() # For now ignoring the first transient # transients = transients[1:] # Initially all appliances/meters are in unknown state (denoted by -1) prev = OrderedDict() learnt_meters = self.centroids.index.values for meter in learnt_meters: prev[meter] = -1 states = pd.DataFrame(-1, index=chunk.index, columns=self.centroids.index.values) for transient_tuple in transients.itertuples(): if transient_tuple[0] < chunk.index[0]: # Transient occurs before chunk has started; do nothing pass elif transient_tuple[0] > chunk.index[-1]: # Transient occurs after chunk has ended; do nothing pass else: # Absolute value of transient abs_value = np.abs(transient_tuple[1:]) positive = transient_tuple[1] > 0 abs_value_transient_minus_centroid = pd.DataFrame( (self.centroids - abs_value).abs()) if len(transient_tuple) == 2: # 1d data index_least_delta = ( abs_value_transient_minus_centroid.idxmin().values[0]) else: # 2d data. # Need to find absolute value before computing minimum columns = abs_value_transient_minus_centroid.columns abs_value_transient_minus_centroid["multidim"] = ( abs_value_transient_minus_centroid[columns[0]]**2 + abs_value_transient_minus_centroid[columns[1]]**2) index_least_delta = ( abs_value_transient_minus_centroid["multidim"].idxmin( )) if positive: # Turned on states.loc[transient_tuple[0]][index_least_delta] = 1 else: # Turned off states.loc[transient_tuple[0]][index_least_delta] = 0 prev = states.iloc[-1].to_dict() power_chunk_dict = self.assign_power_from_states(states, prev) self.power_dict = power_chunk_dict self.chunk_index = chunk.index # Check whether 1d data or 2d data and converting dict to dataframe #print('LEN of Transient Tuple',len(transient_tuple)) if len(transient_tuple) == 2: temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index) else: tuples = [] for i in range(len(self.centroids.index.values)): for j in range(0, 2): tuples.append([i, j]) columns = pd.MultiIndex.from_tuples(tuples) temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index, columns=columns) for i in range(len(chunk.index)): for j in range(len(self.centroids.index.values)): for k in range(0, 2): temp_df.iloc[i, j, k] = power_chunk_dict[j, i, k] self.best_matches = {} temp_df = temp_df.fillna(0) best_matches = {} for row in train_appliances: appliance = row[0] appliance_df = row[1][0] matched_col = self.min_rmse_column(temp_df, appliance_df['power']) best_matches[appliance] = matched_col self.best_matches = best_matches print( '...................................End Centroid Matching............................' ) self.model = dict( best_matches=best_matches, columns=columns, state_threshold=state_threshold, noise_level=noise_level, steady_states=self.steady_states, transients=self.transients, # pair_df=self.pair_df, centroids=self.centroids)
def disaggregate(self, mains, output_datastore, **load_kwargs): """Disaggregate mains according to the model learnt previously. Parameters ---------- mains : nilmtk.ElecMeter or nilmtk.MeterGroup output_datastore : instance of nilmtk.DataStore subclass For storing power predictions from disaggregation algorithm. sample_period : number, optional The desired sample period in seconds. **load_kwargs : key word arguments Passed to `mains.power_series(**kwargs)` """ load_kwargs = self._pre_disaggregation_checks(load_kwargs) load_kwargs.setdefault('sample_period', 60) load_kwargs.setdefault('sections', mains.good_sections()) timeframes = [] building_path = '/building{}'.format(mains.building()) mains_data_location = building_path + '/elec/meter1' data_is_available = False [_, transients] = find_steady_states_transients( mains, cols=self.cols, state_threshold=self.state_threshold, noise_level=self.noise_level, **load_kwargs) # For now ignoring the first transient # transients = transients[1:] # Initially all appliances/meters are in unknown state (denoted by -1) prev = OrderedDict() learnt_meters = self.centroids.index.values for meter in learnt_meters: prev[meter] = -1 timeframes = [] # Now iterating over mains data and disaggregating chunk by chunk for chunk in mains.power_series(**load_kwargs): # Record metadata timeframes.append(chunk.timeframe) measurement = chunk.name power_df = self.disaggregate_chunk( chunk, prev, transients) cols = pd.MultiIndex.from_tuples([chunk.name]) for meter in learnt_meters: data_is_available = True df = power_df[[meter]] df.columns = cols key = '{}/elec/meter{:d}'.format(building_path, meter + 2) output_datastore.append(key, df) output_datastore.append(key=mains_data_location, value=pd.DataFrame(chunk, columns=cols)) if data_is_available: self._save_metadata_for_disaggregation( output_datastore=output_datastore, sample_period=load_kwargs['sample_period'], measurement=measurement, timeframes=timeframes, building=mains.building(), supervised=False, num_meters=len(self.centroids) )
def disaggregate(self, mains, output_datastore=None, exact_nilm_datastore=None, **load_kwargs): """Disaggregate mains according to the model learnt previously. Parameters ---------- mains : nilmtk.ElecMeter or nilmtk.MeterGroup output_datastore : instance of nilmtk.DataStore subclass For storing power predictions from disaggregation algorithm. sample_period : number, optional The desired sample period in seconds. **load_kwargs : key word arguments Passed to `mains.power_series(**kwargs)` """ mains = mains.sitemeters() # Only the main elements are interesting load_kwargs = self._pre_disaggregation_checks(mains, load_kwargs) load_kwargs.setdefault('sample_period', 2) load_kwargs.setdefault('sections', mains.good_sections()) timeframes = [] building_path = '/building{}'.format(mains.building() * 10) mains_data_location = building_path + '/elec/meter1' data_is_available = False [_, transients] = find_steady_states_transients( mains, columns=self.columns, state_threshold=self.state_threshold, noise_level=self.noise_level, **load_kwargs) # For now ignoring the first transient # transients = transients[1:] # Initially all appliances/meters are in unknown state (denoted by -1) prev = OrderedDict() learnt_meters = self.centroids.index.values for meter in learnt_meters: prev[meter] = -1 timeframes = [] disaggregation_overall = None # Now iterating over mains data and disaggregating chunk by chunk for chunk in mains.power_series(**load_kwargs): # Record metadata timeframes.append(chunk.timeframe) measurement = chunk.name power_df = self.disaggregate_chunk(chunk, prev, transients) columns = pd.MultiIndex.from_tuples([chunk.name]) cols = pd.MultiIndex.from_tuples([chunk.name]) if False: #output_datastore != None: for meter in learnt_meters: data_is_available = True df = power_df[[meter]] df.columns = columns key = '{}/elec/meter{:d}'.format( building_path, meter + 2 ) # Weil 0 nicht gibt und Meter1 das undiaggregierte ist und output_datastore.append(key, df) output_datastore.append( key=mains_data_location, value=pd.DataFrame( chunk, columns=columns)) # Das Main wird auf Meter 1 gesetzt. else: if disaggregation_overall is None: disaggregation_overall = power_df else: disaggregation_overall = disaggregation_overall.append( power_df) for column in disaggregation_overall: key = '{}/elec/meter{:d}'.format( building_path, column + 2) # 0 not existing and Meter1 is rest tmp = disaggregation_overall[[column]] tmp.columns = (pd.MultiIndex.from_tuples( [('power', 'active')], names=['physical_quantity', 'type'])) output_datastore.append(key, tmp) if not exact_nilm_datastore is None: exact_nilm_datastore.append( key, self.model.appliances_detailed[[column]]) #output_datastore.append('{}/elec/meter{:d}'.format(building_path, 1), self.model.overall_powerflow[phase]) num_meters = [len(disaggregation_overall.columns)] stores = [(output_datastore, 300, True)] if exact_nilm_datastore is None else [ (output_datastore, 300, True), (exact_nilm_datastore, 0, False) ] for store, res, rest_included in stores: self._save_metadata_for_disaggregation( output_datastore=store, sample_period=res, measurement=pd.MultiIndex.from_tuples( [('power', 'active')], names=['physical_quantity', 'type']), timeframes=TimeFrameGroup([ TimeFrame(start=disaggregation_overall[0].index[0], end=disaggregation_overall[0].index[-1]) ]), building=mains.building(), supervised=False, num_meters=num_meters, original_building_meta=mains.meters[0].building_metadata, rest_powerflow_included=False)
def train(self, metergroup, **load_kwargs): """ Gets a site meter and trains the model based on it. Goes chunkwise through the dataset and returns the events. In the end does a clustering for identifying the events. For signature description see basic class: It should get a sitemeter for unsupervised learning. Parameters ---------- metergroup : a nilmtk.MeterGroup object For custom baranski (is unsupervised), this is a single site meter. """ # Go through all parts and extract events events = [] # 1. Get Events (Das ist ja schon vorhanden) -> Das sollte ich ausbauen -> GetSignatures # -> man separiert in die verschiedenen moeglichen Signaturen # --> Einen Separator als Oberklasse, Dann mehrere Separatoren fuer die einzelnen Typen an Effekt # -Rising Spike: # -Rising Spike: # -Pulse # -Fluctuation # -Quick Vibrate # -Gradual Falling # -Flatt # --> Man arbeitet mit Verdeckung: Event, verdeckt wenn RaisingSpike, FallingSpike, verdeckt wenn Pulse # # --> Jede Signatur hat eigene spezielle Eigenschaften # --> Einige sollten eine Wildcard beinhalten # Ich will hier ein 3d Pandas aufbauen #events = self._load_if_available() #if not events is None: # self.events = events # return events = pd.DataFrame() for i, elec in enumerate(metergroup.all_meters()): print("Find Events for " + str(elec.metadata)) transitions = find_steady_states_transients( elec, cols=self.cols, state_threshold=self.state_threshold, noise_level=self.noise_level, **load_kwargs)[1] # Mark as on- or off-event transitions['type'] = transitions >= 0 transitions['meter'] = elec events = events.append(transitions) events.index.rename('time', inplace=True) events.set_index(['type', 'meter'], append=True, inplace=True) events = events.reorder_levels([2,1,0]) events.sort_index(inplace=True) # Hier vielleicht noch die Kombinationen finden self.events = events #self._save(events) # 2. Cluster the events using different cluster methodologies (Zuweisung passiert automatisch) # Ah es gibt doch ein predict: Und zwar elemente Clustern zuweisen clusters = None #self. _load_if_available(what='cluster') if clusters is None: for curGroup, groupEvents in events.groupby(['meter','type']): centroids, assignments = self._cluster_events(groupEvents, max_num_clusters=self.max_num_clusters, method='kmeans') events.loc[curGroup,'cluster'] = assignments #self._save(events, 'cluster') else: pass #events = clusters self.model = events