def train(self, metergroup, cols=[('power', 'active')], buffer_size=20, noise_level=70, state_threshold=15, min_tolerance=100, percent_tolerance=0.035, large_transition=1000, **kwargs): """ Train using Hart85. Places the learnt model in `model` attribute. Parameters ---------- metergroup : a nilmtk.MeterGroup object cols: nilmtk.Measurement, should be one of the following [('power','active')] [('power','apparent')] [('power','reactive')] [('power','active'), ('power', 'reactive')] buffer_size: int, optional size of the buffer to use for finding edges min_tolerance: int, optional variance in power draw allowed for pairing a match percent_tolerance: float, optional if transition is greater than large_transition, then use percent of large_transition large_transition: float, optional power draw of a Large transition """ self.cols = cols self.state_threshold = state_threshold self.noise_level = noise_level [self.steady_states, self.transients] = find_steady_states_transients( metergroup, cols, noise_level, state_threshold, **kwargs) self.pair_df = self.pair( buffer_size, min_tolerance, percent_tolerance, large_transition) self.centroids = hart85_means_shift_cluster(self.pair_df, cols)
def partial_fit(self, train_main, train_appliances, buffer_size=20, noise_level=70, state_threshold=15, min_tolerance=100, percent_tolerance=0.035, large_transition=1000, **kwargs): """ Train using Hart85. Places the learnt model in `model` attribute. Parameters ---------- metergroup : a nilmtk.MeterGroup object columns: nilmtk.Measurement, should be one of the following [('power','active')] [('power','apparent')] [('power','reactive')] [('power','active'), ('power', 'reactive')] buffer_size: int, optional size of the buffer to use for finding edges min_tolerance: int, optional variance in power draw allowed for pairing a match percent_tolerance: float, optional if transition is greater than large_transition, then use percent of large_transition large_transition: float, optional power draw of a Large transition """ # Train_appliances : list of tuples [('appliance',df),('appliance',df)] self.appliances = [] for row in train_appliances: self.appliances.append(row[0]) print( "...........................Hart_85 Partial Fit Running..............." ) train_main = train_main[0] l = [] l.append(train_main.columns[0]) columns = l self.columns = columns self.state_threshold = state_threshold self.noise_level = noise_level [self.steady_states, self.transients ] = find_steady_states_transients(train_main, columns, noise_level, state_threshold) self.pair_df = self.pair(buffer_size, min_tolerance, percent_tolerance, large_transition) self.centroids = hart85_means_shift_cluster(self.pair_df, columns) print( '..............................Predicting Centroid Matching..........................' ) chunk = train_main transients = self.transients temp_df = pd.DataFrame() # For now ignoring the first transient # transients = transients[1:] # Initially all appliances/meters are in unknown state (denoted by -1) prev = OrderedDict() learnt_meters = self.centroids.index.values for meter in learnt_meters: prev[meter] = -1 states = pd.DataFrame(-1, index=chunk.index, columns=self.centroids.index.values) for transient_tuple in transients.itertuples(): if transient_tuple[0] < chunk.index[0]: # Transient occurs before chunk has started; do nothing pass elif transient_tuple[0] > chunk.index[-1]: # Transient occurs after chunk has ended; do nothing pass else: # Absolute value of transient abs_value = np.abs(transient_tuple[1:]) positive = transient_tuple[1] > 0 abs_value_transient_minus_centroid = pd.DataFrame( (self.centroids - abs_value).abs()) if len(transient_tuple) == 2: # 1d data index_least_delta = ( abs_value_transient_minus_centroid.idxmin().values[0]) else: # 2d data. # Need to find absolute value before computing minimum columns = abs_value_transient_minus_centroid.columns abs_value_transient_minus_centroid["multidim"] = ( abs_value_transient_minus_centroid[columns[0]]**2 + abs_value_transient_minus_centroid[columns[1]]**2) index_least_delta = ( abs_value_transient_minus_centroid["multidim"].idxmin( )) if positive: # Turned on states.loc[transient_tuple[0]][index_least_delta] = 1 else: # Turned off states.loc[transient_tuple[0]][index_least_delta] = 0 prev = states.iloc[-1].to_dict() power_chunk_dict = self.assign_power_from_states(states, prev) self.power_dict = power_chunk_dict self.chunk_index = chunk.index # Check whether 1d data or 2d data and converting dict to dataframe #print('LEN of Transient Tuple',len(transient_tuple)) if len(transient_tuple) == 2: temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index) else: tuples = [] for i in range(len(self.centroids.index.values)): for j in range(0, 2): tuples.append([i, j]) columns = pd.MultiIndex.from_tuples(tuples) temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index, columns=columns) for i in range(len(chunk.index)): for j in range(len(self.centroids.index.values)): for k in range(0, 2): temp_df.iloc[i, j, k] = power_chunk_dict[j, i, k] self.best_matches = {} temp_df = temp_df.fillna(0) best_matches = {} for row in train_appliances: appliance = row[0] appliance_df = row[1][0] matched_col = self.min_rmse_column(temp_df, appliance_df['power']) best_matches[appliance] = matched_col self.best_matches = best_matches print( '...................................End Centroid Matching............................' ) self.model = dict( best_matches=best_matches, columns=columns, state_threshold=state_threshold, noise_level=noise_level, steady_states=self.steady_states, transients=self.transients, # pair_df=self.pair_df, centroids=self.centroids)