示例#1
0
    def train(self, metergroup, cols=[('power', 'active')],
              buffer_size=20, noise_level=70, state_threshold=15,
              min_tolerance=100, percent_tolerance=0.035,
              large_transition=1000, **kwargs):
        """
        Train using Hart85. Places the learnt model in `model` attribute.

        Parameters
        ----------
        metergroup : a nilmtk.MeterGroup object
        cols: nilmtk.Measurement, should be one of the following
            [('power','active')]
            [('power','apparent')]
            [('power','reactive')]
            [('power','active'), ('power', 'reactive')]
        buffer_size: int, optional
            size of the buffer to use for finding edges
        min_tolerance: int, optional
            variance in power draw allowed for pairing a match
        percent_tolerance: float, optional
            if transition is greater than large_transition,
            then use percent of large_transition
        large_transition: float, optional
            power draw of a Large transition
        """
        self.cols = cols
        self.state_threshold = state_threshold
        self.noise_level = noise_level
        [self.steady_states, self.transients] = find_steady_states_transients(
            metergroup, cols, noise_level, state_threshold, **kwargs)
        self.pair_df = self.pair(
            buffer_size, min_tolerance, percent_tolerance, large_transition)
        self.centroids = hart85_means_shift_cluster(self.pair_df, cols)
示例#2
0
    def partial_fit(self,
                    train_main,
                    train_appliances,
                    buffer_size=20,
                    noise_level=70,
                    state_threshold=15,
                    min_tolerance=100,
                    percent_tolerance=0.035,
                    large_transition=1000,
                    **kwargs):
        """
        Train using Hart85. Places the learnt model in `model` attribute.

        Parameters
        ----------
        metergroup : a nilmtk.MeterGroup object
        columns: nilmtk.Measurement, should be one of the following
            [('power','active')]
            [('power','apparent')]
            [('power','reactive')]
            [('power','active'), ('power', 'reactive')]
        buffer_size: int, optional
            size of the buffer to use for finding edges
        min_tolerance: int, optional
            variance in power draw allowed for pairing a match
        percent_tolerance: float, optional
            if transition is greater than large_transition,
            then use percent of large_transition
        large_transition: float, optional
            power draw of a Large transition
        """

        # Train_appliances : list of tuples [('appliance',df),('appliance',df)]

        self.appliances = []
        for row in train_appliances:
            self.appliances.append(row[0])
        print(
            "...........................Hart_85 Partial Fit Running..............."
        )

        train_main = train_main[0]
        l = []
        l.append(train_main.columns[0])
        columns = l
        self.columns = columns
        self.state_threshold = state_threshold
        self.noise_level = noise_level
        [self.steady_states, self.transients
         ] = find_steady_states_transients(train_main, columns, noise_level,
                                           state_threshold)
        self.pair_df = self.pair(buffer_size, min_tolerance, percent_tolerance,
                                 large_transition)
        self.centroids = hart85_means_shift_cluster(self.pair_df, columns)

        print(
            '..............................Predicting Centroid Matching..........................'
        )
        chunk = train_main

        transients = self.transients
        temp_df = pd.DataFrame()
        # For now ignoring the first transient
        # transients = transients[1:]

        # Initially all appliances/meters are in unknown state (denoted by -1)
        prev = OrderedDict()
        learnt_meters = self.centroids.index.values
        for meter in learnt_meters:
            prev[meter] = -1

        states = pd.DataFrame(-1,
                              index=chunk.index,
                              columns=self.centroids.index.values)

        for transient_tuple in transients.itertuples():
            if transient_tuple[0] < chunk.index[0]:
                # Transient occurs before chunk has started; do nothing
                pass
            elif transient_tuple[0] > chunk.index[-1]:
                # Transient occurs after chunk has ended; do nothing
                pass
            else:
                # Absolute value of transient
                abs_value = np.abs(transient_tuple[1:])
                positive = transient_tuple[1] > 0
                abs_value_transient_minus_centroid = pd.DataFrame(
                    (self.centroids - abs_value).abs())
                if len(transient_tuple) == 2:
                    # 1d data
                    index_least_delta = (
                        abs_value_transient_minus_centroid.idxmin().values[0])
                else:
                    # 2d data.
                    # Need to find absolute value before computing minimum
                    columns = abs_value_transient_minus_centroid.columns
                    abs_value_transient_minus_centroid["multidim"] = (
                        abs_value_transient_minus_centroid[columns[0]]**2 +
                        abs_value_transient_minus_centroid[columns[1]]**2)
                    index_least_delta = (
                        abs_value_transient_minus_centroid["multidim"].idxmin(
                        ))
                if positive:
                    # Turned on
                    states.loc[transient_tuple[0]][index_least_delta] = 1
                else:
                    # Turned off
                    states.loc[transient_tuple[0]][index_least_delta] = 0
        prev = states.iloc[-1].to_dict()
        power_chunk_dict = self.assign_power_from_states(states, prev)
        self.power_dict = power_chunk_dict
        self.chunk_index = chunk.index
        # Check whether 1d data or 2d data and converting dict to dataframe
        #print('LEN of Transient Tuple',len(transient_tuple))
        if len(transient_tuple) == 2:

            temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index)
        else:
            tuples = []

            for i in range(len(self.centroids.index.values)):
                for j in range(0, 2):
                    tuples.append([i, j])

            columns = pd.MultiIndex.from_tuples(tuples)

            temp_df = pd.DataFrame(power_chunk_dict,
                                   index=chunk.index,
                                   columns=columns)

            for i in range(len(chunk.index)):
                for j in range(len(self.centroids.index.values)):
                    for k in range(0, 2):
                        temp_df.iloc[i, j, k] = power_chunk_dict[j, i, k]

        self.best_matches = {}
        temp_df = temp_df.fillna(0)
        best_matches = {}

        for row in train_appliances:
            appliance = row[0]
            appliance_df = row[1][0]
            matched_col = self.min_rmse_column(temp_df, appliance_df['power'])
            best_matches[appliance] = matched_col
        self.best_matches = best_matches

        print(
            '...................................End Centroid Matching............................'
        )

        self.model = dict(
            best_matches=best_matches,
            columns=columns,
            state_threshold=state_threshold,
            noise_level=noise_level,
            steady_states=self.steady_states,
            transients=self.transients,
            # pair_df=self.pair_df,
            centroids=self.centroids)