示例#1
0
def TECA(pred_list, ground_list, mains):
    #pred_list and ground_list will contain the meters for each appliace
    listSize = len(pred_list)
    total_diff_sum = 0.0
    total_aggr_sum = 0.0
    sum_samples = 0.0
    for i in range(listSize):
        pred = pred_list[i]
        ground = ground_list[i]
        aligned_meters_pg = align_two_meters(pred, ground)
        for chunk in aligned_meters_pg:
            chunk.fillna(0, inplace=True)
            sum_samples += len(chunk)
            total_diff_sum += sum(abs((chunk.iloc[:, 0]) - chunk.iloc[:, 1]))
            if(i==0): #count the total timestamps
                sum_samples += len(chunk)

    #Aggregate sum
    aligned_meters_pm = align_two_meters(pred, mains)
    for chunk_mains in aligned_meters_pm:
        chunk_mains.fillna(0, inplace=True)
        total_aggr_sum += sum(chunk_mains.iloc[:, 1])

    if sum_samples == 0:
        return None
    else:
        return 1 - (1/2)*(total_diff_sum/total_aggr_sum)
示例#2
0
def recall_precision_accuracy_f1(pred, ground):
    aligned_meters = align_two_meters(pred, ground)
    threshold = ground.on_power_threshold()
    chunk_results = []
    sum_samples = 0.0
    for chunk in aligned_meters:
        sum_samples += len(chunk)
        pr = np.array([0 if (p) < threshold else 1 for p in chunk.iloc[:, 0]])
        gr = np.array([0 if p < threshold else 1 for p in chunk.iloc[:, 1]])

        tp, tn, fp, fn = tp_tn_fp_fn(pr, gr)
        p = sum(pr)
        n = len(pr) - p

        chunk_results.append([tp, tn, fp, fn, p, n])

    if sum_samples == 0:
        return None
    else:
        [tp, tn, fp, fn, p, n] = np.sum(chunk_results, axis=0)

        res_recall = recall(tp, fn)
        res_precision = precision(tp, fp)
        res_f1 = f1(res_precision, res_recall)
        res_accuracy = accuracy(tp, tn, p, n)

        return (res_recall, res_precision, res_accuracy, res_f1)
示例#3
0
def recall_precision_accuracy_f1(pred, ground, pr_threshold = None, gr_threshold = None):
    aligned_meters = align_two_meters(pred, ground)
    if pr_threshold == None:
        pr_threshold = ground.on_power_threshold() #If not TH was provided, both sets get TH from ground
    if gr_threshold == None:
        gr_threshold = ground.on_power_threshold()  # If not TH was provided, both sets get TH from ground
    print('True threshold: ',gr_threshold)
    print('Pred threshold: ', pr_threshold)
    chunk_results = []
    sum_samples = 0.0
    for chunk in aligned_meters:
        sum_samples += len(chunk)
        pr = chunk.iloc[:,0].fillna(0) #method='bfill'
        gr = chunk.iloc[:,1].fillna(0)
        pr = np.array([0 if (p)<pr_threshold else 1 for p in pr])
        gr = np.array([0 if p<gr_threshold else 1 for p in gr])

        tp, tn, fp, fn = tp_tn_fp_fn(pr,gr)
        p = sum(pr)
        n = len(pr) - p

        chunk_results.append([tp,tn,fp,fn,p,n])

    if sum_samples == 0:
        return None
    else:
        [tp,tn,fp,fn,p,n] = np.sum(chunk_results, axis=0)

        res_recall = recall(tp,fn)
        res_precision = precision(tp,fp)
        res_f1 = f1(res_precision,res_recall)
        res_accuracy = accuracy(tp,tn,p,n)

        return (res_recall,res_precision,res_accuracy,res_f1)
示例#4
0
def rms_error_power(predictions, ground_truth):
    '''Compute RMS error in assigned power
    
    .. math::
            error^{(n)} = \\sqrt{ \\frac{1}{T} \\sum_t{ \\left ( y_t - \\hat{y}_t \\right )^2 } }

    Parameters
    ----------
    predictions, ground_truth : nilmtk.MeterGroup

    Returns
    -------
    error : pd.Series
        Each index is an meter instance int (or tuple for MeterGroups).
        Each value is the RMS error in predicted power for that appliance.
    '''

    error = {}

    both_sets_of_meters = iterate_through_submeters_of_two_metergroups(
        predictions, ground_truth)
    for pred_meter, ground_truth_meter in both_sets_of_meters:
        sum_of_squared_diff = 0.0
        n_samples = 0
        for aligned_meters_chunk in align_two_meters(pred_meter, 
                                                     ground_truth_meter):
            diff = aligned_meters_chunk.icol(0) - aligned_meters_chunk.icol(1)
            diff.dropna(inplace=True)
            sum_of_squared_diff += (diff ** 2).sum()
            n_samples += len(diff)

        error[pred_meter.instance()] = math.sqrt(sum_of_squared_diff / n_samples)

    return pd.Series(error)
示例#5
0
def mean_normalized_error_power(predictions, ground_truth):
    '''Compute mean normalized error in assigned power
        
    .. math::
        error^{(n)} = 
        \\frac
        { \\sum_t {\\left | y_t^{(n)} - \\hat{y}_t^{(n)} \\right |} }
        { \\sum_t y_t^{(n)} }

    Parameters
    ----------
    predictions, ground_truth : nilmtk.MeterGroup

    Returns
    -------
    mne : pd.Series
        Each index is an meter instance int (or tuple for MeterGroups).
        Each value is the MNE for that appliance.
    '''

    mne = {}
    both_sets_of_meters = iterate_through_submeters_of_two_metergroups(
        predictions, ground_truth)
    for pred_meter, ground_truth_meter in both_sets_of_meters:
        total_abs_diff = 0.0
        sum_of_ground_truth_power = 0.0
        for aligned_meters_chunk in align_two_meters(pred_meter, 
                                                     ground_truth_meter):
            diff = aligned_meters_chunk.icol(0) - aligned_meters_chunk.icol(1)
            total_abs_diff += sum(abs(diff.dropna()))
            sum_of_ground_truth_power += aligned_meters_chunk.icol(1).sum()

        mne[pred_meter.instance()] = total_abs_diff / sum_of_ground_truth_power

    return pd.Series(mne)
示例#6
0
def FTE_func(predictions, ground_truth):
    both_sets_of_meters = iterate_through_submeters_of_two_metergroups(
        predictions, ground_truth)
    total_pred_list = []
    total_gt_list = []
    fraction_pred = []
    fraction_gt = []
    fraction_min = []
    total_pred = 0.0
    total_gt = 0.0
    for pred_meter, ground_truth_meter in both_sets_of_meters:
        total_app_pred = 0.0
        total_app_gt = 0.0
        for aligned_meters_chunk in align_two_meters(pred_meter,
                                                     ground_truth_meter):
            total_pred += aligned_meters_chunk.icol(0).sum()
            total_gt += aligned_meters_chunk.icol(1).sum()
            total_app_pred += aligned_meters_chunk.icol(0).sum()
            total_app_gt += aligned_meters_chunk.icol(1).sum()
        total_pred_list.append(total_app_pred)
        total_gt_list.append(total_app_gt)
    fraction_gt = np.array(total_gt_list) / total_gt
    fraction_pred = np.array(total_pred_list) / total_pred
    for i in range(len(fraction_pred)):
        fraction_min.append(min(fraction_pred[i], fraction_gt[i]))
    return np.array(fraction_min).sum()
示例#7
0
def getStackTrainGenerators(b, train_meterRef):
    trainXGen_list = []
    for path in dsPathsList[b]:
        train = DataSet(path)
        train_elec = train.buildings[b].elec
        train_meter = train_elec.submeters()[meter_key]
        # print('Stack train: ', train_meter.get_timeframe().start.date(), " - ", train_meter.get_timeframe().end.date())
        # Align the 'train_meterRef' with the X file (smaller). it's also a way to read the X meters chunk-by-chunk
        aligned_meters = align_two_meters(train_meterRef, train_meter)
        trainXGen_list.append(aligned_meters)
    return trainXGen_list
示例#8
0
def getMeterTargetGenerator(b, train_meterRef):
    trainYDS = DataSet(dsPathY)
    print('Stack train: ',
          train_meterRef.get_timeframe().start.date(), " - ",
          train_meterRef.get_timeframe().end.date())
    # trainYDS.set_window(start=train_meter.get_timeframe().start.date(), end=train_meter.get_timeframe().end.date())
    trainY_elec = trainYDS.buildings[b].elec
    trainY_meter = trainY_elec.submeters()[meter_key]
    # print(trainY_meter.sample_period())
    trainYGen = align_two_meters(train_meterRef, trainY_meter)
    return trainYGen
def root_mean_squared_error(pred, ground):
    aligned_meters = align_two_meters(pred, ground)
    total_sum = 0.0
    sum_samples = 0.0
    for chunk in aligned_meters:
        chunk.fillna(0, inplace=True)
        sum_samples += len(chunk)
        total_sum += sum(pow((chunk.iloc[:,0] - chunk.iloc[:,1]),2))
    if sum_samples == 0:
        return None
    else:
        return pow(total_sum / sum_samples, 0.5)
示例#10
0
def mean_absolute_error(pred, ground):
    aligned_meters = align_two_meters(pred, ground)
    total_sum = 0.0
    sum_samples = 0.0
    for chunk in aligned_meters:
        chunk.fillna(0, inplace=True)
        sum_samples += len(chunk)
        total_sum += sum(abs((chunk.iloc[:, 0]) - chunk.iloc[:, 1]))
    if sum_samples == 0:
        return None
    else:
        return total_sum / sum_samples
示例#11
0
def RMSE(pred, ground):
    aligned_meters = align_two_meters(pred, ground)
    total_sum = 0.0
    sum_samples = 0.0
    for chunk in aligned_meters:
        chunk.fillna(0, inplace=True)
        sum_samples += len(chunk)
        total_sum += sum(np.power((chunk.iloc[:,0]) - chunk.iloc[:,1],2))
    if sum_samples == 0:
        return None
    else:
        return math.sqrt(total_sum / sum_samples)
示例#12
0
def f1_score(predictions, ground_truth):
    '''Compute F1 scores.
    
    .. math::
        F_{score}^{(n)} = \\frac
            {2 * Precision * Recall}
            {Precision + Recall}

    Parameters
    ----------
    predictions, ground_truth : nilmtk.MeterGroup

    Returns
    -------
    f1_scores : pd.Series
        Each index is an meter instance int (or tuple for MeterGroups).
        Each value is the F1 score for that appliance.  If there are multiple
        chunks then the value is the weighted mean of the F1 score for 
        each chunk.
    '''
    # If we import sklearn at top of file then sphinx breaks.
    from sklearn.metrics import f1_score as sklearn_f1_score

    # sklearn produces lots of DepreciationWarnings with PyTables
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning) 
    # align_two_meters does not work!!
    f1_scores = {}
    both_sets_of_meters = iterate_through_submeters_of_two_metergroups(
        predictions, ground_truth)
    for pred_meter, ground_truth_meter in both_sets_of_meters:
        scores_for_meter = pd.DataFrame(columns=['score', 'n_samples'])
        aligned_states_chunks = align_two_meters(pred_meter, ground_truth_meter, 'when_on')
        for aligned_states_chunk in aligned_states_chunks:
            aligned_states_chunk.dropna(inplace=True)
            aligned_states_chunk = aligned_states_chunk.astype(int)
            score = sklearn_f1_score(aligned_states_chunk.icol(0),
                                     aligned_states_chunk.icol(1))
            scores_for_meter = scores_for_meter.append(
                {'score': score, 'n_samples': len(aligned_states_chunk)},
                ignore_index=True)

        # Calculate weighted mean
        tot_samples = scores_for_meter['n_samples'].sum()
        scores_for_meter['proportion'] = (scores_for_meter['n_samples'] / 
                                          tot_samples)
        avg_score = (scores_for_meter['score'] * 
                     scores_for_meter['proportion']).sum()
        f1_scores[pred_meter.instance()] = avg_score


    return pd.Series(data=f1_scores.values(), index=f1_scores.keys(), dtype=np.float32)
def acc_score(predictions, ground_truth):
    '''Compute F1 scores.
    .. math::
        F_{score}^{(n)} = \\frac
            {2 * Precision * Recall}
            {Precision + Recall}

    Parameters
    ----------
    predictions, ground_truth : nilmtk.MeterGroup

    Returns
    -------
    f1_scores : pd.Series
        Each index is an meter instance int (or tuple for MeterGroups).
        Each value is the F1 score for that appliance.  If there are multiple
        chunks then the value is the weighted mean of the F1 score for 
        each chunk.
    '''
    # If we import sklearn at top of file then sphinx breaks.
    from sklearn.metrics import accuracy_score as sklearn_acc_score

    # sklearn produces lots of DepreciationWarnings with PyTables
    import warnings
    warnings.filterwarnings("ignore", category=DeprecationWarning) 

    f1_scores = {}
    both_sets_of_meters = iterate_through_submeters_of_two_metergroups(
        predictions, ground_truth)
    for pred_meter, ground_truth_meter in both_sets_of_meters:
        scores_for_meter = pd.DataFrame(columns=['score', 'n_samples'])
        for aligned_states_chunk in align_two_meters(pred_meter, 
                                                     ground_truth_meter,
                                                     'when_on'):
            aligned_states_chunk.dropna(inplace=True)
            aligned_states_chunk = aligned_states_chunk.astype(int)
            score = sklearn_acc_score(aligned_states_chunk.icol(0),
                                     aligned_states_chunk.icol(1))
            scores_for_meter = scores_for_meter.append(
                {'score': score, 'n_samples': len(aligned_states_chunk)},
                ignore_index=True)

        # Calculate weighted mean
        tot_samples = scores_for_meter['n_samples'].sum()
        scores_for_meter['proportion'] = (scores_for_meter['n_samples'] / 
                                          tot_samples)
        avg_score = (scores_for_meter['score'] * 
                     scores_for_meter['proportion']).sum()
        f1_scores[pred_meter.instance()] = avg_score

    return pd.Series(f1_scores)
示例#14
0
def nad(pred, ground):
    aligned_meters = align_two_meters(pred, ground)

    nominator = 0.0
    denominator = 0.0
    sum_samples = 0.0
    for chunk in aligned_meters:
        chunk.fillna(0, inplace=True)
        sum_samples += len(chunk)

        nominator += sum(abs((chunk.iloc[:, 0]) - chunk.iloc[:, 1]))
        denominator += sum(abs(chunk.iloc[:, 1]))
    if sum_samples == 0:
        return None
    else:
        return np.sqrt(nominator / denominator)
示例#15
0
def relative_error_total_energy(pred, ground):
    aligned_meters = align_two_meters(pred, ground)
    chunk_results = []
    sum_samples = 0.0
    for chunk in aligned_meters:
        chunk.fillna(0, inplace=True)
        sum_samples += len(chunk)
        E_pred = sum(chunk.iloc[:, 0])
        E_ground = sum(chunk.iloc[:, 1])

        chunk_results.append([E_pred, E_ground])
    if sum_samples == 0:
        return None
    else:
        [E_pred, E_ground] = np.sum(chunk_results, axis=0)
        return abs(E_pred - E_ground) / float(max(E_pred, E_ground))
示例#16
0
def total_disag_err(predictions, ground_truth):
    #only iterate for the instance in the prediction/ elecmeter with lesser instance
    both_sets_of_meters = iterate_through_submeters_of_two_metergroups(
        predictions, ground_truth)
    # additional of total variable
    total_diff = 0.0
    total_pred = 0.0
    total_gt = 0.0
    for pred_meter, ground_truth_meter in both_sets_of_meters:
        for aligned_meters_chunk in align_two_meters(pred_meter,
                                                     ground_truth_meter):
            diff = aligned_meters_chunk.icol(0) - aligned_meters_chunk.icol(1)
            total_pred += aligned_meters_chunk.icol(0).sum()
            total_diff += sum(abs(diff.dropna()))
            total_gt += aligned_meters_chunk.icol(1).sum()
    return float(total_diff) / float(total_gt)
示例#17
0
def disaggregation_accuracy(pred, ground):

    aligned_meters = align_two_meters(pred, ground)

    nominator = 0.0
    denominator = 0.0
    sum_samples = 0.0
    for chunk in aligned_meters:
        chunk.fillna(0, inplace=True)
        sum_samples += len(chunk)

        nominator += np.linalg.norm(chunk.iloc[:, 0] - chunk.iloc[:, 1], ord=1)
        denominator += np.linalg.norm(chunk.iloc[:, 0], ord=1)
    if sum_samples == 0:
        return None
    else:
        return 1 - (float(nominator) / (2 * denominator))
示例#18
0
def create_trainset(meter, mains, train_size, window_size):

	all_x_train = np.empty((train_size,window_size,1))
	all_y_train = np.empty((train_size,))
	low_index = 0

	gen = align_two_meters(meter, mains)
	for chunk in gen:
		if (chunk.shape[0]<3000):
			continue
		chunk.fillna(method='ffill', inplace=True)
		X_batch, Y_batch = gen_batch(chunk.iloc[:,1], chunk.iloc[:,0], chunk.shape[0]-window_size, 0, window_size)
		high_index = min(len(X_batch), train_size-low_index)
		all_x_train[low_index:high_index+low_index] = X_batch[:high_index]
		all_y_train[low_index:high_index+low_index] = Y_batch[:high_index]
		low_index = high_index+low_index
		if (low_index == train_size):
			break

	return all_x_train, all_y_train
示例#19
0
def root_mean_squared_error(pred, ground):
aligned_meters = align_two_meters(pred, ground)
total_sum = 0.0
sum_samples = 0.0
for chunk in aligned_meters:
    chunk.fillna(0, inplace=True)
    sum_samples += len(chunk)
    total_sum += sum(pow((chunk.iloc[:,0] - chunk.iloc[:,1]),2))
if sum_samples == 0:
    return None
else:
    return pow(total_sum / sum_samples, 0.5)

def recall(tp,fn):
    return tp/float(tp+fn)

def precision(tp,fp):
    return tp/float(tp+fp)

def f1(prec,rec):
    return 2 * (prec*rec) / float(prec+rec)

def accuracy(tp, tn, p, n):
    return (tp + tn) / float(p + n)
示例#20
0
def runExperiment(experiment: experimentInfo, metricsResFileName,
                  clearMetricsFile):
    dsPathsList_Test = experiment.dsList
    outFileName = experiment.outName
    test_building = experiment.building
    meter_key = experiment.meter_key
    pathOrigDS = experiment.pathOrigDS
    meterTH = experiment.meterTH
    print('House ', test_building)

    # Load a "complete" dataset to have the test's timerange
    test = DataSet(dsPathsList_Test[0])
    test_elec = test.buildings[test_building].elec
    testRef_meter = test_elec.submeters(
    )[meter_key]  # will be used as reference to align all meters based on this

    # Align every test meter with testRef_meter as master
    test_series_list = []
    for path in dsPathsList_Test:
        test = DataSet(path)
        test_elec = test.buildings[test_building].elec
        test_meter = test_elec.submeters()[meter_key]
        # print('Stack test: ', test_meter.get_timeframe().start.date(), " - ", test_meter.get_timeframe().end.date())
        aligned_meters = align_two_meters(testRef_meter, test_meter)
        test_series_list.append(aligned_meters)

    # Init vars for the output
    MIN_CHUNK_LENGTH = 300  # Depends on the basemodels of the ensemble
    timeframes = []
    building_path = '/building{}'.format(test_meter.building())
    mains_data_location = building_path + '/elec/meter1'
    data_is_available = False
    disag_filename = outFileName
    output_datastore = HDFDataStore(disag_filename, 'w')

    run = True
    chunkDataForOutput = None
    # -- Used to hold necessary data for saving the results using NILMTK (e.g. timeframes).
    # -- (in case where chunks have different size (not in current implementation), must use the chunk whose windowsSize is the least (to have all the data))

    while run:
        try:
            testX = []
            columnInd = 0
            # Get Next chunk of each series
            for testXGen in test_series_list:
                chunkALL = next(testXGen)
                chunk = chunkALL[
                    'slave']  # slave is the meter needed (master is only for aligning)
                chunk.fillna(0, inplace=True)
                if (columnInd == 0):
                    chunkDataForOutput = chunk  # Use 1st found chunk for it's metadata
                if (testX == []):
                    testX = np.zeros(
                        [len(chunk), len(test_series_list)]
                    )  # Initialize the array that will hold all of the series as columns
                testX[:, columnInd] = chunk[:]
                columnInd += 1
            testX = scaler.transform(testX)
        except:
            run = False
            break

        if len(chunkDataForOutput) < MIN_CHUNK_LENGTH:
            continue
        # print("New sensible chunk: {}".format(len(chunk)))

        startTime = chunkDataForOutput.index[0]
        endTime = chunkDataForOutput.index[
            -1]  # chunkDataForOutput.shape[0] - 1
        # print('Start:',startTime,'End:',endTime)
        timeframes.append(TimeFrame(
            startTime, endTime))  #info needed for output for use with NILMTK
        measurement = ('power', 'active')

        pred = clf.predict(testX)
        column = pd.Series(pred, index=chunkDataForOutput.index, name=0)
        appliance_powers_dict = {}
        appliance_powers_dict[0] = column
        appliance_power = pd.DataFrame(appliance_powers_dict)
        appliance_power[appliance_power < 0] = 0

        # Append prediction to output
        data_is_available = True
        cols = pd.MultiIndex.from_tuples([measurement])
        meter_instance = test_meter.instance()
        df = pd.DataFrame(appliance_power.values,
                          index=appliance_power.index,
                          columns=cols,
                          dtype="float32")
        key = '{}/elec/meter{}'.format(building_path, meter_instance)
        output_datastore.append(key, df)

        # Append aggregate data to output
        mains_df = pd.DataFrame(chunkDataForOutput,
                                columns=cols,
                                dtype="float32")
        # Note (For later): not 100% right. Should be mains. But it won't be used anywhere, so it doesn't matter in this case
        output_datastore.append(key=mains_data_location, value=mains_df)

    # Save metadata to output
    if data_is_available:

        disagr = Disaggregator()
        disagr.MODEL_NAME = 'Stacked model'

        disagr._save_metadata_for_disaggregation(
            output_datastore=output_datastore,
            sample_period=sample_period,
            measurement=measurement,
            timeframes=timeframes,
            building=test_meter.building(),
            meters=[test_meter])

    #======================== Calculate Metrics =====================================
    testYDS = DataSet(pathOrigDS)
    testYDS.set_window(start=test_meter.get_timeframe().start.date(),
                       end=test_meter.get_timeframe().end.date())
    testY_elec = testYDS.buildings[test_building].elec
    testY_meter = testY_elec.submeters()[meter_key]
    test_mains = testY_elec.mains()

    result = DataSet(disag_filename)
    res_elec = result.buildings[test_building].elec
    rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key],
                                                testY_meter, meterTH, meterTH)
    relError = metrics.relative_error_total_energy(res_elec[meter_key],
                                                   testY_meter)
    MAE = metrics.mean_absolute_error(res_elec[meter_key], testY_meter)
    RMSE = metrics.RMSE(res_elec[meter_key], testY_meter)
    print("============ Recall: {}".format(rpaf[0]))
    print("============ Precision: {}".format(rpaf[1]))
    print("============ Accuracy: {}".format(rpaf[2]))
    print("============ F1 Score: {}".format(rpaf[3]))
    print("============ Relative error in total energy: {}".format(relError))
    print("============ Mean absolute error(in Watts): {}".format(MAE))
    print("=== For docs: {:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}".format(
        rpaf[0], rpaf[1], rpaf[2], rpaf[3], relError, MAE))
    # print("============ RMSE: {}".format(RMSE))
    # print("============ TECA: {}".format(metrics.TECA([res_elec[meter_key]],[testY_meter],test_mains)))

    resDict = {
        'model': 'TEST',
        'building': test_building,
        'Appliance': meter_key,
        'Appliance_Type': 2,
        'Recall': rpaf[0],
        'Precision': rpaf[1],
        'Accuracy': rpaf[2],
        'F1': rpaf[3],
        'relError': relError,
        'MAE': MAE,
        'RMSE': RMSE
    }
    metrics.writeResultsToCSV(resDict, metricsResFileName, clearMetricsFile)