Пример #1
0
def get_df_per_day(path_h5: str) -> pd.DataFrame:
    '''
    Dada la ruta del h5 de AMPds devuelve un dataframe con los datos partidos
    por dia y por medidor.

    Parameters
    ----------
    path_h5 : str
        DESCRIPTION.
    house : int, optional
        DESCRIPTION. The default is 1.

    Returns
    -------
    None.

    '''
    
    ds = nilmtk.DataSet(path_h5)
    data_meters = list(map(get_array_and_label, 
                           ds.buildings[1].elec.all_meters()))
    
    # Lo siguiente construye un dataframe multindex como el que se quiere
    n_days = data_meters[0][1].shape[1] #cantidad de dias
    days = pd.RangeIndex(0, n_days, name='sample') #construyo uno de los indices
    names = list(map(lambda x:x[0], data_meters)) #el otro indice correspondiente al nombre
    iterables = [names, days]
    #construccion del multindex
    columns = pd.MultiIndex.from_product(iterables, names=['name', 'day'])
    data = list(map(lambda x:x[1], data_meters))
    data = np.hstack(data)
    df = pd.DataFrame(data, columns=columns)
    df = df.swaplevel(axis=1).sort_index(axis=1)
    return df
    def __init__(self, activations, target_appliance, appliances,
                 seq_length, filename, windows, sample_period,
                 target_inclusion_prob=0.5,
                 uniform_prob_of_selecting_each_building=True,
                 allow_incomplete_target=True,
                 include_incomplete_target_in_output=True,
                 allow_multiple_target_activations_in_aggregate=False,
                 include_multiple_targets_in_output=False,
                 rng_seed=None):
        self.activations = deepcopy(activations)
        self.target_appliance = target_appliance
        self.appliances = appliances
        self.seq_length = seq_length
        self.filename = filename
        self.dataset = nilmtk.DataSet(self.filename)
        check_windows(windows)
        self.windows = windows
        self.sample_period = sample_period
        self.target_inclusion_prob = target_inclusion_prob
        self.uniform_prob_of_selecting_each_building = (
            uniform_prob_of_selecting_each_building)
        self.allow_incomplete_target = allow_incomplete_target
        self.include_incomplete_target_in_output = (
            include_incomplete_target_in_output)
        self.allow_multiple_target_activations_in_aggregate = (
            allow_multiple_target_activations_in_aggregate)
        self.include_multiple_targets_in_output = (
            include_multiple_targets_in_output)
        super(RealAggregateSource, self).__init__(rng_seed=rng_seed)

        self._load_mains_into_memory()
        self._remove_activations_with_no_mains()
        self._find_sections_with_no_target()
        self._compute_gap_probabilities()
Пример #3
0
    def load_csvdata(self, data_path, numApp, typeLoad=0, num_sequences_per_batch=0, target_inclusion_prob=0.5):#appliances, filename, self.sample_period, windows
        '''
        Parameters:
            data_path
            numApp to indicate wether all the appliances should be read or just one of them
            typeLoad: 0 usual in time split training|val|test
                      1 Kelly's load
                      2 combination with our own min ON

        Returns:
            totalX, totalY two dictionaries with the split of the X and Y in training, validation and testing
        '''

        nilmkt_fileName = os.path.join(data_path, "redd.h5")

        if(typeLoad==1 and num_sequences_per_batch==0):
            print("Need to provide number of sequences per batch with kelly sampling")
            return

        if(typeLoad==1):
            numBatches = 1 #keep at 1 for now (1 batch with as many sequences as possible)
            assert self.windows['train'].keys() == self.windows['test'].keys()
            assert self.windows['val'].keys() == self.windows['test'].keys()
            for building_i, window in self.windows['train'].items(): #self.windows.items()
                #Reformat windows to work with kellys code
                #WINDOWS = { 'train': { 1: ("2013-02-01", "2013-11-30")}  }
                #kellyWindow = {'train':{building_i:(window[0],window[1])}}

                totalX, totalY, stdIn, stdTar = kgd.getNILMbatches(self.sample_period, nilmkt_fileName, target_inclusion_prob, self.windows, self.listAppliances, self.pTrain, self.pVal, self.pTest, num_sequences_per_batch, self.time_steps, numApp)
            print(totalX['train'].shape, totalX['val'].shape, totalX['test'].shape, totalY['train'].shape, totalY['val'].shape, totalY['test'].shape)                  
            return totalX,totalY
        else:# 0 or 2
            print("\tTypeLoad is 0 or 2")
            lenApps = len(self.listAppliances)
            shapeY = [0,self.time_steps,lenApps] # (batch, seqLen, apps)
            dataset = nilmtk.DataSet(nilmkt_fileName)
            if (numApp!=-1):
                lenApps = 1
                shapeY = [0,self.time_steps]
            totalX = {'train':np.empty([0,self.time_steps]), 
                        'val':np.empty([0,self.time_steps]),
                        'test':np.empty([0,self.time_steps])}

            totalY = {'train':np.empty(shapeY), 
                        'val':np.empty(shapeY),
                        'test':np.empty(shapeY)}
                    
            for building_i, window in self.windows.items():
                dataBuild = self.all_building_data(dataset,building_i, window)
                allSetsBuild = self.prepare_data(dataBuild, numApp, building_i, typeLoad)
                totalX['train'] = np.concatenate((totalX['train'], allSetsBuild[0]),axis=0)
                totalX['val'] = np.concatenate((totalX['val'], allSetsBuild[1]),axis=0)
                totalX['test'] = np.concatenate((totalX['test'], allSetsBuild[2]),axis=0)
                totalY['train'] = np.concatenate((totalY['train'], allSetsBuild[3]),axis=0)
                totalY['val'] = np.concatenate((totalY['val'], allSetsBuild[4]),axis=0)
                totalY['test'] = np.concatenate((totalY['test'], allSetsBuild[5]),axis=0)
            print(totalX['train'].shape, totalX['val'].shape, totalX['test'].shape, totalY['train'].shape, totalY['val'].shape, totalY['test'].shape)                  
            return totalX, totalY
Пример #4
0
def get_nilmtk_meters():
    HOUSE_1_APPLIANCES = [
        'fridge freezer', 'washer dryer', 'kettle', 'dish washer', 'microwave'
    ]
    ukdale = nilmtk.DataSet('/data/mine/vadeec/merged/ukdale.h5')
    ukdale.set_window("2013-04-12", "2013-05-12")
    elec = ukdale.buildings[1].elec
    meters = []
    for appliance in HOUSE_1_APPLIANCES:
        meter = elec[appliance]
        meters.append(meter)
    meters = nilmtk.MeterGroup(meters)
    return meters
    def _load_data_into_memory(self):
        logger.info("Loading NILMTK data...")

        # Load dataset
        dataset = nilmtk.DataSet(self.filename)

        for fold, buildings_and_windows in self.windows.iteritems():
            for building_i, window in buildings_and_windows.iteritems():
                dataset.set_window(*window)
                elec = dataset.buildings[building_i].elec
                building_name = (dataset.metadata['name'] +
                                 '_building_{}'.format(building_i))

                # Mains
                logger.info("Loading data for {}...".format(building_name))

                mains_meter = elec.mains()
                mains_good_sections = mains_meter.good_sections()

                appliance_meter = elec[self.target_appliance]
                good_sections = appliance_meter.good_sections(
                    sections=mains_good_sections)

                def load_data(meter):
                    return meter.power_series_all_data(
                        sample_period=self.sample_period).astype(
                            np.float32).dropna()  #,
                    #sections=good_sections).astype(np.float32).dropna()

                mains_data = load_data(mains_meter)
                appliance_data = load_data(appliance_meter)
                df = pd.DataFrame(
                    {
                        'mains': mains_data,
                        'target': appliance_data
                    },
                    dtype=np.float32).dropna()
                del mains_data
                del appliance_data

                if not df.empty:
                    self.data.setdefault(fold, {})[building_name] = df

                logger.info("Loaded data from building {} for fold {}"
                            " from {} to {}.".format(building_name, fold,
                                                     df.index[0],
                                                     df.index[-1]))

        dataset.store.close()
        logger.info("Done loading NILMTK mains data.")
Пример #6
0
    def _load_mains_into_memory(self):
        logger.info("Loading NILMTK mains...")

        # Load dataset
        dataset = nilmtk.DataSet(self.filename)

        self.mains = {}
        self.mains_good_sections = {}
        for fold, buildings_and_windows in self.windows.iteritems():
            for building_i, window in buildings_and_windows.iteritems():
                dataset.set_window(*window)
                elec = dataset.buildings[building_i].elec
                building_name = (
                    dataset.metadata['name'] +
                    '_building_{}'.format(building_i))

                logger.info(
                    "Loading mains for {}...".format(building_name))

                mains_meter = elec.mains()
                good_sections = mains_meter.good_sections()
                mains_data = mains_meter.power_series_all_data(
                    sample_period=self.sample_period,
                    sections=good_sections).dropna()

                def set_mains_data(dictionary, data):
                    dictionary.setdefault(fold, {})[building_name] = data

                if not mains_data.empty:
                    set_mains_data(self.mains, mains_data)
                    set_mains_data(self.mains_good_sections, good_sections)

                logger.info(
                    "Loaded mains data from building {} for fold {}"
                    " from {} to {}."
                    .format(building_name, fold,
                            mains_data.index[0], mains_data.index[-1]))

        dataset.store.close()
        logger.info("Done loading NILMTK mains data.")
Пример #7
0
    def _load_mains_into_memory(self):
        logger.info("Loading NILMTK mains...")
        # Load dataset
        dataset = nilmtk.DataSet(self.filename, self.format)
        self.mains = {}
        self.mains_good_sections = {}
        self.target = {}
        for fold in self.fold:
            window = self.windows[fold][self.building_id]
            dataset.set_window(*window)
            elec = dataset.buildings[self.building_id].elec
            self.building_name = (dataset.metadata['name'] +
                                  '_building_{}'.format(self.building_id))
            logger.info("Loading mains for {}...".format(self.building_name))

            mains_meter = elec.mains()
            mains_data = mains_meter.power_series_all_data(
                sample_period=self.sample_period)
            target_data = defaultdict(lambda: np.array())

            for label in self.appliances:
                target_data[label] = elec[label].power_series_all_data(
                    sample_period=self.sample_period)

            def set_mains_data(dictionary, data):
                dictionary.setdefault(fold, {})[self.building_name] = data

            if not mains_data.empty and len(target_data.keys()):
                set_mains_data(self.mains, mains_data)
                set_mains_data(self.target, target_data)
            else:
                print('no available data')

            logger.info(
                "Loaded mains data from building {} for fold {} from {} to {}."
                .format(self.building_name, fold, mains_data.index[0],
                        mains_data.index[-1]))

        dataset.store.close()
        logger.info("Done loading NILMTK mains data.")
Пример #8
0
def load_data_from_nilmtk_datasets(windows, dataset_paths, appliances, target_appliance_name, sample_period):
    data = {}
    data_good_sections = {}

    logger.info("Loading NILMTK data...")

    for dataset_name, folds in windows.items():
        # Load dataset
        dataset = nilmtk.DataSet(dataset_paths[dataset_name])

        for fold, buildings_and_windows in folds.items():
            for building_i, windows_for_building in buildings_and_windows.items():
                dataset.set_window(None, None)
                elec = dataset.buildings[building_i].elec

                building_name = (
                    dataset.metadata['name'] +
                    '_building_{}'.format(building_i))
                logger.info(
                    "Loading data for {}...".format(building_name))
                mains_meter = elec.mains()
                good_sections = get_effective_good_sections(mains_meter)

                appliance_aliases = appliances[dataset_name][target_appliance_name]
                appliance_meters = []
                for meter in elec.meters:
                    if meter.is_site_meter():
                        continue

                    if len(meter.appliances) == 1:
                        appliancetype = meter.appliances[0].type['type']
                        if appliancetype in appliance_aliases:
                            appliance_meters.append(meter)
                    else:
                        append_meter = False
                        for a in meter.appliances:
                            if a.type['type'] in appliance_aliases:
                                append_meter = True
                        if append_meter:
                            appliance_meters.append(meter)
                            print(meter.appliances)

                if not appliance_meters:
                    logger.info(
                        "No {} found in {}".format(target_appliance_name, building_name))
                    continue

                if len(appliance_meters) > 1:
                    appliance_metergroup = nilmtk.MeterGroup(meters=appliance_meters)
                else:
                    appliance_metergroup = appliance_meters[0]
                data_good_sections.setdefault(fold, {})[building_name] = good_sections

                def load_data(meter):
                    df = meter.power_series_all_data(
                        sample_period=sample_period
                        )
                    if df is not None:
                        return df.astype(np.float32).dropna()
                    else:
                        return None

                dfs = []
                for window in windows_for_building:
                    if dataset_name == "ECO":
                        dataset.store.window = TimeFrame(start=window[0], end=window[1], tz='GMT')
                    else:
                        if window is None:
                            ipdb.set_trace() # Something has gone wrong...see what happend!
                        dataset.set_window(*window) # does not work for ECO
                    #ipdb.set_trace()
                    mains_data = load_data(mains_meter)
                    appliance_data = load_data(appliance_metergroup)
                    if (mains_data is None) or (appliance_data is None):
                        continue
                    df = pd.DataFrame(
                        {'mains': mains_data, 'target': appliance_data},
                        dtype=np.float32).dropna()
                    del mains_data
                    del appliance_data
                    if not df.empty:
                        dfs.append(df)

                df = pd.concat(dfs, axis=0)
                dfs = []
                for gs in good_sections:
                    dfslice = gs.slice(df)
                    if not dfslice.empty:
                        dfs.append(dfslice)
                df = pd.concat(dfs, axis=0)

                if not df.empty:
                    data.setdefault(fold, {})[building_name] = df

                logger.info(
                    "Loaded data from building {} for fold {}"
                    " from {} to {}."
                    .format(building_name, fold, df.index[0], df.index[-1]))

        dataset.store.close()

    logger.info("Done loading NILMTK data.")
    return data, data_good_sections
Пример #9
0
from pylab import rcParams
import matplotlib.pyplot as plt

import nilmtk as ntk
import nilmtk.disaggregate as ntkd
import nilmtk.metrics as ntkm

rcParams['figure.figsize'] = (14, 6)
plt.style.use('ggplot')

# CONSTANTS=====================================================================
h5_path = '/home/t7/Dropbox/Documents/TUDelft/Thesis/Datasets/DRED/DRED.h5'
h5_path = r'C:\Users\davwang\Desktop\nilmtk\nilmtk\dataset_converters\dred\DRED.h5'

# Load Data=====================================================================
dred = ntk.DataSet(h5_path)
# dred.set_window(start=None, end='2015-07-10 00:00:00')

elec = dred.buildings[1].elec
mains = elec.mains()

# Train==========================================================================
co = ntk.disaggregate.CombinatorialOptimisation()
co.train(elec)

# Disaggregate====================================================================
output = ntk.HDFDataStore(h5_path + 'outputDRED.h5', 'w')
co.disaggregate(mains, output)
output.close()

# Metrics==========================================================================
Пример #10
0
def load_nilmtk_activations(appliances, filename, sample_period, windows):
    """
    Parameters
    ----------
    appliances : list of strings
    filename : string
    sample_period : int
    windows : dict
        Structure example:
        {
            'train': {<building_i>: <window>},
            'unseen_activations_of_seen_appliances': {<building_i>: <window>},
            'unseen_appliances': {<building_i>: <window>}
        }

    Returns
    -------
    all_activations : dict
        Structure example:
        {<train | unseen_appliances | unseen_activations_of_seen_appliances>: {
             <appliance>: {
                 <building_name>: [<activations>]
        }}}
        Each activation is a pd.Series with DatetimeIndex and the following
        metadata attributes: building, appliance, fold.
    """
    logger.info("Loading NILMTK activations...")

    # Sanity check
    check_windows(windows)

    # Load dataset
    dataset = nilmtk.DataSet(filename)

    all_activations = {}
    for fold, buildings_and_windows in list(windows.items()):
        activations_for_fold = {}
        for building_i, window in list(buildings_and_windows.items()):
            dataset.set_window(*window)
            elec = dataset.buildings[building_i].elec
            building_name = (dataset.metadata['name'] +
                             '_building_{}'.format(building_i))
            for appliance in appliances:
                logger.info("Loading {} for {}...".format(
                    appliance, building_name))

                # Get meter for appliance
                try:
                    meter = elec[appliance]
                except KeyError as exception:
                    logger.info(building_name + " has no " + appliance +
                                ". Full exception: {}".format(exception))
                    continue

                # Get activations_for_fold and process them
                meter_activations = meter.get_activations(
                    sample_period=sample_period)
                meter_activations = [
                    activation.astype(np.float32)
                    for activation in meter_activations
                ]

                # Attach metadata
                for activation in meter_activations:
                    activation._metadata = copy(activation._metadata)
                    activation._metadata.extend(
                        ["building", "appliance", "fold"])
                    activation.building = building_name
                    activation.appliance = appliance
                    activation.fold = fold

                # Save
                if meter_activations:
                    activations_for_fold.setdefault(
                        appliance, {})[building_name] = meter_activations
                logger.info("Loaded {} {} activations from {}.".format(
                    len(meter_activations), appliance, building_name))
        all_activations[fold] = activations_for_fold

    dataset.store.close()
    logger.info("Done loading NILMTK activations.")
    return all_activations
Пример #11
0
    def _load_data_into_memory(self):
        logger.info("Loading NILMTK data...")

        # Load dataset
        dataset = nilmtk.DataSet(self.filename)

        for fold, buildings_and_windows in self.windows.iteritems():
            for building_i, window in buildings_and_windows.iteritems():
                dataset.set_window(*window)
                elec = dataset.buildings[building_i].elec
                """appliances = elec.get_labels(list(elec.identifier.meters))
                meter_complete = True
                for appliance_name in self.appliances:
                    if appliance_name.title() not in appliances:
                        meter_complete=False
                if not meter_complete:
                    continue"""

                building_name = (dataset.metadata['name'] +
                                 '_building_{}'.format(building_i))

                # Mains
                logger.info("Loading data for {}...".format(building_name))

                mains_meter = elec.mains()
                good_sections = mains_meter.good_sections()
                good_sections = elec[self.target_appliance].good_sections(
                    sections=good_sections)

                if len(good_sections) < 1:
                    continue

                def load_data(meter):
                    return meter.power_series_all_data(
                        sample_period=self.sample_period,
                        sections=good_sections)

                power_series_data = defaultdict(lambda: np.array())
                power_series_data['mains'] = load_data(mains_meter)
                main_index = power_series_data['mains'].index
                is_valid = True

                for appliance_name in self.appliances:
                    appliance_meter = elec[appliance_name]
                    power_series_data[appliance_name] = load_data(
                        appliance_meter)

                    if power_series_data[appliance_name] is None:
                        is_valid = False
                        break

                    power_series_data[appliance_name] = power_series_data[
                        appliance_name].loc[main_index]
                    appliance_index = power_series_data[appliance_name].index
                    main_index = main_index.intersection(appliance_index)

                if not is_valid:
                    continue

                for meter in power_series_data.keys():
                    power_series_data[meter] = power_series_data[meter].astype(
                        np.float32).loc[main_index].values

                for meter in power_series_data.keys():
                    if power_series_data[meter].shape != power_series_data[
                            'mains'].shape:
                        is_valid = False
                        break

                if not is_valid:
                    continue

                df = pd.DataFrame(power_series_data, dtype=np.float32).dropna()

                if not df.empty:
                    self.data.setdefault(fold, {})[building_name] = df

                logger.info("Loaded data from building {} for fold {}"
                            " from {} to {}.".format(building_name, fold,
                                                     df.index[0],
                                                     df.index[-1]))

        dataset.store.close()
        logger.info("Done loading NILMTK mains data.")
Пример #12
0
    def load_csvdata(self,
                     data_path,
                     numApp,
                     typeLoad=0,
                     num_sequences_per_batch=0,
                     target_inclusion_prob=0.5
                     ):  #appliances, filename, self.sample_period, windows
        '''
        Parameters:
            data_path
            numApp to indicate wether all the appliances should be read or just one of them
            typeLoad: 0 usual in time split training|val|test
                      1 Kelly's load
                      2 combination with our own min ON

        Returns:
            totalX, totalY two dictionaries with the split of the X and Y in training, validation and testing
        '''

        nilmkt_fileName = os.path.join(data_path, "ukdale.h5")

        if (typeLoad == 1 and num_sequences_per_batch == 0):
            print(
                "Need to provide number of sequences per batch with kelly sampling"
            )
            return

        if (typeLoad == 1):
            numBatches = 1  #keep at 1 for now (1 batch with as many sequences as possible)
            assert self.windows['train'].keys() == self.windows['test'].keys()
            assert self.windows['val'].keys() == self.windows['test'].keys()
            for building_i, window in self.windows['train'].items(
            ):  #self.windows.items()
                #Reformat windows to work with kellys code
                #WINDOWS = { 'train': { 1: ("2013-02-01", "2013-11-30")}  }
                #kellyWindow = {'train':{building_i:(window[0],window[1])}}

                if (numApp != -1):
                    truFileName = data_path + "/pickles/" + str(
                        building_i
                    ) + '_' + self.listAppliances[numApp] + '_' + str(
                        self.sample_period) + '_' + str(
                            num_sequences_per_batch
                        ) + '_' + window[0] + '_' + window[1]  #fileName[pos:]
                else:
                    truFileName = data_path + "/pickles/" + str(
                        building_i) + '_' + 'all' + '_' + str(
                            self.sample_period) + '_' + str(
                                num_sequences_per_batch
                            ) + '_' + window[0] + '_' + window[1]

                try:
                    total = pickle.load(
                        open(truFileName + "_building_k.pickle", "rb"))
                    totalX = total[0]
                    totalY = total[1]
                except (OSError, IOError) as e:
                    totalX, totalY, stdIn, stdTar = kgd.getNILMbatches(
                        self.sample_period, nilmkt_fileName,
                        target_inclusion_prob, self.windows,
                        self.listAppliances, self.pTrain, self.pVal,
                        self.pTest, num_sequences_per_batch, self.time_steps,
                        numApp)
                    #this assumes you have a "pickles" directory at the same level as this file
                    with open(truFileName + "_building_k.pickle", 'wb') as fX:
                        pickle.dump([totalX, totalY], fX)
            print(totalX['train'].shape, totalX['val'].shape,
                  totalX['test'].shape, totalY['train'].shape,
                  totalY['val'].shape, totalY['test'].shape)
            return totalX, totalY
        else:  # 0 or 2
            lenApps = len(self.listAppliances)
            shapeY = [0, self.time_steps, lenApps]  # (batch, seqLen, apps)
            dataset = nilmtk.DataSet(nilmkt_fileName)
            if (numApp != -1):
                lenApps = 1
                shapeY = [0, self.time_steps]
            totalX = {
                'train': np.empty([0, self.time_steps]),
                'val': np.empty([0, self.time_steps]),
                'test': np.empty([0, self.time_steps])
            }

            totalY = {
                'train': np.empty(shapeY),
                'val': np.empty(shapeY),
                'test': np.empty(shapeY)
            }

            for building_i, window in self.windows.items():
                if (numApp != -1):
                    truFileName = data_path + "/pickles/" + str(
                        building_i
                    ) + '_' + self.listAppliances[numApp] + '_' + str(
                        self.stride_input) + '_' + window[0] + '_' + window[
                            1]  #fileName[pos:]
                else:
                    truFileName = data_path + "/pickles/" + str(
                        building_i) + '_' + 'all' + '_' + str(
                            self.stride_input
                        ) + '_' + window[0] + '_' + window[1]

                try:
                    dataBuild = pickle.load(
                        open(truFileName + "_building.pickle", "rb"))
                except (OSError, IOError) as e:
                    dataBuild = self.all_building_data(dataset, building_i,
                                                       window)
                    #this assumes you have a "pickles" directory at the same level as this file
                    with open(truFileName + "_building.pickle", 'wb') as fX:
                        pickle.dump(dataBuild, fX)
                allSetsBuild = self.prepare_data(dataBuild, numApp, building_i,
                                                 typeLoad)
                totalX['train'] = np.concatenate(
                    (totalX['train'], allSetsBuild[0]), axis=0)
                totalX['val'] = np.concatenate(
                    (totalX['val'], allSetsBuild[1]), axis=0)
                totalX['test'] = np.concatenate(
                    (totalX['test'], allSetsBuild[2]), axis=0)
                totalY['train'] = np.concatenate(
                    (totalY['train'], allSetsBuild[3]), axis=0)
                totalY['val'] = np.concatenate(
                    (totalY['val'], allSetsBuild[4]), axis=0)
                totalY['test'] = np.concatenate(
                    (totalY['test'], allSetsBuild[5]), axis=0)

            print(totalX['train'].shape, totalX['val'].shape,
                  totalX['test'].shape, totalY['train'].shape,
                  totalY['val'].shape, totalY['test'].shape)
            return totalX, totalY
Пример #13
0
from __future__ import print_function, division
import numpy as np
import pandas as pd
import nilmtk

dataset = nilmtk.DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2014-01-01", "2014-06-01")
elec = dataset.buildings[1].elec
washer = elec['washer dryer']
washer_activations = washer.get_activations()

activation = washer_activations[4]
activation = activation.clip(lower=0, upper=2068)
activation.name = 'watts'
PERIOD = 6
activation.index = np.arange(0, len(activation) * PERIOD, PERIOD)

segment_indicies = np.where(np.abs(activation.diff()) > 1200)[0]
segment_indicies = np.concatenate((segment_indicies, [len(activation)]))

smoothed = pd.Series(0, index=activation.index, name='watts')
prev_i = 0
for i in segment_indicies:
    smoothed.iloc[prev_i:i] = activation.iloc[prev_i:i].mean()
    prev_i = i


def to_int(data):
    return data.round().astype(int)

Пример #14
0
    def __init__(self, **config):
        if 'filename' not in config.keys():
            self.dataSet = nilmtk.DataSet("ukdale.h5")
        else:
            self.dataSet = nilmtk.DataSet(config['fileName'])

        if 'startTime' not in config.keys() or 'endTime' not in config.keys():
            self.dataSet.set_window("2012-11-01", "2015-01-31")
        else:
            self.dataSet.set_window(config['startTime'], config['endTime'])

        if 'trainBuildings' not in config.keys():
            self.trainBuildings = [1, 3, 4, 5]
        else:
            self.trainBuildings = config['trainBuildings']
        if 'testBuildings' not in config.keys():
            self.testBuildings = [2]
        else:
            self.testBuildings = config['testBuildings']

        if 'applications' not in config.keys():
            raise KeyError("please input applications")
        self.applications = config['applications']

        if 'targetapplication' not in config.keys():
            raise KeyError("please input targetapplication")
        self.targetApplication = config['targetapplication']

        if 'randSeed' not in config.keys():
            randSeed = 0
        else:
            randSeed = config['randSeed']

        self.otherApplications = [
            i for i in self.applications if i not in [self.targetApplication]
        ]
        self.allBuildings = set(self.trainBuildings + self.testBuildings)
        self.window = 599
        self.inputSeqs = []
        self.targetSeqs = []
        self.rng = np.random.RandomState(randSeed)
        activationConfig = {
            'fridge': {
                'min_off_duration': 18,  # 12 in paper here
                'min_on_duration': 60,
                'on_power_threshold': 50,
                'sample_period': 6,
            },
            'kettle': {
                'min_off_duration': 18,  # 0 in paper here
                'min_on_duration': 12,
                'on_power_threshold': 2000,
                'sample_period': 6,
            },
            'washing machine': {
                'min_off_duration': 160,
                'min_on_duration': 1800,
                'on_power_threshold': 20,
                'sample_period': 6,
            },
            'microwave': {
                'min_off_duration': 30,
                'min_on_duration': 12,
                'on_power_threshold': 200,
                'sample_period': 6,
            },
            'dish washer': {
                'min_off_duration': 1800,
                'min_on_duration': 1800,
                'on_power_threshold': 10,
                'sample_period': 6,
            }
        }

        self.elecMains = {}
        self.goodSections = {}
        for building in self.allBuildings:
            self.goodSections[building] = self.dataSet.buildings[
                building].elec.mains().good_sections()
            self.elecMains[building] = self.dataSet.buildings[
                building].elec.mains().power_series_all_data(
                    sample_period=6,
                    sections=self.goodSections[building]).dropna()

        self.numApp = {}
        self.elecApp = {}
        self.activationsApp = {}
        self.activationAppSections = {}
        for app in self.applications:
            self.elecApp[app] = {}
            self.activationsApp[app] = {}
            self.numApp[app] = 0
            self.activationAppSections[app] = {}
            for building in self.allBuildings:
                try:
                    self.elecApp[app][building] = self.dataSet.buildings[
                        building].elec[app].power_series_all_data(
                            sample_period=6).dropna()

                    self.activationsApp[app][
                        building] = self.dataSet.buildings[building].elec[
                            app].get_activations(**activationConfig[app])
                    self.activationsApp[app][building] = [
                        activation.astype(np.float32)
                        for activation in self.activationsApp[app][building]
                    ]
                    self.numApp[app] += len(self.activationsApp[app][building])
                    self.activationAppSections[app][building] = TimeFrameGroup(
                    )
                    for activation in self.activationsApp[app][building]:
                        self.activationAppSections[app][building].append(
                            TimeFrame(activation.index[0],
                                      activation.index[-1]))
                except KeyError as exception:
                    logger.info(
                        str(building) + " has no " + app +
                        ". Full exception: {}".format(exception))
                    continue
        logger.info("Done loading NILMTK data.")

        for building in self.allBuildings:
            activationsToRemove = []
            try:
                activations = self.activationsApp[
                    self.targetApplication][building]
                mains = self.elecMains[building]
                for i, activation in enumerate(activations):
                    activationDuration = (activation.index[-1] -
                                          activation.index[0])
                    start = (activation.index[0] - activationDuration)
                    end = (activation.index[-1] + activationDuration)
                    if start < mains.index[0] or end > mains.index[-1]:
                        activationsToRemove.append(i)
                    else:
                        mainsForAct = mains[start:end]
                        if not self._hasSufficientSamples(
                                start, end, mainsForAct):
                            activationsToRemove.append(i)
                activationsToRemove.reverse()
                for i in activationsToRemove:
                    activations.pop(i)
                self.activationsApp[
                    self.targetApplication][building] = activations
            except KeyError as exception:
                continue

        self.sectionsWithNoTarget = {}
        for building in self.allBuildings:
            try:
                activationsTarget = self.activationsApp[
                    self.targetApplication][building]
                mainGoodSections = self.goodSections[building]
                mains = self.elecMains[building]
                gapsBetweenActivations = TimeFrameGroup()
                prev = mains.index[0]
                for activation in activationsTarget:
                    try:
                        p2 = prev
                        gapsBetweenActivations.append(
                            TimeFrame(prev, activation.index[0]))
                        prev = activation.index[-1]
                        p1 = activation.index[0]
                    except ValueError:
                        logger.debug("----------------------")
                        logger.debug(p1)
                        logger.debug(p2)
                        logger.debug(activation.index[0])
                        logger.debug(activation.index[-1])

                gapsBetweenActivations.append(TimeFrame(prev, mains.index[-1]))

                intersection = gapsBetweenActivations.intersection(
                    mainGoodSections)
                intersection = intersection.remove_shorter_than(6 *
                                                                self.window)
                self.sectionsWithNoTarget[building] = intersection
            except KeyError:
                continue
 def __setstate__(self, dict):
     self.__dict__ = dict
     self.dataset = nilmtk.DataSet(dict['filename'])
Пример #16
0
def load_nilmtk_activations( dataset_paths,
                             target_appliance_name,
                             appliance_names,
                             on_power_threshold,
                             min_on_duration,
                             min_off_duration,
                             sample_period,
                             windows,
                             sanity_check=1 ):
    """
    Parameters
    ----------
    windows : dict
        Structure example:
        {
            'UKDALE': {
                'train': {<building_i>: <window>},
                'unseen_activations_of_seen_appliances': {<building_i>: <window>},
                'unseen_appliances': {<building_i>: <window>}
            }
        }

    Returns
    -------
    all_activations : dict
        Structure example:
        {<train | unseen_appliances | unseen_activations_of_seen_appliances>: {
             <appliance>: {
                 <building_name>: [<activations>]
        }}}
        Each activation is a pd.Series with DatetimeIndex and the following
        metadata attributes: building, appliance, fold.
    """
    logger.info("Loading NILMTK activations...")

    if sanity_check:
        # Sanity check
        for dataset in windows:
            check_windows(windows[dataset])
    
    all_activations = {}
    for dataset_name, folds in windows.items():
        # Load dataset
        dataset = nilmtk.DataSet(dataset_paths[dataset_name])
        appliance_aliases = appliance_names[dataset_name][target_appliance_name]
        
        for fold, buildings_and_windows in folds.items():
            logger.info(
                "Loading activations for fold {}.....".format(fold))         
            for building_i, windows_for_building in buildings_and_windows.items():
                #dataset.set_window(*window)
                elec = dataset.buildings[building_i].elec
                building_name = (
                    dataset.metadata['name'] + '_building_{}'.format(building_i))
                
                appliance_meters = []
                for meter in elec.meters:
                    if meter.is_site_meter():
                        continue

                    append_meter = False
                    for a in meter.appliances:
                        if a.type['type'] in appliance_aliases:
                            append_meter = True
                    if append_meter:
                        appliance_meters.append(meter)
                        print(meter.appliances)

                if not appliance_meters:
                    logger.info(
                        "No {} found in {}".format(target_appliance_name, building_name))
                    continue

                #if appliance_meters:
                if len(appliance_meters) > 1:
                    meter = nilmtk.MeterGroup(meters=appliance_meters)
                else:
                    meter = appliance_meters[0]
                logger.info(
                    "Loading {} for {}...".format(target_appliance_name, building_name))

                meter_activations = []
                for window in windows_for_building:
                    if dataset_name == "ECO":
                        dataset.store.window = TimeFrame(start=window[0], end=window[1], tz='GMT')
                    else:
                        dataset.set_window(*window) # does not work for ECO
                    # Get activations_for_fold and process them
                    meter_activations_for_building = meter.get_activations(
                        sample_period=sample_period,
                        min_off_duration=min_off_duration,
                        min_on_duration=min_on_duration,
                        on_power_threshold=on_power_threshold,
                        resample_kwargs={'fill_method': 'ffill', 'how': 'mean', 'limit': 20})
                    #meter_activations_for_building = [activation.astype(np.float32)
                    #                     for activation in meter_activations_for_building]
                    meter_activations.extend(meter_activations_for_building)

                # Attach metadata
                #for activation in meter_activations:
                #    activation._metadata = copy(activation._metadata)
                #    activation._metadata.extend(
                #        ["building", "appliance", "fold"])
                #    activation.building = building_name
                #    activation.appliance = appliance
                #    activation.fold = fold

                # Save
                if meter_activations:
                    all_activations.setdefault(
                        fold, {}).setdefault(
                        target_appliance_name, {})[building_name] = meter_activations
                logger.info(
                    "Loaded {} {} activations from {}."
                    .format(len(meter_activations), target_appliance_name, building_name))

        dataset.store.close()
        
    logger.info("Done loading NILMTK activations.")
    return all_activations
Пример #17
0
def load_nilmtk_activations(appliances, filename, sample_period, windows,
                            on_power_thresholds=None,
                            min_on_durations=None,
                            min_off_durations=None,
                            sanity_check=1):
    """
    Parameters
    ----------
    appliances : list of strings
    filename : string
    sample_period : int
    windows : dict
        Structure example:
        {
            'train': {<building_i>: <window>},
            'unseen_activations_of_seen_appliances': {<building_i>: <window>},
            'unseen_appliances': {<building_i>: <window>}
        }

    Returns
    -------
    all_activations : dict
        Structure example:
        {<train | unseen_appliances | unseen_activations_of_seen_appliances>: {
             <appliance>: {
                 <building_name>: [<activations>]
        }}}
        Each activation is a pd.Series with DatetimeIndex and the following
        metadata attributes: building, appliance, fold.
    """
    logger.info("Loading NILMTK activations...")

    # check whether optional parameters are provided and if so, are of the same length as `appliances`
    # if not provided build a list of the same size as `appliances` and fill it with None entries.
    if (on_power_thresholds is not None) and (len(on_power_thresholds) != len(appliances)):
        raise ValueError("`on_power_thresholds` must have the same size as `appliances` ")
    elif on_power_thresholds is None:
        on_power_thresholds = [None for i in range(len(appliances))]

    if (min_on_durations is not None) and (len(min_on_durations) != len(appliances)):
        raise ValueError("`min_on_durations` must have the same size as `appliances` ")
    elif min_on_durations is None:
        min_on_durations = [None for i in range(len(appliances))]

    if (min_off_durations is not None) and (len(min_off_durations) != len(appliances)):
        raise ValueError("`min_off_durations` must have the same size as `appliances` ")
    elif min_off_durations is None:
        min_off_durations = [None for i in range(len(appliances))]

    if sanity_check:
        # Sanity check
        check_windows(windows)

    # Load dataset
    dataset = nilmtk.DataSet(filename)

    all_activations = {}
    for fold, buildings_and_windows in windows.items():
        logger.info(
            "Loading activations for fold {}.....".format(fold))         
        activations_for_fold = {}
        for building_i, window in buildings_and_windows.items():
            dataset.set_window(*window)
            elec = dataset.buildings[building_i].elec
            building_name = (
                dataset.metadata['name'] + '_building_{}'.format(building_i))
            for i, appliance in enumerate(appliances):
                logger.info(
                    "Loading {} for {}...".format(appliance, building_name))

                # Get meter for appliance
                try:
                    meter = elec[appliance]
                except KeyError as exception:
                    logger.info(building_name + " has no " + appliance +
                                ". Full exception: {}".format(exception))
                    continue

                # Get activations_for_fold and process them
                meter_activations = meter.get_activations(
                    sample_period=sample_period,
                    min_off_duration=min_off_durations[i],
                    min_on_duration=min_on_durations[i],
                    on_power_threshold=on_power_thresholds[i])
                meter_activations = [activation.astype(np.float32)
                                     for activation in meter_activations]

                # Attach metadata
                for activation in meter_activations:
                    activation._metadata = copy(activation._metadata)
                    activation._metadata.extend(
                        ["building", "appliance", "fold"])
                    activation.building = building_name
                    activation.appliance = appliance
                    activation.fold = fold

                # Save
                if meter_activations:
                    activations_for_fold.setdefault(
                        appliance, {})[building_name] = meter_activations
                logger.info(
                    "Loaded {} {} activations from {}."
                    .format(len(meter_activations), appliance, building_name))
        all_activations[fold] = activations_for_fold

    dataset.store.close()
    logger.info("Done loading NILMTK activations.")
    return all_activations
Пример #18
0
# Weather data store
WEATHER_DATA_STORE = os.path.expanduser("~/git/nilm-actionable/data/hvac/weather_2013.h5")
weather_data_df = pd.HDFStore(WEATHER_DATA_STORE)["/weather"]
df = pd.read_csv(os.path.join(script_path, "../../data/total/survey_2013.csv"))
cols = ['programmable_thermostat_currently_programmed',
        'temp_summer_weekday_workday', 'temp_summer_weekday_morning',
        'temp_summer_weekday_evening', 'temp_summer_sleeping_hours_hours']
from copy import deepcopy

cols_plus_data_id = deepcopy(cols)
cols_plus_data_id.insert(0, "dataid")
df = df[cols_plus_data_id].dropna()
survey_homes = df.dataid.values

ds = nilmtk.DataSet(os.path.expanduser("~/wikienergy-2013.h5"))
nilmtk_to_dataid = {num: building.metadata["original_name"]
                     for num, building in ds.buildings.iteritems()}
dataid_to_nilmtk={v:k for k, v in nilmtk_to_dataid.iteritems()}

function_map = {"binary": fcn2min_time_fixed_binary,
                "minutes": fcn2min_time_fixed}

results = {}

for folder in to_consider[:]:
    results[folder]={}
    output = {"binary": {}, "minutes": {}}

    algo = folder.split("_")[-1]
    print algo, folder
Пример #19
0
from __future__ import print_function
import sip
import nilmtk
import matplotlib.pyplot as plt
import pandas as pd

dataset = nilmtk.DataSet('ukdale.h5')

#dataset.set_window(start="6-4-2013")
#dataset.set_window(end="30-1-2013")
#dataset.set_window(start="6-11-2014",end="13-11-2014")

BUILDING = 2

elec = dataset.buildings[BUILDING].elec

gt = {}
sample_period = 6
for i, chunk in enumerate(elec.mains().load(sample_period=sample_period)):
    chunk_drop_na = chunk.dropna()
    gt[i] = {}

    for meter in elec.submeters().select_using_appliances(type=[
            'kettle', 'fridge', 'microwave', 'dish washer', 'washing machine'
    ]).meters:
        # Only use the meters that we trained on (this saves time!)
        gt[i][meter] = meter.load(sample_period=sample_period).next()

    gt[i] = pd.DataFrame({k: v.squeeze()
                          for k, v in gt[i].iteritems()},
                         index=gt[i].values()[0].index).dropna()