Пример #1
    def load_building(self, root_directory, building_name):
        # Construct new Building and set known attributes
        building = Building()
        # MIT's coorindates
        building.geographic_coordinates = (42.360091, -71.09416)

        # Load labels
        building_dir = os.path.join(root_directory, building_name)
        labels = load_labels(building_dir)

        # Load mains
        mains_chans = [chan for chan, label in labels.iteritems()
                       if label == 'mains']
        mains_chan_dict = {}
        for mains_chan in mains_chans:
            col_name = 'mains_{:d}_meter_1_active'.format(mains_chan)
            mains_chan_dict[col_name] = load_chan(building_dir, mains_chan)

        # Make a DataFrame containing all mains channels
        df = pd.DataFrame(mains_chan_dict)
        df = df.tz_localize('UTC')
        df = df.tz_convert('US/Eastern')  # MIT is on the east coast!
        building.electric.mains = df

        # Load sub metered channels
        # TODO
        # Convert from REDD channel names to standardised names
        # Set up wiring

        self.buildings[building_name] = building
Пример #2
    def load_building(self, root_directory, building_name):
        ''' Loads electrical data for specified building

        # Each building has a week worth data
        # Files are named as follows:
        # Home 01_1min_2012-0903.xlsx to Home 01_1min_2012-0909.xlsx
        # Pattern building_name + "_1min_2012-09" + ['03'-'09'].xlsx

        building_folder = os.path.join(root_directory, '1_min', building_name)
        df = pd.DataFrame()
        for day in ["03", "04", "05", "06", "07", "08", "09"]:
            spreadsheet = pd.ExcelFile(
                             "%s_1min_2012-09%s.xlsx" % (building_name, day)))
            temp_df = spreadsheet.parse('Sheet1',
            df = df.append(temp_df)
        df = self.standardize(df)

        # Create a new building
        building = Building()

        # Add mains
        building = self.add_mains(building, df)

        # Add appliances
        building = self.add_appliances(building, df)

        # Adding this building to dict of buildings
        building_name = building_name.replace(" ", "_")
        self.buildings[building_name] = building
Пример #3
    def load(self, data_dir, max_chunks=None):
        # load list of houses
        house_ids = load_list_of_house_ids(data_dir)
        for house_id in house_ids:
            building = Building()
            building.metadata['original_name'] = house_id
            self.buildings[house_id] = building

        houses_loaded = set()

        for filename in FILENAMES:
            # Load appliance energy data chunk-by-chunk
            full_filename = join(data_dir, filename)
            print('loading', full_filename)
                reader = pd.read_csv(full_filename, names=COL_NAMES, 
                                     index_col=False, chunksize=CHUNKSIZE)
            except IOError as e:
                print(e, file=stderr)

            # Process each chunks
            chunk_i = 0
            for chunk in reader:
                if max_chunks is not None and chunk_i >= max_chunks:

                print('processing chunk', chunk_i, 'of', filename)
                # Convert date and time columns to np.datetime64 objects
                dt = chunk['date'] + ' ' + chunk['time']
                del chunk['date']
                del chunk['time']
                chunk['datetime'] = dt.apply(datetime_converter)

                # Data is either tenths of a Wh or tenths of a degree
                chunk['data'] *= 10
                chunk['data'] = chunk['data'].astype(np.float32)

                # Process each house in chunk
                houses_in_chunk = chunk['house id'].unique() #TODO: use groupby?!?
                houses_loaded = houses_loaded.union(set(houses_in_chunk))
                for house_id in houses_in_chunk:
                    self._process_house_in_chunk(house_id, chunk)

                chunk_i += 1
        print('houses with some data loaded:', houses_loaded)
Пример #4
 def __init__(self):
     super(AMPDS, self).__init__()
     self.urls = ['http://ampds.org/']
     self.citations = [
         'Stephen Makonin, Fred Popowich, Lyn Bartram, '
         'Bob Gill, and Ivan V. Bajic,'
         'AMPds: A Public Dataset for Load Disaggregation and'
         'Eco-Feedback Research, in Electrical Power and Energy'
         'Conference (EPEC), 2013 IEEE, pp. 1-6, 2013.'
     self.building = Building()
     self.buildings['Home_01'] = self.building
     self.nominal_voltage = 230
Пример #5
    def load_building(self, root_directory, building_name):
        spreadsheet = pd.ExcelFile(
                         "15_min/Homes 01-10_15min_2012-0819-0825 .xlsx"))
        df = spreadsheet.parse(building_name, index_col=0, date_parser=True)
        df = self.standardize(df)

        # Create a new building
        building = Building()

        # Add mains
        building = self.add_mains(building, df)

        # Add appliances
        building = self.add_appliances(building, df)

        # Adding this building to dict of buildings
        building_name = building_name.replace(" ", "_")
        self.buildings[building_name] = building
Пример #6
    def load_building(self, root_directory, building_name):
        # Construct new Building and set known attributes
        building = Building()
        building.metadata['original_name'] = building_name

        # Load labels
        building_number = int(building_name[-1])
        building_dir = os.path.join(root_directory, building_name)
        labels = load_labels(building_dir)

        # Remove dud channels
            dud_channels_for_building = DUD_CHANNELS[building_number]
        except KeyError:
            # DUD_CHANNELS doesn't specify dud channels for all buildings
            for dud_chan in dud_channels_for_building:

        # Convert appliance names from REDD to nilmtk standard names
        appliance_metadata = {}
        for chan, label in labels.iteritems():
            nilmtk_appliance = APPLIANCE_NAME_MAP.get(label)
            if nilmtk_appliance is not None:
                labels[chan] = nilmtk_appliance.name
                if nilmtk_appliance.metadata:
                    appliance_metadata[nilmtk_appliance.name] = nilmtk_appliance.metadata

        # Split channels into mains and appliances
        mains_chans = []
        appliance_chans = []
        for chan, label in labels.iteritems():            
            if label == 'mains':

        # Load mains chans
        for mains_chan in mains_chans:
            mainsname = MainsName(split=mains_chan, meter=1)
            df = load_chan(building_dir, mains_chan, colnames=[Measurement('power', 'apparent')])
            df = self._pre_process_dataframe(df)
            building.utility.electric.mains[mainsname] = df

        # Load sub metered channels
        instances = {} 
        # instances is a dict which maps:
        # {<'appliance name'>: 
        #  (<index of next appliance instance>, <i of next supply>)}
        measurement = Measurement('power', 'active')
        for appliance_chan in appliance_chans:
            # Get appliance label and instance
            label = labels[appliance_chan]
            instance, supply = instances.get(label, (1,1))
            appliancename = ApplianceName(name=label, instance=instance)
            metadata = appliance_metadata.get(label)
            is_dualsupply = metadata and metadata.get('dualsupply')
            if is_dualsupply:
                colname = DualSupply(measurement, supply)
                df = load_chan(building_dir, appliance_chan, colnames=[colname])
                df = self._pre_process_dataframe(df)
                df[colname].name = appliancename
                if supply == 1:
                    building.utility.electric.appliances[appliancename] = df
                    instances[label] = (instance, supply + 1)
                    building.utility.electric.appliances[appliancename] = \
                    instances[label] = (instance + 1, 1)
                # This is not a DualSupply appliance
                instances[label] = (instance + 1, 1)
                colname = measurement
                df = load_chan(building_dir, appliance_chan, colnames=[colname])
                df = self._pre_process_dataframe(df)
                df[colname].name = appliancename
                building.utility.electric.appliances[appliancename] = df

        # Now go through all DualSupply appliances to make sure there are two chans
        appliances = building.utility.electric.appliances
        for appliance_name, appliance_df in appliances.iteritems():
            dual_supply_columns = get_dual_supply_columns(appliance_df)
            n_dual_supply_columns = len(dual_supply_columns)
            if n_dual_supply_columns == 1:
                col = dual_supply_columns[0]
                print("converting", appliance_name, "in building", building_number)

        # TODO
        # Store appliance_metadata for each appliance instance in electric.metadata['appliances']
        # Set up wiring

        self.buildings[building_number] = building
Пример #7
    def load_building(self, root_directory, building_name, 
        periods_to_load : dict of tuples, optional
           Key of dict is the building number (int).
           Values are (<start date>, <end date>)
           e.g. ("2013-04-01", None) or ("2013-04-01", "2013-08-01")
           defaults to {1: ("2013-04-01", None)}
        one_sec_mains_params_to_load : list of strings, optional
            some combination of {'active', 'apparent', 'voltage'}
            Defaults to ['active', 'voltage']
        downsample_one_sec_mains_rule : string, optional
            How to download the 1-second mains data, if available.
            e.g. '6S'
            if None then no downsampling will be done on the 1-sec mains data.

        if one_sec_mains_params_to_load is None:
            one_sec_mains_params_to_load = ['active', 'voltage']

        # Construct new Building and set known attributes
        building = Building()
        building.metadata['original_name'] = building_name
        electric = building.utility.electric

        # Load labels
        building_number = int(building_name[-1])
        building_dir = os.path.join(root_directory, building_name)
        labels = load_labels(building_dir)

        print("Loading building {:d}, orig name={}, path={}"
              .format(building_number, building_name, building_dir))

        # Process periods to load
        if periods_to_load is None:
            periods_to_load = DEFAULT_PERIODS_TO_LOAD

        start, end = periods_to_load.get(building_number, (None,None))
        if start or end:
            print("Will crop all channels for this building to start={}, end={}"
                  .format(start, end))

        # Remove dud channels
            dud_channels_for_building = DUD_CHANNELS[building_number]
        except KeyError:
            # DUD_CHANNELS doesn't specify dud channels for all buildings
            for dud_chan in dud_channels_for_building:

        # Convert appliance names from REDD to nilmtk standard names
        appliance_metadata = {}
        for chan, label in labels.iteritems():
            nilmtk_appliance = APPLIANCE_NAME_MAP.get(label)
            if nilmtk_appliance is not None:
                labels[chan] = nilmtk_appliance.name
                if nilmtk_appliance.metadata:
                        nilmtk_appliance.name] = nilmtk_appliance.metadata

        def _pre_process_dataframe(df):
            df = df.tz_convert(self.metadata['timezone'])
            return df[start:end]

        # Load 1-second mains, if available
        usecols = []
        # columns in mains.dat are: index, active, apparent, voltage
        # usecols counts the index column as col 0
        if 'active' in one_sec_mains_params_to_load:
        if 'apparent' in one_sec_mains_params_to_load:
        if 'voltage' in one_sec_mains_params_to_load:
            df = load_chan(building_dir, filename='mains.dat', usecols=usecols,
                           colnames=[Measurement('power', 'active'),
                                     Measurement('power', 'apparent'),
                                     Measurement('voltage', '')])
        except IOError:
            # some houses don't have 1-second mains
            df = _pre_process_dataframe(df)
            if downsample_one_sec_mains_rule:
                df = df.resample(rule=downsample_one_sec_mains_rule, how='mean')
            if len(df) > MIN_SAMPLES_TO_LOAD:
                electric.mains[MainsName(split=1, meter=1)] = df

        # Split channels into mains and appliances
        mains_chan = None
        appliance_chans = []
        for chan, label in labels.iteritems():
            if label == 'aggregate':
                mains_chan = chan

        # Load Current Cost mains chans (only if we haven't loaded 1sec mains)
        if mains_chan and electric.mains.get(MainsName(1,1)) is None:
            mainsname = MainsName(split=1, meter=1)
            df = load_chan(building_dir, mains_chan,
                           colnames=[Measurement('power', 'apparent')])
            df = _pre_process_dataframe(df)
            electric.mains[mainsname] = df

        # Load sub metered channels
        instances = {}
        # instances is a dict which maps:
        # {<'appliance name'>: <index of next appliance instance>}
        measurement = Measurement('power', 'active')
        for appliance_chan in appliance_chans:
            # Get appliance label and instance
            label = labels[appliance_chan]
            instance = instances.get(label, 1)
            appliancename = ApplianceName(name=label, instance=instance)
            instances[label] = instance + 1
            df = load_chan(building_dir, appliance_chan, colnames=[measurement])
            df = _pre_process_dataframe(df)
            df[measurement].name = appliancename
            if len(df) > MIN_SAMPLES_TO_LOAD:
                electric.appliances[appliancename] = df

        self.buildings[building_number] = building