def load_building(self, root_directory, building_name): # Construct new Building and set known attributes building = Building() # MIT's coorindates building.geographic_coordinates = (42.360091, -71.09416) # Load labels building_dir = os.path.join(root_directory, building_name) labels = load_labels(building_dir) # Load mains mains_chans = [chan for chan, label in labels.iteritems() if label == 'mains'] mains_chan_dict = {} for mains_chan in mains_chans: col_name = 'mains_{:d}_meter_1_active'.format(mains_chan) mains_chan_dict[col_name] = load_chan(building_dir, mains_chan) # Make a DataFrame containing all mains channels df = pd.DataFrame(mains_chan_dict) df = df.tz_localize('UTC') df = df.tz_convert('US/Eastern') # MIT is on the east coast! building.electric.mains = df # Load sub metered channels # TODO # Convert from REDD channel names to standardised names # Set up wiring self.buildings[building_name] = building
def load_building(self, root_directory, building_name): ''' Loads electrical data for specified building ''' # Each building has a week worth data # Files are named as follows: # Home 01_1min_2012-0903.xlsx to Home 01_1min_2012-0909.xlsx # Pattern building_name + "_1min_2012-09" + ['03'-'09'].xlsx building_folder = os.path.join(root_directory, '1_min', building_name) df = pd.DataFrame() for day in ["03", "04", "05", "06", "07", "08", "09"]: spreadsheet = pd.ExcelFile( os.path.join(building_folder, "%s_1min_2012-09%s.xlsx" % (building_name, day))) temp_df = spreadsheet.parse('Sheet1', index_col=0, date_parser=True) df = df.append(temp_df) df = self.standardize(df) # Create a new building building = Building() # Add mains building = self.add_mains(building, df) # Add appliances building = self.add_appliances(building, df) # Adding this building to dict of buildings building_name = building_name.replace(" ", "_") self.buildings[building_name] = building
def load(self, data_dir, max_chunks=None): # load list of houses house_ids = load_list_of_house_ids(data_dir) for house_id in house_ids: building = Building() building.metadata['original_name'] = house_id self.buildings[house_id] = building houses_loaded = set() for filename in FILENAMES: # Load appliance energy data chunk-by-chunk full_filename = join(data_dir, filename) print('loading', full_filename) try: reader = pd.read_csv(full_filename, names=COL_NAMES, index_col=False, chunksize=CHUNKSIZE) except IOError as e: print(e, file=stderr) continue # Process each chunks chunk_i = 0 for chunk in reader: if max_chunks is not None and chunk_i >= max_chunks: break print('processing chunk', chunk_i, 'of', filename) # Convert date and time columns to np.datetime64 objects dt = chunk['date'] + ' ' + chunk['time'] del chunk['date'] del chunk['time'] chunk['datetime'] = dt.apply(datetime_converter) # Data is either tenths of a Wh or tenths of a degree chunk['data'] *= 10 chunk['data'] = chunk['data'].astype(np.float32) # Process each house in chunk houses_in_chunk = chunk['house id'].unique() #TODO: use groupby?!? houses_loaded = houses_loaded.union(set(houses_in_chunk)) for house_id in houses_in_chunk: self._process_house_in_chunk(house_id, chunk) chunk_i += 1 print('houses with some data loaded:', houses_loaded)
def __init__(self): super(AMPDS, self).__init__() self.urls = ['http://ampds.org/'] self.citations = [ 'Stephen Makonin, Fred Popowich, Lyn Bartram, ' 'Bob Gill, and Ivan V. Bajic,' 'AMPds: A Public Dataset for Load Disaggregation and' 'Eco-Feedback Research, in Electrical Power and Energy' 'Conference (EPEC), 2013 IEEE, pp. 1-6, 2013.' ] self.building = Building() self.buildings['Home_01'] = self.building self.nominal_voltage = 230
def load_building(self, root_directory, building_name): spreadsheet = pd.ExcelFile( os.path.join(root_directory, "15_min/Homes 01-10_15min_2012-0819-0825 .xlsx")) df = spreadsheet.parse(building_name, index_col=0, date_parser=True) df = self.standardize(df) # Create a new building building = Building() # Add mains building = self.add_mains(building, df) # Add appliances building = self.add_appliances(building, df) # Adding this building to dict of buildings building_name = building_name.replace(" ", "_") self.buildings[building_name] = building
def load_building(self, root_directory, building_name): # Construct new Building and set known attributes building = Building() building.metadata['original_name'] = building_name # Load labels building_number = int(building_name[-1]) building_dir = os.path.join(root_directory, building_name) labels = load_labels(building_dir) # Remove dud channels try: dud_channels_for_building = DUD_CHANNELS[building_number] except KeyError: # DUD_CHANNELS doesn't specify dud channels for all buildings pass else: for dud_chan in dud_channels_for_building: labels.pop(dud_chan) # Convert appliance names from REDD to nilmtk standard names appliance_metadata = {} for chan, label in labels.iteritems(): nilmtk_appliance = APPLIANCE_NAME_MAP.get(label) if nilmtk_appliance is not None: labels[chan] = nilmtk_appliance.name if nilmtk_appliance.metadata: appliance_metadata[nilmtk_appliance.name] = nilmtk_appliance.metadata # Split channels into mains and appliances mains_chans = [] appliance_chans = [] for chan, label in labels.iteritems(): if label == 'mains': mains_chans.append(chan) else: appliance_chans.append(chan) # Load mains chans for mains_chan in mains_chans: mainsname = MainsName(split=mains_chan, meter=1) df = load_chan(building_dir, mains_chan, colnames=[Measurement('power', 'apparent')]) df = self._pre_process_dataframe(df) building.utility.electric.mains[mainsname] = df # Load sub metered channels instances = {} # instances is a dict which maps: # {<'appliance name'>: # (<index of next appliance instance>, <i of next supply>)} measurement = Measurement('power', 'active') for appliance_chan in appliance_chans: # Get appliance label and instance label = labels[appliance_chan] instance, supply = instances.get(label, (1,1)) appliancename = ApplianceName(name=label, instance=instance) metadata = appliance_metadata.get(label) is_dualsupply = metadata and metadata.get('dualsupply') if is_dualsupply: colname = DualSupply(measurement, supply) df = load_chan(building_dir, appliance_chan, colnames=[colname]) df = self._pre_process_dataframe(df) df[colname].name = appliancename if supply == 1: building.utility.electric.appliances[appliancename] = df instances[label] = (instance, supply + 1) else: building.utility.electric.appliances[appliancename] = \ building.utility.electric.appliances[appliancename].join(df) instances[label] = (instance + 1, 1) else: # This is not a DualSupply appliance instances[label] = (instance + 1, 1) colname = measurement df = load_chan(building_dir, appliance_chan, colnames=[colname]) df = self._pre_process_dataframe(df) df[colname].name = appliancename building.utility.electric.appliances[appliancename] = df # Now go through all DualSupply appliances to make sure there are two chans appliances = building.utility.electric.appliances for appliance_name, appliance_df in appliances.iteritems(): dual_supply_columns = get_dual_supply_columns(appliance_df) n_dual_supply_columns = len(dual_supply_columns) if n_dual_supply_columns == 1: col = dual_supply_columns[0] print("converting", appliance_name, "in building", building_number) appliances[appliance_name].rename(columns={col:col.measurement}, inplace=True) # TODO # Store appliance_metadata for each appliance instance in electric.metadata['appliances'] # Set up wiring self.buildings[building_number] = building
def load_building(self, root_directory, building_name, periods_to_load=None, one_sec_mains_params_to_load=None, downsample_one_sec_mains_rule=None): """ Parameters ---------- periods_to_load : dict of tuples, optional Key of dict is the building number (int). Values are (<start date>, <end date>) e.g. ("2013-04-01", None) or ("2013-04-01", "2013-08-01") defaults to {1: ("2013-04-01", None)} one_sec_mains_params_to_load : list of strings, optional some combination of {'active', 'apparent', 'voltage'} Defaults to ['active', 'voltage'] downsample_one_sec_mains_rule : string, optional How to download the 1-second mains data, if available. e.g. '6S' if None then no downsampling will be done on the 1-sec mains data. """ if one_sec_mains_params_to_load is None: one_sec_mains_params_to_load = ['active', 'voltage'] # Construct new Building and set known attributes building = Building() building.metadata['original_name'] = building_name electric = building.utility.electric # Load labels building_number = int(building_name[-1]) building_dir = os.path.join(root_directory, building_name) labels = load_labels(building_dir) print("Loading building {:d}, orig name={}, path={}" .format(building_number, building_name, building_dir)) # Process periods to load if periods_to_load is None: periods_to_load = DEFAULT_PERIODS_TO_LOAD start, end = periods_to_load.get(building_number, (None,None)) if start or end: print("Will crop all channels for this building to start={}, end={}" .format(start, end)) # Remove dud channels try: dud_channels_for_building = DUD_CHANNELS[building_number] except KeyError: # DUD_CHANNELS doesn't specify dud channels for all buildings pass else: for dud_chan in dud_channels_for_building: labels.pop(dud_chan) # Convert appliance names from REDD to nilmtk standard names appliance_metadata = {} for chan, label in labels.iteritems(): nilmtk_appliance = APPLIANCE_NAME_MAP.get(label) if nilmtk_appliance is not None: labels[chan] = nilmtk_appliance.name if nilmtk_appliance.metadata: appliance_metadata[ nilmtk_appliance.name] = nilmtk_appliance.metadata def _pre_process_dataframe(df): df = df.tz_convert(self.metadata['timezone']) return df[start:end] # Load 1-second mains, if available usecols = [] # columns in mains.dat are: index, active, apparent, voltage # usecols counts the index column as col 0 if 'active' in one_sec_mains_params_to_load: usecols.append(1) if 'apparent' in one_sec_mains_params_to_load: usecols.append(2) if 'voltage' in one_sec_mains_params_to_load: usecols.append(3) try: df = load_chan(building_dir, filename='mains.dat', usecols=usecols, colnames=[Measurement('power', 'active'), Measurement('power', 'apparent'), Measurement('voltage', '')]) except IOError: # some houses don't have 1-second mains pass else: df = _pre_process_dataframe(df) if downsample_one_sec_mains_rule: df = df.resample(rule=downsample_one_sec_mains_rule, how='mean') if len(df) > MIN_SAMPLES_TO_LOAD: electric.mains[MainsName(split=1, meter=1)] = df # Split channels into mains and appliances mains_chan = None appliance_chans = [] for chan, label in labels.iteritems(): if label == 'aggregate': mains_chan = chan else: appliance_chans.append(chan) # Load Current Cost mains chans (only if we haven't loaded 1sec mains) if mains_chan and electric.mains.get(MainsName(1,1)) is None: mainsname = MainsName(split=1, meter=1) df = load_chan(building_dir, mains_chan, colnames=[Measurement('power', 'apparent')]) df = _pre_process_dataframe(df) electric.mains[mainsname] = df # Load sub metered channels instances = {} # instances is a dict which maps: # {<'appliance name'>: <index of next appliance instance>} measurement = Measurement('power', 'active') for appliance_chan in appliance_chans: # Get appliance label and instance label = labels[appliance_chan] instance = instances.get(label, 1) appliancename = ApplianceName(name=label, instance=instance) instances[label] = instance + 1 df = load_chan(building_dir, appliance_chan, colnames=[measurement]) df = _pre_process_dataframe(df) df[measurement].name = appliancename if len(df) > MIN_SAMPLES_TO_LOAD: electric.appliances[appliancename] = df self.buildings[building_number] = building