def energy_per_dataframe(electricity, sample_period_multiplier=20, unit="kwh"): """pre-processes electricity and then gets total energy per channel, after masking out all gaps in mains. Returns ------- mains_total_energy, totals_per_appliance total_mains_energy : float totals_per_appliance : pd.Series each key is an ApplianceName each value is total energy """ # TODO: this might be an ugly hack to resolve circular dependencies. from nilmtk.preprocessing.electricity.building import mask_appliances_with_mains from nilmtk.preprocessing.electricity.single import insert_zeros # remove 'unmetered' and 'subpanels' from appliances electricity.appliances = electricity.remove_channels_from_appliances() # Sum split mains and DualSupply appliances electricity = electricity.sum_split_supplies() # TODO: Select common measurements. Maybe use electricity.select_common_measurements? # MEASUREMENT_PREFERENCES = [Measurement('power', 'active'), # Measurement('power', 'apparent')] # # Check if all channels share at least one Measurement (e.g. ('power', 'active')) # common_measurements = find_common_measurements(electricity) # common_measurement = None # for measurement_preference in MEASUREMENT_PREFERENCES: # if measurement_preference in common_measurements: # common_measurement = measurement_preference # print("Using common_measurement:", common_measurement) # break # if common_measurements is None and require_common_measurements: # raise NoCommonMeasurementError # Find large gaps in mains data and ignore those gaps for all appliance channels electricity = mask_appliances_with_mains(electricity, sample_period_multiplier) # Drop NaNs on all channels electricity = apply_func_to_values_of_dicts(electricity, lambda df: df.dropna(), ["appliances", "mains"]) # Insert_zeros on appliance data. print("Inserting zeros... may take a little while...", end="") sys.stdout.flush() single_insert_zeros = lambda df: insert_zeros(df, sample_period_multiplier=sample_period_multiplier) electricity = apply_func_to_values_of_dicts(electricity, single_insert_zeros, ["appliances", "mains"]) print("done inserting zeros") # Total energy used for mains total_mains_energy = get_total_energy_per_dict(electricity, "mains", unit) totals_per_appliance = {} for name, df in electricity.appliances.iteritems(): totals_per_appliance[name] = single.energy(df, unit=unit) return total_mains_energy, pd.Series(totals_per_appliance)
def fill_appliance_gaps(building, sample_period_multiplier=4): """Book-ends all large gaps with zeros using `nilmtk.preprocessing.electric.single.insert_zeros` on all appliances in `building` and then forward fills any remaining NaNs. This will result in forward-filling small gaps with the recorded value which precedes the gap, and forward-filling zeros in large gaps. NOTE: This function assumes that any gaps in the appliance data is the result of the appliance monitor and the appliance being off. Do not use this function if gaps in appliance data are the result of the IAM being broken (and hence the state of the appliance is unknown). Parameters ---------- building : nilmtk.Building sample_period_multiplier : float or int, optional The permissible maximum sample period expressed as a multiple of each dataframe's sample period. Any gap longer than the max sample period is assumed to imply that the IAM and appliance are off. If None then will default to 4 x the sample period of each dataframe. Returns ------- building_copy : nilmtk.Building See Also -------- nilmtk.preprocessing.electric.single.insert_zeros() """ # TODO: should probably remove any periods where all appliances # are not recording (which indicates that things are broken) # "book-end" each gap with a zero at each end single_insert_zeros = lambda df: single.insert_zeros( df, sample_period_multiplier=sample_period_multiplier) APPLIANCES = ['utility.electric.appliances'] new_building = apply_func_to_values_of_dicts(building, single_insert_zeros, APPLIANCES) # Now fill forward ffill = lambda df: pd.DataFrame.fillna(df, method='ffill') new_building = apply_func_to_values_of_dicts(new_building, ffill, APPLIANCES) return new_building
def downsample(building, rule='1T', how='mean', dropna=False): """Downsample all electrical data Parameters ---------- building : nilmtk.Building rule : string refer to pandas.resample docs for rules; default '1T' or 1 minute how : string refer to pandas.resample docs for how; default 'mean' dropna : boolean, optional default = False. Whether to drop NaNs after resampling. Returns -------- building_copy: nilmtk.Building """ # Define a resample function if dropna: resample = lambda df: pd.DataFrame.resample( df, rule=rule, how=how).dropna() else: resample = lambda df: pd.DataFrame.resample(df, rule=rule, how=how) return apply_func_to_values_of_dicts(building, resample, BUILDING_ELECTRICITY_DICTS)
def prepend_append_zeros(building, start_datetime, end_datetime, freq, timezone): """Fill zeros from `start` to `appliance`.index[0] and from `appliance`.index[-1] to end at `frequency`""" # TODO: can this function be merged with or make use of # preprocessing.building.single.reframe_index ? APPLIANCES = ['utility.electric.appliances'] idx = pd.DatetimeIndex(start=start_datetime, end=end_datetime, freq=freq) idx = idx.tz_localize('GMT').tz_convert(timezone) def reindex_fill_na(df): df_copy = deepcopy(df) df_copy = df_copy.reindex(idx) power_columns = [ x for x in df.columns if x.physical_quantity in ['power']] non_power_columns = [x for x in df.columns if x not in power_columns] for power in power_columns: df_copy[power].fillna(0, inplace=True) for measurement in non_power_columns: df_copy[measurement].fillna( df[measurement].median(), inplace=True) return df_copy new_building = apply_func_to_values_of_dicts(building, reindex_fill_na, APPLIANCES) return new_building
def mask_appliances_with_mains(electricity, sample_period_multiplier=4): """Finds gaps in first mains channel and then removes these gaps from all appliance data. The assumption is that if the mains channel is dead for any timeslice then we should ignore this timeslice for all appliance channels too. Parameters ---------- electricity : Electricity object sample_period_multiplier : int, optional Default = 4 max_sample_period = sample_period x sample_period_multiplier max_sample_period defines a 'gap'. Returns ------- copy of electricity .. warning:: currently only uses gaps from first mains dataframe and ignores all other mains dataframes. """ # TODO: handle multiple mains channels and take intersection of gaps print("Masking appliances with mains... may take a little while...", end='') sys.stdout.flush() mains = electricity.mains.values()[0] max_sample_period = get_sample_period(mains) * sample_period_multiplier print("Mains sample period = {:.1f}, max_sample_period = {:.1f}" .format(get_sample_period(mains), max_sample_period)) print("Getting gap starts and ends...") gap_starts, gap_ends = get_gap_starts_and_gap_ends(mains, max_sample_period) print("Found {:d} gap starts and {:d} gap ends.".format(len(gap_starts), len(gap_ends))) def mask_appliances(appliance_df): """For each appliance dataframe, insert NaNs for any reading inside mains gaps. """ print(".", end='') sys.stdout.flush() for gap_start, gap_end in zip(gap_starts, gap_ends): index = appliance_df.index try: appliance_df[(index >= gap_start) & (index <= gap_end)] = np.NaN except ValueError: # some DFs are int32, which can't accept NaNs, so convert to float32: # TODO: remove this once #105 is fixed appliance_df = appliance_df.astype(np.float32) appliance_df[(index >= gap_start) & (index <= gap_end)] = np.NaN return appliance_df masked = apply_func_to_values_of_dicts(electricity, mask_appliances, ['appliances']) print("done") return masked
def make_common_index(building): building_copy = deepcopy(building) appliances_index = building.utility.electric.appliances.values()[0].index mains_index = building.utility.electric.mains.values()[0].index freq = building.utility.electric.mains.values()[0].index.freq # TODO: can the line below be replace with # common_index = mains_index & appliances_index # This might be a lot faster and as far as I can tell gives the same # answer. common_index = pd.DatetimeIndex( np.sort(list(set(mains_index).intersection(set(appliances_index)))), freq=freq) take_common_index = lambda df: df.ix[common_index] return apply_func_to_values_of_dicts(building, take_common_index, BUILDING_ELECTRICITY_DICTS)
def drop_missing_mains(building): MAINS = ['utility.electric.mains'] return apply_func_to_values_of_dicts( building, lambda df: df.dropna(), MAINS)