def _get_sequence_which_includes_target(self, fold): seq = Sequence(self.seq_length) building_name = self._select_building(fold, self.target_appliance) activations = ( self.activations[fold][self.target_appliance][building_name]) activation_i = self._select_activation(activations) activation = activations[activation_i] activation_start_time = ( self._select_start_time( activation, is_target_appliance=True)) data_start = activation_start_time #data_end = activation_start_time + timedelta( # seconds=self.sample_period * (self.seq_length-1)) # Get data data_for_building = self.data[fold][building_name] # load some additional data to make sure we have enough samples #data_end_extended = data_end + timedelta( # seconds=self.sample_period * 2) #data = data_for_building[data_start:data_end_extended] start_i, _ = data_for_building.index.slice_locs(start=data_start) seq.input = data_for_building['mains'].values[start_i:start_i+self.seq_length] seq.target = data_for_building['target'].values[start_i:start_i+self.seq_length] if building_name in self.vampire_power_per_building: seq.input = np.clip( seq.input-self.vampire_power_per_building[building_name], 0, None) return seq
def _get_sequence_which_includes_target(self, fold, allAps=False): seq = Sequence(self.seq_length) building_name = self._select_building(fold, self.target_appliance) activations = ( self.activations[fold][self.target_appliance][building_name]) activation_i = self._select_activation(activations) activation = activations[activation_i] positioned_activation, is_complete = (self._position_activation( activation, is_target_appliance=True)) if is_complete or self.include_incomplete_target_in_output: seq.target = positioned_activation else: seq.target = pd.Series(0, index=positioned_activation.index) #print(seq.target) # Check neighbouring activations mains_start = positioned_activation.index[0] mains_end = positioned_activation.index[-1] def neighbours_ok(neighbour_indicies): for i in neighbour_indicies: activation = activations[i] activation_duration = (activation.index[-1] - activation.index[0]) neighbouring_activation_is_inside_mains_window = ( activation.index[0] > (mains_start - activation_duration) and activation.index[0] < mains_end) if neighbouring_activation_is_inside_mains_window: if self.allow_multiple_target_activations_in_aggregate: if self.include_multiple_targets_in_output: sum_target = seq.target.add(activation, fill_value=0) is_complete = ( sum_target.index == seq.target.index) if self.allow_incomplete_target or is_complete: seq.target = sum_target[seq.target.index] else: return False # need to retry else: return True # everything checks out OK so far return True # Check forwards if not neighbours_ok(range(activation_i + 1, len(activations))): return # Check backwards if not neighbours_ok(range(activation_i - 1, -1, -1)): return # Get mains mains_for_building = self.mains[fold][activation.building] # load some additional data to make sure we have enough samples mains_end_extended = mains_end + timedelta(seconds=self.sample_period * 2) mains = mains_for_building[mains_start:mains_end_extended] if allAps: print("Type mains", type(mains)) seq.input = mains.values[:self.seq_length] return seq
def _get_sequence_without_target(self, fold): # Choose a building and a gap all_gaps_for_fold = self.all_gaps[fold] n = len(all_gaps_for_fold['p'].values) gap_i = self.rng.choice(n, p=all_gaps_for_fold['p'].astype('float32').values) row = all_gaps_for_fold.iloc[gap_i] building, gap = row['building'], row['gap'] # Choose a start point in the gap latest_start_time = gap.end - timedelta( seconds=self.seq_length * self.sample_period) max_offset_seconds = (latest_start_time - gap.start).total_seconds() if max_offset_seconds <= 0: offset = 0 else: offset = self.rng.randint(max_offset_seconds) start_time = gap.start + timedelta(seconds=offset) end_time = start_time + timedelta( seconds=(self.seq_length + 1) * self.sample_period) mains = self.mains[fold][building][start_time:end_time] seq = Sequence(self.seq_length, [len(self.appliances), self.seq_length]) seq.input = mains[~mains.index.duplicated()].values[:self.seq_length] # Get targets targets_for_building = self.target[fold][building] try: seq.target = np.array([targets_for_building[label][~targets_for_building[label].index.duplicated()][start_time:end_time].values[:self.seq_length] for label in self.appliances]) except KeyError: return None return seq
def _get_sequence(self, fold='train', enable_all_appliances=False): seq = Sequence(self.seq_length) all_appliances = {} building_i = 0 # Target appliance if self.rng.binomial(n=1, p=self.target_inclusion_prob): building_name, building_i = self._select_building_with_index( fold, self.target_appliance) activations = ( self.activations[fold][self.target_appliance][building_name]) activation_i = self._select_activation(activations) activation = activations[activation_i] positioned_activation, is_complete, _ = self._position_incomplete_activation( activation, is_target_appliance=True) if enable_all_appliances: all_appliances[self.target_appliance] = positioned_activation if is_complete or self.include_incomplete_target_in_output: seq.target += positioned_activation building_i += 1 seq.input = seq.target[:, np.newaxis] seq.target = seq.target[:, np.newaxis] #seq.weights = np.float32([building_i])[:, np.newaxis] assert len(seq.target) == self.seq_length if enable_all_appliances: seq.all_appliances = pd.DataFrame(all_appliances) return seq
def get_sequence(self, fold='train', enable_all_appliances=False): if enable_all_appliances: raise ValueError("`enable_all_appliances` is not implemented yet" " for RandomStrideSource!") # select building #building_divisions = self._num_seqs[fold].cumsum() total_seq_for_fold = self._num_seqs[fold].sum() building_base_seq_i = self._num_seqs[fold].cumsum() building_base_seq_i.values[1:] = building_base_seq_i.values[:-1] building_base_seq_i.values[0] = 0 building_base_seq_i = pd.Series( building_base_seq_i.index.values, index=building_base_seq_i.values) for seq_i in self.rng.permutation(total_seq_for_fold): building_row_i = building_base_seq_i.index.get_loc(seq_i, method="ffill") building_name = building_base_seq_i.values[building_row_i] base_seq_i = building_base_seq_i.index[building_row_i] seq_i_for_building = seq_i - base_seq_i start_i = seq_i_for_building * self.stride end_i = start_i + self.seq_length dataframe = self.data[fold][building_name] columns = dataframe.columns data_for_seq = dataframe.values[start_i:end_i] def get_data(col): col_i = columns.get_loc(col) data = data_for_seq[:,col_i] len_data = len(data) zero_padded_data = np.zeros((self.seq_length, 1)) zero_padded_data[:len_data,0] = data return zero_padded_data seq = Sequence(self.seq_length) seq.input = get_data('mains') seq.target = get_data('target') assert len(seq.input) == self.seq_length assert len(seq.target) == self.seq_length # Set mask seq.weights = np.ones((self.seq_length, 1), dtype=np.float32) n_zeros_to_pad = self.seq_length - len(data_for_seq) if n_zeros_to_pad > 0: seq.weights[-n_zeros_to_pad:, 0] = 0 # Set metadata seq.metadata = { 'seq_i': seq_i, 'building_name': building_name, 'total_num_sequences': total_seq_for_fold, # this takes a lot of time: 'start_date': dataframe.index[start_i], 'end_date': dataframe.index[start_i+len(data_for_seq)-1] } yield seq
def _get_sequence(self, fold='train', enable_all_appliances=False): if enable_all_appliances: raise ValueError("`enable_all_appliances` is not implemented yet" " for BalancedActivityRealAggregateSource!") building_names = list(self.data[fold].keys()) num_buildings = len(building_names) building_i = self.rng.randint(low=0, high=num_buildings) building_name = building_names[building_i] seq_i_for_building = self.rng.randint( low=0, high=self._num_seqs[(fold, building_name)]) start_i = seq_i_for_building * self.stride end_i = start_i + self.seq_length dataframe = self.data[fold][building_name] columns = dataframe.columns data_for_seq = dataframe.values[start_i:end_i] def get_data(col): col_i = columns.get_loc(col) data = data_for_seq[:,col_i] len_data = len(data) zero_padded_data = np.zeros((self.seq_length, 1)) zero_padded_data[:len_data,0] = data return zero_padded_data seq = Sequence(self.seq_length) seq.input = get_data('mains') seq.target = get_data('target') if building_name in self.vampire_power_per_building: seq.input = np.clip( seq.input-self.vampire_power_per_building[building_name], 0, None) assert len(seq.input) == self.seq_length assert len(seq.target) == self.seq_length # Set mask seq.weights = np.ones((self.seq_length, 1), dtype=np.float32) n_zeros_to_pad = self.seq_length - len(data_for_seq) if n_zeros_to_pad > 0: seq.weights[-n_zeros_to_pad:, 0] = 0 # Set metadata seq.metadata = { 'seq_i': seq_i_for_building, 'building_name': building_name, # this takes a lot of time: 'start_date': dataframe.index[start_i], 'end_date': dataframe.index[start_i+len(data_for_seq)-1] } return seq
def get_sequence(self, fold='train', enable_all_appliances=False): if enable_all_appliances: raise ValueError("`enable_all_appliances` is not implemented yet" " for StrideSource!") # select building building_divisions = self._num_seqs[fold].cumsum() total_seq_for_fold = self._num_seqs[fold].sum() building_row_i = 0 building_name = building_divisions.index[0] prev_division = 0 for seq_i in range(total_seq_for_fold): if seq_i == building_divisions.iloc[building_row_i]: prev_division = seq_i building_row_i += 1 building_name = building_divisions.index[building_row_i] seq_i_for_building = seq_i - prev_division start_i = seq_i_for_building * self.stride end_i = start_i + self.seq_length data_for_seq = self.data[fold][building_name].iloc[start_i:end_i] def get_data(col): data = data_for_seq[col].values n_zeros_to_pad = self.seq_length - len(data) data = np.pad(data, pad_width=(0, n_zeros_to_pad), mode='constant') return data[:, np.newaxis] seq = Sequence(self.seq_length) seq.input = get_data('mains') seq.target = get_data('target') assert len(seq.input) == self.seq_length assert len(seq.target) == self.seq_length # Set mask seq.weights = np.ones((self.seq_length, 1), dtype=np.float32) n_zeros_to_pad = self.seq_length - len(data_for_seq) if n_zeros_to_pad > 0: seq.weights[-n_zeros_to_pad:, 0] = 0 # Set metadata seq.metadata = { 'seq_i': seq_i, 'building_name': building_name, 'total_num_sequences': total_seq_for_fold, 'start_date': data_for_seq.index[0], 'end_date': data_for_seq.index[-1] } yield seq
def _get_sequence(self, fold='train', enable_all_appliances=False): seq = Sequence(self.seq_length, [len(self.appliances), self.seq_length]) for idx, appliance in enumerate(self.appliances): # Target appliance if self.rng.binomial(n=1, p=self.target_inclusion_prob): building_name = self._select_building(fold, appliance) activations = ( self.activations[fold][appliance][building_name]) activation_i = self._select_activation(activations) activation = activations[activation_i] positioned_activation, is_complete = self._position_activation( activation, is_target_appliance=True) positioned_activation = positioned_activation.values seq.input += positioned_activation seq.target[idx] += positioned_activation all_appliances = set(self.activations[fold].keys()) distractor_appliances = all_appliances - set(self.appliances) # Distractor appliances distractor_appliances = [ appliance for appliance in distractor_appliances if self.rng.binomial(n=1, p=self.distractor_inclusion_prob)] for appliance in distractor_appliances: building_name = self._select_building(fold, appliance) activations = self.activations[fold][appliance][building_name] if len(activations) == 0: continue activation_i = self._select_activation(activations) activation = activations[activation_i] positioned_activation, is_complete = self._position_activation( activation, is_target_appliance=False) positioned_activation = positioned_activation.values seq.input += positioned_activation if enable_all_appliances: all_appliances[appliance] = positioned_activation assert len(seq.input) == self.seq_length for i in range(len(self.appliances)): assert len(seq.target[i]) == self.seq_length return seq
def _get_sequence(self, fold='train', enable_all_appliances=False): seq = Sequence(self.seq_length) all_appliances = {} # Target appliance if self.rng.binomial(n=1, p=self.target_inclusion_prob): building_name = self._select_building(fold, self.target_appliance) activations = ( self.activations[fold][self.target_appliance][building_name]) activation_i = self._select_activation(activations) activation = activations[activation_i] positioned_activation, is_complete = self._position_activation( activation, is_target_appliance=True) positioned_activation = positioned_activation.values seq.input += positioned_activation if enable_all_appliances: all_appliances[self.target_appliance] = positioned_activation if is_complete or self.include_incomplete_target_in_output: seq.target += positioned_activation # Distractor appliances distractor_appliances = [ appliance for appliance in self._distractor_appliances(fold) if self.rng.binomial(n=1, p=self.distractor_inclusion_prob) ] for appliance in distractor_appliances: building_name = self._select_building(fold, appliance) activations = self.activations[fold][appliance][building_name] activation_i = self._select_activation(activations) activation = activations[activation_i] positioned_activation, is_complete = self._position_activation( activation, is_target_appliance=False) positioned_activation = positioned_activation.values seq.input += positioned_activation if enable_all_appliances: all_appliances[appliance] = positioned_activation seq.input = seq.input[:, np.newaxis] seq.target = seq.target[:, np.newaxis] assert len(seq.input) == self.seq_length assert len(seq.target) == self.seq_length if enable_all_appliances: seq.all_appliances = pd.DataFrame(all_appliances) return seq
def _get_sequence_which_includes_target(self, fold, valid_date): seq = Sequence(self.seq_length, [len(self.appliances), self.seq_length]) # Check neighbouring activations mains_start = valid_date # Get mains mains_for_building = self.mains[fold][self.building_name] # load some additional data to make sure we have enough samples mains_end_extended = mains_start + timedelta(days=2) mains = mains_for_building[mains_start:mains_end_extended].dropna() seq.input = mains.values[:self.seq_length] # Get targets targets_for_building = self.target[fold][self.building_name] seq.target = np.array([ targets_for_building[label] [mains_start:mains_end_extended].dropna().values[:self.seq_length] for label in self.appliances ]) return seq
def _get_sequence_without_target(self, fold): # Choose a building and a gap all_gaps_for_fold = self.all_gaps[fold] n = len(all_gaps_for_fold) assert(n != 0) gap_i = self.rng.choice(n, p=all_gaps_for_fold['p']) row = all_gaps_for_fold.iloc[gap_i] building, gap = row['building'], row['gap'] # Choose a start point in the gap latest_start_time = gap.end - timedelta( seconds=self.seq_length * self.sample_period) max_offset_seconds = (latest_start_time - gap.start).total_seconds() if max_offset_seconds <= 0: offset = 0 else: offset = self.rng.randint(max_offset_seconds) start_time = gap.start + timedelta(seconds=offset) data = self.data[fold][building] start_i, _ = data.index.slice_locs(start=start_time) #end_time = start_time + timedelta( # seconds=(self.seq_length + 1) * self.sample_period) #data = self.data[fold][building][start_time:end_time] seq = Sequence(self.seq_length) #seq.input = data['mains'].values[:self.seq_length] #seq.target = data['target'].values[:self.seq_length] seq.input = data['mains'].values[start_i:start_i+self.seq_length] seq.target = data['target'].values[start_i:start_i+self.seq_length] if building in self.vampire_power_per_building: seq.input = np.clip( seq.input-self.vampire_power_per_building[building], 0, None) if True: # add metadata ? seq.metadata['start'] = start_time seq.metadata['fold'] = fold seq.metadata['building'] = building return seq
def _get_sequence_without_target(self, fold): # Choose a building and a gap all_gaps_for_fold = self.all_gaps[fold] n = len(all_gaps_for_fold) gap_i = self.rng.choice(n, p=all_gaps_for_fold['p']) row = all_gaps_for_fold.iloc[gap_i] building, gap = row['building'], row['gap'] # Choose a start point in the gap latest_start_time = gap.end - timedelta(seconds=self.seq_length * self.sample_period) max_offset_seconds = (latest_start_time - gap.start).total_seconds() if max_offset_seconds <= 0: offset = 0 else: offset = self.rng.randint(max_offset_seconds) start_time = gap.start + timedelta(seconds=offset) end_time = start_time + timedelta(seconds=(self.seq_length + 1) * self.sample_period) mains = self.mains[fold][building][start_time:end_time] seq = Sequence(self.seq_length) seq.input = mains.values[:self.seq_length] return seq
def _get_sequence_which_includes_target(self, fold): seq = Sequence(self.seq_length, [len(self.appliances), self.seq_length]) building_name = self._select_building(fold, self.target_appliance) activations = ( self.activations[fold][self.target_appliance][building_name]) activation_i = self._select_activation(activations) activation = activations[activation_i] positioned_activation, is_complete = ( self._position_activation( activation, is_target_appliance=True)) # Check neighbouring activations mains_start = positioned_activation.index[0] mains_end = positioned_activation.index[-1] def neighbours_ok(neighbour_indicies): for i in neighbour_indicies: activation = activations[i] activation_duration = ( activation.index[-1] - activation.index[0]) neighbouring_activation_is_inside_mains_window = ( activation.index[0] > (mains_start - activation_duration) and activation.index[0] < mains_end) if neighbouring_activation_is_inside_mains_window: if self.allow_multiple_target_activations_in_aggregate: if self.include_multiple_targets_in_output: sum_target = seq.target.add( activation, fill_value=0) is_complete = ( sum_target.index == seq.target.index) if self.allow_incomplete_target or is_complete: seq.target = sum_target[seq.target.index] else: return False # need to retry else: return True # everything checks out OK so far return True # Check forwards if not neighbours_ok(range(activation_i+1, len(activations))): return # Check backwards if not neighbours_ok(range(activation_i-1, -1, -1)): return # Get mains mains_for_building = self.mains[fold][building_name] # load some additional data to make sure we have enough samples mains_end_extended = mains_end + timedelta( seconds=self.sample_period * 2) mains = mains_for_building[mains_start:mains_end_extended] seq.input = mains[~mains.index.duplicated()].values[:self.seq_length] # Get targets targets_for_building = self.target[fold][building_name] try: seq.target = np.array([targets_for_building[label][~targets_for_building[label].index.duplicated()][mains_start:mains_end_extended].values[:self.seq_length] for label in self.appliances]) except KeyError: return None return seq
class Sample(ActivationsSource): def __init__(self, activations, target_appliance, seq_length, filename, windows, sample_period, uniform_prob_of_selecting_each_building=True, allow_incomplete_target=True, rng_seed=None): self.activations = copy(activations) self.target_appliance = target_appliance self.seq_length = seq_length self.filename = filename check_windows(windows) self.windows = windows self.sample_period = sample_period self.uniform_prob_of_selecting_each_building=( uniform_prob_of_selecting_each_building) self.allow_incomplete_target = allow_incomplete_target super(Sample, self).__init__(rng_seed=rng_seed) self._load_mains_into_memory() self.target_inclusion_prob=0.5 def _load_mains_into_memory(self): logger.info("Loading NILMTK mains...") # Load dataset dataset = nilmtk.DataSet(self.filename) self.dataset = dataset self.mains = {} self.fridge = {} self.AC = {} for fold, buildings_and_windows in self.windows.iteritems(): for building_i, window in buildings_and_windows.iteritems(): dataset.set_window(*window) elec = dataset.buildings[building_i].elec building_name = (dataset.metadata['name'] +'_building_{}'.format(building_i)) logger.info( "Loading mains for {}...".format(building_name)) mains_meter = elec.mains() mains_data = mains_meter.power_series_all_data(sample_period=self.sample_period).dropna()#, #sections=good_sections).dropna() fridge_data = elec['fridge'].power_series_all_data(sample_period=self.sample_period).dropna() AC_data = elec['air conditioner'].power_series_all_data(sample_period=self.sample_period).dropna() def set_mains_data(dictionary, data): dictionary.setdefault(fold, {})[building_name] = data if not mains_data.empty: set_mains_data(self.mains, mains_data) set_mains_data(self.fridge, fridge_data) set_mains_data(self.AC, AC_data) logger.info( "Loaded mains data from building {} for fold {}" " from {} to {}." .format(building_name, fold, mains_data.index[0], mains_data.index[-1])) dataset.store.close() logger.info("Done loading NILMTK mains data.") def get_main_fridge(self, fold='train'): building_number = random.randint(1,len(self.mains[fold].keys())) build_name = sorted(self.mains[fold].keys())[building_number-1] main_data = self.mains[fold][build_name] fridge_data = self.fridge[fold][build_name] # start time point can be any point in main except for the last seq_length ones start = main_data[:-self.seq_length].sample(n=1).index[0] end = start + timedelta(seconds = self.seq_length* (self.sample_period-1)) success = False while not success : if len(fridge_data[start:end])!=self.seq_length or len(main_data[start:end])!=self.seq_length or fridge_data[start:end].sum()<=50: main_data = self.mains[fold][build_name] start = main_data[:-self.seq_length].sample(n=1).index[0] end = start + timedelta(seconds = self.seq_length* (self.sample_period-1)) else: success = True seq = Sequence(self.seq_length) seq.input = np.pad( main_data[start:end], (self.seq_length-len(main_data[start:end])), 'constant') seq.target = np.pad( fridge_data[start:end], (self.seq_length-len(main_data[start:end])), 'constant') return seq
def _get_sequence_which_includes_target(self, fold): seq = Sequence(self.seq_length) building_name = self._select_building(fold, self.target_appliance) activations = ( self.activations[fold][self.target_appliance][building_name]) activation_i = self._select_activation(activations) activation = activations[activation_i] positioned_activation_values, is_complete, activation_start_time = ( self._position_activation(activation, is_target_appliance=True)) #positioned_activation = self._construct_series( # positioned_activation_values, activation_start_time) if is_complete or self.include_incomplete_target_in_output: seq.target = positioned_activation_values else: seq.target = np.zeros(self.seq_length) #pd.Series(0, index=positioned_activation.index) # Check neighbouring activations #mains_start = positioned_activation.index[0] #mains_end = positioned_activation.index[-1] mains_start = activation_start_time mains_end = activation_start_time + timedelta( seconds=self.sample_period * (self.seq_length - 1)) npfreq = np.timedelta64(self.sample_period, 's') def neighbours_ok(neighbour_indicies): for i in neighbour_indicies: activation = activations[i] activation_start = activation.index[0] activation_end = activation.index[-1] activation_duration = (activation_end - activation_start) neighbouring_activation_is_inside_mains_window = ( activation_start > (mains_start - activation_duration) and activation_start < mains_end) activation_values = activation.values if neighbouring_activation_is_inside_mains_window: if self.allow_multiple_target_activations_in_aggregate: if self.include_multiple_targets_in_output: is_complete = \ (activation_start >= mains_start) and\ (activation_end <= mains_end) if self.allow_incomplete_target or is_complete: start_i = (activation_start - mains_start) // npfreq if start_i < 0: # activation before mains n = min(self.seq_length, len(activation_values) + start_i) seq.target[0:n] += activation_values[ -start_i:(n - start_i)] #sum_target = seq.target[0:n] + activation_values[-start_i:(n-start_i)] #seq.target[0:n] += sum_target else: # mains before activation n = min(self.seq_length - start_i, len(activation_values)) seq.target[start_i:( start_i + n)] += activation_values[0:n] #sum_target = seq.target[start_i:(start_i+n)] + activation_values[0:n] #seq.target[start_i:(start_i+n)] = sum_target else: return False # need to retry else: return True # everything checks out OK so far return True # Check forwards if not neighbours_ok(range(activation_i + 1, len(activations))): return # Check backwards if not neighbours_ok(range(activation_i - 1, -1, -1)): return # Get mains mains_for_building = self.mains[fold][activation.building] # load some additional data to make sure we have enough samples mains_end_extended = mains_end + timedelta(seconds=self.sample_period * 2) mains = mains_for_building[mains_start:mains_end_extended] seq.input = mains.values[:self.seq_length] return seq