def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: start = data[self.start_field] length = target_transformation_length(data[self.target_field], self.pred_length, is_train=is_train) self._update_cache(start, length) i0 = self._date_index[start] date_idx = self._date_index.iloc[i0:i0 + length].index # When is_train is false, date_idx has len of target_len + prediction_len # which is useful in time feature generation, but we only need target length date_idx = date_idx[:len(data[self.target_field])] feature = pd.Series(np.ones(len(date_idx)) * np.nan, index=date_idx) mask = data[self.target_field] > 0 feature.loc[mask] = feature.loc[mask].index # filling in nan in first row with the corresponding date # Assumption: If the frame starts with a zero demand, earliest date in frame is taken as a start if len(feature) > 0: if pd.isnull(feature[0]): feature[0] = feature.index[0] feature = feature.ffill().to_frame() feature["diff"] = feature.index.to_period( feature.index.freqstr).astype(int) - pd.DatetimeIndex( feature.iloc[:, 0]).to_period( feature.index.freqstr).astype(int) feature["diff"] = feature["diff"].shift(1).round() + 1 feature["diff"] = feature["diff"].fillna(method="bfill") feature = feature["diff"].values if self.output_field in data.keys(): data[self.output_field] = np.vstack( [data[self.output_field], feature]) else: data[self.output_field] = feature return data
def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: length = target_transformation_length(data[self.target_field], self.ext_length, is_train) data[self.field] = np.broadcast_to( data[self.field], (data[self.field].shape[:-1] + (length, )), ) return data