def add_is_off_column(df, metadata): if 'is_day_off' in df.columns: return is_day_off = [] for idx, row in tqdm(df.iterrows(), total=len(df), desc='Adding day off'): is_day_off.append( _is_day_off(row['series_id'], row['weekday'], metadata)) df['is_day_off'] = is_day_off
def _prepare_is_day_off(window, df, metadata, series_id): is_day_off = df.is_holiday.values[::24].tolist() current_date = df.timestamp.values[-1] current_weekday = df.weekday.values[-1] for _ in range(WINDOW_TO_PRED_DAYS[window]): current_date = _get_next_date(current_date) current_weekday = _get_next_weekday(current_weekday) current_day_is_off = _is_day_off(series_id, current_weekday, metadata) current_day_is_off = current_day_is_off or _is_holiday(current_date) is_day_off.append(current_day_is_off) is_day_off = np.array(is_day_off, dtype=np.float32) is_day_off[is_day_off == 0] = -1 is_day_off = np.expand_dims(is_day_off, axis=0) return is_day_off
def _prepare_future_day_off(window, df, metadata, series_id): is_day_off = [] current_date = df.timestamp.values[-1] current_weekday = df.weekday.values[-1] for _ in range(WINDOW_TO_PRED_DAYS[window]): current_date = _get_next_date(current_date) current_weekday = _get_next_weekday(current_weekday) current_day_is_off = _is_day_off(series_id, current_weekday, metadata) current_day_is_off = current_day_is_off or _is_holiday(current_date) is_day_off.append(current_day_is_off) is_day_off = np.array(is_day_off, dtype=np.float32) if window == 'hourly': is_day_off = np.repeat(is_day_off, 24, axis=0) if window == 'weekly': is_day_off = np.reshape(is_day_off, (2, -1)) else: is_day_off = np.expand_dims(is_day_off, axis=1) is_day_off = np.expand_dims(is_day_off, axis=0) return is_day_off
def visualize_idx(idx, train, train_arrange, preds, metadata): row = train_arrange.loc[idx] df = train[train.series_id == row['series_id']] consumption = df.consumption.values[ row['train_start_idx']:row['val_end_idx']] dates = df.timestamp.values[row['train_start_idx']:row['val_end_idx']] weekdays = df.weekday.values[row['train_start_idx']:row['val_end_idx']] if row['window'] == 'hourly': batch_size = 24 elif row['window'] == 'daily': batch_size = 1 weekdays = weekdays[::24] dates = dates[::24] consumption = group_sum(consumption, 24) plt.plot(dates[-len(preds[idx]):], preds[idx], color='green', lw=3) plt.plot(dates[-len(preds[idx]):][::batch_size], preds[idx][::batch_size], 'o', color='green', lw=3) for i in range(len(dates) // batch_size): weekday = weekdays[i * batch_size] if _is_day_off(row['series_id'], weekday, metadata): color = 'orange' else: color = 'blue' plt.plot(dates[i * batch_size:(i + 1) * batch_size + 1], consumption[i * batch_size:(i + 1) * batch_size + 1], color=color) plt.plot(dates[i * batch_size:(i) * batch_size + 1], consumption[i * batch_size:(i) * batch_size + 1], 'o', color=color) plt.title('%i Nmae: %.3f' % (idx, row['nmae']))
def _is_day_off(self, weekday, series_id, date): ret = _is_day_off(series_id, weekday, self._metadata) if self._use_holidays: ret = ret or _is_holiday(date) return int(ret)