def rise_set_smoothed(rough_dict, sunrise_tau=0.05, sunset_tau=0.95): sunrises = rough_dict['sunrises'] sunsets = rough_dict['sunsets'] sr_smoothed = local_quantile_regression_with_seasonal(sunrises, ~np.isnan(sunrises), tau=sunrise_tau, solver='MOSEK') ss_smoothed = local_quantile_regression_with_seasonal(sunsets, ~np.isnan(sunsets), tau=sunset_tau, solver='MOSEK') return {'sunrises': sr_smoothed, 'sunsets': ss_smoothed}
def run(self, data, random_seed=None): ths = np.logspace(-5, -1, 31) ho_error = [] for th in ths: bool_msk = detect_sun(data, th) measured = rise_set_rough(bool_msk) sunrises = measured['sunrises'] sunsets = measured['sunsets'] np.random.seed(random_seed) use_set_sr = np.arange(len(sunrises))[~np.isnan(sunrises)] use_set_ss = np.arange(len(sunsets))[~np.isnan(sunsets)] if len(use_set_sr) / len(sunrises) > 0.6 and len(use_set_ss) / len( sunsets) > 0.6: np.random.shuffle(use_set_sr) np.random.shuffle(use_set_ss) split_at_sr = int(len(use_set_sr) * .8) # 80-20 train test split split_at_ss = int(len(use_set_ss) * .8) train_sr = use_set_sr[:split_at_sr] train_ss = use_set_ss[:split_at_ss] test_sr = use_set_sr[split_at_sr:] test_ss = use_set_ss[split_at_ss:] train_msk_sr = np.zeros_like(sunrises, dtype=np.bool) train_msk_ss = np.zeros_like(sunsets, dtype=np.bool) train_msk_sr[train_sr] = True train_msk_ss[train_ss] = True test_msk_sr = np.zeros_like(sunrises, dtype=np.bool) test_msk_ss = np.zeros_like(sunsets, dtype=np.bool) test_msk_sr[test_sr] = True test_msk_ss[test_ss] = True sr_smoothed = local_quantile_regression_with_seasonal( sunrises, train_msk_sr, tau=0.05, solver='MOSEK') ss_smoothed = local_quantile_regression_with_seasonal( sunsets, train_msk_ss, tau=0.95, solver='MOSEK') r1 = (sunrises - sr_smoothed)[test_msk_sr] r2 = (sunsets - ss_smoothed)[test_msk_ss] ho_resid = np.r_[r1, r2] ho_error.append(np.sqrt(np.mean(ho_resid**2))) else: ho_error.append(1e6) selected_th = ths[np.argmin(ho_error)] bool_msk = detect_sun(data, selected_th) measured = rise_set_rough(bool_msk) smoothed = rise_set_smoothed(measured, sunrise_tau=.05, sunset_tau=.95) self.sunrise_estimates = smoothed['sunrises'] self.sunset_estimates = smoothed['sunsets'] self.sunrise_measurements = measured['sunrises'] self.sunset_measurements = measured['sunsets'] self.sunup_mask = bool_msk self.threshold = selected_th
def daily_missing_data_advanced(data_matrix, threshold=0.2, return_density_signal=False, return_fit=False): nans = np.isnan(data_matrix) capacity_est = np.quantile(data_matrix[~nans], 0.95) data_copy = np.copy(data_matrix) data_copy[nans] = 0. foo = data_copy > 0.02 * capacity_est density_signal = np.sum(foo, axis=0) / data_matrix.shape[0] use_days = density_signal > threshold fit_signal = local_quantile_regression_with_seasonal( density_signal, use_ixs=use_days, tau=0.9, solver='MOSEK' ) scores = density_signal / fit_signal out = [scores] if return_density_signal: out.append(density_signal) if return_fit: out.append(fit_signal) if len(out) == 1: out = out[0] else: out = tuple(out) return out
def run_optimizer(self, data, random_seed=None, search_pts=51, plot=False, figsize=(8, 6), groundtruth=None): if groundtruth is not None: sr_true = groundtruth[0] ss_true = groundtruth[1] else: sr_true = None ss_true = None ths = np.logspace(-5, -1, search_pts) ho_error = [] full_error = [] for th in ths: bool_msk = detect_sun(data, th) measured = rise_set_rough(bool_msk) sunrises = measured['sunrises'] sunsets = measured['sunsets'] np.random.seed(random_seed) use_set_sr = np.arange(len(sunrises))[~np.isnan(sunrises)] use_set_ss = np.arange(len(sunsets))[~np.isnan(sunsets)] if len(use_set_sr) / len(sunrises) > 0.6 and len(use_set_ss) / len( sunsets) > 0.6: run_ho_errors = [] num_trials = 1 # if > 1, average over multiple random selections for run in range(num_trials): np.random.shuffle(use_set_sr) np.random.shuffle(use_set_ss) split_at_sr = int(len(use_set_sr) * .8) # 80-20 train test split split_at_ss = int(len(use_set_ss) * .8) train_sr = use_set_sr[:split_at_sr] train_ss = use_set_ss[:split_at_ss] test_sr = use_set_sr[split_at_sr:] test_ss = use_set_ss[split_at_ss:] train_msk_sr = np.zeros_like(sunrises, dtype=np.bool) train_msk_ss = np.zeros_like(sunsets, dtype=np.bool) train_msk_sr[train_sr] = True train_msk_ss[train_ss] = True test_msk_sr = np.zeros_like(sunrises, dtype=np.bool) test_msk_ss = np.zeros_like(sunsets, dtype=np.bool) test_msk_sr[test_sr] = True test_msk_ss[test_ss] = True sr_smoothed = local_quantile_regression_with_seasonal( sunrises, train_msk_sr, tau=0.05, solver='MOSEK') ss_smoothed = local_quantile_regression_with_seasonal( sunsets, train_msk_ss, tau=0.95, solver='MOSEK') r1 = (sunrises - sr_smoothed)[test_msk_sr] r2 = (sunsets - ss_smoothed)[test_msk_ss] ho_resid = np.r_[r1, r2] #### TESTING # print(th) # plt.plot(ho_resid) # plt.show() ##### ### 7/30/20: # Some sites can have "consistent" fit (low holdout error) # that is not the correct estimate. We impose the restriction # that the range of sunrise times and sunset times must be # greater than 15 minutes. Any solution that is less than # that must be non-physical. (See: PVO ID# 30121) cond1 = np.max(sr_smoothed) - np.min(sr_smoothed) > 0.25 cond2 = np.max(ss_smoothed) - np.min(ss_smoothed) > 0.25 if cond1 and cond2: ### L1-loss instead of L2 # L1-loss is better proxy for goodness of fit when using # quantile loss function ### run_ho_errors.append(np.mean(np.abs(ho_resid))) else: run_ho_errors.append(1e2) ho_error.append(np.average(run_ho_errors)) if groundtruth is not None: full_fit = rise_set_smoothed(measured, sunrise_tau=0.05, sunset_tau=0.95) sr_full = full_fit['sunrises'] ss_full = full_fit['sunsets'] e1 = (sr_true - sr_full) e2 = (ss_true - ss_full) e_both = np.r_[e1, e2] full_error.append(np.sqrt(np.mean(e_both**2))) else: ho_error.append(1e2) full_error.append(1e2) ho_error = np.array(ho_error) min_val = np.min(ho_error) slct_vals = ho_error < 1.1 * min_val # everything within 10% of min val selected_th = np.min(ths[slct_vals]) bool_msk = detect_sun(data, selected_th) measured = rise_set_rough(bool_msk) smoothed = rise_set_smoothed(measured, sunrise_tau=.05, sunset_tau=.95) self.sunrise_estimates = smoothed['sunrises'] self.sunset_estimates = smoothed['sunsets'] self.sunrise_measurements = measured['sunrises'] self.sunset_measurements = measured['sunsets'] self.sunup_mask_measured = bool_msk data_sampling = int(24 * 60 / data.shape[0]) num_days = data.shape[1] mat = np.tile(np.arange(0, 24, data_sampling / 60), (num_days, 1)).T sr_broadcast = np.tile(self.sunrise_estimates, (data.shape[0], 1)) ss_broadcast = np.tile(self.sunset_estimates, (data.shape[0], 1)) self.sunup_mask_estimated = np.logical_and(mat >= sr_broadcast, mat < ss_broadcast) self.threshold = selected_th if groundtruth is not None: sr_residual = sr_true - self.sunrise_estimates ss_residual = ss_true - self.sunset_estimates total_rmse = np.sqrt(np.mean(np.r_[sr_residual, ss_residual]**2)) self.total_rmse = total_rmse else: self.total_rmse = None if plot: fig = plt.figure(figsize=figsize) plt.plot(ths, ho_error, marker='.', color='blue', label='HO error') plt.yscale('log') plt.xscale('log') plt.plot(ths[slct_vals], ho_error[slct_vals], marker='.', ls='none', color='red') plt.axvline(selected_th, color='blue', ls='--', label='optimized parameter') if groundtruth is not None: best_th = ths[np.argmin(full_error)] plt.plot(ths, full_error, marker='.', color='orange', label='true error') plt.axvline(best_th, color='orange', ls='--', label='best parameter') plt.legend() return fig else: return