def rise_set_smoothed(rough_dict, sunrise_tau=0.05, sunset_tau=0.95):
    sunrises = rough_dict['sunrises']
    sunsets = rough_dict['sunsets']
    sr_smoothed = local_quantile_regression_with_seasonal(sunrises,
                                                          ~np.isnan(sunrises),
                                                          tau=sunrise_tau,
                                                          solver='MOSEK')
    ss_smoothed = local_quantile_regression_with_seasonal(sunsets,
                                                          ~np.isnan(sunsets),
                                                          tau=sunset_tau,
                                                          solver='MOSEK')
    return {'sunrises': sr_smoothed, 'sunsets': ss_smoothed}
示例#2
0
 def run(self, data, random_seed=None):
     ths = np.logspace(-5, -1, 31)
     ho_error = []
     for th in ths:
         bool_msk = detect_sun(data, th)
         measured = rise_set_rough(bool_msk)
         sunrises = measured['sunrises']
         sunsets = measured['sunsets']
         np.random.seed(random_seed)
         use_set_sr = np.arange(len(sunrises))[~np.isnan(sunrises)]
         use_set_ss = np.arange(len(sunsets))[~np.isnan(sunsets)]
         if len(use_set_sr) / len(sunrises) > 0.6 and len(use_set_ss) / len(
                 sunsets) > 0.6:
             np.random.shuffle(use_set_sr)
             np.random.shuffle(use_set_ss)
             split_at_sr = int(len(use_set_sr) *
                               .8)  # 80-20 train test split
             split_at_ss = int(len(use_set_ss) * .8)
             train_sr = use_set_sr[:split_at_sr]
             train_ss = use_set_ss[:split_at_ss]
             test_sr = use_set_sr[split_at_sr:]
             test_ss = use_set_ss[split_at_ss:]
             train_msk_sr = np.zeros_like(sunrises, dtype=np.bool)
             train_msk_ss = np.zeros_like(sunsets, dtype=np.bool)
             train_msk_sr[train_sr] = True
             train_msk_ss[train_ss] = True
             test_msk_sr = np.zeros_like(sunrises, dtype=np.bool)
             test_msk_ss = np.zeros_like(sunsets, dtype=np.bool)
             test_msk_sr[test_sr] = True
             test_msk_ss[test_ss] = True
             sr_smoothed = local_quantile_regression_with_seasonal(
                 sunrises, train_msk_sr, tau=0.05, solver='MOSEK')
             ss_smoothed = local_quantile_regression_with_seasonal(
                 sunsets, train_msk_ss, tau=0.95, solver='MOSEK')
             r1 = (sunrises - sr_smoothed)[test_msk_sr]
             r2 = (sunsets - ss_smoothed)[test_msk_ss]
             ho_resid = np.r_[r1, r2]
             ho_error.append(np.sqrt(np.mean(ho_resid**2)))
         else:
             ho_error.append(1e6)
     selected_th = ths[np.argmin(ho_error)]
     bool_msk = detect_sun(data, selected_th)
     measured = rise_set_rough(bool_msk)
     smoothed = rise_set_smoothed(measured, sunrise_tau=.05, sunset_tau=.95)
     self.sunrise_estimates = smoothed['sunrises']
     self.sunset_estimates = smoothed['sunsets']
     self.sunrise_measurements = measured['sunrises']
     self.sunset_measurements = measured['sunsets']
     self.sunup_mask = bool_msk
     self.threshold = selected_th
示例#3
0
def daily_missing_data_advanced(data_matrix, threshold=0.2,
                                return_density_signal=False,
                                return_fit=False):
    nans = np.isnan(data_matrix)
    capacity_est = np.quantile(data_matrix[~nans], 0.95)
    data_copy = np.copy(data_matrix)
    data_copy[nans] = 0.
    foo = data_copy > 0.02 * capacity_est
    density_signal = np.sum(foo, axis=0) / data_matrix.shape[0]
    use_days = density_signal > threshold
    fit_signal = local_quantile_regression_with_seasonal(
        density_signal,
        use_ixs=use_days,
        tau=0.9,
        solver='MOSEK'
    )
    scores = density_signal / fit_signal
    out = [scores]
    if return_density_signal:
        out.append(density_signal)
    if return_fit:
        out.append(fit_signal)
    if len(out) == 1:
        out = out[0]
    else:
        out = tuple(out)
    return out
示例#4
0
    def run_optimizer(self,
                      data,
                      random_seed=None,
                      search_pts=51,
                      plot=False,
                      figsize=(8, 6),
                      groundtruth=None):
        if groundtruth is not None:
            sr_true = groundtruth[0]
            ss_true = groundtruth[1]
        else:
            sr_true = None
            ss_true = None
        ths = np.logspace(-5, -1, search_pts)
        ho_error = []
        full_error = []
        for th in ths:
            bool_msk = detect_sun(data, th)
            measured = rise_set_rough(bool_msk)
            sunrises = measured['sunrises']
            sunsets = measured['sunsets']
            np.random.seed(random_seed)
            use_set_sr = np.arange(len(sunrises))[~np.isnan(sunrises)]
            use_set_ss = np.arange(len(sunsets))[~np.isnan(sunsets)]
            if len(use_set_sr) / len(sunrises) > 0.6 and len(use_set_ss) / len(
                    sunsets) > 0.6:
                run_ho_errors = []
                num_trials = 1  # if > 1, average over multiple random selections
                for run in range(num_trials):
                    np.random.shuffle(use_set_sr)
                    np.random.shuffle(use_set_ss)
                    split_at_sr = int(len(use_set_sr) *
                                      .8)  # 80-20 train test split
                    split_at_ss = int(len(use_set_ss) * .8)
                    train_sr = use_set_sr[:split_at_sr]
                    train_ss = use_set_ss[:split_at_ss]
                    test_sr = use_set_sr[split_at_sr:]
                    test_ss = use_set_ss[split_at_ss:]
                    train_msk_sr = np.zeros_like(sunrises, dtype=np.bool)
                    train_msk_ss = np.zeros_like(sunsets, dtype=np.bool)
                    train_msk_sr[train_sr] = True
                    train_msk_ss[train_ss] = True
                    test_msk_sr = np.zeros_like(sunrises, dtype=np.bool)
                    test_msk_ss = np.zeros_like(sunsets, dtype=np.bool)
                    test_msk_sr[test_sr] = True
                    test_msk_ss[test_ss] = True
                    sr_smoothed = local_quantile_regression_with_seasonal(
                        sunrises, train_msk_sr, tau=0.05, solver='MOSEK')
                    ss_smoothed = local_quantile_regression_with_seasonal(
                        sunsets, train_msk_ss, tau=0.95, solver='MOSEK')
                    r1 = (sunrises - sr_smoothed)[test_msk_sr]
                    r2 = (sunsets - ss_smoothed)[test_msk_ss]
                    ho_resid = np.r_[r1, r2]
                    #### TESTING
                    # print(th)
                    # plt.plot(ho_resid)
                    # plt.show()
                    #####

                    ### 7/30/20:
                    # Some sites can have "consistent" fit (low holdout error)
                    # that is not the correct estimate. We impose the restriction
                    # that the range of sunrise times and sunset times must be
                    # greater than 15 minutes. Any solution that is less than
                    # that must be non-physical. (See: PVO ID# 30121)
                    cond1 = np.max(sr_smoothed) - np.min(sr_smoothed) > 0.25
                    cond2 = np.max(ss_smoothed) - np.min(ss_smoothed) > 0.25
                    if cond1 and cond2:
                        ### L1-loss instead of L2
                        # L1-loss is better proxy for goodness of fit when using
                        # quantile loss function
                        ###
                        run_ho_errors.append(np.mean(np.abs(ho_resid)))
                    else:
                        run_ho_errors.append(1e2)
                ho_error.append(np.average(run_ho_errors))
                if groundtruth is not None:
                    full_fit = rise_set_smoothed(measured,
                                                 sunrise_tau=0.05,
                                                 sunset_tau=0.95)
                    sr_full = full_fit['sunrises']
                    ss_full = full_fit['sunsets']
                    e1 = (sr_true - sr_full)
                    e2 = (ss_true - ss_full)
                    e_both = np.r_[e1, e2]
                    full_error.append(np.sqrt(np.mean(e_both**2)))
            else:
                ho_error.append(1e2)
                full_error.append(1e2)
        ho_error = np.array(ho_error)
        min_val = np.min(ho_error)
        slct_vals = ho_error < 1.1 * min_val  # everything within 10% of min val
        selected_th = np.min(ths[slct_vals])
        bool_msk = detect_sun(data, selected_th)
        measured = rise_set_rough(bool_msk)
        smoothed = rise_set_smoothed(measured, sunrise_tau=.05, sunset_tau=.95)
        self.sunrise_estimates = smoothed['sunrises']
        self.sunset_estimates = smoothed['sunsets']
        self.sunrise_measurements = measured['sunrises']
        self.sunset_measurements = measured['sunsets']
        self.sunup_mask_measured = bool_msk
        data_sampling = int(24 * 60 / data.shape[0])
        num_days = data.shape[1]
        mat = np.tile(np.arange(0, 24, data_sampling / 60), (num_days, 1)).T
        sr_broadcast = np.tile(self.sunrise_estimates, (data.shape[0], 1))
        ss_broadcast = np.tile(self.sunset_estimates, (data.shape[0], 1))
        self.sunup_mask_estimated = np.logical_and(mat >= sr_broadcast,
                                                   mat < ss_broadcast)
        self.threshold = selected_th
        if groundtruth is not None:
            sr_residual = sr_true - self.sunrise_estimates
            ss_residual = ss_true - self.sunset_estimates
            total_rmse = np.sqrt(np.mean(np.r_[sr_residual, ss_residual]**2))
            self.total_rmse = total_rmse
        else:
            self.total_rmse = None

        if plot:
            fig = plt.figure(figsize=figsize)
            plt.plot(ths, ho_error, marker='.', color='blue', label='HO error')
            plt.yscale('log')
            plt.xscale('log')

            plt.plot(ths[slct_vals],
                     ho_error[slct_vals],
                     marker='.',
                     ls='none',
                     color='red')
            plt.axvline(selected_th,
                        color='blue',
                        ls='--',
                        label='optimized parameter')
            if groundtruth is not None:
                best_th = ths[np.argmin(full_error)]
                plt.plot(ths,
                         full_error,
                         marker='.',
                         color='orange',
                         label='true error')
                plt.axvline(best_th,
                            color='orange',
                            ls='--',
                            label='best parameter')
            plt.legend()
            return fig
        else:
            return