Пример #1
0
def remove_outliers(array):
    if not isinstance(array, np.ndarray):
        raise Exception('input type should be numpy ndarray, instead of {}'.format(type(array)))
    Q1 = np.quantile(array,0.25)
    Q3 = np.quantile(array,0.75)
    IQR = Q3 - Q1
    array = array[array<=Q3+1.5*IQR]
    array = array[ array>= Q1-1.5*IQR]
    return array
Пример #2
0
 def _calc_wts(self, x_i):
     distances = np.array(
         [np.linalg.norm(self.X_[i, :] - x_i) for i in range(self.X_.shape[0])]
     )
     weights = np.exp(-(distances ** 2) / self.sigma)
     if self.span:
         weights = weights * (distances <= np.quantile(distances, q=self.span))
     return weights
Пример #3
0
    def get_anchor_points(self, actions):
        """ Builds anchor action point sets for the direct estimator

        Args:
            actions (np.array): actions drawn from the logging policy to build quantiles on
        """
        if self.mode == 'quantile':
            self.quantiles = np.quantile(actions, np.linspace(0, 1, self.K+1))
            self.action_set = np.pad(self.quantiles, 1, 'constant', constant_values=(self.eps, np.inf))
        elif self.mode == 'grid':
            self.action_set = np.arange(self.eps, np.max(actions) + self.stride, self.stride)
            self.K = int(np.max(actions)/self.stride)
        self.initialized = True
        return self.action_set
Пример #4
0
def T_val(x, gamma):
    mu = x[0]
    sig = np.exp(x[1])
    for m in range(K):
        X = mu + (sig) * Z[:, m]
        P = (stats.norm.pdf(X, 2, 0.8) + stats.norm.pdf(X, 0, 0.8) +
             stats.norm.pdf(X, -2, 0.8) + stats.norm.pdf(X, -4, 0.8)) / 4
        logQ = stats.t.logpdf(X, 10, mu, sig)

        logF = np.log(
            P[(P > 0) & (logQ > -np.inf)]) - logQ[(P > 0) & (logQ > -np.inf)]
        T_value = np.quantile(
            -logF[(~np.isnan(logF))], gamma, interpolation="lower") - np.log(2)

    return (T_value)
    def _setup(self):
        """ Setup the experiments and creates the data
        """
        # Actions
        features, y = self.get_X_y_by_name()
        potentials = self._get_potentials(y)
        actions = self.rng.lognormal(mean=self.start_mu,
                                     sigma=self.start_sigma,
                                     size=potentials.shape[0])
        rewards = self.get_rewards_from_actions(potentials, actions)
        if self.discrete:
            from scipy.stats import lognorm
            rv = lognorm(s=self.start_sigma, scale=np.exp(self.start_mu))
            quantiles = np.quantile(actions,
                                    np.linspace(0, 1, self.discrete + 1))
            action_anchors = np.pad(quantiles,
                                    1,
                                    'constant',
                                    constant_values=(1e-7, np.inf))
            bins = action_anchors[:-1]
            inds = np.digitize(actions, bins, right=True)
            inds_1 = inds - 1
            inds_1[inds_1 == -1] = 0
            pi_logging = rv.cdf(bins[inds]) - rv.cdf(bins[inds_1])
        else:
            pi_logging = Dataset.logging_policy(actions, self.start_mu,
                                                self.start_sigma)

        # Test train split
        self.actions_train, self.actions_test, self.features_train, self.features_test, self.reward_train, \
        self.reward_test, self.pi_0_train, self.pi_0_test, self.potentials_train, self.potentials_test, \
        self.l_train, self.l_test = train_test_split(actions, features, rewards, pi_logging, potentials, y,
                                                     train_size=self.train_size, random_state=42)

        self.actions_train, self.actions_valid, self.features_train, self.features_valid, self.reward_train, \
        self.reward_valid, self.pi_0_train, self.pi_0_valid, self.potentials_train, self.potentials_valid, \
        self.l_train, self.l_valid = train_test_split(self.actions_train, self.features_train, self.reward_train,
                                                      self.pi_0_train, self.potentials_train, self.l_train,
                                                     train_size=self.val_size, random_state=42)

        min_max_scaler = MinMaxScaler(feature_range=(0, 1))
        self.features_train = min_max_scaler.fit_transform(self.features_train)
        self.features_valid = min_max_scaler.transform(self.features_valid)
        self.features_test = min_max_scaler.transform(self.features_test)

        self.baseline_reward_valid = np.mean(self.reward_valid)
        self.baseline_reward_test = np.mean(self.reward_test)
Пример #6
0
def results_plot(best_arg):

    y_test = best_arg[-1][-1]
    # CI plot
    pool = ProcessPool(num_cores)
    result = pool.map_async(grad_func, best_arg)
    result.wait()
    pool.close()
    pool.join()

    grad_total = np.array(result.get())
    grad_mu_total = np.mean(grad_total, 0)
    pred_prob = grad_mu_total[-2]

    std_sample = [j[-2].ravel() for j in grad_total]
    prob_ub = np.quantile(std_sample, 0.975, axis=0)
    prob_lb = np.quantile(std_sample, 0.025, axis=0)
    plt.plot(range(len(pred_prob)), pred_prob)
    plt.title('Probability Confidence Interval')
    plt.fill_between(range(len(pred_prob)),
                     prob_ub,
                     prob_lb,
                     color='b',
                     alpha=.1)
    plt.show()
    """
    auc_list=[]
    f_p_r_list=[]
    t_p_r_list=[]
    for sample_index in range(S):
        false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test,grad_total[sample_index][-2].ravel())        
        roc_auc=auc(false_positive_rate, true_positive_rate)
        auc_list.append(roc_auc)
        f_p_r_list.append(false_positive_rate)
        t_p_r_list.append(true_positive_rate)

    
    up_idx = int(0.975 * (len(auc_list) - 1))
    np.argpartition(f_p_r_list, up_idx)
    
    
    np.quantile(auc_list,0.025)
    
    plt.title('Receiver Operating Characteristic')
    1.96*np.std(auc_list)
    plt.plot(false_positive_rate, true_positive_rate, 'b',
    label='AUC = %0.2f'% np.mean(auc_list))
    plt.plot(false_positive_rate, true_positive_rate, 'b',
    label='AUC = %0.2f'% np.quantile(auc_list,0.975))
    plt.plot(false_positive_rate, true_positive_rate, 'b',
    label='AUC = %0.2f'% np.quantile(auc_list,0.025))
    
    plt.legend(loc='lower right')
    plt.plot([0,1],[0,1],'r--')
    plt.xlim([-0.1,1.2])
    plt.ylim([-0.1,1.2])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()
    """
    # AUC plot
    false_positive_rate, true_positive_rate, thresholds = roc_curve(
        y_test, pred_prob)
    roc_auc = auc(false_positive_rate, true_positive_rate)
    plt.title('Receiver Operating Characteristic')
    plt.plot(false_positive_rate,
             true_positive_rate,
             'b',
             label='AUC = %0.2f' % roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0, 1], [0, 1], 'r--')
    plt.xlim([-0.1, 1.2])
    plt.ylim([-0.1, 1.2])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()
    accuracy = accuracy_score(pred_prob > 0.5, y_test)
    print("accuracy is " + str(accuracy) + '')
    return pred_prob, roc_auc
 def set_list_quantiles(self, features):
     self.list_quantiles = list(np.pad(np.array([np.quantile(features, (i + 1) / self.number_quantiles, axis=0) \
                                                 for i in range(self.number_quantiles - 1)]), 1, 'constant', \
                                       constant_values=(-np.inf, np.inf)).T)[1:-1]
     self.initialized = True