Exemplo n.º 1
0
 def compare_to(self, comp, frac_comp: bool = True) -> None:
     for r in reversed(sorted(self.results)):
         if r not in comp.results: continue
         res = f'{r}:'
         if isinstance(self.results[r], dict):
             for d in sorted(self.results[r]):
                 if d not in comp.results[r]: continue
                 diff = self.results[r][d][0] - comp.results[r][d][0]
                 if frac_comp: diff /= 0.01 * comp.results[r][d][0]
                 diff_unc = np.sqrt(
                     np.square(comp.results[r][d][1]) +
                     np.square(self.results[r][d][1]))
                 if frac_comp:
                     diff_unc = np.abs(diff * np.sqrt(
                         np.square(diff_unc / diff) +
                         np.square(comp.results[r][d][1] /
                                   comp.results[r][d][0])))
                 val = uncert_round(diff, diff_unc)
                 res += f'\n\t{d}\t{val[0]}±{val[1]}'
                 if frac_comp: res += ' %'
         else:
             diff = self.results[r][0] - comp.results[r][0]
             if frac_comp: diff /= 0.01 * comp.results[r][0]
             diff_unc = np.sqrt(
                 np.square(comp.results[r][1]) +
                 np.square(self.results[r][1]))
             if frac_comp:
                 diff_unc = np.abs(diff * np.sqrt(
                     np.square(diff_unc / diff) +
                     np.square(comp.results[r][1] / comp.results[r][0])))
             val = uncert_round(diff, diff_unc)
             res += f'\t{val[0]}±{val[1]}'
             if frac_comp: res += ' %'
         print(res)
Exemplo n.º 2
0
def score_test_data_per_fold(test_fy,
                             cut,
                             pred_name='pred',
                             zero_preds=['pred_0', 'pred_1', 'pred_2'],
                             one_preds=['pred_3']):
    private = test_fy.get_column('private')
    n_tot_pub, n_tot_pri = len(private[private == 0]), len(
        private[private == 1])
    public_ams, private_ams = [], []

    for i in range(test_fy.n_folds):
        data = pandas.DataFrame()
        pred = test_fy.get_column(pred_name, 1, i)
        if len(pred.shape) > 1:
            for p in range(pred.shape[-1]):
                data[f'pred_{p}'] = pred[:, p]
            to_binary_class(data, zero_preds, one_preds)
        else:
            data['pred'] = pred
        data['gen_weight'] = test_fy.get_column('weights', 1, i)
        data['gen_target'] = test_fy.get_column('targets', 1, i)
        data['private'] = test_fy.get_column('private', 1, i)

        data.loc[data.private == 1,
                 'gen_weight'] *= n_tot_pri / len(data[data.private == 1])
        data.loc[data.private == 0,
                 'gen_weight'] *= n_tot_pub / len(data[data.private == 0])

        accept = (data.pred >= cut)
        signal = (data.gen_target == 1)
        bkg = (data.gen_target == 0)
        public = (data.private == 0)
        private = (data.private == 1)

        public_ams.append(
            calc_ams(np.sum(data.loc[accept & public & signal, 'gen_weight']),
                     np.sum(data.loc[accept & public & bkg, 'gen_weight'])))

        private_ams.append(
            calc_ams(np.sum(data.loc[accept & private & signal, 'gen_weight']),
                     np.sum(data.loc[accept & private & bkg, 'gen_weight'])))

    public_mean, public_std = np.mean(public_ams), np.std(
        public_ams, ddof=1) / np.sqrt(test_fy.n_folds)
    private_mean, private_std = np.mean(private_ams), np.std(
        private_ams, ddof=1) / np.sqrt(test_fy.n_folds)

    public = uncert_round(public_mean, public_std)
    private = uncert_round(private_mean, private_std)

    print(
        f"Mean Public:Private AMS: {public[0]}±{public[1]} : {private[0]}±{private[1]}"
    )
    return (public_mean, public_std), (private_mean, private_std)
Exemplo n.º 3
0
 def print_results(self) -> None:
     for r in reversed(sorted(self.results)):
         res = f'{r}:'
         if isinstance(self.results[r], dict):
             for d in sorted(self.results[r]):
                 val = uncert_round(self.results[r][d][0],
                                    self.results[r][d][1])
                 res += f'\n\t{d}\t{val[0]}±{val[1]}'
         else:
             val = uncert_round(self.results[r][0], self.results[r][1])
             res += f'\t{val[0]}±{val[1]}'
         print(res)
Exemplo n.º 4
0
    def print_table_row(self, base_result) -> None:
        row = r''
        for r in ['val_ams_max', 'val_ams_smooth', 'test_public_ams_mean']:
            val = uncert_round(self.results[r][0], self.results[r][1])
            row += fr'${val[0]}\pm{val[1]}$ & '

        for r in ['train_time', 'test_time']:
            bases, times = [], []
            for d in sorted(self.results[r]):
                times.append(self.results[r][d][0])
                bases.append(base_result.results[r][d][0])
            bases, times = np.array(bases), np.array(times)
            deltas = (times - bases) / bases
            val = uncert_round(np.mean(deltas),
                               np.std(deltas, ddof=1) / np.sqrt(len(deltas)))
            row += fr'${val[0]}\pm{val[1]}$ & '
        row = row[:-3] + r'\\'
        print(row)
Exemplo n.º 5
0
def bootstrap_score_test_data(test_fy,
                              cut,
                              n,
                              pred_name='pred',
                              zero_preds=['pred_0', 'pred_1', 'pred_2'],
                              one_preds=['pred_3']):
    private = test_fy.get_column('private')
    data = pandas.DataFrame()
    pred = test_fy.get_column(pred_name)
    if len(pred.shape) > 1:
        for p in range(pred.shape[-1]):
            data[f'pred_{p}'] = pred[:, p]
        to_binary_class(data, zero_preds, one_preds)
    else:
        data['pred'] = pred
    data['gen_weight'] = test_fy.get_column('weights')
    data['gen_target'] = test_fy.get_column('targets')
    data['private'] = test_fy.get_column('private')

    amss = mp_run([{
        'n': n,
        'df': data[(data.private == 0)],
        'cut': cut,
        'name': 'public'
    }, {
        'n': n,
        'df': data[(data.private == 1)],
        'cut': cut,
        'name': 'private'
    }], bs_ams)
    public = uncert_round(np.mean(amss['public_ams']),
                          np.std(amss['public_ams'], ddof=1))
    private = uncert_round(np.mean(amss['private_ams']),
                           np.std(amss['private_ams'], ddof=1))

    print(
        f"Public:Private AMS: {public[0]}±{public[1]} : {private[0]}±{private[1]}"
    )
    return public, private