Exemplo n.º 1
0
def train(x_train: np.array, y_train: np.array, x_val: np.array, y_val: np.array, save_path=None):

    train_data = lgb.Dataset(x_train, label=y_train)
    val_data = lgb.Dataset(x_val, label=y_val)
    gbm = lgb.train(PARAM, train_data, NUM_ROUND, valid_sets=[train_data, val_data], verbose_eval=True)
    pred = gbm.predict(x_val)

    accuracy(y_val, pred)
    return pred
Exemplo n.º 2
0
    def plot(self):
        if self.values is None or (self.cols is None and self.rows is None):
            self.draw_empty()
        else:
            value_type = self.frame.value_type.GetItemLabel(
                self.frame.value_type.GetSelection())
            if value_type == 'metric':
                agg = stats.aggregate(self.df,
                                      subplots=self.subplots,
                                      rows=self.rows,
                                      cols=self.cols,
                                      yerr=self.yerr,
                                      values=self.values)
            elif value_type == 'accuracy':
                correct = list(
                    self.frame.panel_corr.check_correct.GetCheckedStrings())
                incorrect = list(
                    self.frame.panel_corr.check_incorrect.GetCheckedStrings())
                agg = stats.accuracy(self.df,
                                     subplots=self.subplots,
                                     rows=self.rows,
                                     cols=self.cols,
                                     yerr=self.yerr,
                                     values=self.values,
                                     correct=correct,
                                     incorrect=incorrect)
                #import pdb; pdb.set_trace()
            self.redraw(agg)
            self.frame.list_agg.DeleteAllItems()
            for i in range(self.frame.list_agg.GetColumnCount()):
                self.frame.list_agg.DeleteColumn(0)

            aggr = self.frame.list_agg.stack(agg)
            self.frame.list_agg.set_data(aggr)
            self.frame.aggr = aggr
Exemplo n.º 3
0
def plot_accuracy_t_sweep(M, T, T_hat, output_dir):
    print('Plotting t sweep over accuracy...')
    T_hat_norm = normalize_predictions(T_hat)
    t_values = np.linspace(0, 1, 11)
    test_set_accs, zero_set_accs = [], []
    for t in t_values:
        test_set_acc, zero_set_acc = accuracy(M, T, T_hat_norm, t)
        test_set_accs.append(test_set_acc)
        zero_set_accs.append(zero_set_acc)

    plt.plot(t_values, test_set_accs, label='Test Set Accuracy')
    plt.plot(t_values, zero_set_accs, label='Zero Set Accuracy')
    plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1])

    plt.title('t Sweep of Model Accuracy')
    plt.xlabel('Value of t')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(output_dir + '/accuracy.png')
Exemplo n.º 4
0
    def plot(self):
        if self.values is None or (self.cols is None and self.rows is None):
            self.draw_empty()
        else:
            value_type = self.frame.value_type.GetItemLabel(self.frame.value_type.GetSelection())
            if value_type == 'metric':
                agg = stats.aggregate(self.df, subplots=self.subplots, rows=self.rows,
                                      cols=self.cols, yerr=self.yerr, values=self.values)
            elif value_type == 'accuracy':
                correct = list(self.frame.panel_corr.check_correct.GetCheckedStrings())
                incorrect = list(self.frame.panel_corr.check_incorrect.GetCheckedStrings())
                agg = stats.accuracy(self.df, subplots=self.subplots, rows=self.rows,
                                      cols=self.cols, yerr=self.yerr, values=self.values,
                                      correct=correct, incorrect=incorrect)
                #import pdb; pdb.set_trace()
            self.redraw(agg)
            self.frame.list_agg.DeleteAllItems()
            for i in range(self.frame.list_agg.GetColumnCount()):
                self.frame.list_agg.DeleteColumn(0)

            aggr = self.frame.list_agg.stack(agg)
            self.frame.list_agg.set_data(aggr)
            self.frame.aggr = aggr
Exemplo n.º 5
0
                                output_fname=False)

        x, y = zip(*val_iterator)
        x = np.concatenate(x)
        y = np.concatenate(y).flatten()

        model_class = MODEL2CLASS[model_name]
        K.clear_session()
        model = model_class(weights=weights_path)
        y_pred = model.predict(x).flatten()

        DATA_DF.loc[test_df.index, 'pred'] = y_pred * 10
        np.testing.assert_array_almost_equal(
            DATA_DF.loc[test_df.index, 'rank'], y * 10)

        accuracy(y * 10, y_pred * 10, f'\nFold {test_fold} accuracy:')

    df_preds = DATA_DF.dropna()
    df_preds.to_csv(PREDS_DF, index=False)
    accuracy(df_preds['rank'], df_preds['pred'],
             f'\nTotal preds {len(df_preds)}:')

    spearman = []
    for g in df_preds.groupby(['datasetId', 'baseSf']):
        spearman.append(accuracy(g[1]['rank'], g[1]['pred'], verbose=False))
    print(f'\nMean spearman {np.array([s[0] for s in spearman]).mean()}')

    plt.figure()
    plt.hist(np.array([s[0] for s in spearman]))
    plt.show()
Exemplo n.º 6
0
    val_data = lgb.Dataset(x_val, label=y_val)
    gbm = lgb.train(PARAM, train_data, NUM_ROUND, valid_sets=[train_data, val_data], verbose_eval=True)
    pred = gbm.predict(x_val)

    accuracy(y_val, pred)
    return pred


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-features', default=None, required=False, help='path to unsupervised features')
    args = parser.parse_args()
    MODEL_TYPE = 'gbt'

    MODEL_DIR = Path(CURRENT_DIR / 'models/unsupervised_model')
    FEATURE_DICT_PATH = Path(args.features) if args.features else MODEL_DIR / 'features.ncomp20.naugs40.pkl'
    PREDS_DF_PATH = PREDS_DIR / 'preds_{}.csv'.format(MODEL_TYPE)

    PREDS_DF = DATA_DF.copy()
    for fold in FOLDS:
        print(f'Fold {fold}/{len(FOLDS)}')
        (train_features, train_y), (test_features, test_y), df_index = load_data(FEATURE_DICT_PATH, DATA_DF,
                                                                                 fold, N_FOLDS)
        assert len(train_features) + len(test_features) == len(DATA_DF)

        pred = train(train_features, train_y, test_features, test_y)
        PREDS_DF.loc[df_index, 'pred'] = pred * 10

    PREDS_DF.to_csv(PREDS_DF_PATH, index=False)
    accuracy(DATA_DF['rank'], PREDS_DF['pred'])