def train_test(config):

        results_dict = config

        try:
            sys.path.append(BASE_DIR)

            # 1) load / generate data
            from experiments.data_sim import provide_data
            data_train, _, data_test = provide_data(dataset=args.dataset)

            # 2) Fit model
            from meta_learn.GPR_meta_mll import GPRegressionMetaLearned
            torch.set_num_threads(N_THREADS_PER_RUN)
            model = GPRegressionMetaLearned(data_train, **config)
            model.meta_fit(data_test, log_period=5000)

            # 3) evaluate on test set
            ll, rmse, calib_err = model.eval_datasets(data_test)

            results_dict.update(ll=ll, rmse=rmse, calib_err=calib_err)

        except Exception as e:
            print(e)
            results_dict.update(ll=np.nan, rmse=np.nan, calib_err=np.nan)

        return results_dict
    def train_reg(config, reporter):
        sys.path.append(BASE_DIR)

        # 1) load / generate data
        from experiments.data_sim import provide_data
        data_train, data_valid, _ = provide_data(dataset=args.dataset,
                                                 seed=SEED)

        # 2) setup model
        from meta_learn.GPR_meta_mll import GPRegressionMetaLearned
        torch.set_num_threads(N_THREADS_PER_RUN)

        model = GPRegressionMetaLearned(data_train, **config)

        # 3) train and evaluate model
        eval_period = 2000
        train_iter = 0
        for i in range(config["num_iter_fit"] // eval_period):
            loss = model.meta_fit(verbose=False,
                                  log_period=2000,
                                  n_iter=eval_period)
            train_iter += eval_period
            ll, rmse, calib_err = model.eval_datasets(data_valid)
            reporter(timesteps_total=train_iter,
                     loss=loss,
                     test_rmse=rmse,
                     test_ll=ll,
                     calib_err=calib_err)
def main(argv):
    # setup logging

    logger, exp_dir = setup_exp_doc(FLAGS.exp_name)

    if FLAGS.dataset == 'swissfel':
        data_train, _, data_test = provide_data(dataset='swissfel')
    else:
        if FLAGS.dataset == 'sin-nonstat':
            dataset = SinusoidNonstationaryDataset(random_state=np.random.RandomState(FLAGS.seed + 1))
        elif FLAGS.dataset == 'sin':
            dataset = SinusoidDataset(random_state=np.random.RandomState(FLAGS.seed + 1))
        elif FLAGS.dataset == 'cauchy':
            dataset = CauchyDataset(random_state=np.random.RandomState(FLAGS.seed + 1))
        elif FLAGS.dataset == 'mnist':
            dataset = MNISTRegressionDataset(random_state=np.random.RandomState(FLAGS.seed + 1))
        elif FLAGS.dataset == 'physionet':
            dataset = PhysionetDataset(random_state=np.random.RandomState(FLAGS.seed + 1))
        elif FLAGS.dataset == 'gp-funcs':
            dataset = GPFunctionsDataset(random_state=np.random.RandomState(FLAGS.seed + 1))
        else:
            raise NotImplementedError('Does not recognize dataset flag')

        data_train = dataset.generate_meta_train_data(n_tasks=FLAGS.n_train_tasks, n_samples=FLAGS.n_train_samples)
        data_test = dataset.generate_meta_test_data(n_tasks=FLAGS.n_test_tasks, n_samples_context=FLAGS.n_context_samples,
                                                    n_samples_test=FLAGS.n_test_samples)

    nn_layers = tuple([FLAGS.layer_size for _ in range(FLAGS.num_layers)])

    torch.set_num_threads(FLAGS.n_threads)

    gp_meta = GPRegressionMetaLearned(data_train,
                                      learning_mode=FLAGS.learning_mode,
                                      num_iter_fit=FLAGS.n_iter_fit,
                                      covar_module=FLAGS.covar_module,
                                      mean_module=FLAGS.mean_module,
                                      kernel_nn_layers=nn_layers,
                                      mean_nn_layers=nn_layers,
                                      weight_decay=FLAGS.weight_decay,
                                      lr_params=FLAGS.lr,
                                      lr_decay=FLAGS.lr_decay,
                                      random_seed=FLAGS.seed,
                                      task_batch_size=FLAGS.batch_size,
                                      optimizer=FLAGS.optimizer,
                                      normalize_data=FLAGS.normalize_data
                                      )

    gp_meta.meta_fit(valid_tuples=data_test[:100], log_period=2000)

    test_ll, rmse, calib_err = gp_meta.eval_datasets(data_test)

    # save results
    results_dict = {
        'test_ll': test_ll,
        'test_rmse': rmse,
        'calib_err': calib_err
    }
    print(results_dict)
    save_results(results_dict, exp_dir, log=True)
Exemplo n.º 4
0
    def test_normal_vs_meta(self):

        # check that meta-learning improves upon normal learned GP
        torch.manual_seed(60)
        num_iter_fit = 1000

        # meta-learning
        gp_meta = GPRegressionMetaLearned(self.train_data_tuples,
                                          learning_mode='both',
                                          mean_nn_layers=(64, 64),
                                          covar_module='SE',
                                          mean_module='NN',
                                          weight_decay=0.0,
                                          num_iter_fit=num_iter_fit)

        gp_meta.meta_fit(valid_tuples=self.test_data_tuples)

        test_ll_meta, test_rmse_meta, _ = gp_meta.eval_datasets(
            self.test_data_tuples)
        print('Test log-likelihood meta:', test_ll_meta)

        def fit_eval_gpr(x_context, t_context, x_test, t_test):
            gpr = GPRegressionLearned(x_context,
                                      t_context,
                                      learning_mode='both',
                                      mean_nn_layers=(64, 64),
                                      covar_module='SE',
                                      mean_module='NN',
                                      weight_decay=0.0,
                                      num_iter_fit=num_iter_fit)
            gpr.fit(valid_x=x_test, valid_t=t_test)

            return gpr.eval(x_test, t_test)[0]

        ll_list = [
            fit_eval_gpr(*data_tuple) for data_tuple in self.test_data_tuples
        ]
        test_ll_normal = np.mean(ll_list)

        print('Test log-likelihood normal:', test_ll_normal)

        self.assertGreater(test_ll_meta, test_ll_normal)
Exemplo n.º 5
0
    def test_random_seed_consistency(self):
        gp_meta_1 = GPRegressionMetaLearned(self.train_data_tuples[:2],
                                            learning_mode='both',
                                            num_iter_fit=5,
                                            covar_module='NN',
                                            mean_module='NN',
                                            random_seed=22)

        gp_meta_2 = GPRegressionMetaLearned(self.train_data_tuples[:2],
                                            learning_mode='both',
                                            num_iter_fit=5,
                                            covar_module='NN',
                                            mean_module='NN',
                                            random_seed=22)

        gp_meta_1.meta_fit(valid_tuples=self.test_data_tuples)
        gp_meta_2.meta_fit(valid_tuples=self.test_data_tuples)

        for (x_context, t_context, x_test, _) in self.test_data_tuples[:3]:
            t_predict_1 = gp_meta_1.predict(x_context, t_context, x_test)
            t_predict_2 = gp_meta_2.predict(x_context, t_context, x_test)

            self.assertTrue(np.array_equal(t_predict_1, t_predict_2))
Exemplo n.º 6
0
    def test_mean_learning_more_datasets(self):
        torch.manual_seed(40)
        # check that more datasets improve performance

        # meta-learning with 2 datasets
        gp_meta = GPRegressionMetaLearned(self.train_data_tuples[:2],
                                          learning_mode='both',
                                          mean_nn_layers=(16, 16),
                                          kernel_nn_layers=(16, 16),
                                          num_iter_fit=3000,
                                          covar_module='SE',
                                          mean_module='NN',
                                          weight_decay=0.0)
        gp_meta.meta_fit(valid_tuples=self.test_data_tuples)

        test_ll_meta_2, test_rmse_meta_2, _ = gp_meta.eval_datasets(
            self.test_data_tuples)
        print('Test log-likelihood meta (2 datasets):', test_ll_meta_2)

        # meta-learning with 10 datasets
        gp_meta = GPRegressionMetaLearned(self.train_data_tuples,
                                          learning_mode='both',
                                          mean_nn_layers=(16, 16),
                                          kernel_nn_layers=(16, 16),
                                          num_iter_fit=3000,
                                          covar_module='SE',
                                          mean_module='NN',
                                          weight_decay=0.0)
        gp_meta.meta_fit(valid_tuples=self.test_data_tuples)

        test_ll_meta_10, test_rmse_meta_10, _ = gp_meta.eval_datasets(
            self.test_data_tuples)
        print('Test log-likelihood meta (10 datasets):', test_ll_meta_10)

        self.assertGreater(test_ll_meta_10, test_ll_meta_2)
        self.assertLess(test_rmse_meta_10, test_rmse_meta_2)
Exemplo n.º 7
0
    def test_serializable(self):
        torch.manual_seed(40)
        np.random.seed(22)
        import itertools
        # check that more datasets improve performance
        for mean_module, covar_module in itertools.product(['constant', 'NN'],
                                                           ['SE', 'NN']):

            gpr_model = GPRegressionMetaLearned(self.train_data_tuples[:3],
                                                learning_mode='both',
                                                num_iter_fit=5,
                                                mean_module=mean_module,
                                                covar_module='NN',
                                                random_seed=22)
            gpr_model.meta_fit()
            pred_1 = gpr_model.predict(*self.test_data_tuples[0][:3])

            gpr_model2 = GPRegressionMetaLearned(self.train_data_tuples[:3],
                                                 learning_mode='both',
                                                 num_iter_fit=5,
                                                 mean_module=mean_module,
                                                 covar_module='NN',
                                                 random_seed=25)
            gpr_model2.meta_fit()
            pred_2 = gpr_model2.predict(*self.test_data_tuples[0][:3])

            file = ('/tmp/test_torch_serialization.pkl')
            torch.save(gpr_model.state_dict(), file)
            gpr_model2.load_state_dict(torch.load(file))
            pred_3 = gpr_model2.predict(*self.test_data_tuples[0][:3])
            assert not np.array_equal(pred_1, pred_2)
            assert np.array_equal(pred_1, pred_3)

            torch.manual_seed(25)
            gpr_model.rds_numpy = np.random.RandomState(55)
            gpr_model.meta_fit()
            torch.manual_seed(25)
            gpr_model2.rds_numpy = np.random.RandomState(55)
            gpr_model2.meta_fit()
            pred_1 = gpr_model.predict(*self.test_data_tuples[0][:3])
            pred_2 = gpr_model2.predict(*self.test_data_tuples[0][:3])
            assert np.array_equal(pred_1, pred_2)
def main(argv):
    # setup logging

    logger, exp_dir = setup_exp_doc(FLAGS.exp_name)

    if FLAGS.dataset == 'swissfel':
        raise NotImplementedError
    else:
        if FLAGS.dataset == 'sin-nonstat':
            dataset = SinusoidNonstationaryDataset(
                random_state=np.random.RandomState(FLAGS.seed + 1))
        elif FLAGS.dataset == 'sin':
            dataset = SinusoidDataset(
                random_state=np.random.RandomState(FLAGS.seed + 1))
        elif FLAGS.dataset == 'cauchy':
            dataset = CauchyDataset(
                random_state=np.random.RandomState(FLAGS.seed + 1))
        elif FLAGS.dataset == 'mnist':
            dataset = MNISTRegressionDataset(
                random_state=np.random.RandomState(FLAGS.seed + 1))
        elif FLAGS.dataset == 'physionet':
            dataset = PhysionetDataset(
                random_state=np.random.RandomState(FLAGS.seed + 1))
        elif FLAGS.dataset == 'gp-funcs':
            dataset = GPFunctionsDataset(
                random_state=np.random.RandomState(FLAGS.seed + 1))
        else:
            raise NotImplementedError('Does not recognize dataset flag')

        meta_train_data = dataset.generate_meta_test_data(
            n_tasks=1024,
            n_samples_context=FLAGS.n_context_samples,
            n_samples_test=FLAGS.n_test_samples)
        meta_test_data = dataset.generate_meta_test_data(
            n_tasks=FLAGS.n_test_tasks,
            n_samples_context=FLAGS.n_context_samples,
            n_samples_test=FLAGS.n_test_samples)

    nn_layers = tuple([FLAGS.layer_size for _ in range(FLAGS.num_layers)])
    torch.set_num_threads(FLAGS.n_threads)

    # only take meta-train context for training
    meta_train_data = meta_train_data[:FLAGS.n_train_tasks]

    data_train = [(context_x, context_y)
                  for context_x, context_y, _, _ in meta_train_data]
    assert len(data_train) == FLAGS.n_train_tasks

    gp_meta = GPRegressionMetaLearned(data_train,
                                      learning_mode=FLAGS.learning_mode,
                                      num_iter_fit=FLAGS.n_iter_fit,
                                      covar_module=FLAGS.covar_module,
                                      mean_module=FLAGS.mean_module,
                                      kernel_nn_layers=nn_layers,
                                      mean_nn_layers=nn_layers,
                                      weight_decay=FLAGS.weight_decay,
                                      lr_params=FLAGS.lr,
                                      lr_decay=FLAGS.lr_decay,
                                      random_seed=FLAGS.seed,
                                      task_batch_size=FLAGS.batch_size,
                                      optimizer=FLAGS.optimizer,
                                      normalize_data=FLAGS.normalize_data)

    gp_meta.meta_fit(log_period=1000)

    test_ll_meta_train, test_rmse_meta_train, calib_err_meta_train = gp_meta.eval_datasets(
        meta_train_data)
    test_ll_meta_test, test_rmse_meta_test, calib_err_test = gp_meta.eval_datasets(
        meta_test_data)

    # save results
    results_dict = {
        'test_ll_meta_train': test_ll_meta_train,
        'test_ll_meta_test': test_ll_meta_test,
        'test_rmse_meta_train': test_rmse_meta_train,
        'test_rmse_meta_test': test_rmse_meta_test,
        'calib_err_meta_train': calib_err_meta_train,
        'calib_err_test': calib_err_test
    }

    pprint(results_dict)

    save_results(results_dict, exp_dir, log=True)
compute_times_meta_train = dict([(ds, {}) for ds in DATASETS])
compute_times_meta_test = dict([(ds, {}) for ds in DATASETS])

METHODS = ['pacoh_map', 'pacoh_svgd', 'pacoh_vi', 'mlap']

NN_LAYERS = [32, 32, 32, 32]

for dataset in DATASETS:
    meta_train_data, _, meta_test_data = provide_data('sin_20')

    from meta_learn.GPR_meta_mll import GPRegressionMetaLearned

    model_map = GPRegressionMetaLearned(meta_train_data,
                                        num_iter_fit=1000,
                                        covar_module='NN',
                                        mean_module='NN',
                                        mean_nn_layers=NN_LAYERS,
                                        kernel_nn_layers=NN_LAYERS,
                                        task_batch_size=len(meta_train_data))

    from meta_learn.GPR_meta_svgd import GPRegressionMetaLearnedSVGD

    model_svgd = GPRegressionMetaLearnedSVGD(meta_train_data,
                                             num_iter_fit=1000,
                                             num_particles=5,
                                             covar_module='NN',
                                             mean_module='NN',
                                             mean_nn_layers=NN_LAYERS,
                                             kernel_nn_layers=NN_LAYERS,
                                             bandwidth=0.5)