示例#1
0
文件: elbo_pqn.py 项目: mlds-lab/mogp
 def load_model(self, pickle_name):
     w, beta, g_gp_b, h_gp_a, h_gp_b = pickle_load(pickle_name)
     self.shared.w = w
     self.shared.beta = beta
     self.shared.g_gp_b = g_gp_b
     self.shared.h_gp_a = h_gp_a
     self.shared.h_gp_b = h_gp_b
示例#2
0
 def load_model(self, pickle_name):
     w, beta, g_gp_b, h_gp_a, h_gp_b = pickle_load(pickle_name)
     self.shared.w = w
     self.shared.beta = beta
     self.shared.g_gp_b = g_gp_b
     self.shared.h_gp_a = h_gp_a
     self.shared.h_gp_b = h_gp_b
示例#3
0
    def __init__(
            self,
            n_classes,
            inducing_pts,
            t_test,
            update_gp=True,
            init_gp_params=None,  # kernel parameters & noise parameter
            #n_inducing_pts=50,
            #t_min=0,
            #t_max=1,
        n_lanczos_basis=10,
            net_arch='logreg',
            stochastic_train=True,
            stochastic_predict=False,
            n_samples=10,
            n_epochs=100,
            regularize_weight=0,
            optimizer=adadelta,
            optimizer_kwargs={},
            load_params=None,
            random_seed=123):
        '''
        n_samples: number of Monte Carlo samples to estimate the expectation
        n_inducing_pts: number of inducing points
        '''
        lasagne.random.set_rng(np.random.RandomState(seed=random_seed))
        self.rng = RandomStreams(seed=random_seed)

        if load_params:
            (model_params, network_params,
             init_gp_params) = pickle_load(load_params)
            (self.net_arch, self.n_classes, self.inducing_pts, self.idx_test,
             self.w_test, self.gp_output_len) = model_params
        else:
            self.net_arch = net_arch
            self.n_classes = n_classes
            self.inducing_pts = inducing_pts
            self.idx_test, self.w_test = sparse_w(inducing_pts, t_test)
            self.gp_output_len = len(t_test)

        self.n_lanczos_basis = n_lanczos_basis
        self.n_epochs = n_epochs
        self.n_samples = n_samples
        self.post_gp = PosteriorGP(inducing_pts,
                                   t_test,
                                   kernel,
                                   symbolic_kernel,
                                   init_params=init_gp_params)
        self.update_gp = update_gp
        self.regularize_weight = regularize_weight
        self.optimizer = optimizer
        self.optimizer_kwargs = optimizer_kwargs

        # Save stochastic train/predict flags for storing parameters
        self.stochastic_train = stochastic_train
        self.stochastic_predict = stochastic_predict
        self.compile_train_predict(stochastic_train, stochastic_predict)

        if load_params:
            self.load_params(network_params)
示例#4
0
def read_data(data='ECG200/11_1000_60_dat.pkl'):
    #data = 'ECG200/11_1000_60_dat.pkl'
    gp_parms, ts_train, ts_test, l_train, l_test = pickle_load(data)[:5]
    x_train = np.array([each_ts[0] for each_ts in ts_train])
    y_train = np.array([each_ts[1] for each_ts in ts_train])
    eg_id = 0
    x = x_train[eg_id]
    y = y_train[eg_id]
    return x, y
示例#5
0
    def __init__(self,
                 n_classes,
                 inducing_pts,
                 t_test,
                 update_gp=True,
                 init_gp_params=None,   # kernel parameters & noise parameter
                 #n_inducing_pts=50,
                 #t_min=0,
                 #t_max=1,
                 n_lanczos_basis=10,
                 n_samples=1000,
                 n_epochs=100,
                 gamma=5,
                 regularize_weight=0,
                 optimizer=adadelta,
                 optimizer_kwargs={},
                 load_params=None,
                 random_seed=123):
        '''
        n_samples: number of Monte Carlo samples to estimate the expectation
        n_inducing_pts: number of inducing points
        '''
        lasagne.random.set_rng(np.random.RandomState(seed=random_seed))
        W = np.random.normal(0, 1 / gamma**2, size=(n_samples, len(t_test)))
        b = np.random.uniform(0, 2 * np.pi, size=n_samples)
        self.random_weight = theano.shared(W)
        self.random_offset = theano.shared(b)

        if load_params:
            (model_params,
             network_params,
             init_gp_params) = pickle_load(load_params)
            (self.n_classes,
             self.inducing_pts,
             self.idx_test, self.w_test,
             self.gp_output_len) = model_params
        else:
            self.n_classes = n_classes
            self.inducing_pts = inducing_pts
            self.idx_test, self.w_test = sparse_w(inducing_pts, t_test)
            self.gp_output_len = len(t_test)

        self.n_lanczos_basis = n_lanczos_basis
        self.n_epochs = n_epochs
        self.n_samples = n_samples
        self.post_gp = PosteriorGP(inducing_pts, t_test,
                                   kernel, symbolic_kernel,
                                   init_params=init_gp_params)
        self.update_gp = update_gp
        self.regularize_weight = regularize_weight
        self.optimizer = optimizer
        self.optimizer_kwargs = optimizer_kwargs

        self.compile_train_predict()

        if load_params:
            self.load_params(network_params)
示例#6
0
文件: demo.py 项目: zphilip/mogp
    def __init__(self, n_ts):
        self.min_x, self.max_x = 0, 1
        self.n_ts = n_ts

        self.fig, self.ax = pl.subplots(n_ts, 1, figsize=(9, 9))
        self.fig.canvas.mpl_connect('button_press_event', self.onmousepress)
        self.fig.canvas.mpl_connect('key_press_event', self.onpress)
        self.ax_idx = {}
        for idx, each_ax in enumerate(self.ax):
            self.ax_idx[each_ax] = idx
            self.clear_ax(each_ax)

        try:
            self.x, self.y = pickle_load('input.pkl')
            for idx, each_ax in enumerate(self.ax):
                self.ax[idx].plot(self.x[idx], self.y[idx], 'ko')
        except:
            self.reset_data()

        pl.show()
示例#7
0
文件: elbo_pqn.py 项目: mlds-lab/mogp
 def load_model(self, pickle_name):
     self.gp_parms, = pickle_load(pickle_name)
示例#8
0
文件: elbo_pqn.py 项目: mlds-lab/mogp
def main():
    np.random.seed(0)

    dat_id = 1
    ts_all, l_all = pickle_load('../chla-data/chla_ts_min0_%d.pkl' % dat_id)

    # randomly shuffle training examples
    idx = np.arange(len(ts_all))
    np.random.shuffle(idx)
    ts_all = ts_all[idx]
    #l_all = l_all[idx]

    parser = argparse.ArgumentParser()
    parser.add_argument('-q', dest='n_latent_gp', type=int, default=5)
    parser.add_argument('-g', dest='w_reg_group', default='none')
    parser.add_argument('-r', dest='w_reg', type=float, default=1)
    args = parser.parse_args()

    Q = args.n_latent_gp
    w_reg_group = args.w_reg_group
    w_reg = args.w_reg

    P = len(ts_all[0])
    #Q = 10
    M = 20

    #mogp = MultiOutputGP(P, Q, M)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='individual', w_reg=.5)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='row', w_reg=2)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1.5)
    mogp = MultiOutputGP(P, Q, M, w_reg_group=w_reg_group, w_reg=w_reg)

    n_train = int(len(ts_all) * 0.5)
    #n_train = 8
    print 'n_train', n_train
    train_raw = ts_all[:n_train]
    train_ts = mogp.gen_collection(train_raw)
    mogp.train(train_ts, maxiter=50)

    w_reg_group_name = {
            'none': 'non',
            'row': 'row',
            'column': 'col',
            'individual': 'ind',
            }

    mogp_str = 'model-pqn-%s-%g-%d-%d' % (
            w_reg_group_name[mogp.w_reg_group], mogp.w_reg, Q, n_train)
    print mogp_str

    mogp_pickle = 'model/%s.pkl' % mogp_str
    mogp.load_model(mogp_pickle)
    test_raw = ts_all[n_train:]

    indep_gp = IndependentMultiOutputGP(P)
    indep_gp.train(train_raw)
    gp_parms = indep_gp.gp_parms

    for channel in xrange(P):
        loglike = []
        loglike_baseline = []
        for each_test in test_raw:
            x = [xy[0] for xy in each_test]
            y = [xy[1] for xy in each_test]
            channel_len = len(x[channel])
            if channel_len < 3:
                continue
            one_third = channel_len // 3
            x_held_out = x[channel][one_third:-one_third]
            y_held_out = y[channel][one_third:-one_third]
            x_remain = [each_x if i == channel else
                        np.concatenate((each_x[:one_third],
                                        each_x[-one_third:]))
                        for i, each_x in enumerate(x)]
            y_remain = [each_y if i == channel else
                        np.concatenate((each_y[:one_third],
                                        each_y[-one_third:]))
                        for i, each_y in enumerate(y)]
            ts = TimeSeries(x_remain, y_remain, mogp.shared)
            mu, cov = mogp.predictive_gaussian(ts, x_held_out)

            mean, var = gp.pointwise_posterior_mean_var(
                    x_remain[channel], y_remain[channel], x_held_out,
                    gp_parms[channel])

            for i, each_y in enumerate(y_held_out):
                loglike.append(norm.logpdf(each_y, mu[channel, i],
                                                   np.sqrt(cov[channel, i, i])))

                loglike_baseline.append(norm.logpdf(each_y, mean[i],
                                                    np.sqrt(var[i])))

        print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % (
                channel, np.mean(loglike),
                np.std(loglike) / np.sqrt(len(loglike)),
                np.min(loglike), np.max(loglike), len(loglike))

        print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % (
                channel, np.mean(loglike_baseline),
                np.std(loglike_baseline) / np.sqrt(len(loglike_baseline)),
                np.min(loglike_baseline), np.max(loglike_baseline),
                len(loglike_baseline))
        print '-' * 50
        pickle_save('loglike-cmp/%02d.pkl' % channel, loglike, loglike_baseline)
        pl.figure()
        pl.axis('equal')
        pl.scatter(loglike, loglike_baseline, alpha=.5)
        pl.savefig('loglike-cmp/loglike-%02d.pdf' % channel)
        pl.close()
示例#9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-f',
        dest='data',
        #default='data/UWaveGestureLibraryAll-10.pkl',
        default='data/B-UWaveGestureLibraryAll-10.pkl',
        help='data file')
    parser.add_argument('-e',
                        dest='epochs',
                        type=int,
                        default=50,
                        help='number of epochs')
    parser.add_argument('-d',
                        dest='det_train',
                        action='store_true',
                        default=False,
                        help='deterministic train')
    parser.add_argument('-o',
                        dest='optimizer',
                        default='nesterov_momentum',
                        help='optimizer: nesterov_momentum, adagrad, adadelta')
    parser.add_argument('-n',
                        dest='net_arch',
                        default='logreg',
                        help='network architecture: ' + ', '.join(cls_network))
    parser.add_argument('-r',
                        dest='lrate',
                        type=float,
                        default=0.0008,
                        help='learning rate')
    parser.add_argument('-s',
                        dest='subset',
                        type=float,
                        default=None,
                        help='portion or size of subset of data')
    parser.add_argument('-v',
                        dest='validate',
                        type=float,
                        default=.3,
                        help='portion or size of validation set (override -s)')
    parser.add_argument('-p',
                        dest='gp_params',
                        default=None,
                        help='file of GP parameters')
    parser.add_argument('-k',
                        dest='ind_pts',
                        type=int,
                        default=256,
                        help='number of inducing points')
    parser.add_argument('-i',
                        dest='net_ins',
                        type=int,
                        default=0,
                        help='number of network inputs. '
                        '0: use inducing points')
    parser.add_argument('-b',
                        dest='lanczos_basis',
                        type=int,
                        default=5,
                        help='number of Lanczos bases')
    parser.add_argument('-m',
                        dest='samples',
                        type=int,
                        default=10,
                        help='number of Monte Carlo samples')
    parser.add_argument('-g',
                        dest='reg',
                        type=float,
                        default=0,
                        help='regularization weight')
    parser.add_argument('-u',
                        dest='fix_gp',
                        action='store_true',
                        default=False,
                        help='fix GP parameters')
    parser.add_argument('-l',
                        dest='log',
                        action='store_true',
                        default=False,
                        help='log stdout')
    parser.add_argument('-x',
                        dest='swap',
                        action='store_true',
                        default=False,
                        help='swap training/test set')
    parser.add_argument('--saveall',
                        dest='saveall',
                        default=None,
                        help='save parameters at each epoch')
    parser.add_argument('--save',
                        dest='save',
                        default=None,
                        help='save parameters')
    parser.add_argument('--load',
                        dest='load',
                        default=None,
                        help='load parameters (override -n and -p)')
    args = parser.parse_args()

    if args.log:
        sys.stdout = Logger()

    init_gp_params = None
    if args.gp_params:
        try:
            init_gp_params = pickle_load(args.gp_params)
        except:
            pass

    task_name = args.data.rsplit('/', 1)[-1][:-4]
    print task_name

    optimizer = nesterov_momentum
    if args.optimizer == 'adagrad':
        optimizer = adagrad
    elif args.optimizer == 'adadelta':
        optimizer = adadelta

    run_gpnet(args.data,
              task_name,
              n_lanczos_basis=args.lanczos_basis,
              n_samples=args.samples,
              n_inducing_pts=args.ind_pts,
              n_network_inputs=args.net_ins,
              update_gp=(not args.fix_gp),
              init_gp_params=init_gp_params,
              subset_train=args.subset,
              validation_set=args.validate,
              n_epochs=args.epochs,
              stochastic_train=(not args.det_train),
              net_arch=args.net_arch,
              regularize_weight=args.reg,
              optimizer=optimizer,
              optimizer_kwargs={'learning_rate': args.lrate},
              swap=args.swap,
              save_params_epochs=args.saveall,
              save_params=args.save,
              load_params=args.load)
示例#10
0
def run_gpnet(
        data,
        task_name,  # for logging
        n_inducing_pts=256,
        n_network_inputs=1000,
        update_gp=True,
        init_gp_params=None,  # kernel parameters & noise parameter
        net_arch='logreg',
        regularize_weight=0,
        stochastic_train=True,
        stochastic_predict=False,
        n_lanczos_basis=5,
        n_samples=5,
        n_epochs=500,
        optimizer=adadelta,
        optimizer_kwargs={},
        subset_train=None,
        validation_set=.3,
        swap=False,
        save_params_epochs=None,
        save_params=None,
        load_params=None):
    np.random.seed(1)
    x_train, y_train, x_test, y_test, l_train, l_test = pickle_load(data)
    if swap:
        x_train, x_test = x_test, x_train
        y_train, y_test = y_test, y_train
        l_train, l_test = l_test, l_train

    if subset_train:
        if 0 < subset_train <= 1:
            n_train = int(len(l_train) * validation_set)
        elif subset_train > 1:
            n_train = int(subset_train)
        x_train = x_train[:n_train]
        y_train = y_train[:n_train]
        l_train = l_train[:n_train]

    x_valid, y_valid, l_valid = None, None, None
    if validation_set:
        total_train = len(l_train)
        if 0 < validation_set <= 1:
            n_valid = int(total_train * validation_set)
        elif validation_set > 1:
            n_valid = int(validation_set)

        n_train = total_train - n_valid
        x_train, x_valid = x_train[:n_train], x_train[n_train:]
        y_train, y_valid = y_train[:n_train], y_train[n_train:]
        l_train, l_valid = l_train[:n_train], l_train[n_train:]

    n_classes = len(set(l_train) | set(l_test))

    t_min, t_max = 0, 1
    extra_u = 2
    margin = (t_max - t_min) / (n_inducing_pts - extra_u * 2) * 2
    inducing_pts = np.linspace(t_min - margin, t_max + margin, n_inducing_pts)
    if n_network_inputs <= 0:
        t_test = inducing_pts[1:-1]
    else:
        t_test = np.linspace(t_min, t_max, n_network_inputs)

    gpnet = GPNet(n_classes,
                  inducing_pts=inducing_pts,
                  t_test=t_test,
                  update_gp=update_gp,
                  init_gp_params=init_gp_params,
                  net_arch=net_arch,
                  regularize_weight=regularize_weight,
                  stochastic_train=stochastic_train,
                  stochastic_predict=stochastic_predict,
                  n_lanczos_basis=n_lanczos_basis,
                  n_samples=n_samples,
                  n_epochs=n_epochs,
                  optimizer=optimizer,
                  optimizer_kwargs=optimizer_kwargs,
                  load_params=load_params)

    def print_parameters():
        print 'data:', data
        print 'n_train:', len(x_train)
        print 'n_valid:', len(x_valid) if x_valid else 0
        print 'n_test:', len(x_test)
        print 'n_classes:', n_classes
        print 'n_inducing_pts:', n_inducing_pts
        print 'n_net_inputs:', len(t_test)
        print 'stochastic_train:', stochastic_train
        print 'stochastic_predict:', stochastic_predict
        print 'n_lanczos_basis:', n_lanczos_basis
        print 'n_samples:', n_samples
        print 'n_epochs:', n_epochs
        print 'network:', gpnet.net_arch
        print 'optimizer:', optimizer.__name__
        print 'optimizer_kwargs:', optimizer_kwargs
        print 'regularize_weight:', regularize_weight
        print 'init_gp_params:', init_gp_params
        print 'update_gp:', update_gp
        print 'load:', load_params
        print 'save:', save_params
        print 'save_epochs:', save_params_epochs

    print_parameters()

    for v in gpnet.post_gp.params:
        print v.get_value()

    t1 = time.time()
    gpnet.inspect_train(x_train, y_train, l_train, x_valid, y_valid, l_valid,
                        x_test, y_test, l_test, save_params_epochs)
    t2 = time.time()

    if save_params:
        gpnet.save_params(save_params)

    print_parameters()
    print 'time:', t2 - t1
    print task_name

    return

    gpnet.train(x_train, y_train, l_train)

    predict_train = gpnet.predict(x_train, y_train)
    print np.mean(l_train == predict_train)

    predict_test = gpnet.predict(x_test, y_test)
    print np.mean(l_test == predict_test)
示例#11
0
 def load_model(self, pickle_name):
     self.gp_parms, = pickle_load(pickle_name)
示例#12
0
def main():
    np.random.seed(0)

    dat_id = 1
    ts_all, l_all = pickle_load('../chla-data/chla_ts_min0_%d.pkl' % dat_id)

    # randomly shuffle training examples
    idx = np.arange(len(ts_all))
    np.random.shuffle(idx)
    ts_all = ts_all[idx]
    #l_all = l_all[idx]

    parser = argparse.ArgumentParser()
    parser.add_argument('-q', dest='n_latent_gp', type=int, default=5)
    parser.add_argument('-g', dest='w_reg_group', default='none')
    parser.add_argument('-r', dest='w_reg', type=float, default=1)
    args = parser.parse_args()

    Q = args.n_latent_gp
    w_reg_group = args.w_reg_group
    w_reg = args.w_reg

    P = len(ts_all[0])
    #Q = 10
    M = 20

    #mogp = MultiOutputGP(P, Q, M)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='individual', w_reg=.5)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='row', w_reg=2)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1)
    #mogp = MultiOutputGP(P, Q, M, w_reg_group='column', w_reg=1.5)
    mogp = MultiOutputGP(P, Q, M, w_reg_group=w_reg_group, w_reg=w_reg)

    n_train = int(len(ts_all) * 0.5)
    #n_train = 8
    print 'n_train', n_train
    train_raw = ts_all[:n_train]
    train_ts = mogp.gen_collection(train_raw)
    mogp.train(train_ts, maxiter=50)

    w_reg_group_name = {
        'none': 'non',
        'row': 'row',
        'column': 'col',
        'individual': 'ind',
    }

    mogp_str = 'model-pqn-%s-%g-%d-%d' % (w_reg_group_name[mogp.w_reg_group],
                                          mogp.w_reg, Q, n_train)
    print mogp_str

    mogp_pickle = 'model/%s.pkl' % mogp_str
    mogp.load_model(mogp_pickle)
    test_raw = ts_all[n_train:]

    indep_gp = IndependentMultiOutputGP(P)
    indep_gp.train(train_raw)
    gp_parms = indep_gp.gp_parms

    for channel in xrange(P):
        loglike = []
        loglike_baseline = []
        for each_test in test_raw:
            x = [xy[0] for xy in each_test]
            y = [xy[1] for xy in each_test]
            channel_len = len(x[channel])
            if channel_len < 3:
                continue
            one_third = channel_len // 3
            x_held_out = x[channel][one_third:-one_third]
            y_held_out = y[channel][one_third:-one_third]
            x_remain = [
                each_x if i == channel else np.concatenate(
                    (each_x[:one_third], each_x[-one_third:]))
                for i, each_x in enumerate(x)
            ]
            y_remain = [
                each_y if i == channel else np.concatenate(
                    (each_y[:one_third], each_y[-one_third:]))
                for i, each_y in enumerate(y)
            ]
            ts = TimeSeries(x_remain, y_remain, mogp.shared)
            mu, cov = mogp.predictive_gaussian(ts, x_held_out)

            mean, var = gp.pointwise_posterior_mean_var(
                x_remain[channel], y_remain[channel], x_held_out,
                gp_parms[channel])

            for i, each_y in enumerate(y_held_out):
                loglike.append(
                    norm.logpdf(each_y, mu[channel, i],
                                np.sqrt(cov[channel, i, i])))

                loglike_baseline.append(
                    norm.logpdf(each_y, mean[i], np.sqrt(var[i])))

        print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % (
            channel, np.mean(loglike), np.std(loglike) / np.sqrt(len(loglike)),
            np.min(loglike), np.max(loglike), len(loglike))

        print '%2d %10.2f %10.2f %10.2f %10.2f %6d' % (
            channel, np.mean(loglike_baseline), np.std(loglike_baseline) /
            np.sqrt(len(loglike_baseline)), np.min(loglike_baseline),
            np.max(loglike_baseline), len(loglike_baseline))
        print '-' * 50
        pickle_save('loglike-cmp/%02d.pkl' % channel, loglike,
                    loglike_baseline)
        pl.figure()
        pl.axis('equal')
        pl.scatter(loglike, loglike_baseline, alpha=.5)
        pl.savefig('loglike-cmp/loglike-%02d.pdf' % channel)
        pl.close()
示例#13
0
def train_gp(data):
    np.random.seed(123)
    lasagne.random.set_rng(np.random.RandomState(seed=123))
    x_train, y_train, x_test, y_test, l_train, l_test = pickle_load(data)
    gp_params, indep_noise = marginal_likelihood(x_train, y_train)
    return gp_params, indep_noise
示例#14
0
 def __init__(self, model_path, vectorizer_path):
     self.model = pickle_load(model_path)
     self.vectorizer = pickle_load(vectorizer_path)