def generate_batch_multi(num_samples, xobjs=['circle'], yobjs=[0], img_scale=1.0):
     obj_imgs = []
     obj_coords = []
     for obj in xobjs:
         imgs, coords = generate_batch(num_samples, obj_type=obj)
         obj_imgs.append(imgs)
         obj_coords.append(coords)
     seq_len = obj_coords[0].shape[0]
     batch_size = obj_coords[0].shape[1]
     x_imgs = np.zeros(obj_imgs[0].shape)
     y_imgs = np.zeros(obj_imgs[0].shape)
     y_coords = np.zeros(obj_coords[0].shape)
     for o_num in range(len(xobjs)):
         x_imgs = x_imgs + obj_imgs[o_num]
         if o_num in yobjs:
             y_imgs = y_imgs + obj_imgs[o_num]
         mask = npr.rand(seq_len, batch_size) < (1. / (o_num+1))
         mask = mask[:,:,np.newaxis]
         y_coords = (mask * obj_coords[o_num]) + ((1.-mask) * y_coords)
     # rescale coordinates as desired
     y_coords = img_scale * y_coords
     # add noise to image sequences
     pix_mask = npr.rand(*x_imgs.shape) < 0.05
     pix_noise = npr.rand(*x_imgs.shape)
     x_imgs = x_imgs + (pix_mask * pix_noise)
     # clip to 0...0.99
     x_imgs = np.maximum(x_imgs, 0.001)
     x_imgs = np.minimum(x_imgs, 0.999)
     y_imgs = np.maximum(y_imgs, 0.001)
     y_imgs = np.minimum(y_imgs, 0.999)
     return [to_fX(x_imgs), to_fX(y_imgs), to_fX(y_coords)]
Exemplo n.º 2
0
 def generate_batch_multi(num_samples, xobjs=['circle'], yobjs=[0], img_scale=1.0):
     obj_imgs = []
     obj_coords = []
     for obj in xobjs:
         imgs, coords = generate_batch(num_samples, obj_type=obj)
         obj_imgs.append(imgs)
         obj_coords.append(coords)
     seq_len = obj_coords[0].shape[0]
     batch_size = obj_coords[0].shape[1]
     x_imgs = np.zeros(obj_imgs[0].shape)
     y_imgs = np.zeros(obj_imgs[0].shape)
     y_coords = np.zeros(obj_coords[0].shape)
     for o_num in range(len(xobjs)):
         x_imgs = x_imgs + obj_imgs[o_num]
         if o_num in yobjs:
             y_imgs = y_imgs + obj_imgs[o_num]
         mask = npr.rand(seq_len, batch_size) < (1. / (o_num+1))
         mask = mask[:,:,np.newaxis]
         y_coords = (mask * obj_coords[o_num]) + ((1.-mask) * y_coords)
     # rescale coordinates as desired
     y_coords = img_scale * y_coords
     # add noise to image sequences
     pix_mask = npr.rand(*x_imgs.shape) < 0.05
     pix_noise = npr.rand(*x_imgs.shape)
     x_imgs = x_imgs + (pix_mask * pix_noise)
     # clip to 0...0.99
     x_imgs = np.maximum(x_imgs, 0.001)
     x_imgs = np.minimum(x_imgs, 0.999)
     y_imgs = np.maximum(y_imgs, 0.001)
     y_imgs = np.minimum(y_imgs, 0.999)
     return [to_fX(x_imgs), to_fX(y_imgs), to_fX(y_coords)]
 def generate_batch(num_samples):
     for n in range(len(objects)):
         # generate a minibatch of trajectories
         traj_pos, traj_vel = TRAJ.generate_trajectories(num_samples, traj_len)
         traj_x = traj_pos[:,:,0]
         traj_y = traj_pos[:,:,1]
         # draw the trajectories
         center_x = to_fX( traj_x.T.ravel() )
         center_y = to_fX( traj_y.T.ravel() )
         delta = to_fX( np.ones(center_x.shape) )
         sigma = to_fX( np.ones(center_x.shape) )
         if n == 0:
             W  = write_funcs[n](center_y, center_x, delta, 0.05*sigma)
         else:
             W += write_funcs[n](center_y, center_x, delta, 0.05*sigma)
     W = utils.scale_to_unit_interval(W)
     # shape trajectories into a batch for passing to the model
     batch_imgs = np.zeros((num_samples, traj_len, obs_dim))
     for i in range(num_samples):
         start_idx = i * traj_len
         end_idx = start_idx + traj_len
         img_set = W[start_idx:end_idx,:]
         batch_imgs[i,:,:] = img_set
     batch_imgs = np.swapaxes(batch_imgs, 0, 1)
     batch_imgs = to_fX( batch_imgs )
     return batch_imgs
 def generate_batch_multi(num_samples, xobjs=['circle'], yobjs=[0], img_scale=1.0):
     obj_imgs = []
     obj_coords = []
     for obj in xobjs:
         imgs, coords = generate_batch(num_samples+1, obj_type=obj)
         obj_imgs.append(imgs)
         obj_coords.append(coords)
     seq_len = obj_imgs[0].shape[0] - 1
     batch_size = obj_imgs[0].shape[1]
     obs_dim = obj_imgs[0].shape[2]
     x_imgs = np.zeros((seq_len, batch_size, obs_dim))
     y_imgs = np.zeros((seq_len, batch_size, obs_dim))
     for o_num in range(len(xobjs)):
         x_imgs = x_imgs + obj_imgs[o_num][:-1,:,:]
         if o_num in yobjs:
             y_imgs = y_imgs + obj_imgs[o_num][1:,:,:]
     # # add noise to image sequences
     # pix_mask = npr.rand(*x_imgs.shape) < 0.05
     # pix_noise = npr.rand(*x_imgs.shape)
     # x_imgs = x_imgs + (pix_mask * pix_noise)
     # clip to 0...0.99
     x_imgs = np.maximum(x_imgs, 0.001)
     x_imgs = np.minimum(x_imgs, 0.999)
     y_imgs = np.maximum(y_imgs, 0.001)
     y_imgs = np.minimum(y_imgs, 0.999)
     return [to_fX(x_imgs), to_fX(y_imgs)]
Exemplo n.º 5
0
 def generate_batch_multi(num_samples,
                          xobjs=['circle'],
                          yobjs=[0],
                          img_scale=1.0):
     obj_imgs = []
     obj_coords = []
     for obj in xobjs:
         imgs, coords = generate_batch(num_samples + 1, obj_type=obj)
         obj_imgs.append(imgs)
         obj_coords.append(coords)
     seq_len = obj_imgs[0].shape[0] - 1
     batch_size = obj_imgs[0].shape[1]
     obs_dim = obj_imgs[0].shape[2]
     x_imgs = np.zeros((seq_len, batch_size, obs_dim))
     y_imgs = np.zeros((seq_len, batch_size, obs_dim))
     for o_num in range(len(xobjs)):
         x_imgs = x_imgs + obj_imgs[o_num][:-1, :, :]
         if o_num in yobjs:
             y_imgs = y_imgs + obj_imgs[o_num][1:, :, :]
     # # add noise to image sequences
     # pix_mask = npr.rand(*x_imgs.shape) < 0.05
     # pix_noise = npr.rand(*x_imgs.shape)
     # x_imgs = x_imgs + (pix_mask * pix_noise)
     # clip to 0...0.99
     x_imgs = np.maximum(x_imgs, 0.001)
     x_imgs = np.minimum(x_imgs, 0.999)
     y_imgs = np.maximum(y_imgs, 0.001)
     y_imgs = np.minimum(y_imgs, 0.999)
     return [to_fX(x_imgs), to_fX(y_imgs)]
 def set_lam_kld(self, lam_kld_q2p=1.0, lam_kld_p2q=1.0):
     """
     Set the relative weight of various KL-divergences.
     """
     zero_ary = np.zeros((1,))
     new_lam = zero_ary + lam_kld_q2p
     self.lam_kld_q2p.set_value(to_fX(new_lam))
     new_lam = zero_ary + lam_kld_p2q
     self.lam_kld_p2q.set_value(to_fX(new_lam))
     return
Exemplo n.º 7
0
 def prior_sampler(samp_count):
     x_samps = to_fX( np.zeros((samp_count, self.obs_dim)) )
     old_switch = self.train_switch.get_value(borrow=False)
     # set model to generation mode
     self.set_train_switch(switch_val=0.0)
     z_samps = to_fX( npr.randn(samp_count, self.z_dim) )
     model_samps = sample_func(z_samps, x_samps)
     # set model back to either training or generation mode
     self.set_train_switch(switch_val=old_switch)
     return model_samps
 def set_lam_kld(self, lam_kld_q2p=1.0, lam_kld_p2q=1.0):
     """
     Set the relative weight of various KL-divergences.
     """
     zero_ary = np.zeros((1,))
     new_lam = zero_ary + lam_kld_q2p
     self.lam_kld_q2p.set_value(to_fX(new_lam))
     new_lam = zero_ary + lam_kld_p2q
     self.lam_kld_p2q.set_value(to_fX(new_lam))
     return
 def set_lam_kld(self, lam_kld_p=1.0, lam_kld_q=1.0):
     """
     Set the relative weight of prior KL-divergence vs. data likelihood.
     """
     zero_ary = np.zeros((1,))
     new_lam = zero_ary + lam_kld_p
     self.lam_kld_p.set_value(to_fX(new_lam))
     new_lam = zero_ary + lam_kld_q
     self.lam_kld_q.set_value(to_fX(new_lam))
     return
Exemplo n.º 10
0
 def raw_cost_computer(XI, XO, XM):
     _all_costs = cost_func(to_fX(XI), to_fX(XO), to_fX(XM))
     _kld_q2p = np.sum(np.mean(_all_costs[1], axis=1, keepdims=True), axis=0)
     _kld_p2q = np.sum(np.mean(_all_costs[2], axis=1, keepdims=True), axis=0)
     _step_klds = np.mean(np.sum(_all_costs[1], axis=2, keepdims=True), axis=1)
     _step_klds = to_fX( np.asarray([k for k in _step_klds]) )
     _step_nlls = np.mean(_all_costs[0], axis=1)
     _step_nlls = to_fX( np.asarray([k for k in _step_nlls]) )
     results = [_step_nlls, _step_klds, _kld_q2p, _kld_p2q]
     return results
Exemplo n.º 11
0
 def set_lam_kld(self, lam_kld_p=0.0, lam_kld_q=1.0, lam_kld_g=0.0):
     """
     Set the relative weight of prior KL-divergence vs. data likelihood.
     """
     zero_ary = np.zeros((1, ))
     new_lam = zero_ary + lam_kld_p
     self.lam_kld_p.set_value(to_fX(new_lam))
     new_lam = zero_ary + lam_kld_q
     self.lam_kld_q.set_value(to_fX(new_lam))
     new_lam = zero_ary + lam_kld_g
     self.lam_kld_g.set_value(to_fX(new_lam))
     return
Exemplo n.º 12
0
def test_tfd_nll(occ_dim=15, drop_prob=0.0):
    RESULT_PATH = "IMP_TFD_TM/"
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    data_file = 'data/tfd_data_48x48.pkl'
    dataset = load_tfd(tfd_pkl_name=data_file,
                       which_set='unlabeled',
                       fold='all')
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all')
    Xva = dataset[0]
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 250
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], )))

    TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')

    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')

    Xva = row_shuffle(Xva)
    # record an estimate of performance on the test set
    xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \
                                       occ_dim=occ_dim, data_mean=data_mean)
    result = TM.best_match_nll(xo, xm)
    match_on_known = np.mean(result[0])
    match_on_unknown = np.mean(result[1])
    str0 = "Test 1:"
    str1 = "    match on known   : {}".format(match_on_known)
    str2 = "    match on unknown : {}".format(match_on_unknown)
    joint_str = "\n".join([str0, str1, str2])
    print(joint_str)
    out_file.write(joint_str + "\n")
    out_file.flush()
    out_file.close()
    return
 def set_sgd_params(self, lr=0.01, mom_1=0.9, mom_2=0.999):
     """
     Set learning rate and momentum parameter for all updates.
     """
     zero_ary = np.zeros((1,))
     # set learning rate
     new_lr = zero_ary + lr
     self.lr.set_value(to_fX(new_lr))
     # set momentums (use first and second order "momentum")
     new_mom_1 = zero_ary + mom_1
     self.mom_1.set_value(to_fX(new_mom_1))
     new_mom_2 = zero_ary + mom_2
     self.mom_2.set_value(to_fX(new_mom_2))
     return
Exemplo n.º 14
0
def test_tfd_nll(occ_dim=15, drop_prob=0.0):
    RESULT_PATH = "IMP_TFD_TM/"
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    data_file = 'data/tfd_data_48x48.pkl'
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all')
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all')
    Xva = dataset[0]
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 250
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) )

    TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')

    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')

    Xva = row_shuffle(Xva)
    # record an estimate of performance on the test set
    xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \
                                       occ_dim=occ_dim, data_mean=data_mean)
    result = TM.best_match_nll(xo, xm)
    match_on_known = np.mean(result[0])
    match_on_unknown = np.mean(result[1])
    str0 = "Test 1:"
    str1 = "    match on known   : {}".format(match_on_known)
    str2 = "    match on unknown : {}".format(match_on_unknown)
    joint_str = "\n".join([str0, str1, str2])
    print(joint_str)
    out_file.write(joint_str+"\n")
    out_file.flush()
    out_file.close()
    return
Exemplo n.º 15
0
 def set_sgd_params(self, lr=0.01, mom_1=0.9, mom_2=0.999):
     """
     Set learning rate and momentum parameter for all updates.
     """
     zero_ary = np.zeros((1, ))
     # set learning rate
     new_lr = zero_ary + lr
     self.lr.set_value(to_fX(new_lr))
     # set momentums (use first and second order "momentum")
     new_mom_1 = zero_ary + mom_1
     self.mom_1.set_value(to_fX(new_mom_1))
     new_mom_2 = zero_ary + mom_2
     self.mom_2.set_value(to_fX(new_mom_2))
     return
Exemplo n.º 16
0
 def raw_cost_computer(XO):
     _all_costs = cost_func(to_fX(XO))
     _kld_q2p = np.sum(np.mean(_all_costs[1], axis=1, keepdims=True),
                       axis=0)
     _kld_p2q = np.sum(np.mean(_all_costs[2], axis=1, keepdims=True),
                       axis=0)
     _kld_p2g = np.sum(np.mean(_all_costs[3], axis=1, keepdims=True),
                       axis=0)
     _step_klds = np.mean(np.sum(_all_costs[1], axis=2, keepdims=True),
                          axis=1)
     _step_klds = to_fX(np.asarray([k for k in _step_klds]))
     _step_nlls = np.mean(_all_costs[0], axis=1)
     _step_nlls = to_fX(np.asarray([k for k in _step_nlls]))
     results = [_step_nlls, _step_klds, _kld_q2p, _kld_p2q, _kld_p2g]
     return results
Exemplo n.º 17
0
def test_mnist_img(occ_dim=15, drop_prob=0.0):
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    Xtr = datasets[0][0]
    Xva = datasets[1][0]
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1],)))

    TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')

    Xva = row_shuffle(Xva)
    # record an estimate of performance on the test set
    xi, xo, xm = construct_masked_data(Xva[:500], drop_prob=drop_prob, \
                                       occ_dim=occ_dim, data_mean=data_mean)
    img_match_on_known, img_match_on_unknown = TM.best_match_img(xo, xm)

    display_count = 100
    # visualize matches on known elements
    Xs = np.zeros((2*display_count, Xva.shape[1]))
    for idx in range(display_count):
        Xs[2*idx] = xi[idx]
        Xs[(2*idx)+1] = img_match_on_known[idx]
    file_name = "{0:s}_SAMPLES_MOK.png".format(result_tag)
    utils.visualize_samples(Xs, file_name, num_rows=20)
    # visualize matches on unknown elements
    Xs = np.zeros((2*display_count, Xva.shape[1]))
    for idx in range(display_count):
        Xs[2*idx] = xi[idx]
        Xs[(2*idx)+1] = img_match_on_unknown[idx]
    file_name = "{0:s}_SAMPLES_MOU.png".format(result_tag)
    utils.visualize_samples(Xs, file_name, num_rows=20)
    return
Exemplo n.º 18
0
def test_mnist_img(occ_dim=15, drop_prob=0.0):
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    Xtr = datasets[0][0]
    Xva = datasets[1][0]
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], )))

    TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')

    Xva = row_shuffle(Xva)
    # record an estimate of performance on the test set
    xi, xo, xm = construct_masked_data(Xva[:500], drop_prob=drop_prob, \
                                       occ_dim=occ_dim, data_mean=data_mean)
    img_match_on_known, img_match_on_unknown = TM.best_match_img(xo, xm)

    display_count = 100
    # visualize matches on known elements
    Xs = np.zeros((2 * display_count, Xva.shape[1]))
    for idx in range(display_count):
        Xs[2 * idx] = xi[idx]
        Xs[(2 * idx) + 1] = img_match_on_known[idx]
    file_name = "{0:s}_SAMPLES_MOK.png".format(result_tag)
    utils.visualize_samples(Xs, file_name, num_rows=20)
    # visualize matches on unknown elements
    Xs = np.zeros((2 * display_count, Xva.shape[1]))
    for idx in range(display_count):
        Xs[2 * idx] = xi[idx]
        Xs[(2 * idx) + 1] = img_match_on_unknown[idx]
    file_name = "{0:s}_SAMPLES_MOU.png".format(result_tag)
    utils.visualize_samples(Xs, file_name, num_rows=20)
    return
Exemplo n.º 19
0
def load_hydranet_from_dict(model_dict, rng=None, Xd=None, \
                            new_params=None):
    """
    Load a clone of some previously trained model.
    """
    # load basic parameters
    self_dot_params = model_dict['params']
    if not (new_params is None):
        for k in new_params:
            self_dot_params[k] = new_params[k]
    # load numpy arrays that will be converted to Theano shared arrays
    self_dot_numpy_param_dicts = model_dict['numpy_param_dicts']
    self_dot_shared_param_dicts = {'shared': [], 'output': []}
    for layer_group in ['shared', 'output']:
        # go over the list of parameter dicts in this layer group
        for numpy_dict in self_dot_numpy_param_dicts[layer_group]:
            shared_dict = {}
            for key in numpy_dict:
                # convert each numpy array to a Theano shared array
                val = to_fX(numpy_dict[key])
                shared_dict[key] = theano.shared(val)
            self_dot_shared_param_dicts[layer_group].append(shared_dict)
    # now, create a HydraNet with the configuration we just unpacked
    clone_net = HydraNet(rng=rng, Xd=Xd, params=self_dot_params, \
                         shared_param_dicts=self_dot_shared_param_dicts)
    # helpful output
    print("==================================================")
    print("LOADED HydraNet WITH PARAMS:")
    for k in self_dot_params:
        print("    {0:s}: {1:s}".format(str(k), str(self_dot_params[k])))
    print("==================================================")
    return clone_net
 def sample_func(XO, use_guide_policy=False):
     # set model to desired generation mode
     old_switch = self.train_switch.get_value(borrow=False)
     if use_guide_policy:
         # take samples from the guide policy
         self.set_train_switch(switch_val=1.0)
     else:
         # take samples from the primary policy
         self.set_train_switch(switch_val=0.0)
     # get belief states and masks generated by the scan loop
     scan_vals = func(to_fX(XO))
     step_count = self.total_steps + 1
     seq_shape = (step_count, XO.shape[0], XO.shape[1])
     xm_seq = np.zeros(seq_shape).astype(theano.config.floatX)
     xi_seq = np.zeros(seq_shape).astype(theano.config.floatX)
     mi_seq = np.zeros(seq_shape).astype(theano.config.floatX)
     for i in range(step_count):
         _xi = scan_vals[i]
         _mi = scan_vals[i + step_count]
         _xm = (_mi * XO) + ((1.0 - _mi) * _xi)
         xm_seq[i,:,:] = _xm
         xi_seq[i,:,:] = _xi
         mi_seq[i,:,:] = _mi
     # set model back to either training or generation mode
     self.set_train_switch(switch_val=old_switch)
     return [xm_seq, xi_seq, mi_seq]
 def prior_sampler(samp_count):
     z_samps = npr.randn(samp_count, self.z_dim)
     z_samps = (np.exp(0.5 * self.prior_logvar) * z_samps) + \
               self.prior_mean
     z_samps = to_fX(z_samps)
     model_samps = sample_func(z_samps)
     return model_samps
 def prior_sampler(samp_count):
     z_samps = npr.randn(samp_count, self.z_dim)
     z_samps = (np.exp(0.5 * self.prior_logvar) * z_samps) + \
               self.prior_mean
     z_samps =to_fX(z_samps)
     model_samps = sample_func(z_samps)
     return model_samps
Exemplo n.º 23
0
def load_infnet_from_dict(model_dict, rng=None, Xd=None, \
                          new_params=None):
    """
    Load a clone of some previously trained model.
    """
    self_dot_params = model_dict['params']
    if not (new_params is None):
        for k in new_params:
            self_dot_params[k] = new_params[k]
    self_dot_numpy_param_dicts = model_dict['numpy_param_dicts']
    self_dot_shared_param_dicts = {'shared': [], 'mu': [], 'sigma': []}
    for layer_group in ['shared', 'mu', 'sigma']:
        for numpy_dict in self_dot_numpy_param_dicts[layer_group]:
            shared_dict = {}
            for key in numpy_dict:
                val = to_fX(numpy_dict[key])
                shared_dict[key] = theano.shared(val)
            self_dot_shared_param_dicts[layer_group].append(shared_dict)
    # now, create an InfNet with the configuration we just unpacked
    clone_net = InfNet(rng=rng, Xd=Xd, params=self_dot_params, \
            shared_param_dicts=self_dot_shared_param_dicts)
    # helpful output
    print("==================================================")
    print("LOADED InfNet WITH PARAMS:")
    for k in self_dot_params:
        print("    {0:s}: {1:s}".format(str(k), str(self_dot_params[k])))
    print("==================================================")
    return clone_net
Exemplo n.º 24
0
 def sample_func(XO, use_guide_policy=False):
     # set model to desired generation mode
     old_switch = self.train_switch.get_value(borrow=False)
     if use_guide_policy:
         # take samples from the guide policy
         self.set_train_switch(switch_val=1.0)
     else:
         # take samples from the primary policy
         self.set_train_switch(switch_val=0.0)
     # get belief states and masks generated by the scan loop
     scan_vals = func(to_fX(XO))
     step_count = self.total_steps + 1
     seq_shape = (step_count, XO.shape[0], XO.shape[1])
     xm_seq = np.zeros(seq_shape).astype(theano.config.floatX)
     xi_seq = np.zeros(seq_shape).astype(theano.config.floatX)
     mi_seq = np.zeros(seq_shape).astype(theano.config.floatX)
     for i in range(step_count):
         _xi = scan_vals[i]
         _mi = scan_vals[i + step_count]
         _xm = (_mi * XO) + ((1.0 - _mi) * _xi)
         xm_seq[i, :, :] = _xm
         xi_seq[i, :, :] = _xi
         mi_seq[i, :, :] = _mi
     # set model back to either training or generation mode
     self.set_train_switch(switch_val=old_switch)
     return [xm_seq, xi_seq, mi_seq]
Exemplo n.º 25
0
def load_hydranet_from_dict(model_dict, rng=None, Xd=None, \
                            new_params=None):
    """
    Load a clone of some previously trained model.
    """
    # load basic parameters
    self_dot_params = model_dict['params']
    if not (new_params is None):
        for k in new_params:
            self_dot_params[k] = new_params[k]
    # load numpy arrays that will be converted to Theano shared arrays
    self_dot_numpy_param_dicts = model_dict['numpy_param_dicts']
    self_dot_shared_param_dicts = {'shared': [], 'output': []}
    for layer_group in ['shared', 'output']:
        # go over the list of parameter dicts in this layer group
        for numpy_dict in self_dot_numpy_param_dicts[layer_group]:
            shared_dict = {}
            for key in numpy_dict:
                # convert each numpy array to a Theano shared array
                val = to_fX(numpy_dict[key])
                shared_dict[key] = theano.shared(val)
            self_dot_shared_param_dicts[layer_group].append(shared_dict)
    # now, create a HydraNet with the configuration we just unpacked
    clone_net = HydraNet(rng=rng, Xd=Xd, params=self_dot_params, \
                         shared_param_dicts=self_dot_shared_param_dicts)
    # helpful output
    print("==================================================")
    print("LOADED HydraNet WITH PARAMS:")
    for k in self_dot_params:
        print("    {0:s}: {1:s}".format(str(k), str(self_dot_params[k])))
    print("==================================================")
    return clone_net
 def set_lam_nll(self, lam_nll=1.0):
     """
     Set weight for controlling the influence of the data likelihood.
     """
     zero_ary = np.zeros((1, ))
     new_lam = zero_ary + lam_nll
     self.lam_nll.set_value(to_fX(new_lam))
     return
Exemplo n.º 27
0
def test_svhn_nll(occ_dim=15, drop_prob=0.0):
    RESULT_PATH = "IMP_SVHN_TM/"
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    tr_file = 'data/svhn_train_gray.pkl'
    te_file = 'data/svhn_test_gray.pkl'
    ex_file = 'data/svhn_extra_gray.pkl'
    data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000)
    Xtr = to_fX( shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])) )
    Xva = to_fX( shift_and_scale_into_01(data['Xte']) )
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 250
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) )

    TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')

    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')

    Xva = row_shuffle(Xva)
    # record an estimate of performance on the test set
    xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \
                                       occ_dim=occ_dim, data_mean=data_mean)
    result = TM.best_match_nll(xo, xm)
    match_on_known = np.mean(result[0])
    match_on_unknown = np.mean(result[1])
    str0 = "Test 1:"
    str1 = "    match on known   : {}".format(match_on_known)
    str2 = "    match on unknown : {}".format(match_on_unknown)
    joint_str = "\n".join([str0, str1, str2])
    print(joint_str)
    out_file.write(joint_str+"\n")
    out_file.flush()
    out_file.close()
    return
Exemplo n.º 28
0
 def set_sgd_params(self, lr_1=0.01, lr_2=0.01, \
             mom_1=0.9, mom_2=0.999):
     """
     Set learning rate and momentum parameter for all updates.
     """
     zero_ary = np.zeros((1,))
     # set learning rates
     new_lr_1 = zero_ary + lr_1
     self.lr_1.set_value(to_fX(new_lr_1))
     new_lr_2 = zero_ary + lr_2
     self.lr_2.set_value(to_fX(new_lr_2))
     # set momentums
     new_mom_1 = zero_ary + mom_1
     self.mom_1.set_value(to_fX(new_mom_1))
     new_mom_2 = zero_ary + mom_2
     self.mom_2.set_value(to_fX(new_mom_2))
     return
Exemplo n.º 29
0
 def set_lam_l2w(self, lam_l2w=1e-3):
     """
     Set the relative strength of l2 regularization on network params.
     """
     zero_ary = np.zeros((1, ))
     new_lam = zero_ary + lam_l2w
     self.lam_l2w.set_value(to_fX(new_lam))
     return
Exemplo n.º 30
0
 def set_lam_nll(self, lam_nll=1.0):
     """
     Set weight for controlling the influence of the data likelihood.
     """
     zero_ary = np.zeros((1,))
     new_lam = zero_ary + lam_nll
     self.lam_nll.set_value(to_fX(new_lam))
     return
Exemplo n.º 31
0
 def set_lam_l2w(self, lam_l2w=1e-3):
     """
     Set the relative strength of l2 regularization on network params.
     """
     zero_ary = np.zeros((1,))
     new_lam = zero_ary + lam_l2w
     self.lam_l2w.set_value(to_fX(new_lam))
     return
Exemplo n.º 32
0
 def set_lam_kld_l1l2(self, lam_kld_l1l2=1.0):
     """
     Set the weight for shaping penalty on conditional priors over zt.
     """
     zero_ary = np.zeros((1,))
     new_val = zero_ary + lam_kld_l1l2
     self.lam_kld_l1l2.set_value(to_fX(new_val))
     return
Exemplo n.º 33
0
 def set_drop_rate(self, drop_rate=0.0):
     """
     Set the weight for shaping penalty on conditional priors over zt.
     """
     zero_ary = np.zeros((1,))
     new_val = zero_ary + drop_rate
     self.drop_rate.set_value(to_fX(new_val))
     return
Exemplo n.º 34
0
 def raw_kld_computer(XI, XO):
     hi_zmuv = to_fX( npr.randn(self.ir_steps, XI.shape[0], self.h_dim) )
     _all_costs = cost_func(XI, XO, hi_zmuv)
     _init_klds = _all_costs[0]
     _kld_q2p = np.sum(np.mean(_all_costs[1], axis=1, keepdims=True), axis=0)
     _kld_p2q = np.sum(np.mean(_all_costs[2], axis=1, keepdims=True), axis=0)
     results = [_init_klds, _kld_q2p, _kld_p2q]
     return results
Exemplo n.º 35
0
 def set_sigma_scale(self, sigma_scale=1.0):
     """
     Set the posterior sigma rescaling shared parameter to some value.
     """
     zero_ary = np.zeros((1,))
     new_scale = zero_ary + sigma_scale
     self.sigma_scale.set_value(to_fX(new_scale))
     return
Exemplo n.º 36
0
 def init_biases(self, b_init=0.0, b_std=1e-2):
     """
     Initialize the biases in all hidden layers to some constant.
     """
     for layer in self.shared_layers:
         b_vec = (0.0 * layer.b.get_value(borrow=False)) + b_init
         b_vec = b_vec + (b_std * npr.randn(*b_vec.shape))
         layer.b.set_value(to_fX(b_vec))
     for layer in self.mu_layers[:-1]:
         b_vec = (0.0 * layer.b.get_value(borrow=False)) + b_init
         b_vec = b_vec + (b_std * npr.randn(*b_vec.shape))
         layer.b.set_value(to_fX(b_vec))
     for layer in self.sigma_layers[:-1]:
         b_vec = (0.0 * layer.b.get_value(borrow=False)) + b_init
         b_vec = b_vec + (b_std * npr.randn(*b_vec.shape))
         layer.b.set_value(to_fX(b_vec))
     return
Exemplo n.º 37
0
def test_svhn_nll(occ_dim=15, drop_prob=0.0):
    RESULT_PATH = "IMP_SVHN_TM/"
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    tr_file = 'data/svhn_train_gray.pkl'
    te_file = 'data/svhn_test_gray.pkl'
    ex_file = 'data/svhn_extra_gray.pkl'
    data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000)
    Xtr = to_fX(shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])))
    Xva = to_fX(shift_and_scale_into_01(data['Xte']))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 250
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], )))

    TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')

    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')

    Xva = row_shuffle(Xva)
    # record an estimate of performance on the test set
    xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \
                                       occ_dim=occ_dim, data_mean=data_mean)
    result = TM.best_match_nll(xo, xm)
    match_on_known = np.mean(result[0])
    match_on_unknown = np.mean(result[1])
    str0 = "Test 1:"
    str1 = "    match on known   : {}".format(match_on_known)
    str2 = "    match on unknown : {}".format(match_on_unknown)
    joint_str = "\n".join([str0, str1, str2])
    print(joint_str)
    out_file.write(joint_str + "\n")
    out_file.flush()
    out_file.close()
    return
Exemplo n.º 38
0
def test_mnist_nll(occ_dim=15, drop_prob=0.0):
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    Xtr = datasets[0][0]
    Xva = datasets[1][0]
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1],)))

    TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')

    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')

    Xva = row_shuffle(Xva)
    # record an estimate of performance on the test set
    xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \
                                       occ_dim=occ_dim, data_mean=data_mean)
    result = TM.best_match_nll(xo, xm)
    match_on_known = np.mean(result[0])
    match_on_unknown = np.mean(result[1])
    str0 = "Test 1:"
    str1 = "    match on known   : {}".format(match_on_known)
    str2 = "    match on unknown : {}".format(match_on_unknown)
    joint_str = "\n".join([str0, str1, str2])
    print(joint_str)
    out_file.write(joint_str+"\n")
    out_file.flush()
    out_file.close()
    return
Exemplo n.º 39
0
 def conditional_sampler(XI, XO=None, guided_decoding=False):
     XI = to_fX( XI )
     if XO is None:
         XO = XI
     XO = to_fX( XO )
     # set model to desired generation mode
     old_switch = self.train_switch.get_value(borrow=False)
     if guided_decoding:
         # take samples from guide policies (i.e. variational q)
         self.set_train_switch(switch_val=1.0)
     else:
         # take samples from model's generative policy
         self.set_train_switch(switch_val=0.0)
     # draw guided/unguided conditional samples
     model_samps = sample_func(XI, XO)
     # set model back to either training or generation mode
     self.set_train_switch(switch_val=old_switch)
     return model_samps
Exemplo n.º 40
0
 def init_biases(self, b_init=0.0, b_std=1e-2):
     """
     Initialize the biases in all shred layers to some constant.
     """
     for layer in self.shared_layers:
         b_vec = (0.0 * layer.b.get_value(borrow=False)) + b_init
         b_vec = b_vec + (b_std * npr.randn(*b_vec.shape))
         layer.b.set_value(to_fX(b_vec))
     return
 def prior_sampler(samp_count):
     x_samps = to_fX( np.zeros((samp_count, self.x_dim)) )
     old_switch = self.train_switch.get_value(borrow=False)
     # set model to generation mode
     self.set_train_switch(switch_val=0.0)
     # generate samples from model
     model_samps = sample_func(x_samps)
     # set model back to previous mode
     self.set_train_switch(switch_val=old_switch)
     return model_samps
 def set_kld_z_mean(self, x):
     """
     Compute mean of KL(q(z|x) || p(z)) for the observations in x, and
     then use it to reset self.kld_z_mean.
     """
     nll, kld = self.compute_fe_terms(x, 10)
     old_mean = self.kld_z_mean.get_value(borrow=False)
     new_mean = (0.0 * old_mean) + np.mean(kld)
     self.kld_z_mean.set_value(to_fX(new_mean))
     return
 def prior_sampler(samp_count):
     x_samps = to_fX( np.zeros((samp_count, self.x_dim)) )
     old_switch = self.train_switch.get_value(borrow=False)
     # set model to generation mode
     self.set_train_switch(switch_val=0.0)
     # generate samples from model
     model_samps = sample_func(x_samps)
     # set model back to previous mode
     self.set_train_switch(switch_val=old_switch)
     return model_samps
def load_gpsimputer_from_file(f_name=None, rng=None):
    """
    Load a clone of some previously trained model.
    """
    from InfNet import load_infnet_from_dict
    from HydraNet import load_hydranet_from_dict
    assert(not (f_name is None))
    pickle_file = open(f_name)
    # reload the basic python parameters
    self_dot_params = cPickle.load(pickle_file)
    # reload the theano shared parameters
    self_dot_numpy_param_dicts = cPickle.load(pickle_file)
    self_dot_shared_param_dicts = {}
    for key in self_dot_numpy_param_dicts:
        val = to_fX(self_dot_numpy_param_dicts[key])
        self_dot_shared_param_dicts[key] = theano.shared(val)
    # reload the child models
    child_model_dicts = cPickle.load(pickle_file)
    xd = T.matrix()
    p_h_given_x = load_infnet_from_dict( \
            child_model_dicts['p_h_given_x'], rng=rng, Xd=xd)
    p_s0_given_h = load_hydranet_from_dict( \
            child_model_dicts['p_s0_given_h'], rng=rng, Xd=xd)
    p_zi_given_xi = load_infnet_from_dict( \
            child_model_dicts['p_zi_given_xi'], rng=rng, Xd=xd)
    p_sip1_given_zi = load_hydranet_from_dict( \
            child_model_dicts['p_sip1_given_zi'], rng=rng, Xd=xd)
    p_x_given_si = load_hydranet_from_dict( \
            child_model_dicts['p_x_given_si'], rng=rng, Xd=xd)
    q_h_given_x = load_infnet_from_dict( \
            child_model_dicts['q_h_given_x'], rng=rng, Xd=xd)
    q_zi_given_xi = load_infnet_from_dict( \
            child_model_dicts['q_zi_given_xi'], rng=rng, Xd=xd)
    # now, create a new GPSImputerWI based on the loaded data
    xi = T.matrix()
    xm = T.matrix()
    xo = T.matrix()
    clone_net = GPSImputerWI(rng=rng, \
                             x_in=xi, x_mask=xm, x_out=xo, \
                             p_h_given_x=p_h_given_x, \
                             p_s0_given_h=p_s0_given_h, \
                             p_zi_given_xi=p_zi_given_xi, \
                             p_sip1_given_zi=p_sip1_given_zi, \
                             p_x_given_si=p_x_given_si, \
                             q_h_given_x=q_h_given_x, \
                             q_zi_given_xi=q_zi_given_xi, \
                             params=self_dot_params, \
                             shared_param_dicts=self_dot_shared_param_dicts)
    # helpful output
    print("==================================================")
    print("LOADED GPSImputerWI WITH PARAMS:")
    for k in self_dot_params:
        print("    {0:s}: {1:s}".format(str(k), str(self_dot_params[k])))
    print("==================================================")
    return clone_net
 def fe_term_estimator(X, sample_count):
     X = to_fX(X)
     ll_sum = np.zeros((X.shape[0], ))
     kld_sum = np.zeros((X.shape[0], ))
     for i in range(sample_count):
         result = fe_term_sample(X)
         ll_sum = ll_sum + result[0].ravel()
         kld_sum = kld_sum + result[1].ravel()
     mean_nll = -ll_sum / float(sample_count)
     mean_kld = kld_sum / float(sample_count)
     return [mean_nll, mean_kld]
 def fe_term_estimator(X, sample_count):
     X = to_fX(X)
     ll_sum = np.zeros((X.shape[0],))
     kld_sum = np.zeros((X.shape[0],))
     for i in range(sample_count):
         result = fe_term_sample(X)
         ll_sum = ll_sum + result[0].ravel()
         kld_sum = kld_sum + result[1].ravel()
     mean_nll = -ll_sum / float(sample_count)
     mean_kld = kld_sum / float(sample_count)
     return [mean_nll, mean_kld]
Exemplo n.º 47
0
 def set_bias_noise(self, bias_noise=0.0):
     """
     Set the bias noise in all hidden layers to the given value.
     """
     new_ary = np.zeros((1,)) + bias_noise
     new_bn = to_fX( new_ary )
     for layer in self.shared_layers:
         layer.bias_noise.set_value(new_bn)
     for layer in self.output_layers:
         layer.bias_noise.set_value(new_bn)
     return
Exemplo n.º 48
0
 def set_train_switch(self, switch_val=0.0):
     """
     Set the switch for changing between training and sampling behavior.
     """
     if (switch_val < 0.5):
         switch_val = 0.0
     else:
         switch_val = 1.0
     zero_ary = np.zeros((1, ))
     new_val = zero_ary + switch_val
     self.train_switch.set_value(to_fX(new_val))
     return
def img_split(imgs, im_dim=None, split_col=None, transposed=False):
    """
    Split flattened images in rows of img vertically, with obs_cols taken
    from the left and im_dim[1]-obs_cols taken from the right.
    """
    if transposed:
        assert (im_dim[0] == im_dim[1]), "transpose only works for square imgs"
    img_count = imgs.shape[0]
    row_count = im_dim[0]
    col_count = im_dim[1]
    l_obs_dim = split_col * row_count
    r_obs_dim = (col_count - split_col) * row_count
    left_cols = np.zeros((img_count, l_obs_dim))
    right_cols = np.zeros((img_count, r_obs_dim))
    for i in range(img_count):
        im = imgs[i, :].reshape(im_dim)
        if transposed:
            im = im.transpose()
        left_cols[i, :] = im[:, :split_col].flatten()
        right_cols[i, :] = im[:, split_col:].flatten()
    return to_fX(left_cols), to_fX(right_cols)
Exemplo n.º 50
0
 def generate_batch(num_samples, obj_type='circle'):
     # generate a minibatch of trajectories
     traj_pos, traj_vel = TRAJ.generate_trajectories(
         num_samples, (traj_len + 1))
     traj_x = traj_pos[:, :, 0]
     traj_y = traj_pos[:, :, 1]
     # draw the trajectories
     center_x = to_fX(traj_x.T.ravel())
     center_y = to_fX(traj_y.T.ravel())
     delta = to_fX(np.ones(center_x.shape))
     sigma = to_fX(np.ones(center_x.shape))
     paint_obj = OPTRS[obj_type]
     W = paint_obj(center_y, center_x, delta, 0.05 * sigma)
     # shape trajectories into a batch for passing to the model
     batch_imgs = np.zeros((num_samples, (traj_len + 1), obs_dim))
     batch_coords = np.zeros((num_samples, (traj_len + 1), 2))
     for i in range(num_samples):
         start_idx = i * (traj_len + 1)
         end_idx = start_idx + (traj_len + 1)
         img_set = W[start_idx:end_idx, :]
         batch_imgs[i, :, :] = img_set
         batch_coords[i, :, 0] = center_x[start_idx:end_idx]
         batch_coords[i, :, 1] = center_y[start_idx:end_idx]
     batch_imgs = np.swapaxes(batch_imgs, 0, 1)
     batch_coords = np.swapaxes(batch_coords, 0, 1)
     return [to_fX(batch_imgs), to_fX(batch_coords)]
Exemplo n.º 51
0
 def imputer_sampler(XI, XO, XM, use_guide_policy=False):
     XI = to_fX(XI)
     XO = to_fX(XO)
     XM = to_fX(XM)
     # set model to desired generation mode
     old_switch = self.train_switch.get_value(borrow=False)
     if use_guide_policy:
         # take samples from guide policies (i.e. variational q)
         self.set_train_switch(switch_val=1.0)
     else:
         # take samples from model's imputation policy
         self.set_train_switch(switch_val=0.0)
     # draw guided/unguided conditional samples
     model_samps = sample_func(XI, XO, XM)
     # set model back to either training or generation mode
     self.set_train_switch(switch_val=old_switch)
     # reverse engineer the "masked" samples...
     masked_samps = []
     for xs in model_samps:
         xsm = (XM * XI) + ((1.0 - XM) * xs)
         masked_samps.append(xsm)
     return model_samps, masked_samps
Exemplo n.º 52
0
def load_WalkoutModel_from_file(f_name=None, rng=None):
    """
    Load a clone of some previously trained model.
    """
    from InfNet import load_infnet_from_dict
    from HydraNet import load_hydranet_from_dict
    assert (not (f_name is None))
    pickle_file = open(f_name)
    # reload the basic python parameters
    self_dot_params = cPickle.load(pickle_file)
    # reload the theano shared parameters
    self_dot_numpy_param_dicts = cPickle.load(pickle_file)
    self_dot_shared_param_dicts = {}
    for key in self_dot_numpy_param_dicts:
        val = to_fX(self_dot_numpy_param_dicts[key])
        self_dot_shared_param_dicts[key] = theano.shared(val)
    # reload the child models
    child_model_dicts = cPickle.load(pickle_file)
    xd = T.matrix()
    p_zi_given_xi = load_infnet_from_dict( \
            child_model_dicts['p_zi_given_xi'], rng=rng, Xd=xd)
    p_sip1_given_zi = load_hydranet_from_dict( \
            child_model_dicts['p_sip1_given_zi'], rng=rng, Xd=xd)
    p_x_given_si = load_hydranet_from_dict( \
            child_model_dicts['p_x_given_si'], rng=rng, Xd=xd)
    q_zi_given_xi = load_infnet_from_dict( \
            child_model_dicts['q_zi_given_xi'], rng=rng, Xd=xd)
    # now, create a new WalkoutModel based on the loaded data
    xo = T.matrix()
    clone_net = WalkoutModel(rng=rng, \
                         x_out=xo, \
                         p_zi_given_xi=p_zi_given_xi, \
                         p_sip1_given_zi=p_sip1_given_zi, \
                         p_x_given_si=p_x_given_si, \
                         q_zi_given_xi=q_zi_given_xi, \
                         params=self_dot_params, \
                         shared_param_dicts=self_dot_shared_param_dicts)
    # helpful output
    print("==================================================")
    print("LOADED WalkoutModel WITH PARAMS:")
    for k in self_dot_params:
        print("    {0:s}: {1:s}".format(str(k), str(self_dot_params[k])))
    print("==================================================")
    return clone_net
def img_join(left_cols, right_cols, im_dim=None, transposed=False):
    """
    Join flattened images vertically.
    """
    if transposed:
        assert (im_dim[0] == im_dim[1]), "transpose only works for square imgs"
    img_count = left_cols.shape[0]
    row_count = im_dim[0]
    col_count = im_dim[1]
    left_col_count = left_cols.shape[1] / row_count
    right_col_count = col_count - left_col_count
    imgs = np.zeros((img_count, row_count * col_count))
    im_sq = np.zeros((row_count, col_count))
    for i in range(img_count):
        left_chunk = left_cols[i, :].reshape((row_count, left_col_count))
        right_chunk = right_cols[i, :].reshape((row_count, right_col_count))
        im_sq[:, :left_col_count] = left_chunk[:, :]
        im_sq[:, left_col_count:] = right_chunk[:, :]
        if transposed:
            im_sq = im_sq.transpose()
        imgs[i, :] = im_sq.flatten()
    return to_fX(imgs)
def test_seq_cond_gen_copy(step_type='add', res_tag="AAA"):
    ##############################
    # File tag, for output stuff #
    ##############################
    result_tag = "{}TEST_{}".format(RESULT_PATH, res_tag)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    Xtr = datasets[0][0]
    Xva = datasets[1][0]
    Xte = datasets[2][0]
    # merge validation set and training set, and test on test set.
    #Xtr = np.concatenate((Xtr, Xva), axis=0)
    #Xva = Xte
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))

    # basic params
    batch_size = 128
    traj_len = 20
    im_dim = 28
    obs_dim = im_dim * im_dim

    def sample_batch(np_ary, bs=100):
        row_count = np_ary.shape[0]
        samp_idx = npr.randint(low=0, high=row_count, size=(bs, ))
        xb = np_ary.take(samp_idx, axis=0)
        return xb

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    total_steps = traj_len
    init_steps = 5
    exit_rate = 0.1
    nll_weight = 0.0
    x_dim = obs_dim
    y_dim = obs_dim
    z_dim = 128
    att_spec_dim = 5
    rnn_dim = 512
    mlp_dim = 512

    def visualize_attention(result, pre_tag="AAA", post_tag="AAA"):
        seq_len = result[0].shape[0]
        samp_count = result[0].shape[1]
        # get generated predictions
        x_samps = np.zeros((seq_len * samp_count, obs_dim))
        idx = 0
        for s1 in range(samp_count):
            for s2 in range(seq_len):
                x_samps[idx] = result[0][s2, s1, :]
                idx += 1
        file_name = "{0:s}_traj_xs_{1:s}.png".format(pre_tag, post_tag)
        utils.visualize_samples(x_samps, file_name, num_rows=samp_count)
        # get sequential attention maps
        seq_samps = np.zeros((seq_len * samp_count, obs_dim))
        idx = 0
        for s1 in range(samp_count):
            for s2 in range(seq_len):
                seq_samps[idx] = result[1][s2, s1, :]
                idx += 1
        file_name = "{0:s}_traj_att_maps_{1:s}.png".format(pre_tag, post_tag)
        utils.visualize_samples(seq_samps, file_name, num_rows=samp_count)
        # get sequential attention maps (read out values)
        seq_samps = np.zeros((seq_len * samp_count, obs_dim))
        idx = 0
        for s1 in range(samp_count):
            for s2 in range(seq_len):
                seq_samps[idx] = result[2][s2, s1, :]
                idx += 1
        file_name = "{0:s}_traj_read_outs_{1:s}.png".format(pre_tag, post_tag)
        utils.visualize_samples(seq_samps, file_name, num_rows=samp_count)
        # get original input sequences
        seq_samps = np.zeros((seq_len * samp_count, obs_dim))
        idx = 0
        for s1 in range(samp_count):
            for s2 in range(seq_len):
                seq_samps[idx] = result[3][s2, s1, :]
                idx += 1
        file_name = "{0:s}_traj_xs_in_{1:s}.png".format(pre_tag, post_tag)
        utils.visualize_samples(seq_samps, file_name, num_rows=samp_count)
        return

    rnninits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    # module for doing local 2d read defined by an attention specification
    img_scale = 1.0  # image coords will range over [-img_scale...img_scale]
    read_N = 2  # use NxN grid for reader
    reader_mlp = FovAttentionReader2d(x_dim=obs_dim,
                                      width=im_dim,
                                      height=im_dim,
                                      N=read_N,
                                      img_scale=img_scale,
                                      att_scale=0.5,
                                      **inits)
    read_dim = reader_mlp.read_dim  # total number of "pixels" read by reader

    # MLP for updating belief state based on con_rnn
    writer_mlp = MLP([None, None], [rnn_dim, mlp_dim, obs_dim], \
                     name="writer_mlp", **inits)

    # mlps for processing inputs to LSTMs
    con_mlp_in = MLP([Identity()], \
                     [                       z_dim, 4*rnn_dim], \
                     name="con_mlp_in", **inits)
    var_mlp_in = MLP([Identity()], \
                     [(read_dim + read_dim + att_spec_dim + rnn_dim), 4*rnn_dim], \
                     name="var_mlp_in", **inits)
    gen_mlp_in = MLP([Identity()], \
                     [        (read_dim + att_spec_dim + rnn_dim), 4*rnn_dim], \
                     name="gen_mlp_in", **inits)

    # mlps for turning LSTM outputs into conditionals over z_gen
    con_mlp_out = CondNet([], [rnn_dim, att_spec_dim], \
                          name="con_mlp_out", **inits)
    gen_mlp_out = CondNet([], [rnn_dim, z_dim], name="gen_mlp_out", **inits)
    var_mlp_out = CondNet([], [rnn_dim, z_dim], name="var_mlp_out", **inits)

    # LSTMs for the actual LSTMs (obviously, perhaps)
    con_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="con_rnn", **rnninits)
    gen_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="gen_rnn", **rnninits)
    var_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="var_rnn", **rnninits)

    SCG = SeqCondGenRAM(x_and_y_are_seqs=False,
                        total_steps=total_steps,
                        init_steps=init_steps,
                        exit_rate=exit_rate,
                        nll_weight=nll_weight,
                        step_type=step_type,
                        x_dim=obs_dim,
                        y_dim=obs_dim,
                        reader_mlp=reader_mlp,
                        writer_mlp=writer_mlp,
                        con_mlp_in=con_mlp_in,
                        con_mlp_out=con_mlp_out,
                        con_rnn=con_rnn,
                        gen_mlp_in=gen_mlp_in,
                        gen_mlp_out=gen_mlp_out,
                        gen_rnn=gen_rnn,
                        var_mlp_in=var_mlp_in,
                        var_mlp_out=var_mlp_out,
                        var_rnn=var_rnn)
    SCG.initialize()

    compile_start_time = time.time()

    # build the attention trajectory sampler
    SCG.build_attention_funcs()

    # quick test of attention trajectory sampler
    Xb = sample_batch(Xtr, bs=32)
    result = SCG.sample_attention(Xb, Xb)
    visualize_attention(result, pre_tag=result_tag, post_tag="b0")

    # build the main model functions (i.e. training and cost functions)
    SCG.build_model_funcs()

    compile_end_time = time.time()
    compile_minutes = (compile_end_time - compile_start_time) / 60.0
    print("THEANO COMPILE TIME (MIN): {}".format(compile_minutes))

    # TEST SAVE/LOAD FUNCTIONALITY
    param_save_file = "{}_params.pkl".format(result_tag)
    SCG.save_model_params(param_save_file)
    SCG.load_model_params(param_save_file)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    print("Beginning to train the model...")
    out_file = open("{}_results.txt".format(result_tag), 'wb')
    out_file.flush()
    costs = [0. for i in range(10)]
    learn_rate = 0.0001
    momentum = 0.95
    for i in range(250000):
        lr_scale = min(1.0, ((i + 1) / 5000.0))
        mom_scale = min(1.0, ((i + 1) / 10000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        # set sgd and objective function hyperparams for this update
        SCG.set_sgd_params(lr=lr_scale * learn_rate,
                           mom_1=mom_scale * momentum,
                           mom_2=0.99)
        SCG.set_lam_kld(lam_kld_q2p=0.95, lam_kld_p2q=0.05, \
                        lam_kld_amu=0.0, lam_kld_alv=0.1)
        # perform a minibatch update and record the cost for this batch
        Xb = sample_batch(Xtr, bs=batch_size)
        result = SCG.train_joint(Xb, Xb)
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        # output diagnostic information and checkpoint parameters, etc.
        if ((i % 250) == 0):
            costs = [(v / 250.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    total_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_term  : {0:.4f}".format(costs[1])
            str4 = "    kld_q2p   : {0:.4f}".format(costs[2])
            str5 = "    kld_p2q   : {0:.4f}".format(costs[3])
            str6 = "    kld_amu   : {0:.4f}".format(costs[4])
            str7 = "    kld_alv   : {0:.4f}".format(costs[5])
            str8 = "    reg_term  : {0:.4f}".format(costs[6])
            joint_str = "\n".join(
                [str1, str2, str3, str4, str5, str6, str7, str8])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 500) == 0):
            SCG.save_model_params("{}_params.pkl".format(result_tag))
            #############################################
            # check model performance on validation set #
            #############################################
            Xb = sample_batch(Xva, bs=500)
            result = SCG.compute_nll_bound(Xb, Xb)
            str2 = "    va_total_cost: {0:.4f}".format(float(result[0]))
            str3 = "    va_nll_term  : {0:.4f}".format(float(result[1]))
            str4 = "    va_kld_q2p   : {0:.4f}".format(float(result[2]))
            str5 = "    va_kld_p2q   : {0:.4f}".format(float(result[3]))
            str6 = "    va_kld_amu   : {0:.4f}".format(float(result[4]))
            str7 = "    va_kld_alv   : {0:.4f}".format(float(result[5]))
            str8 = "    va_reg_term  : {0:.4f}".format(float(result[6]))
            joint_str = "\n".join([str2, str3, str4, str5, str6, str7, str8])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            ###########################################
            # sample and draw attention trajectories. #
            ###########################################
            Xb = sample_batch(Xva, bs=32)
            result = SCG.sample_attention(Xb, Xb)
            post_tag = "b{0:d}".format(i)
            visualize_attention(result, pre_tag=result_tag, post_tag=post_tag)
def test_imocld_mnist(step_type='add', attention=False):
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    Xtr = datasets[0][0]
    Xva = datasets[1][0]
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 250

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    x_dim = Xtr.shape[1]
    write_dim = 300
    enc_dim = 300
    dec_dim = 300
    mix_dim = 20
    z_dim = 100
    n_iter = 16
    
    rnninits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    att_tag = "NA" # attention not implemented yet

    # setup the reader and writer (shared by primary and guide policies)
    read_dim = 2*x_dim # dimension of output from reader_mlp
    reader_mlp = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
    writer_mlp = MLP([None, None], [dec_dim, write_dim, x_dim], \
                     name="writer_mlp", **inits)
    
    # mlps for setting conditionals over z_mix
    mix_var_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \
                          name="mix_var_mlp", **inits)
    mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \
                          name="mix_enc_mlp", **inits)
    # mlp for decoding z_mix into a distribution over initial LSTM states
    mix_dec_mlp = MLP([Tanh(), Tanh()], \
                      [mix_dim, 250, (2*enc_dim + 2*dec_dim + 2*enc_dim + mix_dim)], \
                      name="mix_dec_mlp", **inits)
    # mlps for processing inputs to LSTMs
    var_mlp_in = MLP([Identity()], [(read_dim + dec_dim + mix_dim), 4*enc_dim], \
                     name="var_mlp_in", **inits)
    enc_mlp_in = MLP([Identity()], [(read_dim + dec_dim + mix_dim), 4*enc_dim], \
                     name="enc_mlp_in", **inits)
    dec_mlp_in = MLP([Identity()], [                         z_dim, 4*dec_dim], \
                     name="dec_mlp_in", **inits)
    # mlps for turning LSTM outputs into conditionals over z_gen
    var_mlp_out = CondNet([], [enc_dim, z_dim], name="var_mlp_out", **inits)
    enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits)
    # LSTMs for the actual LSTMs (obviously, perhaps)
    var_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="var_rnn", **rnninits)
    enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="enc_rnn", **rnninits)
    dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="dec_rnn", **rnninits)

    draw = IMoCLDrawModels(
                n_iter,
                step_type=step_type, # step_type can be 'add' or 'jump'
                reader_mlp=reader_mlp,
                writer_mlp=writer_mlp,
                mix_enc_mlp=mix_enc_mlp,
                mix_dec_mlp=mix_dec_mlp,
                mix_var_mlp=mix_var_mlp,
                enc_mlp_in=enc_mlp_in,
                enc_mlp_out=enc_mlp_out,
                enc_rnn=enc_rnn,
                dec_mlp_in=dec_mlp_in,
                dec_rnn=dec_rnn,
                var_mlp_in=var_mlp_in,
                var_mlp_out=var_mlp_out,
                var_rnn=var_rnn)
    draw.initialize()
    # build the cost gradients, training function, samplers, etc.
    draw.build_model_funcs()

    # sample several interchangeable versions of the model
    conditions = [{'occ_dim': 0, 'drop_prob': 0.8}, \
                  {'occ_dim': 16, 'drop_prob': 0.0}]
    for cond_dict in conditions:
        occ_dim = cond_dict['occ_dim']
        drop_prob = cond_dict['drop_prob']
        dp_int = int(100.0 * drop_prob)

        draw.load_model_params(f_name="TBCLM_IMP_MNIST_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag))

        # draw some independent samples from the model
        Xva = row_shuffle(Xva)
        Xb = to_fX(Xva[:128])
        _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \
                                occ_dim=occ_dim, data_mean=None)
        Xb = np.repeat(Xb, 2, axis=0)
        Mb = np.repeat(Mb, 2, axis=0)
        samples, _ = draw.do_sample(Xb, Mb)

        # save the samples to a pkl file, in their numpy array form
        sample_pkl_name = "IMP-MNIST-OD{0:d}-DP{1:d}-{2:s}.pkl".format(occ_dim, dp_int, step_type)
        f_handle = file(sample_pkl_name, 'wb')
        cPickle.dump(samples, f_handle, protocol=-1)
        f_handle.close()
        print("Saved some samples in: {}".format(sample_pkl_name))
    return
def test_sgm_mnist(step_type='add', occ_dim=14, drop_prob=0.0, attention=False):
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
    Xtr = np.vstack((Xtr, Xva))
    Xva = Xte
    #del Xte
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    x_dim = Xtr.shape[1]
    writer_dim = 250
    reader_dim = 250
    dyn_dim = 250
    primary_dim = 500
    guide_dim = 500
    z_dim = 100
    n_iter = 20
    dp_int = int(100.0 * drop_prob)
    
    rnninits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    att_tag = "NA" # attention not implemented yet

    # reader MLP provides input to the dynamics LSTM update
    reader_mlp = MLP([Rectifier(), Rectifier(), None], \
                     [(x_dim + z_dim), reader_dim, reader_dim, 4*dyn_dim], \
                     name="reader_mlp", **inits)
    # writer MLP applies changes to the generation workspace
    writer_mlp = MLP([Rectifier(), Rectifier(), None], \
                     [(dyn_dim + z_dim), writer_dim, writer_dim, x_dim], \
                     name="writer_mlp", **inits)

    # MLPs for computing conditionals over z
    primary_policy = CondNet([Rectifier(), Rectifier()], \
                             [(dyn_dim + x_dim), primary_dim, primary_dim, z_dim], \
                             name="primary_policy", **inits)
    guide_policy = CondNet([Rectifier(), Rectifier()], \
                           [(dyn_dim + 2*x_dim), guide_dim, guide_dim, z_dim], \
                           name="guide_policy", **inits)
    # LSTMs for the actual LSTMs (obviously, perhaps)
    shared_dynamics = BiasedLSTM(dim=dyn_dim, ig_bias=2.0, fg_bias=2.0, \
                                 name="shared_dynamics", **rnninits)

    model = SeqGenModel(
                n_iter,
                step_type=step_type, # step_type can be 'add' or 'jump'
                reader_mlp=reader_mlp,
                writer_mlp=writer_mlp,
                primary_policy=primary_policy,
                guide_policy=guide_policy,
                shared_dynamics=shared_dynamics)
    model.initialize()

    # build the cost gradients, training function, samplers, etc.
    model.build_model_funcs()

    #model.load_model_params(f_name="TBSGM_IMP_MNIST_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag))

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    print("Beginning to train the model...")
    out_file = open("TBSGM_IMP_MNIST_RESULTS_OD{}_DP{}_{}_{}.txt".format(occ_dim, dp_int, step_type, att_tag), 'wb')
    out_file.flush()
    costs = [0. for i in range(10)]
    learn_rate = 0.0002
    momentum = 0.5
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(250000):
        scale = min(1.0, ((i+1) / 1000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        if (i > 10000):
            momentum = 0.90
        else:
            momentum = 0.50
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        zero_ary = np.zeros((1,))
        model.lr.set_value(to_fX(zero_ary + learn_rate))
        model.mom_1.set_value(to_fX(zero_ary + momentum))
        model.mom_2.set_value(to_fX(zero_ary + 0.99))

        # perform a minibatch update and record the cost for this batch
        Xb = to_fX(Xtr.take(batch_idx, axis=0))
        _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \
                                    occ_dim=occ_dim, data_mean=None)
        result = model.train_joint(Xb, Mb)

        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 200) == 0):
            costs = [(v / 200.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    total_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_bound : {0:.4f}".format(costs[1])
            str4 = "    nll_term  : {0:.4f}".format(costs[2])
            str5 = "    kld_q2p   : {0:.4f}".format(costs[3])
            str6 = "    kld_p2q   : {0:.4f}".format(costs[4])
            str7 = "    reg_term  : {0:.4f}".format(costs[5])
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            model.save_model_params("TBSGM_IMP_MNIST_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag))
            # compute a small-sample estimate of NLL bound on validation set
            Xva = row_shuffle(Xva)
            Xb = to_fX(Xva[:5000])
            _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \
                                    occ_dim=occ_dim, data_mean=None)
            va_costs = model.compute_nll_bound(Xb, Mb)
            str1 = "    va_nll_bound : {}".format(va_costs[1])
            str2 = "    va_nll_term  : {}".format(va_costs[2])
            str3 = "    va_kld_q2p   : {}".format(va_costs[3])
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            # draw some independent samples from the model
            Xb = to_fX(Xva[:100])
            _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \
                                    occ_dim=occ_dim, data_mean=None)
            samples, _ = model.do_sample(Xb, Mb)
            n_iter, N, D = samples.shape
            samples = samples.reshape( (n_iter, N, 28, 28) )
            for j in xrange(n_iter):
                img = img_grid(samples[j,:,:,:])
                img.save("TBSGM-IMP-MNIST-OD{0:d}-DP{1:d}-{2:s}-samples-{3:03d}.png".format(occ_dim, dp_int, step_type, j))
Exemplo n.º 57
0
    def __init__(self, rng=None,
            x_out=None, \
            p_z_given_x=None, \
            p_x_given_z=None, \
            params=None, \
            shared_param_dicts=None):
        # setup a rng for this WalkoutModel
        self.rng = RandStream(rng.randint(100000))

        # grab the user-provided parameters
        self.params = params
        self.x_dim = self.params['x_dim']
        self.z_dim = self.params['z_dim']
        self.walkout_steps = self.params['walkout_steps']
        self.x_type = self.params['x_type']
        self.shared_param_dicts = shared_param_dicts
        if 'x_transform' in self.params:
            assert((self.params['x_transform'] == 'sigmoid') or \
                    (self.params['x_transform'] == 'none'))
            if self.params['x_transform'] == 'sigmoid':
                self.x_transform = lambda x: T.nnet.sigmoid(x)
            else:
                self.x_transform = lambda x: x
        else:
            self.x_transform = lambda x: T.nnet.sigmoid(x)
        if self.x_type == 'bernoulli':
            self.x_transform = lambda x: T.nnet.sigmoid(x)
        assert ((self.x_type == 'bernoulli') or (self.x_type == 'gaussian'))
        assert ((self.step_type == 'add') or (self.step_type == 'jump'))

        # grab handles to the relevant networks
        self.p_z_given_x = p_z_given_x
        self.p_x_given_z = p_x_given_z

        # record the symbolic variables that will provide inputs to the
        # computation graph created for this WalkoutModel
        self.x_out = x_out  # target output for generation
        self.zi_zmuv = T.tensor3()  # ZMUV gauss noise for walk-out wobble

        if self.shared_param_dicts is None:
            # initialize the parameters "owned" by this model
            zero_ary = to_fX(np.zeros((1, )))
            self.obs_logvar = theano.shared(value=zero_ary, name='obs_logvar')
            self.bounded_logvar = 8.0 * T.tanh(
                (1.0 / 8.0) * self.obs_logvar[0])
            self.shared_param_dicts = {}
            self.shared_param_dicts['obs_logvar'] = self.obs_logvar
        else:
            # grab the parameters required by this model from a given dict
            self.obs_logvar = self.shared_param_dicts['obs_logvar']
            self.bounded_logvar = 8.0 * T.tanh(
                (1.0 / 8.0) * self.obs_logvar[0])

        ###############################################################
        # Setup the forwards (i.e. training) walk-out loop using scan #
        ###############################################################
        def forwards_loop(xi_zmuv, zi_zmuv, xi_fw, zi_fw):
            # get samples of next zi, according to the forwards model
            zi_fw_mean, zi_fw_logvar = self.p_z_given_x.apply(xi_fw, \
                                       do_samples=False)
            zi_fw = zi_fw_mean + (T.exp(0.5 * zi_fw_logvar) * zi_zmuv)

            # check reverse direction probability p(xi_fw | zi_fw)
            xi_bw_mean, xi_bw_logvar = self.p_x_given_z.apply(zi_fw, \
                                       do_samples=False)
            xi_bw_mean = self.x_transform(xi_bw_mean)
            nll_xi_bw = log_prob_gaussian2(xi_fw, xi_bw_mean, \
                        log_vars=xi_bw_logvar, mask=None)
            nll_xi_bw = nll_xi_bw.flatten()

            # get samples of next xi, according to the forwards model
            xi_fw_mean, xi_fw_logvar = self.p_x_given_z.apply(zi_fw, \
                                       do_samples=False)
            xi_fw_mean = self.x_transform(xi_fw_mean)
            xi_fw = xi_fw_mean + (T.exp(0.5 * xi_fw_logvar) * xi_zmuv)

            # check reverse direction probability p(zi_fw | xi_fw)
            zi_bw_mean, zi_bw_logvar = self.p_z_given_x.apply(xi_fw, \
                                       do_samples=False)
            nll_zi_bw = log_prob_gaussian2(zi_fw, zi_bw_mean, \
                        log_vars=zi_bw_logvar, mask=None)
            nll_zi_bw = nll_zi_bw.flatten()

            # each loop iteration produces the following values:
            #   xi_fw: xi generated fom zi by forwards walk
            #   zi_fw: zi generated fom xi by forwards walk
            #   xi_fw_mean: ----
            #   xi_fw_logvar: ----
            #   zi_fw_mean: ----
            #   zi_fw_logvar: ----
            #   nll_xi_bw: NLL for reverse step zi_fw -> xi_fw
            #   nll_zi_bw: NLL for reverse step xi_fw -> zi_fw
            return xi_fw, zi_fw, xi_fw_mean, xi_fw_logvar, zi_fw_mean, zi_fw_logvar, nll_xi_bw, nll_zi_bw

        # initialize states for x/z
        self.x0 = self.x_out
        self.z0 = T.alloc(0.0, self.x0.shape[0], self.z_dim)
        # setup initial values to pass to scan op
        outputs_init = [self.x0, self.z0, None, None, None, None, None, None]
        sequences_init = [self.xi_zmuv, self.zi_zmuv]
        # apply scan op for the sequential imputation loop
        self.scan_results, self.scan_updates = theano.scan(forwards_loop, \
                    outputs_info=outputs_init, \
                    sequences=sequences_init)

        # grab results of the scan op. all values are computed for each step
        self.xi = self.scan_results[0]
        self.zi = self.scan_results[1]
        self.xi_fw_mean = self.scan_results[2]
        self.xi_fw_logvar = self.scan_results[3]
        self.zi_fw_mean = self.scan_results[4]
        self.zi_fw_logvar = self.scan_results[5]
        self.nll_xi_bw = self.scan_results[6]
        self.nll_zi_bw = self.scan_results[7]

        ######################################################################
        # ALL SYMBOLIC VARS NEEDED FOR THE OBJECTIVE SHOULD NOW BE AVAILABLE #
        ######################################################################

        # shared var learning rate for generator and inferencer
        zero_ary = to_fX(np.zeros((1, )))
        self.lr = theano.shared(value=zero_ary, name='srr_lr')
        # shared var momentum parameters for ADAM optimization
        self.mom_1 = theano.shared(value=zero_ary, name='srr_mom_1')
        self.mom_2 = theano.shared(value=zero_ary, name='srr_mom_2')
        # init parameters for controlling learning dynamics
        self.set_sgd_params()
        # init shared vars for weighting prior kld against reconstruction
        self.lam_kld_p = theano.shared(value=zero_ary, name='srr_lam_kld_p')
        self.lam_kld_q = theano.shared(value=zero_ary, name='srr_lam_kld_q')
        self.lam_kld_g = theano.shared(value=zero_ary, name='srr_lam_kld_g')
        self.lam_kld_s = theano.shared(value=zero_ary, name='srr_lam_kld_s')
        self.set_lam_kld(lam_kld_p=0.0,
                         lam_kld_q=1.0,
                         lam_kld_g=0.0,
                         lam_kld_s=0.0)
        # init shared var for controlling l2 regularization on params
        self.lam_l2w = theano.shared(value=zero_ary, name='srr_lam_l2w')
        self.set_lam_l2w(1e-5)

        # grab all of the "optimizable" parameters from the base networks
        self.joint_params = [self.s0, self.obs_logvar, self.step_scales]
        self.joint_params.extend(self.p_zi_given_xi.mlp_params)
        self.joint_params.extend(self.p_sip1_given_zi.mlp_params)
        self.joint_params.extend(self.p_x_given_si.mlp_params)
        self.joint_params.extend(self.q_zi_given_xi.mlp_params)

        #################################
        # CONSTRUCT THE KLD-BASED COSTS #
        #################################
        self.kld_p, self.kld_q, self.kld_g, self.kld_s = self._construct_kld_costs(
            p=1.0)
        self.kld_costs = (self.lam_kld_p[0] * self.kld_p) + \
                         (self.lam_kld_q[0] * self.kld_q) + \
                         (self.lam_kld_g[0] * self.kld_g) + \
                         (self.lam_kld_s[0] * self.kld_s)
        self.kld_cost = T.mean(self.kld_costs)
        #################################
        # CONSTRUCT THE NLL-BASED COSTS #
        #################################
        self.nll_costs = T.sum(self.nlli, axis=0)  # sum the per-step NLLs
        self.nll_cost = T.mean(self.nll_costs)
        self.nll_bounds = self.nll_costs.ravel() + self.kld_q.ravel()
        self.nll_bound = T.mean(self.nll_bounds)
        ########################################
        # CONSTRUCT THE REST OF THE JOINT COST #
        ########################################
        param_reg_cost = self._construct_reg_costs()
        self.reg_cost = self.lam_l2w[0] * param_reg_cost
        self.joint_cost = self.nll_cost + self.kld_cost + self.reg_cost
        ##############################
        # CONSTRUCT A PER-TRIAL COST #
        ##############################
        self.obs_costs = self.nll_costs + self.kld_costs

        # Get the gradient of the joint cost for all optimizable parameters
        print("Computing gradients of self.joint_cost...")
        self.joint_grads = OrderedDict()
        grad_list = T.grad(self.joint_cost, self.joint_params)
        for i, p in enumerate(self.joint_params):
            self.joint_grads[p] = grad_list[i]

        # Construct the updates for the generator and inferencer networks
        self.joint_updates = get_adam_updates(params=self.joint_params, \
                grads=self.joint_grads, alpha=self.lr, \
                beta1=self.mom_1, beta2=self.mom_2, \
                mom2_init=1e-3, smoothing=1e-5, max_grad_norm=10.0)
        for k, v in self.scan_updates.items():
            self.joint_updates[k] = v

        # Construct theano functions for training and diagnostic computations
        print("Compiling cost computer...")
        self.compute_raw_costs = self._construct_raw_costs()
        print("Compiling training function...")
        self.train_joint = self._construct_train_joint()
        print("Compiling free-energy sampler...")
        self.compute_fe_terms = self._construct_compute_fe_terms()
        print("Compiling sequence sampler...")
        self.sequence_sampler = self._construct_sequence_sampler()
        # make easy access points for some interesting parameters
        #self.gen_inf_weights = self.p_zi_given_xi.shared_layers[0].W
        return
Exemplo n.º 58
0
def test_imocld_generation(step_type='add', attention=False):
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
    Xtr = np.vstack((Xtr, Xva))
    Xva = Xte
    #del Xte
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 250

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    x_dim = Xtr.shape[1]
    write_dim = 200
    enc_dim = 250
    dec_dim = 250
    mix_dim = 20
    z_dim = 100
    n_iter = 16

    rnninits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    att_tag = "NA"  # attention not tested yet

    # setup the reader and writer (shared by primary and guide policies)
    read_dim = 2 * x_dim  # dimension of output from reader_mlp
    reader_mlp = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
    writer_mlp = MLP([None, None], [dec_dim, write_dim, x_dim], \
                     name="writer_mlp", **inits)

    # mlps for setting conditionals over z_mix
    mix_var_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \
                          name="mix_var_mlp", **inits)
    mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \
                          name="mix_enc_mlp", **inits)
    # mlp for decoding z_mix into a distribution over initial LSTM states
    mix_dec_mlp = MLP([Tanh(), Tanh()], \
                      [mix_dim, 250, (2*enc_dim + 2*dec_dim + 2*enc_dim)], \
                      name="mix_dec_mlp", **inits)
    # mlps for processing inputs to LSTMs
    var_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \
                     name="var_mlp_in", **inits)
    enc_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \
                     name="enc_mlp_in", **inits)
    dec_mlp_in = MLP([Identity()], [               z_dim, 4*dec_dim], \
                     name="dec_mlp_in", **inits)
    # mlps for turning LSTM outputs into conditionals over z_gen
    var_mlp_out = CondNet([], [enc_dim, z_dim], name="var_mlp_out", **inits)
    enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits)
    # LSTMs for the actual LSTMs (obviously, perhaps)
    var_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="var_rnn", **rnninits)
    enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="enc_rnn", **rnninits)
    dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="dec_rnn", **rnninits)

    draw = IMoCLDrawModels(
        n_iter,
        step_type='add',  # step_type can be 'add' or 'jump'
        reader_mlp=reader_mlp,
        writer_mlp=writer_mlp,
        mix_enc_mlp=mix_enc_mlp,
        mix_dec_mlp=mix_dec_mlp,
        mix_var_mlp=mix_var_mlp,
        enc_mlp_in=enc_mlp_in,
        enc_mlp_out=enc_mlp_out,
        enc_rnn=enc_rnn,
        dec_mlp_in=dec_mlp_in,
        dec_rnn=dec_rnn,
        var_mlp_in=var_mlp_in,
        var_mlp_out=var_mlp_out,
        var_rnn=var_rnn)
    draw.initialize()

    # build the cost gradients, training function, samplers, etc.
    draw.build_model_funcs()

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    print("Beginning to train the model...")
    out_file = open("TBCLM_GEN_RESULTS_{}_{}.txt".format(step_type, att_tag),
                    'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0002
    momentum = 0.5
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(250000):
        scale = min(1.0, ((i + 1) / 1000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        if (i > 10000):
            momentum = 0.90
        else:
            momentum = 0.50
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        zero_ary = np.zeros((1, ))
        draw.lr.set_value(to_fX(zero_ary + learn_rate))
        draw.mom_1.set_value(to_fX(zero_ary + momentum))
        draw.mom_2.set_value(to_fX(zero_ary + 0.99))

        # perform a minibatch update and record the cost for this batch
        Xb = to_fX(Xtr.take(batch_idx, axis=0))
        Mb = 0.0 * Xb
        result = draw.train_joint(Xb, Mb)

        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 200) == 0):
            costs = [(v / 200.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    total_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_bound : {0:.4f}".format(costs[1])
            str4 = "    nll_term  : {0:.4f}".format(costs[2])
            str5 = "    kld_q2p   : {0:.4f}".format(costs[3])
            str6 = "    kld_p2q   : {0:.4f}".format(costs[4])
            str7 = "    reg_term  : {0:.4f}".format(costs[5])
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            draw.save_model_params("TBCLM_GEN_PARAMS_{}_{}.pkl".format(
                step_type, att_tag))
            # compute a small-sample estimate of NLL bound on validation set
            Xva = row_shuffle(Xva)
            Xb = to_fX(Xva[:5000])
            Mb = 0.0 * Xb
            va_costs = draw.compute_nll_bound(Xb, Mb)
            str1 = "    va_nll_bound : {}".format(va_costs[1])
            str2 = "    va_nll_term  : {}".format(va_costs[2])
            str3 = "    va_kld_q2p   : {}".format(va_costs[3])
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            # draw some independent samples from the model
            Xb = to_fX(Xva[:256])
            Mb = 0.0 * Xb
            samples, _ = draw.do_sample(Xb, Mb)
            n_iter, N, D = samples.shape
            samples = samples.reshape((n_iter, N, 28, 28))
            for j in xrange(n_iter):
                img = img_grid(samples[j, :, :, :])
                img.save("TBCLM-gen-samples-%03d.png" % (j, ))
Exemplo n.º 59
0
def test_imoold_generation(step_type='add', attention=False):
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
    Xtr = np.vstack((Xtr, Xva))
    Xva = Xte
    #del Xte
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    x_dim = Xtr.shape[1]
    write_dim = 250
    enc_dim = 250
    dec_dim = 250
    mix_dim = 25
    z_dim = 100
    if attention:
        n_iter = 64
    else:
        n_iter = 32

    rnninits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    # setup the reader and writer
    if attention:
        read_N, write_N = (2, 5) # resolution of reader and writer
        read_dim = 2*read_N**2   # total number of "pixels" read by reader
        reader_mlp = AttentionReader2d(x_dim=x_dim, dec_dim=dec_dim,
                                 width=28, height=28,
                                 N=read_N, **inits)
        writer_mlp = AttentionWriter(input_dim=dec_dim, output_dim=x_dim,
                                 width=28, height=28,
                                 N=write_N, **inits)
        att_tag = "YA"
    else:
        read_dim = 2*x_dim
        reader_mlp = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits)
        writer_mlp = MLP([None, None], [dec_dim, write_dim, x_dim], \
                         name="writer_mlp", **inits)
        att_tag = "NA"

    # setup the infinite mixture initialization model
    mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \
                          name="mix_enc_mlp", **inits)
    mix_dec_mlp = MLP([Tanh(), Tanh()], \
                      [mix_dim, 250, (2*enc_dim + 2*dec_dim)], \
                      name="mix_dec_mlp", **inits)
    # setup the components of the sequential generative model
    enc_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \
                     name="enc_mlp_in", **inits)
    dec_mlp_in = MLP([Identity()], [               z_dim, 4*dec_dim], \
                     name="dec_mlp_in", **inits)
    enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits)
    dec_mlp_out = CondNet([], [dec_dim, z_dim], name="dec_mlp_out", **inits)
    enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="enc_rnn", **rnninits)
    dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="dec_rnn", **rnninits)

    draw = IMoOLDrawModels(
                n_iter,
                step_type=step_type, # step_type can be 'add' or 'jump'
                mix_enc_mlp=mix_enc_mlp,
                mix_dec_mlp=mix_dec_mlp,
                reader_mlp=reader_mlp,
                enc_mlp_in=enc_mlp_in,
                enc_mlp_out=enc_mlp_out,
                enc_rnn=enc_rnn,
                dec_mlp_in=dec_mlp_in,
                dec_mlp_out=dec_mlp_out,
                dec_rnn=dec_rnn,
                writer_mlp=writer_mlp)
    draw.initialize()

    compile_start_time = time.time()

    # build the cost gradients, training function, samplers, etc.
    draw.build_model_funcs()

    compile_end_time = time.time()
    compile_minutes = (compile_end_time - compile_start_time) / 60.0
    print("THEANO COMPILE TIME (MIN): {}".format(compile_minutes))

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    print("Beginning to train the model...")
    out_file = open("TBOLM_GEN_RESULTS_{}_{}.txt".format(step_type, att_tag), 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.00015
    momentum = 0.9
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(250000):
        scale = min(1.0, ((i+1) / 5000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)

        # set sgd and objective function hyperparams for this update
        zero_ary = np.zeros((1,))
        draw.lr.set_value(to_fX(zero_ary + scale*learn_rate))
        draw.mom_1.set_value(to_fX(zero_ary + scale*momentum))
        draw.mom_2.set_value(to_fX(zero_ary + 0.98))

        # perform a minibatch update and record the cost for this batch
        Xb = to_fX(Xtr.take(batch_idx, axis=0))
        draw.set_rnn_noise(rnn_noise=0.02)
        result = draw.train_joint(Xb, Xb)
        costs = [(costs[j] + result[j]) for j in range(len(result))]

        # diagnostics
        if ((i % 200) == 0):
            costs = [(v / 200.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    total_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_bound : {0:.4f}".format(costs[1])
            str4 = "    nll_term  : {0:.4f}".format(costs[2])
            str5 = "    kld_q2p   : {0:.4f}".format(costs[3])
            str6 = "    kld_p2q   : {0:.4f}".format(costs[4])
            str7 = "    reg_term  : {0:.4f}".format(costs[5])
            str8 = "    step_klds : {0:s}".format(np.array_str(costs[6], precision=2))
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7, str8])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            draw.save_model_params("TBOLM_GEN_PARAMS_{}_{}.pkl".format(step_type, att_tag))
            # compute a small-sample estimate of NLL bound on validation set
            Xva = row_shuffle(Xva)
            Xb = to_fX(Xva[:5000])
            draw.set_rnn_noise(rnn_noise=0.0)
            va_costs = draw.compute_nll_bound(Xb, Xb)
            str1 = "    va_nll_bound : {}".format(va_costs[1])
            str2 = "    va_nll_term  : {}".format(va_costs[2])
            str3 = "    va_kld_q2p   : {}".format(va_costs[3])
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            # draw some independent samples from the model
            samples, x_logodds = draw.do_sample(16*16)
            utils.plot_kde_histogram(x_logodds[-1,:,:], "TBOLM-log_odds_hist.png", bins=30)
            n_iter, N, D = samples.shape
            samples = samples.reshape( (n_iter, N, 28, 28) )
            for j in xrange(n_iter):
                img = img_grid(samples[j,:,:,:])
                img.save("TBOLM-gen-samples-%03d.png" % (j,))
Exemplo n.º 60
0
    # configure a trajectory generator
    num_samples = 100
    traj_len = 64
    x_range = [-0.8, 0.8]
    y_range = [-0.8, 0.8]
    max_speed = 0.15
    TRAJ = TrajectoryGenerator(x_range=x_range, y_range=y_range, \
                               max_speed=max_speed)

    # test the writer function
    start_time = time.time()
    batch_count = 50
    for i in range(batch_count):
        # generate a minibatch of trajectories
        traj_pos, traj_vel = TRAJ.generate_trajectories(num_samples, traj_len)
        traj_x = traj_pos[:, :, 0]
        traj_y = traj_pos[:, :, 1]
        # draw the trajectories
        center_x = to_fX(traj_x.T.ravel())
        center_y = to_fX(traj_y.T.ravel())
        delta = to_fX(np.ones(center_x.shape))
        sigma = to_fX(np.ones(center_x.shape))
        W = write_func(center_y, center_x, delta, 0.2 * sigma)
    end_time = time.time()
    render_time = end_time - start_time
    render_bps = batch_count / render_time
    print("RENDER BATCH/SECOND: {0:.2f}".format(render_bps))

    W = W[:20 * traj_len]
    utils.visualize_samples(W, "AAAAA.png", num_rows=20)