def visualize_attention(result, pre_tag="AAA", post_tag="AAA"):
     seq_len = result[0].shape[0]
     samp_count = result[0].shape[1]
     # get generated predictions
     x_samps = np.zeros((seq_len*samp_count, obs_dim))
     y_samps = np.zeros((seq_len*samp_count, label_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             x_samps[idx] = result[0][s2,s1,:obs_dim]
             y_samps[idx] = result[0][s2,s1,obs_dim:]
             idx += 1
     file_name = "{0:s}_traj_xs_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(x_samps, file_name, num_rows=20)
     file_name = "{0:s}_traj_ys_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(y_samps, file_name, num_rows=20)
     # get sequential attention maps
     seq_samps = np.zeros((seq_len*samp_count, x_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             seq_samps[idx] = result[1][s2,s1,:x_dim] + result[1][s2,s1,x_dim:]
             idx += 1
     file_name = "{0:s}_traj_att_maps_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=20)
     # get sequential attention maps (read out values)
     seq_samps = np.zeros((seq_len*samp_count, x_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             seq_samps[idx] = result[2][s2,s1,:x_dim] + result[2][s2,s1,x_dim:]
             idx += 1
     file_name = "{0:s}_traj_read_outs_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=20)
     return
예제 #2
0
def process_samples(step_type='add', data_name='MNIST'):
    # sample several interchangeable versions of the model
    if data_name == 'MNIST':
        conditions = [{'occ_dim': 0, 'drop_prob': 0.8}, \
                      {'occ_dim': 16, 'drop_prob': 0.0}]
    if data_name == 'SVHN':
        conditions = [{'occ_dim': 0, 'drop_prob': 0.8}, \
                      {'occ_dim': 17, 'drop_prob': 0.0}]
    if data_name == 'TFD':
        conditions = [{'occ_dim': 0, 'drop_prob': 0.8}, \
                      {'occ_dim': 25, 'drop_prob': 0.0}]
    for cond_dict in conditions:
        occ_dim = cond_dict['occ_dim']
        drop_prob = cond_dict['drop_prob']
        dp_int = int(100.0 * drop_prob)

        # save the samples to a pkl file, in their numpy array form
        sample_pkl_name = "IMP-{}-OD{}-DP{}-{}.pkl".format(
            data_name, occ_dim, dp_int, step_type)
        pickle_file = open(sample_pkl_name)
        samples = cPickle.load(pickle_file)
        pickle_file.close()
        print("Loaded some samples from: {}".format(sample_pkl_name))

        sample_list = []
        for i in range(samples.shape[0]):
            sample_list.append(samples[i, :, :])
        # downsample the sequence....
        #keep_idx = range(len(sample_list))
        keep_idx = [0, 2, 4, 6, 9, 12, 15]
        sample_list = [sample_list[i] for i in keep_idx]

        seq_len = len(sample_list)
        samp_count = sample_list[0].shape[0]
        obs_dim = sample_list[0].shape[1]

        seq_samps = np.zeros((seq_len * samp_count, obs_dim))
        idx = 0
        for s1 in range(samp_count):
            for s2 in range(seq_len):
                seq_samps[idx] = sample_list[s2][s1, :].ravel()
                idx += 1
        sample_img_name = "IMP-{}-OD{}-DP{}-{}.png".format(
            data_name, occ_dim, dp_int, step_type)

        row_count = int(samp_count / 16)
        print("row_count: {}".format(row_count))
        utils.visualize_samples(seq_samps, sample_img_name, num_rows=row_count)
    return
예제 #3
0
def test_mnist_img(occ_dim=15, drop_prob=0.0):
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    Xtr = datasets[0][0]
    Xva = datasets[1][0]
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], )))

    TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')

    Xva = row_shuffle(Xva)
    # record an estimate of performance on the test set
    xi, xo, xm = construct_masked_data(Xva[:500], drop_prob=drop_prob, \
                                       occ_dim=occ_dim, data_mean=data_mean)
    img_match_on_known, img_match_on_unknown = TM.best_match_img(xo, xm)

    display_count = 100
    # visualize matches on known elements
    Xs = np.zeros((2 * display_count, Xva.shape[1]))
    for idx in range(display_count):
        Xs[2 * idx] = xi[idx]
        Xs[(2 * idx) + 1] = img_match_on_known[idx]
    file_name = "{0:s}_SAMPLES_MOK.png".format(result_tag)
    utils.visualize_samples(Xs, file_name, num_rows=20)
    # visualize matches on unknown elements
    Xs = np.zeros((2 * display_count, Xva.shape[1]))
    for idx in range(display_count):
        Xs[2 * idx] = xi[idx]
        Xs[(2 * idx) + 1] = img_match_on_unknown[idx]
    file_name = "{0:s}_SAMPLES_MOU.png".format(result_tag)
    utils.visualize_samples(Xs, file_name, num_rows=20)
    return
예제 #4
0
def test_mnist_img(occ_dim=15, drop_prob=0.0):
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    Xtr = datasets[0][0]
    Xva = datasets[1][0]
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1],)))

    TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli')

    Xva = row_shuffle(Xva)
    # record an estimate of performance on the test set
    xi, xo, xm = construct_masked_data(Xva[:500], drop_prob=drop_prob, \
                                       occ_dim=occ_dim, data_mean=data_mean)
    img_match_on_known, img_match_on_unknown = TM.best_match_img(xo, xm)

    display_count = 100
    # visualize matches on known elements
    Xs = np.zeros((2*display_count, Xva.shape[1]))
    for idx in range(display_count):
        Xs[2*idx] = xi[idx]
        Xs[(2*idx)+1] = img_match_on_known[idx]
    file_name = "{0:s}_SAMPLES_MOK.png".format(result_tag)
    utils.visualize_samples(Xs, file_name, num_rows=20)
    # visualize matches on unknown elements
    Xs = np.zeros((2*display_count, Xva.shape[1]))
    for idx in range(display_count):
        Xs[2*idx] = xi[idx]
        Xs[(2*idx)+1] = img_match_on_unknown[idx]
    file_name = "{0:s}_SAMPLES_MOU.png".format(result_tag)
    utils.visualize_samples(Xs, file_name, num_rows=20)
    return
def process_samples(step_type='add', data_name='MNIST'):
    # sample several interchangeable versions of the model
    if data_name == 'MNIST':
        conditions = [{'occ_dim': 0, 'drop_prob': 0.8}, \
                      {'occ_dim': 16, 'drop_prob': 0.0}]
    if data_name == 'SVHN':
        conditions = [{'occ_dim': 0, 'drop_prob': 0.8}, \
                      {'occ_dim': 17, 'drop_prob': 0.0}]
    if data_name == 'TFD':
        conditions = [{'occ_dim': 0, 'drop_prob': 0.8}, \
                      {'occ_dim': 25, 'drop_prob': 0.0}]
    for cond_dict in conditions:
        occ_dim = cond_dict['occ_dim']
        drop_prob = cond_dict['drop_prob']
        dp_int = int(100.0 * drop_prob)

        # save the samples to a pkl file, in their numpy array form
        sample_pkl_name = "IMP-{}-OD{}-DP{}-{}.pkl".format(data_name, occ_dim, dp_int, step_type)
        pickle_file = open(sample_pkl_name)
        samples = cPickle.load(pickle_file)
        pickle_file.close()
        print("Loaded some samples from: {}".format(sample_pkl_name))

        sample_list = []
        for i in range(samples.shape[0]):
            sample_list.append(samples[i,:,:])
        # downsample the sequence....
        #keep_idx = range(len(sample_list))
        keep_idx = [0, 2, 4, 6, 9, 12, 15]
        sample_list = [sample_list[i] for i in keep_idx]

        seq_len = len(sample_list)
        samp_count = sample_list[0].shape[0]
        obs_dim = sample_list[0].shape[1]

        seq_samps = np.zeros((seq_len*samp_count, obs_dim))
        idx = 0
        for s1 in range(samp_count):
            for s2 in range(seq_len):
                seq_samps[idx] = sample_list[s2][s1,:].ravel()
                idx += 1
        sample_img_name = "IMP-{}-OD{}-DP{}-{}.png".format(data_name, occ_dim, dp_int, step_type)

        row_count = int(samp_count / 16)
        print("row_count: {}".format(row_count))
        utils.visualize_samples(seq_samps, sample_img_name, num_rows=row_count)
    return
예제 #6
0
 def visualize_attention_joint(result, pre_tag="AAA", post_tag="AAA"):
     seq_len = result[0].shape[0]
     samp_count = result[0].shape[1]
     # get generated predictions
     seq_samps = np.zeros((3*seq_len*samp_count, obs_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             seq_samps[idx] = result[3][s2,s1,:]
             idx += 1
         for s2 in range(seq_len):
             seq_samps[idx] = result[0][s2,s1,:]
             idx += 1
         for s2 in range(seq_len):
             seq_samps[idx] = result[1][s2,s1,:]
             idx += 1
     file_name = "{0:s}_traj_joint_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=(3*samp_count))
     return
 def visualize_attention_joint(result, pre_tag="AAA", post_tag="AAA"):
     seq_len = result[0].shape[0]
     samp_count = result[0].shape[1]
     # get generated predictions
     seq_samps = np.zeros((3*seq_len*samp_count, obs_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             seq_samps[idx] = result[3][s2,s1,:]
             idx += 1
         for s2 in range(seq_len):
             seq_samps[idx] = result[0][s2,s1,:]
             idx += 1
         for s2 in range(seq_len):
             seq_samps[idx] = result[1][s2,s1,:]
             idx += 1
     file_name = "{0:s}_traj_joint_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=(3*samp_count))
     return
예제 #8
0
 def visualize_attention(result, pre_tag="AAA", post_tag="AAA"):
     seq_len = result[0].shape[0]
     samp_count = result[0].shape[1]
     # get generated predictions
     x_samps = np.zeros((seq_len*samp_count, obs_dim))
     y_samps = np.zeros((seq_len*samp_count, label_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             x_samps[idx] = result[0][s2,s1,:obs_dim]
             y_samps[idx] = result[0][s2,s1,obs_dim:]
             # add ticks at the corners of label predictions, to make them
             # easier to parse visually.
             max_val = np.mean(result[0][s2,s1,obs_dim:])
             y_samps[idx][0] = max_val
             y_samps[idx][9] = max_val
             y_samps[idx][-1] = max_val
             y_samps[idx][-10] = max_val
             idx += 1
     file_name = "{0:s}_traj_xs_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(x_samps, file_name, num_rows=20)
     file_name = "{0:s}_traj_ys_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(y_samps, file_name, num_rows=20)
     # get sequential attention maps
     seq_samps = np.zeros((seq_len*samp_count, x_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             seq_samps[idx] = result[1][s2,s1,:x_dim] + result[1][s2,s1,x_dim:]
             idx += 1
     file_name = "{0:s}_traj_att_maps_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=20)
     # get sequential attention maps (read out values)
     seq_samps = np.zeros((seq_len*samp_count, x_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             seq_samps[idx] = result[2][s2,s1,:x_dim] + result[2][s2,s1,x_dim:]
             idx += 1
     file_name = "{0:s}_traj_read_outs_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=20)
     return
 def visualize_attention(result, pre_tag="AAA", post_tag="AAA"):
     seq_len = result[0].shape[0]
     samp_count = result[0].shape[1]
     # get generated predictions
     x_samps = np.zeros((seq_len * samp_count, obs_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             x_samps[idx] = result[0][s2, s1, :]
             idx += 1
     file_name = "{0:s}_traj_xs_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(x_samps, file_name, num_rows=samp_count)
     # get sequential attention maps
     seq_samps = np.zeros((seq_len * samp_count, obs_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             seq_samps[idx] = result[1][s2, s1, :]
             idx += 1
     file_name = "{0:s}_traj_att_maps_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=samp_count)
     # get sequential attention maps (read out values)
     seq_samps = np.zeros((seq_len * samp_count, obs_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             seq_samps[idx] = result[2][s2, s1, :]
             idx += 1
     file_name = "{0:s}_traj_read_outs_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=samp_count)
     # get original input sequences
     seq_samps = np.zeros((seq_len * samp_count, obs_dim))
     idx = 0
     for s1 in range(samp_count):
         for s2 in range(seq_len):
             seq_samps[idx] = result[3][s2, s1, :]
             idx += 1
     file_name = "{0:s}_traj_xs_in_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=samp_count)
     return
예제 #10
0
 def visualize_attention(sampler_result, pre_tag="AAA", post_tag="AAA"):
     # get generated predictions
     seq_len = sampler_result[0].shape[0]
     samp_count = sampler_result[0].shape[1]
     x_dim = sampler_result[0].shape[2]
     seq_samps = np.zeros((samp_count, 28*28))
     for samp in range(samp_count):
         step = 0
         samp_vals = np.zeros((28,28))
         for col in range(28):
             col_vals = np.zeros((28,))
             for rep in range(step_reps):
                 if (rep == (step_reps-1)):
                     col_vals = sampler_result[0][step,samp,:]
                 step += 1
             samp_vals[:,col] = col_vals
         seq_samps[samp,:] = samp_vals.ravel()
     file_name = "{0:s}_traj_xs_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=10)
     # get sequential attention maps
     seq_samps = np.zeros((samp_count, 28*28))
     for samp in range(samp_count):
         step = 0
         samp_vals = np.zeros((28,28))
         for col in range(28):
             col_vals = np.zeros((28,))
             for rep in range(step_reps):
                 col_vals = col_vals + sampler_result[1][step,samp,:x_dim]
                 col_vals = col_vals + sampler_result[1][step,samp,x_dim:]
                 step += 1
             samp_vals[:,col] = col_vals / (2.0*step_reps)
         seq_samps[samp,:] = samp_vals.ravel()
     file_name = "{0:s}_traj_att_maps_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=10)
     # get sequential attention maps (read out values)
     seq_samps = np.zeros((samp_count, 28*28))
     for samp in range(samp_count):
         step = 0
         samp_vals = np.zeros((28,28))
         for col in range(28):
             col_vals = np.zeros((28,))
             for rep in range(step_reps):
                 col_vals = col_vals + sampler_result[2][step,samp,:x_dim]
                 col_vals = col_vals + sampler_result[2][step,samp,x_dim:]
                 step += 1
             samp_vals[:,col] = col_vals / (2.0*step_reps)
         seq_samps[samp,:] = samp_vals.ravel()
     file_name = "{0:s}_traj_read_outs_{1:s}.png".format(pre_tag, post_tag)
     utils.visualize_samples(seq_samps, file_name, num_rows=10)
     return
예제 #11
0
def train_walk_from_pretrained_osm(lam_kld=0.0):
    # Simple test code, to check that everything is basically functional.
    print("TESTING...")

    # Initialize a source of randomness
    rng = np.random.RandomState(1234)

    # Load some data to train/validate/test with
    data_file = 'data/tfd_data_48x48.pkl'
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all')
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all')
    Xva = dataset[0]
    print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),str(Xva.shape)))

    # get and set some basic dataset information
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    data_dim = Xtr.shape[1]
    batch_size = 400
    batch_reps = 5
    prior_sigma = 1.0
    Xtr_mean = np.mean(Xtr, axis=0, keepdims=True)
    Xtr_mean = (0.0 * Xtr_mean) + np.mean(np.mean(Xtr,axis=1))
    Xc_mean = np.repeat(Xtr_mean, batch_size, axis=0)

    # Symbolic inputs
    Xd = T.matrix(name='Xd')
    Xc = T.matrix(name='Xc')
    Xm = T.matrix(name='Xm')
    Xt = T.matrix(name='Xt')

    ###############################
    # Setup discriminator network #
    ###############################
    # Set some reasonable mlp parameters
    dn_params = {}
    # Set up some proto-networks
    pc0 = [data_dim, (300, 4), (300, 4), 10]
    dn_params['proto_configs'] = [pc0]
    # Set up some spawn networks
    sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True}
    #sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True}
    dn_params['spawn_configs'] = [sc0]
    dn_params['spawn_weights'] = [1.0]
    # Set remaining params
    dn_params['init_scale'] = 1.0
    dn_params['lam_l2a'] = 1e-2
    dn_params['vis_drop'] = 0.2
    dn_params['hid_drop'] = 0.5
    # Initialize a network object to use as the discriminator
    DN = PeaNet(rng=rng, Xd=Xd, params=dn_params)
    DN.init_biases(0.0)

    #######################################################
    # Load inferencer and generator from saved parameters #
    #######################################################
    gn_fname = RESULT_PATH+"pt_osm_params_b100000_GN.pkl"
    in_fname = RESULT_PATH+"pt_osm_params_b100000_IN.pkl"
    IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
    GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd)

    ########################################################
    # Define parameters for the VCGLoop, and initialize it #
    ########################################################
    print("Building the VCGLoop...")
    vcgl_params = {}
    vcgl_params['x_type'] = 'gaussian'
    vcgl_params['xt_transform'] = 'sigmoid'
    vcgl_params['logvar_bound'] = LOGVAR_BOUND
    vcgl_params['cost_decay'] = 0.1
    vcgl_params['chain_type'] = 'walkout'
    vcgl_params['lam_l2d'] = 5e-2
    VCGL = VCGLoop(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, Xt=Xt, \
                 i_net=IN, g_net=GN, d_net=DN, chain_len=5, \
                 data_dim=data_dim, prior_dim=PRIOR_DIM, params=vcgl_params)

    out_file = open(RESULT_PATH+"pt_walk_results.txt", 'wb')
    ####################################################
    # Train the VCGLoop by unrolling and applying BPTT #
    ####################################################
    learn_rate = 0.0005
    cost_1 = [0. for i in range(10)]
    for i in range(100000):
        scale = float(min((i+1), 5000)) / 5000.0
        if ((i+1 % 25000) == 0):
            learn_rate = learn_rate * 0.8
        ########################################
        # TRAIN THE CHAIN IN FREE-RUNNING MODE #
        ########################################
        VCGL.set_all_sgd_params(learn_rate=(scale*learn_rate), \
                mom_1=0.9, mom_2=0.99)
        VCGL.set_disc_weights(dweight_gn=25.0, dweight_dn=25.0)
        VCGL.set_lam_chain_nll(1.0)
        VCGL.set_lam_chain_kld(lam_kld)
        # get some data to train with
        tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xc_batch = 0.0 * Xd_batch
        Xm_batch = 0.0 * Xd_batch
        # examples from the target distribution, to train discriminator
        tr_idx = npr.randint(low=0,high=tr_samples,size=(2*batch_size,))
        Xt_batch = Xtr.take(tr_idx, axis=0)
        # do a minibatch update of the model, and compute some costs
        outputs = VCGL.train_joint(Xd_batch, Xc_batch, Xm_batch, Xt_batch, batch_reps)
        cost_1 = [(cost_1[k] + 1.*outputs[k]) for k in range(len(outputs))]
        if ((i % 500) == 0):
            cost_1 = [(v / 500.0) for v in cost_1]
            o_str_1 = "batch: {0:d}, joint_cost: {1:.4f}, chain_nll_cost: {2:.4f}, chain_kld_cost: {3:.4f}, disc_cost_gn: {4:.4f}, disc_cost_dn: {5:.4f}".format( \
                    i, cost_1[0], cost_1[1], cost_1[2], cost_1[5], cost_1[6])
            print(o_str_1)
            cost_1 = [0. for v in cost_1]
        if ((i % 1000) == 0):
            tr_idx = npr.randint(low=0,high=Xtr.shape[0],size=(5,))
            va_idx = npr.randint(low=0,high=Xva.shape[0],size=(5,))
            Xd_batch = np.vstack([Xtr.take(tr_idx, axis=0), Xva.take(va_idx, axis=0)])
            # draw some chains of samples from the VAE loop
            file_name = RESULT_PATH+"pt_walk_chain_samples_b{0:d}.png".format(i)
            Xd_samps = np.repeat(Xd_batch, 3, axis=0)
            sample_lists = VCGL.OSM.sample_from_chain(Xd_samps, loop_iters=20)
            Xs = np.vstack(sample_lists["data samples"])
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # draw some masked chains of samples from the VAE loop
            file_name = RESULT_PATH+"pt_walk_mask_samples_b{0:d}.png".format(i)
            Xd_samps = np.repeat(Xc_mean[0:Xd_batch.shape[0],:], 3, axis=0)
            Xc_samps = np.repeat(Xd_batch, 3, axis=0)
            Xm_rand = sample_masks(Xc_samps, drop_prob=0.0)
            Xm_patch = sample_patch_masks(Xc_samps, (48,48), (25,25))
            Xm_samps = Xm_rand * Xm_patch
            sample_lists = VCGL.OSM.sample_from_chain(Xd_samps, \
                    X_c=Xc_samps, X_m=Xm_samps, loop_iters=20)
            Xs = np.vstack(sample_lists["data samples"])
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # draw some samples independently from the GenNet's prior
            file_name = RESULT_PATH+"pt_walk_prior_samples_b{0:d}.png".format(i)
            Xs = VCGL.sample_from_prior(20*20)
            utils.visualize_samples(Xs, file_name, num_rows=20)
        # DUMP PARAMETERS FROM TIME-TO-TIME
        if (i % 5000 == 0):
            DN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_DN.pkl".format(i))
            IN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_IN.pkl".format(i))
            GN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_GN.pkl".format(i))
    return
예제 #12
0
def evaluate_lenet5(learning_rate=0.05,
                    n_epochs=500,
                    dataset='./data/mnist.pkl.gz',
                    nkerns=[48, 64],
                    batch_size=256):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ishape = (28, 28)  # this is the size of MNIST images

    start_rate = numpy.asarray([0.05]).astype(theano.config.floatX)
    learning_rate = theano.shared(value=start_rate, name='learning_rate')

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    tanh = lambda vals: T.tanh(vals)
    relu = lambda vals: relu_actfun(vals)

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_prep = Reshape2D4DLayer(input=x, out_shape=(1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-7+1,28-7+1)=(22,22)
    # maxpooling reduces this further to (22/2,22/2) = (11,11)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],11,11)
    layer0 = ConvPoolLayer(rng, input=layer0_prep.output, \
            filt_def=(nkerns[0], 1, 7, 7), pool_def=(2, 2), \
            activation=relu, drop_rate=0.0, input_noise=0.1, bias_noise=0.05, \
            W=None, b=None, name="layer0", W_scale=2.0)

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (11-4+1,11-4+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = ConvPoolLayer(rng, input=layer0.output, \
            filt_def=(nkerns[1], nkerns[0], 4, 4), pool_def=(2, 2), \
            activation=relu, drop_rate=0.0, input_noise=0.0, bias_noise=0.05, \
            W=None, b=None, name="layer1", W_scale=2.0)

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_prep = Reshape4D2DLayer(layer1.output)

    # construct a fully-connected relu layer
    layer2 = HiddenLayer(rng, layer2_prep.output, nkerns[1]*4*4, 512, \
                 activation=relu, pool_size=0, \
                 drop_rate=0.0, input_noise=0.0, bias_noise=0.05, \
                 W=None, b=None, name="layer2", W_scale=2.0)

    # construct an output layer to predict classes
    layer3 = HiddenLayer(rng, layer2.output, 512, 10, \
             activation=relu, pool_size=0, \
             drop_rate=0.5, input_noise=0.0, bias_noise=0.0, \
             W=None, b=None, name="layer2", W_scale=2.0)

    # get a loss function to apply to the output layer
    loss_func = LogisticRegression(layer3)

    # the cost we minimize during training is the NLL of the model
    cost = loss_func.loss_func(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        loss_func.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        loss_func.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params
    moms = OrderedDict()
    for p in params:
        moms[p] = theano.shared(value=numpy.zeros( \
            p.get_value(borrow=True).shape).astype(theano.config.floatX))

    # create a list of gradients for all model parameters
    grads = OrderedDict()
    for p in params:
        grads[p] = T.grad(cost, p)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for p in params:
        mom_update = (moms[p], (0.8 * moms[p]) + (0.2 * grads[p]))
        param_update = (p, p - learning_rate[0] * moms[p])
        updates.append(mom_update)
        updates.append(param_update)

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(
                    validation_losses) / batch_size
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses) / batch_size
                    print(
                        ('     epoch %i, minibatch %i/%i, test error of best '
                         'model %f %%') % (epoch, minibatch_index + 1,
                                           n_train_batches, test_score * 100.))

        if ((epoch + 1 % 30) == 0):
            new_rate = 0.5 * learning_rate.get_value(borrow=True)
            learning_rate.set_value(new_rate)

        if ((epoch % 10) == 0):
            W_l0 = layer0.W.get_value(borrow=False)
            W_l0 = W_l0.reshape((W_l0.shape[0], numpy.prod(W_l0.shape[1:])))
            visualize_samples(W_l0, 'A1_CONV_FILTS.png')

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_lstm_structpred(step_type='add', use_pol=True, use_binary=False):
    ###########################################
    # Make a tag for identifying result files #
    ###########################################
    pol_tag = "P1" if use_pol else "P0"
    bin_tag = "B1" if use_binary else "B0"
    res_tag = "STRUCT_PRED_RESULTS/SP_LSTM_{}_{}_{}".format(
        step_type, pol_tag, bin_tag)

    if use_binary:
        ############################
        # Get binary training data #
        ############################
        rng = np.random.RandomState(1234)
        Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
        #Xtr = np.vstack((Xtr, Xva))
        #Xva = Xte
    else:
        ################################
        # Get continuous training data #
        ################################
        rng = np.random.RandomState(1234)
        dataset = 'data/mnist.pkl.gz'
        datasets = load_udm(dataset, as_shared=False, zero_mean=False)
        Xtr = datasets[0][0]
        Xva = datasets[1][0]
        Xte = datasets[2][0]
        #Xtr = np.concatenate((Xtr, Xva), axis=0)
        #Xva = Xte
        Xtr = to_fX(shift_and_scale_into_01(Xtr))
        Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200

    ########################################################
    # Split data into "observation" and "prediction" parts #
    ########################################################
    obs_cols = 14  # number of columns to observe
    pred_cols = 28 - obs_cols  # number of columns to predict
    x_dim = obs_cols * 28  # dimensionality of observations
    y_dim = pred_cols * 28  # dimensionality of predictions
    Xtr, Ytr = img_split(Xtr,
                         im_dim=(28, 28),
                         split_col=obs_cols,
                         transposed=True)
    Xva, Yva = img_split(Xva,
                         im_dim=(28, 28),
                         split_col=obs_cols,
                         transposed=True)

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    read_dim = 128
    write_dim = 128
    mlp_dim = 128
    rnn_dim = 128
    z_dim = 64
    n_iter = 15

    rnninits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    # setup reader/writer models
    reader_mlp = MLP([Rectifier(), Tanh()], [x_dim, mlp_dim, read_dim],
                     name="reader_mlp",
                     **inits)
    writer_mlp = MLP([Rectifier(), None], [rnn_dim, mlp_dim, y_dim],
                     name="writer_mlp",
                     **inits)

    # setup submodels for processing LSTM inputs
    pol_inp_dim = y_dim + read_dim + rnn_dim
    var_inp_dim = y_dim + y_dim + read_dim + rnn_dim
    pol_mlp_in = MLP([Identity()], [pol_inp_dim, 4 * rnn_dim],
                     name="pol_mlp_in",
                     **inits)
    var_mlp_in = MLP([Identity()], [var_inp_dim, 4 * rnn_dim],
                     name="var_mlp_in",
                     **inits)
    dec_mlp_in = MLP([Identity()], [z_dim, 4 * rnn_dim],
                     name="dec_mlp_in",
                     **inits)

    # setup submodels for turning LSTM states into conditionals over z
    pol_mlp_out = CondNet([], [rnn_dim, z_dim], name="pol_mlp_out", **inits)
    var_mlp_out = CondNet([], [rnn_dim, z_dim], name="var_mlp_out", **inits)
    dec_mlp_out = CondNet([], [rnn_dim, z_dim], name="dec_mlp_out", **inits)

    # setup the LSTMs for primary policy, guide policy, and shared dynamics
    pol_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="pol_rnn", **rnninits)
    var_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="var_rnn", **rnninits)
    dec_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="dec_rnn", **rnninits)

    model = IRStructPredModel(n_iter,
                              step_type=step_type,
                              use_pol=use_pol,
                              reader_mlp=reader_mlp,
                              writer_mlp=writer_mlp,
                              pol_mlp_in=pol_mlp_in,
                              pol_mlp_out=pol_mlp_out,
                              pol_rnn=pol_rnn,
                              var_mlp_in=var_mlp_in,
                              var_mlp_out=var_mlp_out,
                              var_rnn=var_rnn,
                              dec_mlp_in=dec_mlp_in,
                              dec_mlp_out=dec_mlp_out,
                              dec_rnn=dec_rnn)
    model.initialize()

    compile_start_time = time.time()

    # build the cost gradients, training function, samplers, etc.
    model.build_sampling_funcs()
    print("Testing model sampler...")
    # draw some independent samples from the model
    samp_count = 10
    samp_reps = 3
    x_in = Xtr[:10, :].repeat(samp_reps, axis=0)
    y_in = Ytr[:10, :].repeat(samp_reps, axis=0)
    x_samps, y_samps = model.sample_model(x_in, y_in, sample_source='p')
    # TODO: visualize sample prediction trajectories
    img_seq = seq_img_join(x_samps, y_samps, im_dim=(28, 28), transposed=True)
    seq_len = len(img_seq)
    samp_count = img_seq[0].shape[0]
    seq_samps = np.zeros((seq_len * samp_count, img_seq[0].shape[1]))
    idx = 0
    for s1 in range(samp_count):
        for s2 in range(seq_len):
            seq_samps[idx] = img_seq[s2][s1]
            idx += 1
    file_name = "{0:s}_samples_b{1:d}.png".format(res_tag, 0)
    utils.visualize_samples(seq_samps, file_name, num_rows=samp_count)

    model.build_model_funcs()

    compile_end_time = time.time()
    compile_minutes = (compile_end_time - compile_start_time) / 60.0
    print("THEANO COMPILE TIME (MIN): {}".format(compile_minutes))

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    print("Beginning to train the model...")
    out_file = open("{}_results.txt".format(res_tag), 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0002
    momentum = 0.9
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(300000):
        scale = min(1.0, ((i + 1) / 5000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr, Ytr = row_shuffle(Xtr, Ytr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        model.set_sgd_params(lr=scale * learn_rate,
                             mom_1=scale * momentum,
                             mom_2=0.98)
        model.set_lam_kld(lam_kld_q2p=1.0, lam_kld_p2q=0.1)
        model.set_grad_noise(grad_noise=0.02)
        # perform a minibatch update and record the cost for this batch
        Xb = to_fX(Xtr.take(batch_idx, axis=0))
        Yb = to_fX(Ytr.take(batch_idx, axis=0))
        result = model.train_joint(Xb, Yb)
        costs = [(costs[j] + result[j]) for j in range(len(result))]

        # diagnostics
        if ((i % 250) == 0):
            costs = [(v / 250.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    total_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_bound : {0:.4f}".format(costs[1])
            str4 = "    nll_term  : {0:.4f}".format(costs[2])
            str5 = "    kld_q2p   : {0:.4f}".format(costs[3])
            str6 = "    kld_p2q   : {0:.4f}".format(costs[4])
            str7 = "    reg_term  : {0:.4f}".format(costs[5])
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            model.save_model_params("{}_params.pkl".format(res_tag))
            # compute a small-sample estimate of NLL bound on validation set
            Xva, Yva = row_shuffle(Xva, Yva)
            Xb = to_fX(Xva[:5000])
            Yb = to_fX(Yva[:5000])
            va_costs = model.compute_nll_bound(Xb, Yb)
            str1 = "    va_nll_bound : {}".format(va_costs[1])
            str2 = "    va_nll_term  : {}".format(va_costs[2])
            str3 = "    va_kld_q2p   : {}".format(va_costs[3])
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            # draw some independent samples from the model
            samp_count = 10
            samp_reps = 3
            x_in = Xva[:samp_count, :].repeat(samp_reps, axis=0)
            y_in = Yva[:samp_count, :].repeat(samp_reps, axis=0)
            x_samps, y_samps = model.sample_model(x_in,
                                                  y_in,
                                                  sample_source='p')
            # visualize sample prediction trajectories
            img_seq = seq_img_join(x_samps,
                                   y_samps,
                                   im_dim=(28, 28),
                                   transposed=True)
            seq_len = len(img_seq)
            samp_count = img_seq[0].shape[0]
            seq_samps = np.zeros((seq_len * samp_count, img_seq[0].shape[1]))
            idx = 0
            for s1 in range(samp_count):
                for s2 in range(seq_len):
                    if use_binary:
                        seq_samps[idx] = binarize_data(img_seq[s2][s1])
                    else:
                        seq_samps[idx] = img_seq[s2][s1]
                    idx += 1
            file_name = "{0:s}_samples_b{1:d}.png".format(res_tag, i)
            utils.visualize_samples(seq_samps, file_name, num_rows=samp_count)
예제 #14
0
def main(args):
    cfg = cfg_dict[args.cfg_name]
    writer = SummaryWriter(os.path.join("runs", args.cfg_name))
    train_loader = get_data_loader(cfg, cfg["train_dir"])

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = EDSR(cfg).to(device)
    criterion = torch.nn.L1Loss()
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg["init_lr"],
                                 betas=(0.9, 0.999), eps=1e-8)

    global_batches = 0
    if args.train:
        for epoch in range(cfg["n_epoch"]):
            model.train()
            running_loss = 0.0
            for i, batch in enumerate(train_loader):
                lr, hr = batch[0].to(device), batch[1].to(device)
                optimizer.zero_grad()
                sr = model(lr)
                loss = model.loss(sr, hr)
                # loss = criterion(model(lr), hr)
                running_loss += loss.item()
                loss.backward()
                optimizer.step()
                global_batches += 1
                if global_batches % cfg["lr_decay_every"] == 0:
                    for param_group in optimizer.param_groups:
                        print(f"decay lr to {param_group['lr'] / 10}")
                        param_group["lr"] /= 10

            if epoch % args.log_every == 0:
                model.eval()
                with torch.no_grad():
                    batch_samples = {"lr": batch[0], "hr": batch[1], 
                                     "sr": sr.cpu()}
                    writer.add_scalar("training-loss", 
                                      running_loss / len(train_loader),
                                      global_step=global_batches)
                    writer.add_scalar("PSNR", compute_psnr(batch_samples), 
                                      global_step=global_batches)
                    samples = {k: v[:3] for k, v in batch_samples.items()}
                    fig = visualize_samples(samples, f"epoch-{epoch}")
                    writer.add_figure("sample-visualization", fig, 
                                      global_step=global_batches)

            if epoch % args.save_every == 0:
                state = {"net": model.state_dict(), 
                         "optim": optimizer.state_dict()}
                checkpoint_dir = args.checkpoint_dir
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)
                path = os.path.join(checkpoint_dir, args.cfg_name)
                torch.save(state, path)
    
    # eval
    if args.eval:
        assert args.model_path and args.lr_img_path
        print(f"evaluating {args.lr_img_path}")
        state = torch.load(args.model_path, map_location=device)
        model.load_state_dict(state["net"])
        optimizer.load_state_dict(state["optim"])

        with torch.no_grad():
            lr = img2tensor(args.lr_img_path)
            sr = model(lr.clone().to(device)).cpu()
            samples = {"lr": lr, "sr": sr}
            if args.hr_img_path:
                samples["hr"] = img2tensor(args.hr_img_path)
                print(f"PSNR: {compute_psnr(samples)}")
            directory = os.path.dirname(args.lr_img_path)
            name = f"eval-{args.cfg_name}-{args.lr_img_path.split('/')[-1]}"
            visualize_samples(samples, name, save=True, 
                              directory=directory, size=6)
예제 #15
0
def test_gip_sigma_scale_mnist():
    from LogPDFs import cross_validate_sigma
    # Simple test code, to check that everything is basically functional.
    print("TESTING...")

    # Initialize a source of randomness
    rng = np.random.RandomState(12345)

    # Load some data to train/validate/test with
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, zero_mean=False)
    Xtr = datasets[0][0]
    Xtr = Xtr.get_value(borrow=False)
    Xva = datasets[2][0]
    Xva = Xva.get_value(borrow=False)
    print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),str(Xva.shape)))

    # get and set some basic dataset information
    tr_samples = Xtr.shape[0]
    batch_size = 100
    Xtr_mean = np.mean(Xtr, axis=0, keepdims=True)
    Xtr_mean = (0.0 * Xtr_mean) + np.mean(Xtr)
    Xc_mean = np.repeat(Xtr_mean, batch_size, axis=0).astype(theano.config.floatX)

    # Symbolic inputs
    Xd = T.matrix(name='Xd')
    Xc = T.matrix(name='Xc')
    Xm = T.matrix(name='Xm')
    Xt = T.matrix(name='Xt')

    # Load inferencer and generator from saved parameters
    gn_fname = "MNIST_WALKOUT_TEST_MAX_KLD/pt_walk_params_b70000_GN.pkl"
    in_fname = "MNIST_WALKOUT_TEST_MAX_KLD/pt_walk_params_b70000_IN.pkl"
    IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
    GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd)
    x_dim = IN.shared_layers[0].in_dim
    z_dim = IN.mu_layers[-1].out_dim
    # construct a GIPair with the loaded InfNet and GenNet
    osm_params = {}
    osm_params['x_type'] = 'gaussian'
    osm_params['xt_transform'] = 'sigmoid'
    osm_params['logvar_bound'] = LOGVAR_BOUND
    OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \
            p_x_given_z=GN, q_z_given_x=IN, \
            x_dim=x_dim, z_dim=z_dim, params=osm_params)
    # compute variational likelihood bound and its sub-components
    Xva = row_shuffle(Xva)
    Xb = Xva[0:5000]
    file_name = "A_MNIST_POST_KLDS.png"
    post_klds = OSM.compute_post_klds(Xb)
    post_dim_klds = np.mean(post_klds, axis=0)
    utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \
            file_name)
    # compute information about free-energy on validation set
    file_name = "A_MNIST_FREE_ENERGY.png"
    fe_terms = OSM.compute_fe_terms(Xb, 20)
    utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \
            x_label='Posterior KLd', y_label='Negative Log-likelihood')

    # bound_results = OSM.compute_ll_bound(Xva)
    # ll_bounds = bound_results[0]
    # post_klds = bound_results[1]
    # log_likelihoods = bound_results[2]
    # max_lls = bound_results[3]
    # print("mean ll bound: {0:.4f}".format(np.mean(ll_bounds)))
    # print("mean posterior KLd: {0:.4f}".format(np.mean(post_klds)))
    # print("mean log-likelihood: {0:.4f}".format(np.mean(log_likelihoods)))
    # print("mean max log-likelihood: {0:.4f}".format(np.mean(max_lls)))
    # print("min ll bound: {0:.4f}".format(np.min(ll_bounds)))
    # print("max posterior KLd: {0:.4f}".format(np.max(post_klds)))
    # print("min log-likelihood: {0:.4f}".format(np.min(log_likelihoods)))
    # print("min max log-likelihood: {0:.4f}".format(np.min(max_lls)))
    # # compute some information about the approximate posteriors
    # post_stats = OSM.compute_post_stats(Xva, 0.0*Xva, 0.0*Xva)
    # all_post_klds = np.sort(post_stats[0].ravel()) # post KLds for each obs and dim
    # obs_post_klds = np.sort(post_stats[1]) # summed post KLds for each obs
    # post_dim_klds = post_stats[2] # average post KLds for each post dim
    # post_dim_vars = post_stats[3] # average squared mean for each post dim
    # utils.plot_line(np.arange(all_post_klds.shape[0]), all_post_klds, "AAA_ALL_POST_KLDS.png")
    # utils.plot_line(np.arange(obs_post_klds.shape[0]), obs_post_klds, "AAA_OBS_POST_KLDS.png")
    # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, "AAA_POST_DIM_KLDS.png")
    # utils.plot_stem(np.arange(post_dim_vars.shape[0]), post_dim_vars, "AAA_POST_DIM_VARS.png")

    # draw many samples from the GIP
    for i in range(5):
        tr_idx = npr.randint(low=0,high=tr_samples,size=(100,))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xs = []
        for row in range(3):
            Xs.append([])
            for col in range(3):
                sample_lists = OSM.sample_from_chain(Xd_batch[0:10,:], loop_iters=100, \
                        sigma_scale=1.0)
                Xs[row].append(group_chains(sample_lists['data samples']))
        Xs, block_im_dim = block_video(Xs, (28,28), (3,3))
        to_video(Xs, block_im_dim, "A_MNIST_KLD_CHAIN_VIDEO_{0:d}.avi".format(i), frame_rate=10)
        #sample_lists = GIP.sample_from_chain(Xd_batch[0,:].reshape((1,data_dim)), loop_iters=300, \
        #        sigma_scale=1.0)
        #Xs = np.vstack(sample_lists["data samples"])
        #file_name = "TFD_TEST_{0:d}.png".format(i)
        #utils.visualize_samples(Xs, file_name, num_rows=15)
    file_name = "A_MNIST_KLD_PRIOR_SAMPLE.png"
    Xs = OSM.sample_from_prior(20*20)
    utils.visualize_samples(Xs, file_name, num_rows=20)
    # # test Parzen density estimator built from prior samples
    # Xs = OSM.sample_from_prior(10000)
    # [best_sigma, best_ll, best_lls] = \
    #         cross_validate_sigma(Xs, Xva, [0.12, 0.14, 0.15, 0.16, 0.18], 20)
    # sort_idx = np.argsort(best_lls)
    # sort_idx = sort_idx[0:400]
    # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_MNIST_BEST_LLS_1.png")
    # utils.visualize_samples(Xva[sort_idx], "A_MNIST_BAD_DIGITS_1.png", num_rows=20)
    # ##########
    # # AGAIN! #
    # ##########
    # Xs = OSM.sample_from_prior(10000)
    # tr_idx = npr.randint(low=0,high=tr_samples,size=(5000,))
    # Xva = Xtr.take(tr_idx, axis=0)
    # [best_sigma, best_ll, best_lls] = \
    #         cross_validate_sigma(Xs, Xva, [0.12, 0.14, 0.15, 0.16, 0.18], 20)
    # sort_idx = np.argsort(best_lls)
    # sort_idx = sort_idx[0:400]
    # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_MNIST_BEST_LLS_2.png")
    # utils.visualize_samples(Xva[sort_idx], "A_MNIST_BAD_DIGITS_2.png", num_rows=20)
    return
예제 #16
0
def test_tfd(step_type='add',
               occ_dim=15,
               drop_prob=0.0):
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = "{}GPSI_OD{}_DP{}_{}_NA".format(RESULT_PATH, occ_dim, dp_int, step_type)

    ##########################
    # Get some training data #
    ##########################
    data_file = 'data/tfd_data_48x48.pkl'
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all')
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all')
    Xva = dataset[0]
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 250
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) )

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    obs_dim = Xtr.shape[1]
    z_dim = 200
    imp_steps = 6
    init_scale = 1.0

    x_in_sym = T.matrix('x_in_sym')
    x_out_sym = T.matrix('x_out_sym')
    x_mask_sym = T.matrix('x_mask_sym')

    #################
    # p_zi_given_xi #
    #################
    params = {}
    shared_config = [obs_dim, 1500, 1500]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_zi_given_xi.init_biases(0.2)
    ###################
    # p_xip1_given_zi #
    ###################
    params = {}
    shared_config = [z_dim, 1500, 1500]
    output_config = [obs_dim, obs_dim]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_xip1_given_zi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_xip1_given_zi.init_biases(0.2)
    ###################
    # q_zi_given_x_xi #
    ###################
    params = {}
    shared_config = [(obs_dim + obs_dim), 1500, 1500]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_zi_given_x_xi = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_zi_given_x_xi.init_biases(0.2)


    ###########################################################
    # Define parameters for the GPSImputer, and initialize it #
    ###########################################################
    print("Building the GPSImputer...")
    gpsi_params = {}
    gpsi_params['obs_dim'] = obs_dim
    gpsi_params['z_dim'] = z_dim
    gpsi_params['imp_steps'] = imp_steps
    gpsi_params['step_type'] = step_type
    gpsi_params['x_type'] = 'bernoulli'
    gpsi_params['obs_transform'] = 'sigmoid'
    GPSI = GPSImputer(rng=rng, 
            x_in=x_in_sym, x_out=x_out_sym, x_mask=x_mask_sym, \
            p_zi_given_xi=p_zi_given_xi, \
            p_xip1_given_zi=p_xip1_given_zi, \
            q_zi_given_x_xi=q_zi_given_x_xi, \
            params=gpsi_params, \
            shared_param_dicts=None)

    # # test model saving
    # print("Testing model save to file...")
    # GPSI.save_to_file("AAA_GPSI_SAVE_TEST.pkl")
    # # test model loading
    # print("Testing model load from file...")
    # GPSI = load_gpsimputer_from_file(f_name="AAA_GPSI_SAVE_TEST.pkl", rng=rng)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0002
    momentum = 0.5
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(200005):
        scale = min(1.0, ((i+1) / 5000.0))
        if (((i + 1) % 15000) == 0):
            learn_rate = learn_rate * 0.92
        if (i > 10000):
            momentum = 0.90
        else:
            momentum = 0.50
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        GPSI.set_sgd_params(lr=scale*learn_rate, \
                            mom_1=scale*momentum, mom_2=0.98)
        GPSI.set_train_switch(1.0)
        GPSI.set_lam_nll(lam_nll=1.0)
        GPSI.set_lam_kld(lam_kld_p=0.1, lam_kld_q=0.9)
        GPSI.set_lam_l2w(1e-4)
        # perform a minibatch update and record the cost for this batch
        xb = to_fX( Xtr.take(batch_idx, axis=0) )
        xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                        occ_dim=occ_dim, data_mean=data_mean)
        result = GPSI.train_joint(xi, xo, xm, batch_reps)
        # do diagnostics and general training tracking
        costs = [(costs[j] + result[j]) for j in range(len(result)-1)]
        if ((i % 250) == 0):
            costs = [(v / 250.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_bound : {0:.4f}".format(costs[1])
            str4 = "    nll_cost  : {0:.4f}".format(costs[2])
            str5 = "    kld_cost  : {0:.4f}".format(costs[3])
            str6 = "    reg_cost  : {0:.4f}".format(costs[4])
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            Xva = row_shuffle(Xva)
            # record an estimate of performance on the test set
            xi, xo, xm = construct_masked_data(Xva[0:5000], drop_prob=drop_prob, \
                                               occ_dim=occ_dim, data_mean=data_mean)
            nll, kld = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10)
            vfe = np.mean(nll) + np.mean(kld)
            str1 = "    va_nll_bound : {}".format(vfe)
            str2 = "    va_nll_term  : {}".format(np.mean(nll))
            str3 = "    va_kld_q2p   : {}".format(np.mean(kld))
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            GPSI.save_to_file("{}_PARAMS.pkl".format(result_tag))
        if ((i % 20000) == 0):
            # Get some validation samples for evaluating model performance
            xb = to_fX( Xva[0:100] )
            xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                    occ_dim=occ_dim, data_mean=data_mean)
            xi = np.repeat(xi, 2, axis=0)
            xo = np.repeat(xo, 2, axis=0)
            xm = np.repeat(xm, 2, axis=0)
            # draw some sample imputations from the model
            samp_count = xi.shape[0]
            _, model_samps = GPSI.sample_imputer(xi, xo, xm, use_guide_policy=False)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count):
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "{0:s}_samples_ng_b{1:d}.png".format(result_tag, i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
            # get visualizations of policy parameters
            file_name = "{0:s}_gen_gen_weights_b{1:d}.png".format(result_tag, i)
            W = GPSI.gen_gen_weights.get_value(borrow=False)
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            file_name = "{0:s}_gen_inf_weights_b{1:d}.png".format(result_tag, i)
            W = GPSI.gen_inf_weights.get_value(borrow=False).T
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
def test_lstm_structpred(step_type='add', use_pol=True, use_binary=False):
    ###########################################
    # Make a tag for identifying result files #
    ###########################################
    pol_tag = "P1" if use_pol else "P0"
    bin_tag = "B1" if use_binary else "B0"
    res_tag = "STRUCT_PRED_RESULTS/SP_LSTM_{}_{}_{}".format(step_type, pol_tag, bin_tag)

    if use_binary:
        ############################
        # Get binary training data #
        ############################
        rng = np.random.RandomState(1234)
        Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
        #Xtr = np.vstack((Xtr, Xva))
        #Xva = Xte
    else:
        ################################
        # Get continuous training data #
        ################################
        rng = np.random.RandomState(1234)
        dataset = 'data/mnist.pkl.gz'
        datasets = load_udm(dataset, as_shared=False, zero_mean=False)
        Xtr = datasets[0][0]
        Xva = datasets[1][0]
        Xte = datasets[2][0]
        #Xtr = np.concatenate((Xtr, Xva), axis=0)
        #Xva = Xte
        Xtr = to_fX(shift_and_scale_into_01(Xtr))
        Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200


    ########################################################
    # Split data into "observation" and "prediction" parts #
    ########################################################
    obs_cols = 14             # number of columns to observe
    pred_cols = 28 - obs_cols # number of columns to predict
    x_dim = obs_cols * 28     # dimensionality of observations
    y_dim = pred_cols * 28    # dimensionality of predictions
    Xtr, Ytr = img_split(Xtr, im_dim=(28, 28), split_col=obs_cols, transposed=True)
    Xva, Yva = img_split(Xva, im_dim=(28, 28), split_col=obs_cols, transposed=True)

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    read_dim = 128
    write_dim = 128
    mlp_dim = 128
    rnn_dim = 128
    z_dim = 64
    n_iter = 15

    rnninits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }
    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.),
    }

    # setup reader/writer models
    reader_mlp = MLP([Rectifier(), Tanh()], [x_dim, mlp_dim, read_dim],
                     name="reader_mlp", **inits)
    writer_mlp = MLP([Rectifier(), None], [rnn_dim, mlp_dim, y_dim],
                     name="writer_mlp", **inits)

    # setup submodels for processing LSTM inputs
    pol_inp_dim = y_dim + read_dim + rnn_dim
    var_inp_dim = y_dim + y_dim + read_dim + rnn_dim
    pol_mlp_in = MLP([Identity()], [pol_inp_dim, 4*rnn_dim],
                     name="pol_mlp_in", **inits)
    var_mlp_in = MLP([Identity()], [var_inp_dim, 4*rnn_dim],
                     name="var_mlp_in", **inits)
    dec_mlp_in = MLP([Identity()], [z_dim, 4*rnn_dim],
                     name="dec_mlp_in", **inits)

    # setup submodels for turning LSTM states into conditionals over z
    pol_mlp_out = CondNet([], [rnn_dim, z_dim], name="pol_mlp_out", **inits)
    var_mlp_out = CondNet([], [rnn_dim, z_dim], name="var_mlp_out", **inits)
    dec_mlp_out = CondNet([], [rnn_dim, z_dim], name="dec_mlp_out", **inits)

    # setup the LSTMs for primary policy, guide policy, and shared dynamics
    pol_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="pol_rnn", **rnninits)
    var_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="var_rnn", **rnninits)
    dec_rnn = BiasedLSTM(dim=rnn_dim, ig_bias=2.0, fg_bias=2.0, \
                         name="dec_rnn", **rnninits)

    model = IRStructPredModel(
                n_iter,
                step_type=step_type,
                use_pol=use_pol,
                reader_mlp=reader_mlp,
                writer_mlp=writer_mlp,
                pol_mlp_in=pol_mlp_in,
                pol_mlp_out=pol_mlp_out,
                pol_rnn=pol_rnn,
                var_mlp_in=var_mlp_in,
                var_mlp_out=var_mlp_out,
                var_rnn=var_rnn,
                dec_mlp_in=dec_mlp_in,
                dec_mlp_out=dec_mlp_out,
                dec_rnn=dec_rnn)
    model.initialize()

    compile_start_time = time.time()

    # build the cost gradients, training function, samplers, etc.
    model.build_sampling_funcs()
    print("Testing model sampler...")
    # draw some independent samples from the model
    samp_count = 10
    samp_reps = 3
    x_in = Xtr[:10,:].repeat(samp_reps, axis=0)
    y_in = Ytr[:10,:].repeat(samp_reps, axis=0)
    x_samps, y_samps = model.sample_model(x_in, y_in, sample_source='p')
    # TODO: visualize sample prediction trajectories
    img_seq = seq_img_join(x_samps, y_samps, im_dim=(28,28), transposed=True)
    seq_len = len(img_seq)
    samp_count = img_seq[0].shape[0]
    seq_samps = np.zeros((seq_len*samp_count, img_seq[0].shape[1]))
    idx = 0
    for s1 in range(samp_count):
        for s2 in range(seq_len):
            seq_samps[idx] = img_seq[s2][s1]
            idx += 1
    file_name = "{0:s}_samples_b{1:d}.png".format(res_tag, 0)
    utils.visualize_samples(seq_samps, file_name, num_rows=samp_count)

    model.build_model_funcs()

    compile_end_time = time.time()
    compile_minutes = (compile_end_time - compile_start_time) / 60.0
    print("THEANO COMPILE TIME (MIN): {}".format(compile_minutes))

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    print("Beginning to train the model...")
    out_file = open("{}_results.txt".format(res_tag), 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0002
    momentum = 0.9
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(300000):
        scale = min(1.0, ((i+1) / 5000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr, Ytr = row_shuffle(Xtr, Ytr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        model.set_sgd_params(lr=scale*learn_rate, mom_1=scale*momentum, mom_2=0.98)
        model.set_lam_kld(lam_kld_q2p=1.0, lam_kld_p2q=0.1)
        model.set_grad_noise(grad_noise=0.02)
        # perform a minibatch update and record the cost for this batch
        Xb = to_fX(Xtr.take(batch_idx, axis=0))
        Yb = to_fX(Ytr.take(batch_idx, axis=0))
        result = model.train_joint(Xb, Yb)
        costs = [(costs[j] + result[j]) for j in range(len(result))]

        # diagnostics
        if ((i % 250) == 0):
            costs = [(v / 250.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    total_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_bound : {0:.4f}".format(costs[1])
            str4 = "    nll_term  : {0:.4f}".format(costs[2])
            str5 = "    kld_q2p   : {0:.4f}".format(costs[3])
            str6 = "    kld_p2q   : {0:.4f}".format(costs[4])
            str7 = "    reg_term  : {0:.4f}".format(costs[5])
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            model.save_model_params("{}_params.pkl".format(res_tag))
            # compute a small-sample estimate of NLL bound on validation set
            Xva, Yva = row_shuffle(Xva, Yva)
            Xb = to_fX(Xva[:5000])
            Yb = to_fX(Yva[:5000])
            va_costs = model.compute_nll_bound(Xb, Yb)
            str1 = "    va_nll_bound : {}".format(va_costs[1])
            str2 = "    va_nll_term  : {}".format(va_costs[2])
            str3 = "    va_kld_q2p   : {}".format(va_costs[3])
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            # draw some independent samples from the model
            samp_count = 10
            samp_reps = 3
            x_in = Xva[:samp_count,:].repeat(samp_reps, axis=0)
            y_in = Yva[:samp_count,:].repeat(samp_reps, axis=0)
            x_samps, y_samps = model.sample_model(x_in, y_in, sample_source='p')
            # visualize sample prediction trajectories
            img_seq = seq_img_join(x_samps, y_samps, im_dim=(28,28), transposed=True)
            seq_len = len(img_seq)
            samp_count = img_seq[0].shape[0]
            seq_samps = np.zeros((seq_len*samp_count, img_seq[0].shape[1]))
            idx = 0
            for s1 in range(samp_count):
                for s2 in range(seq_len):
                    if use_binary:
                        seq_samps[idx] = binarize_data(img_seq[s2][s1])
                    else:
                        seq_samps[idx] = img_seq[s2][s1]
                    idx += 1
            file_name = "{0:s}_samples_b{1:d}.png".format(res_tag, i)
            utils.visualize_samples(seq_samps, file_name, num_rows=samp_count)
예제 #18
0
def test_one_stage_model():
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
    Xtr = np.vstack((Xtr, Xva))
    Xva = Xte
    #del Xte
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 128
    batch_reps = 1

    ###############################################
    # Setup some parameters for the OneStageModel #
    ###############################################
    x_dim = Xtr.shape[1]
    z_dim = 64
    x_type = 'bernoulli'
    xin_sym = T.matrix('xin_sym')

    ###############
    # p_x_given_z #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': z_dim,
       'out_chans': 256,
       'activation': relu_actfun,
       'apply_bn': True}, \
      {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': 7*7*128,
       'activation': relu_actfun,
       'apply_bn': True,
       'shape_func_out': lambda x: T.reshape(x, (-1, 128, 7, 7))}, \
      {'layer_type': 'conv',
       'in_chans': 128, # in shape:  (batch, 128, 7, 7)
       'out_chans': 64, # out shape: (batch, 64, 14, 14)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'conv',
       'in_chans': 64, # in shape:  (batch, 64, 14, 14)
       'out_chans': 1, # out shape: (batch, 1, 28, 28)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': False,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'conv',
       'in_chans': 64,
       'out_chans': 1,
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': False,
       'shape_func_out': lambda x: T.flatten(x, 2)} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    p_x_given_z = HydraNet(rng=rng, Xd=xin_sym, \
            params=params, shared_param_dicts=None)
    p_x_given_z.init_biases(0.0)
    ###############
    # q_z_given_x #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'conv',
       'in_chans': 1,   # in shape:  (batch, 784)
       'out_chans': 64, # out shape: (batch, 64, 14, 14)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': True,
       'shape_func_in': lambda x: T.reshape(x, (-1, 1, 28, 28))}, \
      {'layer_type': 'conv',
       'in_chans': 64,   # in shape:  (batch, 64, 14, 14)
       'out_chans': 128, # out shape: (batch, 128, 7, 7)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': True,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'fc',
       'in_chans': 128*7*7,
       'out_chans': 256,
       'activation': relu_actfun,
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    q_z_given_x = HydraNet(rng=rng, Xd=xin_sym, \
            params=params, shared_param_dicts=None)
    q_z_given_x.init_biases(0.0)

    ##############################################################
    # Define parameters for the TwoStageModel, and initialize it #
    ##############################################################
    print("Building the OneStageModel...")
    osm_params = {}
    osm_params['x_type'] = x_type
    osm_params['obs_transform'] = 'sigmoid'
    OSM = OneStageModel(rng=rng,
                        x_in=xin_sym,
                        x_dim=x_dim,
                        z_dim=z_dim,
                        p_x_given_z=p_x_given_z,
                        q_z_given_x=q_z_given_x,
                        params=osm_params)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    log_name = "{}_RESULTS.txt".format("OSM_TEST")
    out_file = open(log_name, 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0005
    momentum = 0.9
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(500000):
        scale = min(0.5, ((i + 1) / 5000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        Xb = to_fX(Xtr.take(batch_idx, axis=0))
        #Xb = binarize_data(Xtr.take(batch_idx, axis=0))
        # set sgd and objective function hyperparams for this update
        OSM.set_sgd_params(lr=scale*learn_rate, \
                           mom_1=(scale*momentum), mom_2=0.98)
        OSM.set_lam_nll(lam_nll=1.0)
        OSM.set_lam_kld(lam_kld=1.0)
        OSM.set_lam_l2w(1e-5)
        # perform a minibatch update and record the cost for this batch
        result = OSM.train_joint(Xb, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str5 = "    reg_cost  : {0:.4f}".format(costs[3])
            joint_str = "\n".join([str1, str2, str3, str4, str5])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if (((i % 5000) == 0) or ((i < 10000) and ((i % 1000) == 0))):
            # draw some independent random samples from the model
            samp_count = 300
            model_samps = OSM.sample_from_prior(samp_count)
            file_name = "OSM_SAMPLES_b{0:d}.png".format(i)
            utils.visualize_samples(model_samps, file_name, num_rows=15)
            # compute free energy estimate for validation samples
            Xva = row_shuffle(Xva)
            fe_terms = OSM.compute_fe_terms(Xva[0:5000], 20)
            fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
            out_str = "    nll_bound : {0:.4f}".format(fe_mean)
            print(out_str)
            out_file.write(out_str + "\n")
            out_file.flush()
    return
예제 #19
0
    for i in range(MCS.chain_len):
        Xtr_chains.append(0.0*Xtr)

    print("Testing chain sampler....")
    loop_times = []
    # TESTING SAMPLING SPEED!
    for i in range(batch_count):
        start_time = time.clock()
        batch_start = i * batch_size
        batch_end = min(tr_samples, (batch_start + batch_size))
        Xd_batch = Xtr[batch_start:batch_end]
        Xd_chain = MCS.sample_from_chain(Xd_batch)
        Xs = [Xd_batch[0:50]]
        Xs.extend([xd[0:50] for xd in Xd_chain])
        file_name = "MCS_TEST_{0:d}.png".format(i)
        utils.visualize_samples(np.vstack(Xs), file_name, num_rows=10)
        loop_times.append((time.clock() - start_time))
    total_time = sum(loop_times)
    mean_time = total_time / batch_count
    time_std = sum([(t - mean_time)**2.0 for t in loop_times]) / batch_count
    print("total_time: {0:.4f}".format(total_time))
    print("mean_time: {0:.4f}, time_std: {1:.4f}".format(mean_time, time_std))
    start_time = time.clock()
    Xtr_chains = resample_chain_steps(MCS, Xtr_chains)
    total_time = time.clock() - start_time
    print("total_time: {0:.4f}".format(total_time))




예제 #20
0
def test_two_stage_model2():
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
    Xtr = np.vstack((Xtr, Xva))
    Xva = Xte
    #del Xte
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 128
    batch_reps = 1

    ###############################################
    # Setup some parameters for the TwoStageModel #
    ###############################################
    x_dim = Xtr.shape[1]
    z_dim = 50
    h_dim = 100
    x_type = 'bernoulli'

    # some InfNet instances to build the TwoStageModel from
    xin_sym = T.matrix('xin_sym')
    xout_sym = T.matrix('xout_sym')

    ###############
    # p_h_given_z #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': z_dim,
       'out_chans': 100,
       'activation': tanh_actfun,
       'apply_bn': True}, \
      {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': 100,
       'activation': tanh_actfun,
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': h_dim,
       'activation': tanh_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': h_dim,
       'activation': tanh_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    p_h_given_z = HydraNet(rng=rng,
                           Xd=xin_sym,
                           params=params,
                           shared_param_dicts=None)
    p_h_given_z.init_biases(0.0)
    ###############
    # p_x_given_h #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': h_dim,
       'out_chans': 200,
       'activation': tanh_actfun,
       'apply_bn': True}, \
      {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': 200,
       'activation': tanh_actfun,
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': x_dim,
       'activation': tanh_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': x_dim,
       'activation': tanh_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    p_x_given_h = HydraNet(rng=rng,
                           Xd=xin_sym,
                           params=params,
                           shared_param_dicts=None)
    p_x_given_h.init_biases(0.0)
    ###############
    # q_h_given_x #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': x_dim,
       'out_chans': 200,
       'activation': tanh_actfun,
       'apply_bn': True}, \
      {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': 200,
       'activation': tanh_actfun,
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': h_dim,
       'activation': tanh_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': h_dim,
       'activation': tanh_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    q_h_given_x = HydraNet(rng=rng,
                           Xd=xin_sym,
                           params=params,
                           shared_param_dicts=None)
    q_h_given_x.init_biases(0.0)
    ###############
    # q_z_given_h #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': h_dim,
       'out_chans': 100,
       'activation': tanh_actfun,
       'apply_bn': True}, \
      {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': 100,
       'activation': tanh_actfun,
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': z_dim,
       'activation': tanh_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': z_dim,
       'activation': tanh_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    q_z_given_h = HydraNet(rng=rng,
                           Xd=xin_sym,
                           params=params,
                           shared_param_dicts=None)
    q_z_given_h.init_biases(0.0)

    ##############################################################
    # Define parameters for the TwoStageModel, and initialize it #
    ##############################################################
    print("Building the TwoStageModel...")
    tsm_params = {}
    tsm_params['x_type'] = x_type
    tsm_params['obs_transform'] = 'sigmoid'
    TSM = TwoStageModel2(rng=rng,
                         x_in=xin_sym,
                         x_out=xout_sym,
                         x_dim=x_dim,
                         z_dim=z_dim,
                         h_dim=h_dim,
                         q_h_given_x=q_h_given_x,
                         q_z_given_h=q_z_given_h,
                         p_h_given_z=p_h_given_z,
                         p_x_given_h=p_x_given_h,
                         params=tsm_params)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    log_name = "{}_RESULTS.txt".format("TSM2A_TEST")
    out_file = open(log_name, 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.001
    momentum = 0.9
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(500000):
        scale = min(1.0, ((i + 1) / 5000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        Xb = to_fX(Xtr.take(batch_idx, axis=0))
        #Xb = binarize_data(Xtr.take(batch_idx, axis=0))
        # set sgd and objective function hyperparams for this update
        TSM.set_sgd_params(lr=scale * learn_rate,
                           mom_1=(scale * momentum),
                           mom_2=0.98)
        TSM.set_train_switch(1.0)
        TSM.set_lam_nll(lam_nll=1.0)
        TSM.set_lam_kld(lam_kld_q2p=1.0, lam_kld_p2q=0.0)
        TSM.set_lam_l2w(1e-5)
        # perform a minibatch update and record the cost for this batch
        result = TSM.train_joint(Xb, Xb, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str5 = "    reg_cost  : {0:.4f}".format(costs[3])
            str6 = "    nll       : {0:.4f}".format(np.mean(costs[4]))
            str7 = "    kld_z     : {0:.4f}".format(np.mean(costs[5]))
            str8 = "    kld_h     : {0:.4f}".format(np.mean(costs[6]))
            joint_str = "\n".join(
                [str1, str2, str3, str4, str5, str6, str7, str8])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if (((i % 5000) == 0) or ((i < 10000) and ((i % 1000) == 0))):
            # draw some independent random samples from the model
            samp_count = 300
            model_samps = TSM.sample_from_prior(samp_count)
            file_name = "TSM2A_SAMPLES_b{0:d}.png".format(i)
            utils.visualize_samples(model_samps, file_name, num_rows=15)
            # compute free energy estimate for validation samples
            Xva = row_shuffle(Xva)
            fe_terms = TSM.compute_fe_terms(Xva[0:5000], Xva[0:5000], 20)
            fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
            out_str = "    nll_bound : {0:.4f}".format(fe_mean)
            print(out_str)
            out_file.write(out_str + "\n")
            out_file.flush()
    return
예제 #21
0
def test_mnist(step_type='add', imp_steps=6, occ_dim=15, drop_prob=0.0):
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = "{}GPSI_conv_bn_OD{}_DP{}_IS{}_{}_NA".format(
        RESULT_PATH, occ_dim, dp_int, imp_steps, step_type)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    Xtr = datasets[0][0]
    Xva = datasets[1][0]
    Xte = datasets[2][0]
    # Merge validation set and training set, and test on test set.
    Xtr = np.concatenate((Xtr, Xva), axis=0)
    Xva = Xte
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], )))

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    x_dim = Xtr.shape[1]
    z_dim = 100
    init_scale = 1.0
    use_bn = True

    x_in_sym = T.matrix('x_in_sym')
    x_out_sym = T.matrix('x_out_sym')
    x_mask_sym = T.matrix('x_mask_sym')

    #################
    # p_zi_given_xi #
    #################
    params = {}
    shared_config = \
    [ {'layer_type': 'conv',
       'in_chans': 1,   # in shape:  (batch, 784)
       'out_chans': 64, # out shape: (batch, 64, 14, 14)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': use_bn,
       'shape_func_in': lambda x: T.reshape(x, (-1, 1, 28, 28))}, \
      {'layer_type': 'conv',
       'in_chans': 64,   # in shape:  (batch, 64, 14, 14)
       'out_chans': 128, # out shape: (batch, 128, 7, 7)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': use_bn,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'fc',
       'in_chans': 128*7*7,
       'out_chans': 256,
       'activation': relu_actfun,
       'apply_bn': use_bn} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = init_scale
    params['build_theano_funcs'] = False
    p_zi_given_xi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_zi_given_xi.init_biases(0.0)
    ###################
    # p_sip1_given_zi #
    ###################
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': z_dim,
       'out_chans': 256,
       'activation': relu_actfun,
       'apply_bn': use_bn}, \
      {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': 7*7*128,
       'activation': relu_actfun,
       'apply_bn': use_bn,
       'shape_func_out': lambda x: T.reshape(x, (-1, 128, 7, 7))}, \
      {'layer_type': 'conv',
       'in_chans': 128, # in shape:  (batch, 128, 7, 7)
       'out_chans': 64, # out shape: (batch, 64, 14, 14)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': use_bn} ]
    output_config = \
    [ {'layer_type': 'conv',
       'in_chans': 64, # in shape:  (batch, 64, 14, 14)
       'out_chans': 1, # out shape: (batch, 1, 28, 28)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': False,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'conv',
       'in_chans': 64,
       'out_chans': 1,
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': False,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'conv',
       'in_chans': 64,
       'out_chans': 1,
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': False,
       'shape_func_out': lambda x: T.flatten(x, 2)} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = init_scale
    params['build_theano_funcs'] = False
    p_sip1_given_zi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_sip1_given_zi.init_biases(0.0)

    #################
    # q_zi_given_xi #
    #################
    params = {}
    shared_config = \
    [ {'layer_type': 'conv',
       'in_chans': 2,   # in shape:  (batch, 784+784)
       'out_chans': 64, # out shape: (batch, 64, 14, 14)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': use_bn,
       'shape_func_in': lambda x: T.reshape(x, (-1, 2, 28, 28))}, \
      {'layer_type': 'conv',
       'in_chans': 64,   # in shape:  (batch, 64, 14, 14)
       'out_chans': 128, # out shape: (batch, 128, 7, 7)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': use_bn,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'fc',
       'in_chans': 128*7*7,
       'out_chans': 256,
       'activation': relu_actfun,
       'apply_bn': use_bn} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = init_scale
    params['build_theano_funcs'] = False
    q_zi_given_xi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_zi_given_xi.init_biases(0.0)

    ###########################################################
    # Define parameters for the GPSImputer, and initialize it #
    ###########################################################
    print("Building the GPSImputer...")
    gpsi_params = {}
    gpsi_params['x_dim'] = x_dim
    gpsi_params['z_dim'] = z_dim
    # switch between direct construction and construction via p_x_given_si
    gpsi_params['imp_steps'] = imp_steps
    gpsi_params['step_type'] = step_type
    gpsi_params['x_type'] = 'bernoulli'
    gpsi_params['obs_transform'] = 'sigmoid'
    GPSI = GPSImputer(rng=rng,
                      x_in=x_in_sym,
                      x_out=x_out_sym,
                      x_mask=x_mask_sym,
                      p_zi_given_xi=p_zi_given_xi,
                      p_sip1_given_zi=p_sip1_given_zi,
                      q_zi_given_xi=q_zi_given_xi,
                      params=gpsi_params,
                      shared_param_dicts=None)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0001
    momentum = 0.90
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(200000):
        scale = min(1.0, ((i + 1) / 5000.0))
        if (((i + 1) % 15000) == 0):
            learn_rate = learn_rate * 0.95
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        GPSI.set_sgd_params(lr=scale*learn_rate, \
                            mom_1=scale*momentum, mom_2=0.98)
        GPSI.set_train_switch(1.0)
        GPSI.set_lam_nll(lam_nll=1.0)
        GPSI.set_lam_kld(lam_kld_q=1.0, lam_kld_p=0.1, lam_kld_g=0.0)
        GPSI.set_lam_l2w(1e-5)
        # perform a minibatch update and record the cost for this batch
        xb = to_fX(Xtr.take(batch_idx, axis=0))
        xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                        occ_dim=occ_dim, data_mean=data_mean)
        result = GPSI.train_joint(xi, xo, xm, batch_reps)
        # do diagnostics and general training tracking
        costs = [(costs[j] + result[j]) for j in range(len(result) - 1)]
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_bound : {0:.4f}".format(costs[1])
            str4 = "    nll_cost  : {0:.4f}".format(costs[2])
            str5 = "    kld_cost  : {0:.4f}".format(costs[3])
            str6 = "    reg_cost  : {0:.4f}".format(costs[4])
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            Xva = row_shuffle(Xva)
            # record an estimate of performance on the test set
            xi, xo, xm = construct_masked_data(Xva[0:5000], drop_prob=drop_prob, \
                                               occ_dim=occ_dim, data_mean=data_mean)
            nll, kld = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10)
            vfe = np.mean(nll) + np.mean(kld)
            str1 = "    va_nll_bound : {}".format(vfe)
            str2 = "    va_nll_term  : {}".format(np.mean(nll))
            str3 = "    va_kld_q2p   : {}".format(np.mean(kld))
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
        if ((i % 2000) == 0):
            #GPSI.save_to_file("{}_PARAMS.pkl".format(result_tag))
            # Get some validation samples for evaluating model performance
            xb = to_fX(Xva[0:100])
            xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                    occ_dim=occ_dim, data_mean=data_mean)
            xi = np.repeat(xi, 2, axis=0)
            xo = np.repeat(xo, 2, axis=0)
            xm = np.repeat(xm, 2, axis=0)
            # draw some sample imputations from the model
            samp_count = xi.shape[0]
            _, model_samps = GPSI.sample_imputer(xi,
                                                 xo,
                                                 xm,
                                                 use_guide_policy=False)
            seq_len = len(model_samps)
            seq_samps = np.zeros(
                (seq_len * samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count):
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "{0:s}_samples_ng_b{1:d}.png".format(result_tag, i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
예제 #22
0
def pretrain_gip(extra_lam_kld=0.0, kld2_scale=0.0):
    # Initialize a source of randomness
    rng = np.random.RandomState(1234)

     # Load some data to train/validate/test with
    tr_file = 'data/svhn_train_gray.pkl'
    te_file = 'data/svhn_test_gray.pkl'
    ex_file = 'data/svhn_extra_gray.pkl'
    data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000)
    #all_file = 'data/svhn_all_gray_zca.pkl'
    #data = load_svhn_all_gray_zca(all_file)
    Xtr = np.vstack([data['Xtr'], data['Xex']])
    Xtr = Xtr - np.mean(Xtr, axis=1, keepdims=True)
    Xtr = Xtr / np.std(Xtr, axis=1, keepdims=True)
    Xtr = shift_and_scale_into_01(Xtr)
    Xtr, Xva = train_valid_split(Xtr, valid_count=5000)

    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 100
    batch_reps = 5

    # setup some symbolic variables and stuff
    Xp = T.matrix('Xp_base')
    Xd = T.matrix('Xd_base')
    Xc = T.matrix('Xc_base')
    Xm = T.matrix('Xm_base')
    data_dim = Xtr.shape[1]
    prior_sigma = 1.0

    ##########################
    # NETWORK CONFIGURATIONS #
    ##########################
    gn_params = {}
    gn_config = [PRIOR_DIM, 2400, 2400, data_dim]
    gn_params['mlp_config'] = gn_config
    gn_params['activation'] = relu_actfun
    gn_params['out_type'] = 'gaussian'
    gn_params['mean_transform'] = 'sigmoid'
    gn_params['logvar_type'] = 'single_shared'
    gn_params['init_scale'] = 1.2
    gn_params['lam_l2a'] = 1e-2
    gn_params['vis_drop'] = 0.0
    gn_params['hid_drop'] = 0.0
    gn_params['bias_noise'] = 0.1
    # choose some parameters for the continuous inferencer
    in_params = {}
    shared_config = [data_dim, 2400, 2400]
    top_config = [shared_config[-1], PRIOR_DIM]
    in_params['shared_config'] = shared_config
    in_params['mu_config'] = top_config
    in_params['sigma_config'] = top_config
    in_params['activation'] = relu_actfun
    in_params['init_scale'] = 1.2
    in_params['lam_l2a'] = 1e-2
    in_params['vis_drop'] = 0.2
    in_params['hid_drop'] = 0.0
    in_params['bias_noise'] = 0.1
    in_params['input_noise'] = 0.0
    in_params['kld2_scale'] = kld2_scale
    # Initialize the base networks for this GIPair
    IN = InfNet(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, prior_sigma=prior_sigma, \
            params=in_params, shared_param_dicts=None)
    GN = GenNet(rng=rng, Xp=Xp, prior_sigma=prior_sigma, \
            params=gn_params, shared_param_dicts=None)
    # Initialize biases in IN and GN
    IN.init_biases(0.1)
    GN.init_biases(0.1)

    ######################################
    # LOAD AND RESTART FROM SAVED PARAMS #
    ######################################
    # new_in_params = {'kld2_scale': kld2_scale, 'bias_noise': 0.2}
    # new_gn_params = {'bias_noise': 0.2}
    # # Load inferencer and generator from saved parameters
    # gn_fname = "TMS_RESULTS_DROPLESS/pt_params_b50000_GN.pkl"
    # in_fname = "TMS_RESULTS_DROPLESS/pt_params_b50000_IN.pkl"
    # IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, \
    #         Xc=Xc, Xm=Xm, new_params=new_in_params)
    # GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp, \
    #         new_params=new_gn_params)
    # in_params = IN.params
    # gn_params = GN.params

    #########################
    # INITIALIZE THE GIPAIR #
    #########################
    GIP = GIPair(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, g_net=GN, i_net=IN, \
            data_dim=data_dim, prior_dim=PRIOR_DIM, params=None)
    GIP.set_lam_l2w(1e-4)

    ####################
    # RICA PRETRAINING #
    ####################
    IN.W_rica.set_value(0.05 * IN.W_rica.get_value(borrow=False))
    GN.W_rica.set_value(0.05 * GN.W_rica.get_value(borrow=False))
    for i in range(6000):
        scale = min(1.0, (float(i+1) / 6000.0))
        l_rate = 0.0001 * scale
        lam_l1 = 0.025
        tr_idx = npr.randint(low=0,high=tr_samples,size=(1000,))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        inr_out = IN.train_rica(Xd_batch, l_rate, lam_l1)
        gnr_out = GN.train_rica(Xd_batch, l_rate, lam_l1)
        inr_out = [v for v in gnr_out]
        if ((i % 1000) == 0):
            print("rica batch {0:d}: in_recon={1:.4f}, in_spars={2:.4f}, gn_recon={3:.4f}, gn_spars={4:.4f}".format( \
                    i, 1.*inr_out[1], 1.*inr_out[2], 1.*gnr_out[1], 1.*gnr_out[2]))
                        # draw inference net first layer weights
    file_name = RESULT_PATH+"pt_rica_inf_weights.png".format(i)
    utils.visualize_samples(IN.W_rica.get_value(borrow=False).T, file_name, num_rows=20)
    # draw generator net final layer weights
    file_name = RESULT_PATH+"pt_rica_gen_weights.png".format(i)
    if ('gaussian' in gn_params['out_type']):
        lay_num = -2
    else:
        lay_num = -1
    utils.visualize_samples(GN.W_rica.get_value(borrow=False), file_name, num_rows=20)

    ######################
    # BASIC VAE TRAINING #
    ######################
    out_file = open(RESULT_PATH+"pt_gip_results.txt", 'wb')
    # Set initial learning rate and basic SGD hyper parameters
    cost_1 = [0. for i in range(10)]
    learn_rate = 0.0002
    for i in range(300000):
        scale = min(1.0, float(i) / 40000.0)
        if ((i + 1) % 100000 == 0):
            learn_rate = learn_rate * 0.8
        # do a minibatch update of the model, and compute some costs
        tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xd_batch = np.repeat(Xd_batch, batch_reps, axis=0)
        Xc_batch = 0.0 * Xd_batch
        Xm_batch = 0.0 * Xd_batch
        # do a minibatch update of the model, and compute some costs
        GIP.set_all_sgd_params(lr_gn=(scale*learn_rate), \
                lr_in=(scale*learn_rate), mom_1=0.9, mom_2=0.999)
        GIP.set_lam_nll(1.0)
        GIP.set_lam_kld(1.0 + extra_lam_kld*scale)
        outputs = GIP.train_joint(Xd_batch, Xc_batch, Xm_batch)
        cost_1 = [(cost_1[k] + 1.*outputs[k]) for k in range(len(outputs))]
        if ((i % 1000) == 0):
            cost_1 = [(v / 1000.) for v in cost_1]
            o_str = "batch: {0:d}, joint_cost: {1:.4f}, data_nll_cost: {2:.4f}, post_kld_cost: {3:.4f}, other_reg_cost: {4:.4f}".format( \
                    i, cost_1[0], cost_1[1], cost_1[2], cost_1[3])
            print(o_str)
            out_file.write(o_str+"\n")
            out_file.flush()
            cost_1 = [0. for v in cost_1]
        if ((i % 5000) == 0):
            cost_2 = GIP.compute_costs(Xva, 0.*Xva, 0.*Xva)
            o_str = "--val: {0:d}, joint_cost: {1:.4f}, data_nll_cost: {2:.4f}, post_kld_cost: {3:.4f}, other_reg_cost: {4:.4f}".format( \
                    i, 1.*cost_2[0], 1.*cost_2[1], 1.*cost_2[2], 1.*cost_2[3])
            print(o_str)
            out_file.write(o_str+"\n")
            out_file.flush()
        if ((i % 5000) == 0):
            tr_idx = npr.randint(low=0,high=va_samples,size=(100,))
            Xd_batch = Xva.take(tr_idx, axis=0)
            file_name = RESULT_PATH+"pt_gip_chain_samples_b{0:d}.png".format(i)
            Xd_samps = np.repeat(Xd_batch[0:10,:], 3, axis=0)
            sample_lists = GIP.sample_from_chain(Xd_samps, loop_iters=20)
            Xs = np.vstack(sample_lists["data samples"])
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # draw samples freely from the generative model's prior
            file_name = RESULT_PATH+"pt_gip_prior_samples_b{0:d}.png".format(i)
            Xs = GIP.sample_from_prior(20*20)
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # draw inference net first layer weights
            file_name = RESULT_PATH+"pt_gip_inf_weights_b{0:d}.png".format(i)
            utils.visualize_net_layer(GIP.IN.shared_layers[0], file_name)
            # draw generator net final layer weights
            file_name = RESULT_PATH+"pt_gip_gen_weights_b{0:d}.png".format(i)
            if (gn_params['out_type'] == 'gaussian'):
                lay_num = -2
            else:
                lay_num = -1
            utils.visualize_net_layer(GIP.GN.mlp_layers[lay_num], file_name, \
                    colorImg=False, use_transpose=True)
            #########################
            # Check posterior KLds. #
            #########################
            post_klds = posterior_klds(IN, Xtr, 5000, 5)
            file_name = RESULT_PATH+"pt_gip_post_klds_b{0:d}.png".format(i)
            utils.plot_kde_histogram2( \
                    np.asarray(post_klds), np.asarray(post_klds), file_name, bins=30)
        if ((i % 10000) == 0):
            IN.save_to_file(f_name=RESULT_PATH+"pt_gip_params_b{0:d}_IN.pkl".format(i))
            GN.save_to_file(f_name=RESULT_PATH+"pt_gip_params_b{0:d}_GN.pkl".format(i))
    IN.save_to_file(f_name=RESULT_PATH+"pt_gip_params_IN.pkl")
    GN.save_to_file(f_name=RESULT_PATH+"pt_gip_params_GN.pkl")
    return
예제 #23
0
def train_walk_from_pretrained_gip(extra_lam_kld=0.0):
    # Simple test code, to check that everything is basically functional.
    print("TESTING...")

    # Initialize a source of randomness
    rng = np.random.RandomState(1234)

     # Load some data to train/validate/test with
    tr_file = 'data/svhn_train_gray.pkl'
    te_file = 'data/svhn_test_gray.pkl'
    ex_file = 'data/svhn_extra_gray.pkl'
    data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000)
    #all_file = 'data/svhn_all_gray_zca.pkl'
    #data = load_svhn_all_gray_zca(all_file)
    Xtr = np.vstack([data['Xtr'], data['Xex']])
    Xtr = Xtr - np.mean(Xtr, axis=1, keepdims=True)
    Xtr = Xtr / np.std(Xtr, axis=1, keepdims=True)
    Xtr = shift_and_scale_into_01(Xtr)
    Xtr, Xva = train_valid_split(Xtr, valid_count=5000)
    print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),str(Xva.shape)))

    # get and set some basic dataset information
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    data_dim = Xtr.shape[1]
    batch_size = 100
    batch_reps = 5
    prior_sigma = 1.0
    Xtr_mean = np.mean(Xtr, axis=0, keepdims=True)
    Xtr_mean = (0.0 * Xtr_mean) + np.mean(np.mean(Xtr,axis=1))
    Xc_mean = np.repeat(Xtr_mean, batch_size, axis=0)

    # Symbolic inputs
    Xd = T.matrix(name='Xd')
    Xc = T.matrix(name='Xc')
    Xm = T.matrix(name='Xm')
    Xt = T.matrix(name='Xt')
    Xp = T.matrix(name='Xp')

    START_FRESH = True
    if START_FRESH:
        ###############################
        # Setup discriminator network #
        ###############################
        # Set some reasonable mlp parameters
        dn_params = {}
        # Set up some proto-networks
        pc0 = [data_dim, (300, 4), (300, 4), 10]
        dn_params['proto_configs'] = [pc0]
        # Set up some spawn networks
        sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True}
        #sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True}
        dn_params['spawn_configs'] = [sc0]
        dn_params['spawn_weights'] = [1.0]
        # Set remaining params
        dn_params['init_scale'] = 0.25
        dn_params['lam_l2a'] = 1e-2
        dn_params['vis_drop'] = 0.2
        dn_params['hid_drop'] = 0.5
        # Initialize a network object to use as the discriminator
        DN = PeaNet(rng=rng, Xd=Xd, params=dn_params)
        DN.init_biases(0.0)

        #######################################################
        # Load inferencer and generator from saved parameters #
        #######################################################
        gn_fname = RESULT_PATH+"pt_gip_params_b200000_GN.pkl"
        in_fname = RESULT_PATH+"pt_gip_params_b200000_IN.pkl"
        IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, Xc=Xc, Xm=Xm)
        GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp)
    else:
        ###########################################################
        # Load all networks from partially-trained VCGLoop params #
        ###########################################################
        gn_fname = RESULT_PATH+"pt_walk_params_GN.pkl"
        in_fname = RESULT_PATH+"pt_walk_params_IN.pkl"
        dn_fname = RESULT_PATH+"pt_walk_params_DN.pkl"
        IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, Xc=Xc, Xm=Xm)
        GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp)
        DN = PNet.load_peanet_from_file(f_name=dn_fname, rng=rng, Xd=Xd)

    ###############################
    # Initialize the main VCGLoop #
    ###############################
    vcgl_params = {}
    vcgl_params['lam_l2d'] = 5e-2
    VCGL = VCGLoop(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, Xt=Xt, i_net=IN, \
                 g_net=GN, d_net=DN, chain_len=6, data_dim=data_dim, \
                 prior_dim=PRIOR_DIM, params=vcgl_params)
    VCGL.set_lam_l2w(1e-4)

    out_file = open(RESULT_PATH+"pt_walk_results.txt", 'wb')
    ####################################################
    # Train the VCGLoop by unrolling and applying BPTT #
    ####################################################
    learn_rate = 0.0002
    cost_1 = [0. for i in range(10)]
    for i in range(1000000):
        scale = float(min((i+1), 25000)) / 25000.0
        if ((i+1 % 50000) == 0):
            learn_rate = learn_rate * 0.8
        ########################################
        # TRAIN THE CHAIN IN FREE-RUNNING MODE #
        ########################################
        VCGL.set_all_sgd_params(learn_rate=(scale*learn_rate), \
                mom_1=0.9, mom_2=0.999)
        VCGL.set_disc_weights(dweight_gn=20.0, dweight_dn=4.0)
        VCGL.set_lam_chain_nll(1.0)
        VCGL.set_lam_chain_kld(1.0 + extra_lam_kld)
        VCGL.set_lam_chain_vel(0.0)
        VCGL.set_lam_mask_nll(0.0)
        VCGL.set_lam_mask_kld(0.0)
        # get some data to train with
        tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xc_batch = 0.0 * Xd_batch
        Xm_batch = 0.0 * Xd_batch
        # do 5 repetitions of the batch
        Xd_batch = np.repeat(Xd_batch, batch_reps, axis=0)
        Xc_batch = np.repeat(Xc_batch, batch_reps, axis=0)
        Xm_batch = np.repeat(Xm_batch, batch_reps, axis=0)
        # examples from the target distribution, to train discriminator
        tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_reps*batch_size,))
        Xt_batch = Xtr.take(tr_idx, axis=0)
        # do a minibatch update of the model, and compute some costs
        outputs = VCGL.train_joint(Xd_batch, Xc_batch, Xm_batch, Xt_batch)
        cost_1 = [(cost_1[k] + 1.*outputs[k]) for k in range(len(outputs))]
        if ((i % 1000) == 0):
            cost_1 = [(v / 1000.0) for v in cost_1]
            o_str_1 = "batch: {0:d}, joint_cost: {1:.4f}, chain_nll_cost: {2:.4f}, chain_kld_cost: {3:.4f}, disc_cost_gn: {4:.4f}, disc_cost_dn: {5:.4f}".format( \
                    i, cost_1[0], cost_1[1], cost_1[2], cost_1[6], cost_1[7])
            print(o_str_1)
            out_file.write(o_str_1+"\n")
            out_file.flush()
            cost_1 = [0. for v in cost_1]
        if ((i % 5000) == 0):
            tr_idx = npr.randint(low=0,high=Xtr.shape[0],size=(5,))
            va_idx = npr.randint(low=0,high=Xva.shape[0],size=(5,))
            Xd_batch = np.vstack([Xtr.take(tr_idx, axis=0), Xva.take(va_idx, axis=0)])
            # draw some chains of samples from the VAE loop
            file_name = RESULT_PATH+"pt_walk_chain_samples_b{0:d}.png".format(i)
            Xd_samps = np.repeat(Xd_batch, 3, axis=0)
            sample_lists = VCGL.GIP.sample_from_chain(Xd_samps, loop_iters=20)
            Xs = np.vstack(sample_lists["data samples"])
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # draw some masked chains of samples from the VAE loop
            file_name = RESULT_PATH+"pt_walk_mask_samples_b{0:d}.png".format(i)
            Xd_samps = np.repeat(Xc_mean[0:Xd_batch.shape[0],:], 3, axis=0)
            Xc_samps = np.repeat(Xd_batch, 3, axis=0)
            Xm_rand = sample_masks(Xc_samps, drop_prob=0.2)
            Xm_patch = sample_patch_masks(Xc_samps, (32,32), (16,16))
            Xm_samps = Xm_rand * Xm_patch
            sample_lists = VCGL.GIP.sample_from_chain(Xd_samps, \
                    X_c=Xc_samps, X_m=Xm_samps, loop_iters=20)
            Xs = np.vstack(sample_lists["data samples"])
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # draw some samples independently from the GenNet's prior
            file_name = RESULT_PATH+"pt_walk_prior_samples_b{0:d}.png".format(i)
            Xs = VCGL.sample_from_prior(20*20)
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # draw discriminator network's weights
            file_name = RESULT_PATH+"pt_walk_dis_weights_b{0:d}.png".format(i)
            utils.visualize_net_layer(VCGL.DN.proto_nets[0][0], file_name)
            # draw inference net first layer weights
            file_name = RESULT_PATH+"pt_walk_inf_weights_b{0:d}.png".format(i)
            utils.visualize_net_layer(VCGL.IN.shared_layers[0], file_name)
            # draw generator net final layer weights
            file_name = RESULT_PATH+"pt_walk_gen_weights_b{0:d}.png".format(i)
            if GN.out_type == 'sigmoid':
                utils.visualize_net_layer(VCGL.GN.mlp_layers[-1], file_name, use_transpose=True)
            else:
                utils.visualize_net_layer(VCGL.GN.mlp_layers[-2], file_name, use_transpose=True)
            #########################
            # Check posterior KLds. #
            #########################
            post_klds = posterior_klds(IN, Xtr, 5000, 5)
            file_name = RESULT_PATH+"pt_walk_post_klds_b{0:d}.png".format(i)
            utils.plot_kde_histogram2( \
                    np.asarray(post_klds), np.asarray(post_klds), file_name, bins=30)
        # DUMP PARAMETERS FROM TIME-TO-TIME
        if (i % 10000 == 0):
            DN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_DN.pkl".format(i))
            IN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_IN.pkl".format(i))
            GN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_GN.pkl".format(i))
    return
예제 #24
0
파일: train.py 프로젝트: zxyjfj/SRGAN_AE
def main():
    # 导入高分辨和低分辨的图片
    lr_batch, hr_batch = batch_queue_for_training(TRAIN_DATA_PATH)

    coord = tf.train.Coordinator()

    # ========================================
    #           Create Network
    # ========================================
    lr_holders = tf.placeholder(
        dtype=tf.float32,
        shape=[BATCH_SIZE, INPUT_SIZE, INPUT_SIZE, NUM_CHENNELS])
    hr_holders = tf.placeholder(
        dtype=tf.float32,
        shape=[BATCH_SIZE, PATCH_SIZE, PATCH_SIZE, NUM_CHENNELS])

    real_data = hr_holders
    # ----------------------------------------
    #               Generator

    with tf.variable_scope('generator', reuse=tf.AUTO_REUSE):
        fake_data = generator(lr_holders)

    real_hsv_data = tf.image.rgb_to_hsv(real_data)
    fake_hsv_data = tf.image.rgb_to_hsv(fake_data)

    interpolation = tf.image.resize_bicubic(lr_holders,
                                            [PATCH_SIZE, PATCH_SIZE])

    # ----------------------------------------
    #               Discriminator

    with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE):
        y_fake = discriminator(fake_data)

    with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE):
        y_real = discriminator(real_data)

    # ----------------------------------------
    #               Encoder

    with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE):
        real_code = encoder(real_data)

    with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE):
        fake_code = encoder(fake_data)

    # ----------------------------------------
    #           Code Discriminator

    with tf.variable_scope('code_discriminator', reuse=tf.AUTO_REUSE):
        c_fake = code_discriminator(fake_code)

    with tf.variable_scope('code_discriminator', reuse=tf.AUTO_REUSE):
        c_real = code_discriminator(real_code)

    # ========================================
    #            Create Optimizer
    # ========================================

    variables = tf.trainable_variables()
    encoder_vars = [var for var in variables if 'encoder/' in var.name]
    generator_vars = [var for var in variables if 'generator/' in var.name]
    discriminator_vars = [
        var for var in variables if 'discriminator/' in var.name
    ]
    code_discriminator_vars = [
        var for var in variables if 'code_discriminator/' in var.name
    ]
    # ========================================
    #            Define Loss(WGAN/DCGAN有问题)
    # ========================================

    generator_loss = tf.reduce_mean(y_fake)
    discriminator_loss = tf.reduce_mean(y_real) - tf.reduce_mean(y_fake)
    code_generator_loss = tf.reduce_mean(c_fake)
    code_discriminator_loss = tf.reduce_mean(c_real) - tf.reduce_mean(c_fake)
    # generator_loss = tf.reduce_mean(-tf.log(y_fake + EPS))
    # discriminator_loss = tf.reduce_mean(
    #     -tf.log(y_real + EPS)) - tf.reduce_mean(-tf.log(1 - y_fake + EPS))
    # code_generator_loss = tf.reduce_mean(-tf.log(c_fake + EPS))
    # code_discriminator_loss = tf.reduce_mean(
    #     -tf.log(c_real + EPS)) - tf.reduce_mean(-tf.log(1 - c_fake + EPS))
    reconstruction_loss = tf.reduce_mean(
        tf.squared_difference(real_data, fake_data))
    # 保证图片颜色不变
    s_loss = tf.reduce_mean(
        tf.abs(real_hsv_data[:, :, :, 1] - fake_hsv_data[:, :, :, 1]))
    h_loss = tf.reduce_mean(
        tf.abs(real_hsv_data[:, :, :, 0] - fake_hsv_data[:, :, :, 0]))

    generator_encoder_loss = generator_loss + code_generator_loss \
    + reconstruction_loss_weight * reconstruction_loss + s_loss + h_loss

    # ----------------------------------------
    #               Generator

    generator_encoder_opt = tf.train.RMSPropOptimizer(LEARN_RATE).minimize(
        generator_encoder_loss, var_list=generator_vars + encoder_vars)

    # ----------------------------------------
    #               Discriminator

    discriminator_opt = tf.train.RMSPropOptimizer(LEARN_RATE).minimize(
        discriminator_loss, var_list=discriminator_vars)

    # ----------------------------------------
    #               Code Discriminator

    code_discriminator_opt = tf.train.RMSPropOptimizer(LEARN_RATE).minimize(
        code_discriminator_loss, var_list=code_discriminator_vars)

    # ========================================
    #               Important
    # ========================================
    # c=0.01
    clip_bounds = [-0.01, 0.01]
    d_clip = []
    for var in discriminator_vars:
        d_clip.append(
            tf.assign(var, tf.clip_by_value(var, clip_bounds[0],
                                            clip_bounds[1])))
    clip_disc_weight = tf.group(*d_clip)

    c_clip = []
    for var in code_discriminator_vars:
        c_clip.append(
            tf.assign(var, tf.clip_by_value(var, clip_bounds[0],
                                            clip_bounds[1])))
    clip_code_disc_weight = tf.group(*c_clip)

    # for summaries
    with tf.name_scope('Summary'):
        tf.summary.image('inputs', lr_holders, max_outputs=4)
        tf.summary.image('generator', fake_data, max_outputs=4)
        tf.summary.image('interpolation', interpolation, max_outputs=4)
        tf.summary.image('targets', hr_holders, max_outputs=4)
        tf.summary.scalar('generator_loss', generator_loss)
        tf.summary.scalar('s_loss', s_loss)
        tf.summary.scalar('h_loss', h_loss)
        tf.summary.scalar('discriminator_loss', discriminator_loss)
        tf.summary.scalar('code_generator_loss', code_generator_loss)
        tf.summary.scalar('code_discriminator_loss', code_discriminator_loss)
        tf.summary.scalar('reconstruction_loss', reconstruction_loss)
        tf.summary.scalar('generator_encoder_loss', generator_encoder_loss)

    # 初始化tensorflow
    sess = tf.Session()
    init = [
        tf.local_variables_initializer(),
        tf.global_variables_initializer()
    ]
    sess.run(init)

    # the saver will restore all model's variables during training
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=MAX_CKPT_TO_KEEP)
    try:
        saved_global_step = load(saver, sess, CHECKPOINTS_PATH)
        if saved_global_step is None:
            saved_global_step = 0
    except:
        raise ValueError(
            "You have changed the model, Please Delete CheckPoints!")

    # Start the queue runners (make batches).
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    # Merage all the summaries and write them out to TRAINING_DIR
    merged_summary = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(TRAIN_SUMMARY_PATH, sess.graph)

    num_item_per_epoch = len(os.listdir(TRAIN_DATA_PATH)) // BATCH_SIZE
    time_i = time.time()
    step = saved_global_step

    for epoch in range(NUM_EPOCH):
        for item in range(num_item_per_epoch):
            lr_images, hr_images = sess.run([lr_batch, hr_batch])

            feed_dict = {lr_holders: lr_images, hr_holders: hr_images}

            # ------------------train G twice-------------------
            _, gene_loss = sess.run(
                [generator_encoder_opt, generator_encoder_loss],
                feed_dict=feed_dict)
            _, gene_loss = sess.run(
                [generator_encoder_opt, generator_encoder_loss],
                feed_dict=feed_dict)
            # ------------------train D ------------------------
            _, d_loss = sess.run(
                [discriminator_opt, discriminator_loss], feed_dict=feed_dict)
            # ------------------train code_D -------------------
            _, c_loss = sess.run(
                [code_discriminator_opt, code_discriminator_loss],
                feed_dict=feed_dict)
            sess.run(
                [clip_disc_weight, clip_code_disc_weight], feed_dict=feed_dict)

            sr_img = sess.run(fake_data, feed_dict=feed_dict)

            interpolation_img = sess.run(interpolation, feed_dict=feed_dict)

            summary = sess.run(merged_summary, feed_dict=feed_dict)
            summary_writer.add_summary(summary, global_step=step)

            message = 'Epoch [{:3d}/{:3d}]'.format(epoch + 1, NUM_EPOCH) \
                + '[{:4d}/{:4d}]'.format(item + 1, num_item_per_epoch) \
                + 'gene_loss={:6.8f}, '.format(gene_loss) \
                + 'd_loss={:6.8f}, '.format(d_loss) \
                + 'c_loss={:6.8f}, '.format(c_loss) \
                + 'Time={:.2f}.'.format(time.time() - time_i)
            print(message)
            step += 1

        visualize_samples(
            sess,
            hr_images,
            sr_img,
            interpolation_img,
            filename=os.path.join(INFERENCES_SAVE_PATH,
                                  'trian-epoch-{:03d}.png'.format(epoch + 1)))

        save(saver, sess, CHECKPOINTS_PATH, step)

    coord.request_stop()
    coord.join(threads=threads)
예제 #25
0
def test_with_model_init():
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, zero_mean=False)
    Xtr_shared = datasets[0][0]
    Xva_shared = datasets[1][0]
    Xtr = Xtr_shared.get_value(borrow=False).astype(theano.config.floatX)
    Xva = Xva_shared.get_value(borrow=False).astype(theano.config.floatX)
    tr_samples = Xtr.shape[0]
    batch_size = 200
    batch_reps = 1

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    obs_dim = Xtr.shape[1]
    z_dim = 20
    h_dim = 100
    x_type = 'bernoulli'

    # some InfNet instances to build the TwoStageModel from
    X_sym = T.matrix('X_sym')

    ########################
    # p_s0_obs_given_z_obs #
    ########################
    params = {}
    shared_config = [z_dim, 250, 250]
    top_config = [shared_config[-1], obs_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = 1.2
    params['lam_l2a'] = 1e-3
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_s0_obs_given_z_obs = InfNet(rng=rng, Xd=X_sym, \
            params=params, shared_param_dicts=None)
    p_s0_obs_given_z_obs.init_biases(0.2)
    #################
    # p_hi_given_si #
    #################
    params = {}
    shared_config = [obs_dim, 250, 250]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = 1.2
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_hi_given_si = InfNet(rng=rng, Xd=X_sym, \
            params=params, shared_param_dicts=None)
    p_hi_given_si.init_biases(0.2)
    ######################
    # p_sip1_given_si_hi #
    ######################
    params = {}
    shared_config = [h_dim, 250, 250]
    top_config = [shared_config[-1], obs_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = 1.2
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_sip1_given_si_hi = InfNet(rng=rng, Xd=X_sym, \
            params=params, shared_param_dicts=None)
    p_sip1_given_si_hi.init_biases(0.2)
    ###############
    # q_z_given_x #
    ###############
    params = {}
    shared_config = [obs_dim, 250, 250]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = 1.2
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_z_given_x = InfNet(rng=rng, Xd=X_sym, \
            params=params, shared_param_dicts=None)
    q_z_given_x.init_biases(0.2)
    ###################
    # q_hi_given_x_si #
    ###################
    params = {}
    shared_config = [(obs_dim + obs_dim), 500, 500]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = 1.2
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_hi_given_x_si = InfNet(rng=rng, Xd=X_sym, \
            params=params, shared_param_dicts=None)
    q_hi_given_x_si.init_biases(0.2)


    ################################################################
    # Define parameters for the MultiStageModel, and initialize it #
    ################################################################
    print("Building the MultiStageModel...")
    msm_params = {}
    msm_params['x_type'] = x_type
    msm_params['obs_transform'] = 'sigmoid'
    MSM = MultiStageModel(rng=rng, x_in=X_sym, \
            p_s0_obs_given_z_obs=p_s0_obs_given_z_obs, \
            p_hi_given_si=p_hi_given_si, \
            p_sip1_given_si_hi=p_sip1_given_si_hi, \
            q_z_given_x=q_z_given_x, \
            q_hi_given_x_si=q_hi_given_x_si, \
            obs_dim=obs_dim, z_dim=z_dim, h_dim=h_dim, \
            model_init_obs=True, ir_steps=2, \
            params=msm_params)
    obs_mean = (0.9 * np.mean(Xtr, axis=0)) + 0.05
    obs_mean_logit = np.log(obs_mean / (1.0 - obs_mean))
    MSM.set_input_bias(-obs_mean)
    MSM.set_obs_bias(0.1*obs_mean_logit)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    costs = [0. for i in range(10)]
    learn_rate = 0.0003
    momentum = 0.8
    for i in range(300000):
        scale = min(1.0, ((i+1) / 10000.0))
        extra_kl = max(0.0, ((50000.0 - i) / 50000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        # randomly sample a minibatch
        tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,))
        Xb = binarize_data(Xtr.take(tr_idx, axis=0))
        Xb = Xb.astype(theano.config.floatX)
        # set sgd and objective function hyperparams for this update
        MSM.set_sgd_params(lr_1=scale*learn_rate, lr_2=scale*learn_rate, \
                           mom_1=(scale*momentum), mom_2=0.98)
        MSM.set_train_switch(1.0)
        MSM.set_l1l2_weight(1.0)
        MSM.set_lam_nll(lam_nll=1.0)
        MSM.set_lam_kld(lam_kld_1=(1.0+extra_kl), lam_kld_2=(1.0+extra_kl))
        MSM.set_lam_l2w(1e-6)
        MSM.set_kzg_weight(0.01)
        # perform a minibatch update and record the cost for this batch
        result = MSM.train_joint(Xb, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            print("-- batch {0:d} --".format(i))
            print("    joint_cost: {0:.4f}".format(costs[0]))
            print("    nll_cost  : {0:.4f}".format(costs[1]))
            print("    kld_cost  : {0:.4f}".format(costs[2]))
            print("    reg_cost  : {0:.4f}".format(costs[3]))
            costs = [0.0 for v in costs]
        if (((i % 2000) == 0) or ((i < 10000) and ((i % 1000) == 0))):
            Xva = row_shuffle(Xva)
            # draw some independent random samples from the model
            samp_count = 200
            model_samps = MSM.sample_from_prior(samp_count)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count): 
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "MX_SAMPLES_b{0:d}.png".format(i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
            # visualize some important weights in the model
            file_name = "MX_INF_1_WEIGHTS_b{0:d}.png".format(i)
            W = MSM.inf_1_weights.get_value(borrow=False).T
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            file_name = "MX_INF_2_WEIGHTS_b{0:d}.png".format(i)
            W = MSM.inf_2_weights.get_value(borrow=False).T
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            file_name = "MX_GEN_1_WEIGHTS_b{0:d}.png".format(i)
            W = MSM.gen_1_weights.get_value(borrow=False)
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            file_name = "MX_GEN_2_WEIGHTS_b{0:d}.png".format(i)
            W = MSM.gen_2_weights.get_value(borrow=False)
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            file_name = "MX_GEN_INF_WEIGHTS_b{0:d}.png".format(i)
            W = MSM.gen_inf_weights.get_value(borrow=False).T
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            # compute information about posterior KLds on validation set
            post_klds = MSM.compute_post_klds(Xva[0:5000])
            file_name = "MX_H0_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(post_klds[0].shape[1]), \
                    np.mean(post_klds[0], axis=0), file_name)
            file_name = "MX_HI_COND_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(post_klds[1].shape[1]), \
                    np.mean(post_klds[1], axis=0), file_name)
            file_name = "MX_HI_GLOB_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(post_klds[2].shape[1]), \
                    np.mean(post_klds[2], axis=0), file_name)
            # compute information about free-energy on validation set
            file_name = "MX_FREE_ENERGY_b{0:d}.png".format(i)
            fe_terms = MSM.compute_fe_terms(binarize_data(Xva[0:5000]), 20)
            fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
            print("    nll_bound : {0:.4f}".format(fe_mean))
            utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \
                    x_label='Posterior KLd', y_label='Negative Log-likelihood')
    return
예제 #26
0
def test_with_model_init():
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    Xtr = to_fX(datasets[0][0])
    Xva = to_fX(datasets[1][0])
    Ytr = datasets[0][1]
    Yva = datasets[1][1]
    Xtr_class_groups = make_class_groups(Xtr, Ytr)

    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 300

    BD = lambda ary: binarize_data(ary)

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    obs_dim = Xtr.shape[1]
    z_dim = 32
    h_dim = 100
    ir_steps = 2
    init_scale = 1.0
    
    x_type = 'bernoulli'

    # some InfNet instances to build the TwoStageModel from
    x_in = T.matrix('x_in')
    x_pos = T.matrix('x_pos')
    y_in = T.lvector('y_in')

    #################
    # p_hi_given_si #
    #################
    params = {}
    shared_config = [obs_dim, 500, 500]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_hi_given_si = InfNet(rng=rng, Xd=x_in, \
            params=params, shared_param_dicts=None)
    p_hi_given_si.init_biases(0.2)
    ######################
    # p_sip1_given_si_hi #
    ######################
    params = {}
    shared_config = [(h_dim + obs_dim), 500, 500]
    top_config = [shared_config[-1], obs_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_sip1_given_si_hi = InfNet(rng=rng, Xd=x_in, \
            params=params, shared_param_dicts=None)
    p_sip1_given_si_hi.init_biases(0.2)
    ################
    # p_s0_given_z #
    ################
    params = {}
    shared_config = [z_dim, 500, 500]
    top_config = [shared_config[-1], obs_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_s0_given_z = InfNet(rng=rng, Xd=x_in, \
            params=params, shared_param_dicts=None)
    p_s0_given_z.init_biases(0.2)
    ###############
    # q_z_given_x #
    ###############
    params = {}
    shared_config = [obs_dim, (500, 4), (500, 4)]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.2
    params['hid_drop'] = 0.5
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_z_given_x = InfNet(rng=rng, Xd=x_in, \
            params=params, shared_param_dicts=None)
    q_z_given_x.init_biases(0.0)
    ###################
    # q_hi_given_x_si #
    ###################
    params = {}
    shared_config = [(obs_dim + obs_dim), 800, 800]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_hi_given_x_si = InfNet(rng=rng, Xd=x_in, \
            params=params, shared_param_dicts=None)
    q_hi_given_x_si.init_biases(0.2)


    ################################################################
    # Define parameters for the MultiStageModel, and initialize it #
    ################################################################
    print("Building the MultiStageModel...")
    msm_params = {}
    msm_params['x_type'] = x_type
    msm_params['obs_transform'] = 'sigmoid'
    MSM = MultiStageModelSS(rng=rng, \
            x_in=x_in, x_pos=x_pos, y_in=y_in, \
            p_s0_given_z=p_s0_given_z, \
            p_hi_given_si=p_hi_given_si, \
            p_sip1_given_si_hi=p_sip1_given_si_hi, \
            q_z_given_x=q_z_given_x, \
            q_hi_given_x_si=q_hi_given_x_si, \
            class_count=10, \
            obs_dim=obs_dim, z_dim=z_dim, h_dim=h_dim, \
            ir_steps=ir_steps, params=msm_params)
    MSM.set_lam_class(lam_class=20.0)
    MSM.set_lam_nll(lam_nll=1.0)
    MSM.set_lam_kld(lam_kld_z=1.0, lam_kld_q2p=0.9, \
                    lam_kld_p2q=0.1)
    MSM.set_lam_l2w(1e-4)
    MSM.set_drop_rate(0.0)
    MSM.q_hi_given_x_si.set_bias_noise(0.0)
    MSM.p_hi_given_si.set_bias_noise(0.0)
    MSM.p_sip1_given_si_hi.set_bias_noise(0.0)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    out_file = open("MSS_A_RESULTS.txt", 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0002
    momentum = 0.5
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(250000):
        scale = min(1.0, ((i+1) / 2000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        if (i > 20000):
            momentum = 0.90
        else:
            momentum = 0.50
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr, Ytr = row_shuffle(Xtr, Ytr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        MSM.set_sgd_params(lr_1=scale*learn_rate, lr_2=scale*learn_rate, \
                           mom_1=scale*momentum, mom_2=0.99)
        MSM.set_train_switch(1.0)
        # perform a minibatch update and record the cost for this batch
        Xi_tr = Xtr.take(batch_idx, axis=0)
        Yi_tr = Ytr.take(batch_idx, axis=0)
        Xp_tr, Xn_tr = sample_class_groups(Yi_tr, Xtr_class_groups)
        result = MSM.train_joint(BD(Xi_tr), BD(Xp_tr), Yi_tr)
        costs = [(costs[j] + result[j]) for j in range(len(result)-1)]
        # output useful information about training progress
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost  : {0:.4f}".format(costs[0])
            str3 = "    class_cost  : {0:.4f}".format(costs[1])
            str4 = "    nll_cost    : {0:.4f}".format(costs[2])
            str5 = "    kld_cost    : {0:.4f}".format(costs[3])
            str6 = "    reg_cost    : {0:.4f}".format(costs[4])
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if (((i % 2000) == 0) or ((i < 10000) and ((i % 1000) == 0))):
            # Get some validation samples for computing diagnostics
            Xva, Yva = row_shuffle(Xva, Yva)
            Xb_va = Xva[0:2500]
            Yb_va = Yva[0:2500]
            # draw some independent random samples from the model
            samp_count = 200
            model_samps = MSM.sample_from_prior(samp_count)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count):
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "MSS_A_SAMPLES_IND_b{0:d}.png".format(i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
            # draw some conditional random samples from the model
            Xs = Xb_va[0:50] # only use validation set samples
            Xs = np.repeat(Xs, 4, axis=0)
            samp_count = Xs.shape[0]
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
            # draw some conditional random samples from the model
            model_samps = MSM.sample_from_input(BD(Xs), guided_decoding=False)
            model_samps.append(Xs)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count): 
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "MSS_A_SAMPLES_CND_UD_b{0:d}.png".format(i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
            # compute information about posterior KLds on validation set
            raw_costs = MSM.compute_raw_costs(BD(Xb_va), BD(Xb_va))
            init_nll, init_kld, q2p_kld, p2q_kld, step_nll, step_kld = raw_costs
            file_name = "MSS_A_H0_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(init_kld.shape[1]), \
                    np.mean(init_kld, axis=0), file_name)
            file_name = "MSS_A_HI_Q2P_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(q2p_kld.shape[1]), \
                    np.mean(q2p_kld, axis=0), file_name)
            file_name = "MSS_A_HI_P2Q_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(p2q_kld.shape[1]), \
                    np.mean(p2q_kld, axis=0), file_name)
            # draw weights for the initial encoder/classifier
            file_name = "MSS_A_QZX_WEIGHTS_b{0:d}.png".format(i)
            W = q_z_given_x.shared_layers[0].W.get_value(borrow=False).T
            utils.visualize_samples(W, file_name, num_rows=20)
            # compute free-energy terms on training samples
            fe_terms = MSM.compute_fe_terms(BD(Xtr[0:2500]), BD(Xtr[0:2500]), 30)
            fe_nll = np.mean(fe_terms[0])
            fe_kld = np.mean(fe_terms[1])
            fe_joint = fe_nll + fe_kld
            joint_str = "    vfe-tr: {0:.4f}, nll: ({1:.4f}, {2:.4f}, {3:.4f}), kld: ({4:.4f}, {5:.4f}, {6:.4f})".format( \
                    fe_joint, fe_nll, np.min(fe_terms[0]), np.max(fe_terms[0]), fe_kld, np.min(fe_terms[1]), np.max(fe_terms[1]))
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            # compute free-energy terms on validation samples
            fe_terms = MSM.compute_fe_terms(BD(Xb_va), BD(Xb_va), 30)
            fe_nll = np.mean(fe_terms[0])
            fe_kld = np.mean(fe_terms[1])
            fe_joint = fe_nll + fe_kld
            joint_str = "    vfe-va: {0:.4f}, nll: ({1:.4f}, {2:.4f}, {3:.4f}), kld: ({4:.4f}, {5:.4f}, {6:.4f})".format( \
                    fe_joint, fe_nll, np.min(fe_terms[0]), np.max(fe_terms[0]), fe_kld, np.min(fe_terms[1]), np.max(fe_terms[1]))
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            # compute multi-sample estimate of classification error
            err_rate, err_idx, y_preds = MSM.class_error(Xb_va, Yb_va, \
                    samples=30, prep_func=BD)
            joint_str = "    va-class-error: {0:.4f}".format(err_rate)
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            # draw some conditional random samples from the model
            Xs = Xb_va[err_idx] # use validation samples with class errors
            if (Xs.shape[0] > 50):
                Xs = Xs[:50]
            Xs = np.repeat(Xs, 4, axis=0)
            if ((Xs.shape[0] % 20) != 0):
                # round-off the number of error examples, for nice display
                remainder = Xs.shape[0] % 20
                Xs = Xs[:-remainder]
            samp_count = Xs.shape[0]
            # draw some conditional random samples from the model
            model_samps = MSM.sample_from_input(BD(Xs), guided_decoding=False)
            model_samps.append(Xs)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count): 
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "MSS_A_SAMPLES_CND_ERR_b{0:d}.png".format(i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
예제 #27
0
def train_walk_from_pretrained_osm(lam_kld=0.0):
    # Simple test code, to check that everything is basically functional.
    print("TESTING...")

    # Initialize a source of randomness
    rng = np.random.RandomState(1234)

    # Load some data to train/validate/test with
    data_file = 'data/tfd_data_48x48.pkl'
    dataset = load_tfd(tfd_pkl_name=data_file,
                       which_set='unlabeled',
                       fold='all')
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all')
    Xva = dataset[0]
    print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),
                                                      str(Xva.shape)))

    # get and set some basic dataset information
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    data_dim = Xtr.shape[1]
    batch_size = 400
    batch_reps = 5
    prior_sigma = 1.0
    Xtr_mean = np.mean(Xtr, axis=0, keepdims=True)
    Xtr_mean = (0.0 * Xtr_mean) + np.mean(np.mean(Xtr, axis=1))
    Xc_mean = np.repeat(Xtr_mean, batch_size, axis=0)

    # Symbolic inputs
    Xd = T.matrix(name='Xd')
    Xc = T.matrix(name='Xc')
    Xm = T.matrix(name='Xm')
    Xt = T.matrix(name='Xt')

    ###############################
    # Setup discriminator network #
    ###############################
    # Set some reasonable mlp parameters
    dn_params = {}
    # Set up some proto-networks
    pc0 = [data_dim, (300, 4), (300, 4), 10]
    dn_params['proto_configs'] = [pc0]
    # Set up some spawn networks
    sc0 = {
        'proto_key': 0,
        'input_noise': 0.1,
        'bias_noise': 0.1,
        'do_dropout': True
    }
    #sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True}
    dn_params['spawn_configs'] = [sc0]
    dn_params['spawn_weights'] = [1.0]
    # Set remaining params
    dn_params['init_scale'] = 1.0
    dn_params['lam_l2a'] = 1e-2
    dn_params['vis_drop'] = 0.2
    dn_params['hid_drop'] = 0.5
    # Initialize a network object to use as the discriminator
    DN = PeaNet(rng=rng, Xd=Xd, params=dn_params)
    DN.init_biases(0.0)

    #######################################################
    # Load inferencer and generator from saved parameters #
    #######################################################
    gn_fname = RESULT_PATH + "pt_osm_params_b100000_GN.pkl"
    in_fname = RESULT_PATH + "pt_osm_params_b100000_IN.pkl"
    IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
    GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd)

    ########################################################
    # Define parameters for the VCGLoop, and initialize it #
    ########################################################
    print("Building the VCGLoop...")
    vcgl_params = {}
    vcgl_params['x_type'] = 'gaussian'
    vcgl_params['xt_transform'] = 'sigmoid'
    vcgl_params['logvar_bound'] = LOGVAR_BOUND
    vcgl_params['cost_decay'] = 0.1
    vcgl_params['chain_type'] = 'walkout'
    vcgl_params['lam_l2d'] = 5e-2
    VCGL = VCGLoop(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, Xt=Xt, \
                 i_net=IN, g_net=GN, d_net=DN, chain_len=5, \
                 data_dim=data_dim, prior_dim=PRIOR_DIM, params=vcgl_params)

    out_file = open(RESULT_PATH + "pt_walk_results.txt", 'wb')
    ####################################################
    # Train the VCGLoop by unrolling and applying BPTT #
    ####################################################
    learn_rate = 0.0005
    cost_1 = [0. for i in range(10)]
    for i in range(100000):
        scale = float(min((i + 1), 5000)) / 5000.0
        if ((i + 1 % 25000) == 0):
            learn_rate = learn_rate * 0.8
        ########################################
        # TRAIN THE CHAIN IN FREE-RUNNING MODE #
        ########################################
        VCGL.set_all_sgd_params(learn_rate=(scale*learn_rate), \
                mom_1=0.9, mom_2=0.99)
        VCGL.set_disc_weights(dweight_gn=25.0, dweight_dn=25.0)
        VCGL.set_lam_chain_nll(1.0)
        VCGL.set_lam_chain_kld(lam_kld)
        # get some data to train with
        tr_idx = npr.randint(low=0, high=tr_samples, size=(batch_size, ))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xc_batch = 0.0 * Xd_batch
        Xm_batch = 0.0 * Xd_batch
        # examples from the target distribution, to train discriminator
        tr_idx = npr.randint(low=0, high=tr_samples, size=(2 * batch_size, ))
        Xt_batch = Xtr.take(tr_idx, axis=0)
        # do a minibatch update of the model, and compute some costs
        outputs = VCGL.train_joint(Xd_batch, Xc_batch, Xm_batch, Xt_batch,
                                   batch_reps)
        cost_1 = [(cost_1[k] + 1. * outputs[k]) for k in range(len(outputs))]
        if ((i % 500) == 0):
            cost_1 = [(v / 500.0) for v in cost_1]
            o_str_1 = "batch: {0:d}, joint_cost: {1:.4f}, chain_nll_cost: {2:.4f}, chain_kld_cost: {3:.4f}, disc_cost_gn: {4:.4f}, disc_cost_dn: {5:.4f}".format( \
                    i, cost_1[0], cost_1[1], cost_1[2], cost_1[5], cost_1[6])
            print(o_str_1)
            cost_1 = [0. for v in cost_1]
        if ((i % 1000) == 0):
            tr_idx = npr.randint(low=0, high=Xtr.shape[0], size=(5, ))
            va_idx = npr.randint(low=0, high=Xva.shape[0], size=(5, ))
            Xd_batch = np.vstack(
                [Xtr.take(tr_idx, axis=0),
                 Xva.take(va_idx, axis=0)])
            # draw some chains of samples from the VAE loop
            file_name = RESULT_PATH + "pt_walk_chain_samples_b{0:d}.png".format(
                i)
            Xd_samps = np.repeat(Xd_batch, 3, axis=0)
            sample_lists = VCGL.OSM.sample_from_chain(Xd_samps, loop_iters=20)
            Xs = np.vstack(sample_lists["data samples"])
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # draw some masked chains of samples from the VAE loop
            file_name = RESULT_PATH + "pt_walk_mask_samples_b{0:d}.png".format(
                i)
            Xd_samps = np.repeat(Xc_mean[0:Xd_batch.shape[0], :], 3, axis=0)
            Xc_samps = np.repeat(Xd_batch, 3, axis=0)
            Xm_rand = sample_masks(Xc_samps, drop_prob=0.0)
            Xm_patch = sample_patch_masks(Xc_samps, (48, 48), (25, 25))
            Xm_samps = Xm_rand * Xm_patch
            sample_lists = VCGL.OSM.sample_from_chain(Xd_samps, \
                    X_c=Xc_samps, X_m=Xm_samps, loop_iters=20)
            Xs = np.vstack(sample_lists["data samples"])
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # draw some samples independently from the GenNet's prior
            file_name = RESULT_PATH + "pt_walk_prior_samples_b{0:d}.png".format(
                i)
            Xs = VCGL.sample_from_prior(20 * 20)
            utils.visualize_samples(Xs, file_name, num_rows=20)
        # DUMP PARAMETERS FROM TIME-TO-TIME
        if (i % 5000 == 0):
            DN.save_to_file(f_name=RESULT_PATH +
                            "pt_walk_params_b{0:d}_DN.pkl".format(i))
            IN.save_to_file(f_name=RESULT_PATH +
                            "pt_walk_params_b{0:d}_IN.pkl".format(i))
            GN.save_to_file(f_name=RESULT_PATH +
                            "pt_walk_params_b{0:d}_GN.pkl".format(i))
    return
    # configure a trajectory generator
    num_samples = 100
    traj_len = 64
    x_range = [-0.8,0.8]
    y_range = [-0.8,0.8]
    max_speed = 0.15
    TRAJ = TrajectoryGenerator(x_range=x_range, y_range=y_range, \
                               max_speed=max_speed)

    # test the writer function
    start_time = time.time()
    batch_count = 50
    for i in range(batch_count):
        # generate a minibatch of trajectories
        traj_pos, traj_vel = TRAJ.generate_trajectories(num_samples, traj_len)
        traj_x = traj_pos[:,:,0]
        traj_y = traj_pos[:,:,1]
        # draw the trajectories
        center_x = to_fX( traj_x.T.ravel() )
        center_y = to_fX( traj_y.T.ravel() )
        delta = to_fX( np.ones(center_x.shape) )
        sigma = to_fX( np.ones(center_x.shape) )
        W = write_func(center_y, center_x, delta, 0.2*sigma)
    end_time = time.time()
    render_time = end_time - start_time
    render_bps = batch_count / render_time
    print("RENDER BATCH/SECOND: {0:.2f}".format(render_bps))

    W = W[:20*traj_len]
    utils.visualize_samples(W, "AAAAA.png", num_rows=20)
def test_two_stage_model2():
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
    Xtr = np.vstack((Xtr, Xva))
    Xva = Xte
    #del Xte
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 128
    batch_reps = 1

    ###############################################
    # Setup some parameters for the TwoStageModel #
    ###############################################
    x_dim = Xtr.shape[1]
    z_dim = 50
    h_dim = 100
    x_type = 'bernoulli'

    # some InfNet instances to build the TwoStageModel from
    xin_sym = T.matrix('xin_sym')
    xout_sym = T.matrix('xout_sym')

    ###############
    # p_h_given_z #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': z_dim,
       'out_chans': 100,
       'activation': tanh_actfun,
       'apply_bn': True}, \
      {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': 100,
       'activation': tanh_actfun,
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': h_dim,
       'activation': tanh_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': h_dim,
       'activation': tanh_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    p_h_given_z = HydraNet(rng=rng, Xd=xin_sym,
            params=params, shared_param_dicts=None)
    p_h_given_z.init_biases(0.0)
    ###############
    # p_x_given_h #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': h_dim,
       'out_chans': 200,
       'activation': tanh_actfun,
       'apply_bn': True}, \
      {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': 200,
       'activation': tanh_actfun,
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': x_dim,
       'activation': tanh_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': x_dim,
       'activation': tanh_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    p_x_given_h = HydraNet(rng=rng, Xd=xin_sym,
            params=params, shared_param_dicts=None)
    p_x_given_h.init_biases(0.0)
    ###############
    # q_h_given_x #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': x_dim,
       'out_chans': 200,
       'activation': tanh_actfun,
       'apply_bn': True}, \
      {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': 200,
       'activation': tanh_actfun,
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': h_dim,
       'activation': tanh_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 200,
       'out_chans': h_dim,
       'activation': tanh_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    q_h_given_x = HydraNet(rng=rng, Xd=xin_sym,
            params=params, shared_param_dicts=None)
    q_h_given_x.init_biases(0.0)
    ###############
    # q_z_given_h #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': h_dim,
       'out_chans': 100,
       'activation': tanh_actfun,
       'apply_bn': True}, \
      {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': 100,
       'activation': tanh_actfun,
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': z_dim,
       'activation': tanh_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 100,
       'out_chans': z_dim,
       'activation': tanh_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    q_z_given_h = HydraNet(rng=rng, Xd=xin_sym,
            params=params, shared_param_dicts=None)
    q_z_given_h.init_biases(0.0)

    ##############################################################
    # Define parameters for the TwoStageModel, and initialize it #
    ##############################################################
    print("Building the TwoStageModel...")
    tsm_params = {}
    tsm_params['x_type'] = x_type
    tsm_params['obs_transform'] = 'sigmoid'
    TSM = TwoStageModel2(rng=rng, x_in=xin_sym, x_out=xout_sym,
            x_dim=x_dim, z_dim=z_dim, h_dim=h_dim,
            q_h_given_x=q_h_given_x,
            q_z_given_h=q_z_given_h,
            p_h_given_z=p_h_given_z,
            p_x_given_h=p_x_given_h,
            params=tsm_params)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    log_name = "{}_RESULTS.txt".format("TSM2A_TEST")
    out_file = open(log_name, 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.001
    momentum = 0.9
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(500000):
        scale = min(1.0, ((i+1) / 5000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        Xb = to_fX( Xtr.take(batch_idx, axis=0) )
        #Xb = binarize_data(Xtr.take(batch_idx, axis=0))
        # set sgd and objective function hyperparams for this update
        TSM.set_sgd_params(lr=scale*learn_rate,
                           mom_1=(scale*momentum), mom_2=0.98)
        TSM.set_train_switch(1.0)
        TSM.set_lam_nll(lam_nll=1.0)
        TSM.set_lam_kld(lam_kld_q2p=1.0, lam_kld_p2q=0.0)
        TSM.set_lam_l2w(1e-5)
        # perform a minibatch update and record the cost for this batch
        result = TSM.train_joint(Xb, Xb, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str5 = "    reg_cost  : {0:.4f}".format(costs[3])
            str6 = "    nll       : {0:.4f}".format(np.mean(costs[4]))
            str7 = "    kld_z     : {0:.4f}".format(np.mean(costs[5]))
            str8 = "    kld_h     : {0:.4f}".format(np.mean(costs[6]))
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7, str8])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if (((i % 5000) == 0) or ((i < 10000) and ((i % 1000) == 0))):
            # draw some independent random samples from the model
            samp_count = 300
            model_samps = TSM.sample_from_prior(samp_count)
            file_name = "TSM2A_SAMPLES_b{0:d}.png".format(i)
            utils.visualize_samples(model_samps, file_name, num_rows=15)
            # compute free energy estimate for validation samples
            Xva = row_shuffle(Xva)
            fe_terms = TSM.compute_fe_terms(Xva[0:5000], Xva[0:5000], 20)
            fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
            out_str = "    nll_bound : {0:.4f}".format(fe_mean)
            print(out_str)
            out_file.write(out_str+"\n")
            out_file.flush()
    return
예제 #30
0
            out_file.write("{}\n".format(o_str_su))
            if ((i % 2000) == 0):
                # check classification error on training and validation set
                train_err = GIS.classification_error(Xtr_su, Ytr_su)
                va_err = GIS.classification_error(Xva, Yva)
                o_str = "    tr_err: {0:.4f}, va_err: {1:.4f}".format(train_err, va_err)
                print(o_str)
                out_file.write("{}\n".format(o_str))
            out_file.flush()
        if ((i % 5000) == 0):
            file_name = "GIS_SAMPLES_b{0:d}.png".format(i)
            tr_idx = npr.randint(low=0,high=un_samples,size=(5,))
            va_idx = npr.randint(low=0,high=va_samples,size=(5,))
            Xd_samps = np.vstack([Xtr_un[tr_idx,:], Xva[va_idx,:]])
            Xd_samps = np.repeat(Xd_samps, 3, axis=0)
            sample_lists = GIS.sample_gis_from_data(Xd_samps, loop_iters=20)
            Xs = np.vstack(sample_lists["data samples"])
            Ys = GIS.class_probs(Xs)
            Xs = mnist_prob_embed(Xs, Ys)
            utils.visualize_samples(Xs, file_name, num_rows=20)
    out_file.close()
    print("TESTING COMPLETE!")




##############
# EYE BUFFER #
##############

예제 #31
0
            outputs = GIP.train_joint(Xd_batch, Xc_batch, Xm_batch)
            nll_data = 1.0 * outputs[1]
            kld_data = 1.0 * outputs[2]
            if ((i + 1) % 5000 == 0):
                print("     nll_data: {0:.4f}, kld_data: {1:.4f}".format( \
                    nll_data, kld_data))
        if ((i+1) % 100000 == 0):
            learn_rate = learn_rate * 0.8
        if (i % 1000 == 0):
            print("batch: {0:d}, mom_match_cost: {1:.4f}, disc_dn: {2:.6f}, disc_gn: {3:.6f}, nll: {4:.4f}, kld: {5:.4f}".format( \
                    i, mom_match_cost, disc_dn, disc_gn, nll_cost, kld_cost))
        if (i % 5000 == 0):
            # draw independent samples from generative model's prior
            file_name = "VCG_SAMPLES_b{0:d}.png".format(i)
            Xs = VCG.sample_from_gn(200)
            utils.visualize_samples(Xs, file_name)
            file_name = "VCG_WEIGHTS_b{0:d}.png".format(i)
            utils.visualize(VCG.DN, 0, 0, file_name)
            # draw "markov chain" samples initiated from training data
            file_name = "VCG_SAMPLES_a{0:d}.png".format(i)
            Xd_samps = np.repeat(Xd_batch[0:10,:], 3, axis=0)
            sample_lists = GIP.sample_gil_from_data(Xd_samps, loop_iters=10)
            Xs = np.vstack(sample_lists["data samples"])
            utils.visualize_samples(Xs, file_name)
    print("TESTING COMPLETE!")




##############
# EYE BUFFER #
예제 #32
0
def test_gip_sigma_scale_tfd():
    from LogPDFs import cross_validate_sigma

    # Simple test code, to check that everything is basically functional.
    print("TESTING...")

    # Initialize a source of randomness
    rng = np.random.RandomState(12345)

    # Load some data to train/validate/test with
    data_file = "data/tfd_data_48x48.pkl"
    dataset = load_tfd(tfd_pkl_name=data_file, which_set="unlabeled", fold="all")
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set="train", fold="all")
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set="test", fold="all")
    Xva = dataset[0]
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape), str(Xva.shape)))

    # get and set some basic dataset information
    tr_samples = Xtr.shape[0]
    data_dim = Xtr.shape[1]
    batch_size = 100

    # Symbolic inputs
    Xd = T.matrix(name="Xd")
    Xc = T.matrix(name="Xc")
    Xm = T.matrix(name="Xm")
    Xt = T.matrix(name="Xt")

    # Load inferencer and generator from saved parameters
    gn_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_GN.pkl"
    in_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_IN.pkl"
    IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
    GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd)
    x_dim = IN.shared_layers[0].in_dim
    z_dim = IN.mu_layers[-1].out_dim
    # construct a GIPair with the loaded InfNet and GenNet
    osm_params = {}
    osm_params["x_type"] = "gaussian"
    osm_params["xt_transform"] = "sigmoid"
    osm_params["logvar_bound"] = LOGVAR_BOUND
    OSM = OneStageModel(
        rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, p_x_given_z=GN, q_z_given_x=IN, x_dim=x_dim, z_dim=z_dim, params=osm_params
    )

    # # compute variational likelihood bound and its sub-components
    Xva = row_shuffle(Xva)
    Xb = Xva[0:5000]
    # file_name = "A_TFD_POST_KLDS.png"
    # post_klds = OSM.compute_post_klds(Xb)
    # post_dim_klds = np.mean(post_klds, axis=0)
    # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \
    #         file_name)
    # compute information about free-energy on validation set
    file_name = "A_TFD_KLD_FREE_ENERGY.png"
    fe_terms = OSM.compute_fe_terms(Xb, 20)
    utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, x_label="Posterior KLd", y_label="Negative Log-likelihood")

    # bound_results = OSM.compute_ll_bound(Xva)
    # ll_bounds = bound_results[0]
    # post_klds = bound_results[1]
    # log_likelihoods = bound_results[2]
    # max_lls = bound_results[3]
    # print("mean ll bound: {0:.4f}".format(np.mean(ll_bounds)))
    # print("mean posterior KLd: {0:.4f}".format(np.mean(post_klds)))
    # print("mean log-likelihood: {0:.4f}".format(np.mean(log_likelihoods)))
    # print("mean max log-likelihood: {0:.4f}".format(np.mean(max_lls)))
    # print("min ll bound: {0:.4f}".format(np.min(ll_bounds)))
    # print("max posterior KLd: {0:.4f}".format(np.max(post_klds)))
    # print("min log-likelihood: {0:.4f}".format(np.min(log_likelihoods)))
    # print("min max log-likelihood: {0:.4f}".format(np.min(max_lls)))
    # # compute some information about the approximate posteriors
    # post_stats = OSM.compute_post_stats(Xva, 0.0*Xva, 0.0*Xva)
    # all_post_klds = np.sort(post_stats[0].ravel()) # post KLds for each obs and dim
    # obs_post_klds = np.sort(post_stats[1]) # summed post KLds for each obs
    # post_dim_klds = post_stats[2] # average post KLds for each post dim
    # post_dim_vars = post_stats[3] # average squared mean for each post dim
    # utils.plot_line(np.arange(all_post_klds.shape[0]), all_post_klds, "AAA_ALL_POST_KLDS.png")
    # utils.plot_line(np.arange(obs_post_klds.shape[0]), obs_post_klds, "AAA_OBS_POST_KLDS.png")
    # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, "AAA_POST_DIM_KLDS.png")
    # utils.plot_stem(np.arange(post_dim_vars.shape[0]), post_dim_vars, "AAA_POST_DIM_VARS.png")

    # draw many samples from the GIP
    for i in range(5):
        tr_idx = npr.randint(low=0, high=tr_samples, size=(100,))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xs = []
        for row in range(3):
            Xs.append([])
            for col in range(3):
                sample_lists = OSM.sample_from_chain(Xd_batch[0:10, :], loop_iters=100, sigma_scale=1.0)
                Xs[row].append(group_chains(sample_lists["data samples"]))
        Xs, block_im_dim = block_video(Xs, (48, 48), (3, 3))
        to_video(Xs, block_im_dim, "A_TFD_KLD_CHAIN_VIDEO_{0:d}.avi".format(i), frame_rate=10)
        # sample_lists = GIP.sample_from_chain(Xd_batch[0,:].reshape((1,data_dim)), loop_iters=300, \
        #        sigma_scale=1.0)
        # Xs = np.vstack(sample_lists["data samples"])
        # file_name = "TFD_TEST_{0:d}.png".format(i)
        # utils.visualize_samples(Xs, file_name, num_rows=15)
    file_name = "A_TFD_KLD_PRIOR_SAMPLE.png"
    Xs = OSM.sample_from_prior(20 * 20)
    utils.visualize_samples(Xs, file_name, num_rows=20)

    # test Parzen density estimator built from prior samples
    # Xs = OSM.sample_from_prior(10000)
    # [best_sigma, best_ll, best_lls] = \
    #         cross_validate_sigma(Xs, Xva, [0.09, 0.095, 0.1, 0.105, 0.11], 10)
    # sort_idx = np.argsort(best_lls)
    # sort_idx = sort_idx[0:400]
    # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_TFD_BEST_LLS_1.png")
    # utils.visualize_samples(Xva[sort_idx], "A_TFD_BAD_FACES_1.png", num_rows=20)
    return
예제 #33
0
def test_svhn(occ_dim=15, drop_prob=0.0):
    RESULT_PATH = "IMP_SVHN_VAE/"
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = "{}VAE_OD{}_DP{}".format(RESULT_PATH, occ_dim, dp_int)

    ##########################
    # Get some training data #
    ##########################
    tr_file = 'data/svhn_train_gray.pkl'
    te_file = 'data/svhn_test_gray.pkl'
    ex_file = 'data/svhn_extra_gray.pkl'
    data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000)
    Xtr = to_fX( shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])) )
    Xva = to_fX( shift_and_scale_into_01(data['Xte']) )
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 250
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) )

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    obs_dim = Xtr.shape[1]
    z_dim = 100
    imp_steps = 15 # we'll check for the best step count (found oracularly)
    init_scale = 1.0

    x_in_sym = T.matrix('x_in_sym')
    x_out_sym = T.matrix('x_out_sym')
    x_mask_sym = T.matrix('x_mask_sym')

    #################
    # p_zi_given_xi #
    #################
    params = {}
    shared_config = [obs_dim, 1000, 1000]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_zi_given_xi.init_biases(0.2)
    ###################
    # p_xip1_given_zi #
    ###################
    params = {}
    shared_config = [z_dim, 1000, 1000]
    output_config = [obs_dim, obs_dim]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_xip1_given_zi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_xip1_given_zi.init_biases(0.2)
    ###################
    # q_zi_given_x_xi #
    ###################
    params = {}
    shared_config = [(obs_dim + obs_dim), 1000, 1000]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_zi_given_x_xi = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_zi_given_x_xi.init_biases(0.2)


    ###########################################################
    # Define parameters for the GPSImputer, and initialize it #
    ###########################################################
    print("Building the GPSImputer...")
    gpsi_params = {}
    gpsi_params['obs_dim'] = obs_dim
    gpsi_params['z_dim'] = z_dim
    gpsi_params['imp_steps'] = imp_steps
    gpsi_params['step_type'] = 'jump'
    gpsi_params['x_type'] = 'bernoulli'
    gpsi_params['obs_transform'] = 'sigmoid'
    gpsi_params['use_osm_mode'] = True
    GPSI = GPSImputer(rng=rng, 
            x_in=x_in_sym, x_out=x_out_sym, x_mask=x_mask_sym, \
            p_zi_given_xi=p_zi_given_xi, \
            p_xip1_given_zi=p_xip1_given_zi, \
            q_zi_given_x_xi=q_zi_given_x_xi, \
            params=gpsi_params, \
            shared_param_dicts=None)
    #########################################################################
    # Define parameters for the underlying OneStageModel, and initialize it #
    #########################################################################
    print("Building the OneStageModel...")
    osm_params = {}
    osm_params['x_type'] = 'bernoulli'
    osm_params['xt_transform'] = 'sigmoid'
    OSM = OneStageModel(rng=rng, \
            x_in=x_in_sym, \
            p_x_given_z=p_xip1_given_zi, \
            q_z_given_x=p_zi_given_xi, \
            x_dim=obs_dim, z_dim=z_dim, \
            params=osm_params)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0002
    momentum = 0.5
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(200005):
        scale = min(1.0, ((i+1) / 5000.0))
        if (((i + 1) % 15000) == 0):
            learn_rate = learn_rate * 0.92
        if (i > 10000):
            momentum = 0.90
        else:
            momentum = 0.50
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        OSM.set_sgd_params(lr=scale*learn_rate, \
                           mom_1=scale*momentum, mom_2=0.99)
        OSM.set_lam_nll(lam_nll=1.0)
        OSM.set_lam_kld(lam_kld_1=1.0, lam_kld_2=0.0)
        OSM.set_lam_l2w(1e-4)
        # perform a minibatch update and record the cost for this batch
        xb = to_fX( Xtr.take(batch_idx, axis=0) )
        result = OSM.train_joint(xb, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result)-1)]
        if ((i % 250) == 0):
            costs = [(v / 250.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str5 = "    reg_cost  : {0:.4f}".format(costs[3])
            joint_str = "\n".join([str1, str2, str3, str4, str5])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            Xva = row_shuffle(Xva)
            # record an estimate of performance on the test set
            xi, xo, xm = construct_masked_data(Xva[0:5000], drop_prob=drop_prob, \
                                               occ_dim=occ_dim, data_mean=data_mean)
            step_nll, step_kld = GPSI.compute_per_step_cost(xi, xo, xm, sample_count=10)
            min_nll = np.min(step_nll)
            str1 = "    va_nll_bound : {}".format(min_nll)
            str2 = "    va_nll_min  : {}".format(min_nll)
            str3 = "    va_nll_final : {}".format(step_nll[-1])
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
        if ((i % 10000) == 0):
            # Get some validation samples for evaluating model performance
            xb = to_fX( Xva[0:100] )
            xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                    occ_dim=occ_dim, data_mean=data_mean)
            xi = np.repeat(xi, 2, axis=0)
            xo = np.repeat(xo, 2, axis=0)
            xm = np.repeat(xm, 2, axis=0)
            # draw some sample imputations from the model
            samp_count = xi.shape[0]
            _, model_samps = GPSI.sample_imputer(xi, xo, xm, use_guide_policy=False)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count):
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "{}_samples_ng_b{}.png".format(result_tag, i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
            # get visualizations of policy parameters
            file_name = "{}_gen_gen_weights_b{}.png".format(result_tag, i)
            W = GPSI.gen_gen_weights.get_value(borrow=False)
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            file_name = "{}_gen_inf_weights_b{}.png".format(result_tag, i)
            W = GPSI.gen_inf_weights.get_value(borrow=False).T
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
예제 #34
0
def pretrain_osm(lam_kld=0.0):
    # Initialize a source of randomness
    rng = np.random.RandomState(1234)

    # Load some data to train/validate/test with
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, zero_mean=False)
    Xtr = datasets[0][0]
    Xtr = Xtr.get_value(borrow=False)
    Xva = datasets[2][0]
    Xva = Xva.get_value(borrow=False)
    print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),
                                                      str(Xva.shape)))

    # get and set some basic dataset information
    Xtr_mean = np.mean(Xtr, axis=0)
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 100
    batch_reps = 5

    # setup some symbolic variables and stuff
    Xd = T.matrix('Xd_base')
    Xc = T.matrix('Xc_base')
    Xm = T.matrix('Xm_base')
    data_dim = Xtr.shape[1]
    prior_sigma = 1.0

    ##########################
    # NETWORK CONFIGURATIONS #
    ##########################
    gn_params = {}
    shared_config = [PRIOR_DIM, 1000, 1000]
    top_config = [shared_config[-1], data_dim]
    gn_params['shared_config'] = shared_config
    gn_params['mu_config'] = top_config
    gn_params['sigma_config'] = top_config
    gn_params['activation'] = relu_actfun
    gn_params['init_scale'] = 1.4
    gn_params['lam_l2a'] = 0.0
    gn_params['vis_drop'] = 0.0
    gn_params['hid_drop'] = 0.0
    gn_params['bias_noise'] = 0.0
    gn_params['input_noise'] = 0.0
    # choose some parameters for the continuous inferencer
    in_params = {}
    shared_config = [data_dim, 1000, 1000]
    top_config = [shared_config[-1], PRIOR_DIM]
    in_params['shared_config'] = shared_config
    in_params['mu_config'] = top_config
    in_params['sigma_config'] = top_config
    in_params['activation'] = relu_actfun
    in_params['init_scale'] = 1.4
    in_params['lam_l2a'] = 0.0
    in_params['vis_drop'] = 0.0
    in_params['hid_drop'] = 0.0
    in_params['bias_noise'] = 0.0
    in_params['input_noise'] = 0.0
    # Initialize the base networks for this OneStageModel
    IN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \
            params=in_params, shared_param_dicts=None)
    GN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \
            params=gn_params, shared_param_dicts=None)
    # Initialize biases in IN and GN
    IN.init_biases(0.2)
    GN.init_biases(0.2)

    #########################
    # INITIALIZE THE GIPAIR #
    #########################
    osm_params = {}
    osm_params['x_type'] = 'bernoulli'
    osm_params['xt_transform'] = 'sigmoid'
    osm_params['logvar_bound'] = LOGVAR_BOUND
    OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \
            p_x_given_z=GN, q_z_given_x=IN, \
            x_dim=data_dim, z_dim=PRIOR_DIM, params=osm_params)
    OSM.set_lam_l2w(1e-5)
    safe_mean = (0.9 * Xtr_mean) + 0.05
    safe_mean_logit = np.log(safe_mean / (1.0 - safe_mean))
    OSM.set_output_bias(safe_mean_logit)
    OSM.set_input_bias(-Xtr_mean)

    ######################
    # BASIC VAE TRAINING #
    ######################
    out_file = open(RESULT_PATH + "pt_osm_results.txt", 'wb')
    # Set initial learning rate and basic SGD hyper parameters
    obs_costs = np.zeros((batch_size, ))
    costs = [0. for i in range(10)]
    learn_rate = 0.0005
    for i in range(150000):
        scale = min(1.0, float(i) / 10000.0)
        if ((i > 1) and ((i % 20000) == 0)):
            learn_rate = learn_rate * 0.9
        # do a minibatch update of the model, and compute some costs
        tr_idx = npr.randint(low=0, high=tr_samples, size=(batch_size, ))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xc_batch = 0.0 * Xd_batch
        Xm_batch = 0.0 * Xd_batch
        # do a minibatch update of the model, and compute some costs
        OSM.set_sgd_params(lr_1=(scale * learn_rate), mom_1=0.5, mom_2=0.98)
        OSM.set_lam_nll(1.0)
        OSM.set_lam_kld(lam_kld_1=(1.0 + (scale * (lam_kld - 1.0))),
                        lam_kld_2=0.0)
        result = OSM.train_joint(Xd_batch, Xc_batch, Xm_batch, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 1000) == 0):
            # record and then reset the cost trackers
            costs = [(v / 1000.0) for v in costs]
            str_1 = "-- batch {0:d} --".format(i)
            str_2 = "    joint_cost: {0:.4f}".format(costs[0])
            str_3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str_4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str_5 = "    reg_cost  : {0:.4f}".format(costs[3])
            costs = [0.0 for v in costs]
            # print out some diagnostic information
            joint_str = "\n".join([str_1, str_2, str_3, str_4, str_5])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
        if ((i % 2000) == 0):
            Xva = row_shuffle(Xva)
            model_samps = OSM.sample_from_prior(500)
            file_name = RESULT_PATH + "pt_osm_samples_b{0:d}_XG.png".format(i)
            utils.visualize_samples(model_samps, file_name, num_rows=20)
            # compute information about free-energy on validation set
            file_name = RESULT_PATH + "pt_osm_free_energy_b{0:d}.png".format(i)
            fe_terms = OSM.compute_fe_terms(Xva[0:2500], 20)
            fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
            fe_str = "    nll_bound : {0:.4f}".format(fe_mean)
            print(fe_str)
            out_file.write(fe_str + "\n")
            utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \
                    x_label='Posterior KLd', y_label='Negative Log-likelihood')
            # compute information about posterior KLds on validation set
            file_name = RESULT_PATH + "pt_osm_post_klds_b{0:d}.png".format(i)
            post_klds = OSM.compute_post_klds(Xva[0:2500])
            post_dim_klds = np.mean(post_klds, axis=0)
            utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \
                    file_name)
        if ((i % 5000) == 0):
            IN.save_to_file(f_name=RESULT_PATH +
                            "pt_osm_params_b{0:d}_IN.pkl".format(i))
            GN.save_to_file(f_name=RESULT_PATH +
                            "pt_osm_params_b{0:d}_GN.pkl".format(i))
    IN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_IN.pkl")
    GN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_GN.pkl")
    return
def test_mnist(step_type='add',
               imp_steps=6,
               occ_dim=15,
               drop_prob=0.0):
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = "{}GPSI_OD{}_DP{}_IS{}_{}_NA".format(RESULT_PATH, occ_dim, dp_int, imp_steps, step_type)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
    Xtr = np.vstack((Xtr, Xva))
    Xva = Xte
    #del Xte
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]

    ##########################
    # Get some training data #
    ##########################
    # rng = np.random.RandomState(1234)
    # dataset = 'data/mnist.pkl.gz'
    # datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    # Xtr = datasets[0][0]
    # Xva = datasets[1][0]
    # Xte = datasets[2][0]
    # # Merge validation set and training set, and test on test set.
    # #Xtr = np.concatenate((Xtr, Xva), axis=0)
    # #Xva = Xte
    # Xtr = to_fX(shift_and_scale_into_01(Xtr))
    # Xva = to_fX(shift_and_scale_into_01(Xva))
    # tr_samples = Xtr.shape[0]
    # va_samples = Xva.shape[0]
    batch_size = 200
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) )

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    x_dim = Xtr.shape[1]
    s_dim = x_dim
    h_dim = 50
    z_dim = 100
    init_scale = 0.6

    x_in_sym = T.matrix('x_in_sym')
    x_out_sym = T.matrix('x_out_sym')
    x_mask_sym = T.matrix('x_mask_sym')

    ###############
    # p_h_given_x #
    ###############
    params = {}
    shared_config = [x_dim, 250]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = tanh_actfun #relu_actfun
    params['init_scale'] = 'xg' #init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_h_given_x = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_h_given_x.init_biases(0.0)
    ################
    # p_s0_given_h #
    ################
    params = {}
    shared_config = [h_dim, 250]
    output_config = [s_dim, s_dim, s_dim]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['activation'] = tanh_actfun #relu_actfun
    params['init_scale'] = 'xg' #init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_s0_given_h = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_s0_given_h.init_biases(0.0)
    #################
    # p_zi_given_xi #
    #################
    params = {}
    shared_config = [(x_dim + x_dim), 500, 500]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = tanh_actfun #relu_actfun
    params['init_scale'] = init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_zi_given_xi.init_biases(0.0)
    ###################
    # p_sip1_given_zi #
    ###################
    params = {}
    shared_config = [z_dim, 500, 500]
    output_config = [s_dim, s_dim, s_dim]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['activation'] = tanh_actfun #relu_actfun
    params['init_scale'] = init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_sip1_given_zi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_sip1_given_zi.init_biases(0.0)
    ################
    # p_x_given_si #
    ################
    params = {}
    shared_config = [s_dim]
    output_config = [x_dim, x_dim]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['activation'] = tanh_actfun #relu_actfun
    params['init_scale'] = init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_x_given_si = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_x_given_si.init_biases(0.0)
    ###############
    # q_h_given_x #
    ###############
    params = {}
    shared_config = [x_dim, 250]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = tanh_actfun #relu_actfun
    params['init_scale'] = 'xg' #init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_h_given_x = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_h_given_x.init_biases(0.0)
    #################
    # q_zi_given_xi #
    #################
    params = {}
    shared_config = [(x_dim + x_dim), 500, 500]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = tanh_actfun #relu_actfun
    params['init_scale'] = init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_zi_given_xi.init_biases(0.0)

    ###########################################################
    # Define parameters for the GPSImputer, and initialize it #
    ###########################################################
    print("Building the GPSImputer...")
    gpsi_params = {}
    gpsi_params['x_dim'] = x_dim
    gpsi_params['h_dim'] = h_dim
    gpsi_params['z_dim'] = z_dim
    gpsi_params['s_dim'] = s_dim
    # switch between direct construction and construction via p_x_given_si
    gpsi_params['use_p_x_given_si'] = False
    gpsi_params['imp_steps'] = imp_steps
    gpsi_params['step_type'] = step_type
    gpsi_params['x_type'] = 'bernoulli'
    gpsi_params['obs_transform'] = 'sigmoid'
    GPSI = GPSImputerWI(rng=rng,
            x_in=x_in_sym, x_out=x_out_sym, x_mask=x_mask_sym, \
            p_h_given_x=p_h_given_x, \
            p_s0_given_h=p_s0_given_h, \
            p_zi_given_xi=p_zi_given_xi, \
            p_sip1_given_zi=p_sip1_given_zi, \
            p_x_given_si=p_x_given_si, \
            q_h_given_x=q_h_given_x, \
            q_zi_given_xi=q_zi_given_xi, \
            params=gpsi_params, \
            shared_param_dicts=None)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0002
    momentum = 0.5
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(250000):
        scale = min(1.0, ((i+1) / 5000.0))
        lam_scale = 1.0 - min(1.0, ((i+1) / 100000.0)) # decays from 1.0->0.0
        if (((i + 1) % 15000) == 0):
            learn_rate = learn_rate * 0.93
        if (i > 10000):
            momentum = 0.90
        else:
            momentum = 0.75
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        GPSI.set_sgd_params(lr=scale*learn_rate, \
                            mom_1=scale*momentum, mom_2=0.98)
        GPSI.set_train_switch(1.0)
        GPSI.set_lam_nll(lam_nll=1.0)
        GPSI.set_lam_kld(lam_kld_p=0.05, lam_kld_q=0.95, \
                         lam_kld_g=(0.1 * lam_scale), lam_kld_s=(0.1 * lam_scale))
        GPSI.set_lam_l2w(1e-5)
        # perform a minibatch update and record the cost for this batch
        xb = to_fX( Xtr.take(batch_idx, axis=0) )
        xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                        occ_dim=occ_dim, data_mean=data_mean)
        result = GPSI.train_joint(xi, xo, xm, batch_reps)
        # do diagnostics and general training tracking
        costs = [(costs[j] + result[j]) for j in range(len(result)-1)]
        if ((i % 250) == 0):
            costs = [(v / 250.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_bound : {0:.4f}".format(costs[1])
            str4 = "    nll_cost  : {0:.4f}".format(costs[2])
            str5 = "    kld_cost  : {0:.4f}".format(costs[3])
            str6 = "    reg_cost  : {0:.4f}".format(costs[4])
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            Xva = row_shuffle(Xva)
            # record an estimate of performance on the test set
            xi, xo, xm = construct_masked_data(Xva[0:5000], drop_prob=drop_prob, \
                                               occ_dim=occ_dim, data_mean=data_mean)
            nll, kld = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10)
            vfe = np.mean(nll) + np.mean(kld)
            str1 = "    va_nll_bound : {}".format(vfe)
            str2 = "    va_nll_term  : {}".format(np.mean(nll))
            str3 = "    va_kld_q2p   : {}".format(np.mean(kld))
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
        if ((i % 2000) == 0):
            GPSI.save_to_file("{}_PARAMS.pkl".format(result_tag))
            # Get some validation samples for evaluating model performance
            xb = to_fX( Xva[0:100] )
            xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                    occ_dim=occ_dim, data_mean=data_mean)
            xi = np.repeat(xi, 2, axis=0)
            xo = np.repeat(xo, 2, axis=0)
            xm = np.repeat(xm, 2, axis=0)
            # draw some sample imputations from the model
            samp_count = xi.shape[0]
            _, model_samps = GPSI.sample_imputer(xi, xo, xm, use_guide_policy=False)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count):
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "{0:s}_samples_ng_b{1:d}.png".format(result_tag, i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
            # show KLds and NLLs on a step-by-step basis
            xb = to_fX( Xva[0:1000] )
            xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                    occ_dim=occ_dim, data_mean=data_mean)
            step_costs = GPSI.compute_per_step_cost(xi, xo, xm)
            step_nlls = step_costs[0]
            step_klds = step_costs[1]
            step_nums = np.arange(step_nlls.shape[0])
            file_name = "{0:s}_NLL_b{1:d}.png".format(result_tag, i)
            utils.plot_stem(step_nums, step_nlls, file_name)
            file_name = "{0:s}_KLD_b{1:d}.png".format(result_tag, i)
            utils.plot_stem(step_nums, step_klds, file_name)
예제 #36
0
def pretrain_osm(lam_kld=0.0):
    # Initialize a source of randomness
    rng = np.random.RandomState(1234)

    # Load some data to train/validate/test with
    data_file = 'data/tfd_data_48x48.pkl'
    dataset = load_tfd(tfd_pkl_name=data_file,
                       which_set='unlabeled',
                       fold='all')
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all')
    Xva = dataset[0]
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 400
    batch_reps = 6
    carry_frac = 0.25
    carry_size = int(batch_size * carry_frac)
    reset_prob = 0.04

    # setup some symbolic variables and stuff
    Xd = T.matrix('Xd_base')
    Xc = T.matrix('Xc_base')
    Xm = T.matrix('Xm_base')
    data_dim = Xtr.shape[1]
    prior_sigma = 1.0
    Xtr_mean = np.mean(Xtr, axis=0)

    ##########################
    # NETWORK CONFIGURATIONS #
    ##########################
    gn_params = {}
    shared_config = [PRIOR_DIM, 1500, 1500]
    top_config = [shared_config[-1], data_dim]
    gn_params['shared_config'] = shared_config
    gn_params['mu_config'] = top_config
    gn_params['sigma_config'] = top_config
    gn_params['activation'] = relu_actfun
    gn_params['init_scale'] = 1.4
    gn_params['lam_l2a'] = 0.0
    gn_params['vis_drop'] = 0.0
    gn_params['hid_drop'] = 0.0
    gn_params['bias_noise'] = 0.0
    gn_params['input_noise'] = 0.0
    # choose some parameters for the continuous inferencer
    in_params = {}
    shared_config = [data_dim, 1500, 1500]
    top_config = [shared_config[-1], PRIOR_DIM]
    in_params['shared_config'] = shared_config
    in_params['mu_config'] = top_config
    in_params['sigma_config'] = top_config
    in_params['activation'] = relu_actfun
    in_params['init_scale'] = 1.4
    in_params['lam_l2a'] = 0.0
    in_params['vis_drop'] = 0.0
    in_params['hid_drop'] = 0.0
    in_params['bias_noise'] = 0.0
    in_params['input_noise'] = 0.0
    # Initialize the base networks for this OneStageModel
    IN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \
            params=in_params, shared_param_dicts=None)
    GN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \
            params=gn_params, shared_param_dicts=None)
    # Initialize biases in IN and GN
    IN.init_biases(0.2)
    GN.init_biases(0.2)

    ######################################
    # LOAD AND RESTART FROM SAVED PARAMS #
    ######################################
    # gn_fname = RESULT_PATH+"pt_osm_params_b110000_GN.pkl"
    # in_fname = RESULT_PATH+"pt_osm_params_b110000_IN.pkl"
    # IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, \
    #         new_params=None)
    # GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd, \
    #         new_params=None)
    # in_params = IN.params
    # gn_params = GN.params

    #########################
    # INITIALIZE THE GIPAIR #
    #########################
    osm_params = {}
    osm_params['x_type'] = 'bernoulli'
    osm_params['xt_transform'] = 'sigmoid'
    osm_params['logvar_bound'] = LOGVAR_BOUND
    OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \
            p_x_given_z=GN, q_z_given_x=IN, \
            x_dim=data_dim, z_dim=PRIOR_DIM, params=osm_params)
    OSM.set_lam_l2w(1e-5)
    safe_mean = (0.9 * Xtr_mean) + 0.05
    safe_mean_logit = np.log(safe_mean / (1.0 - safe_mean))
    OSM.set_output_bias(safe_mean_logit)
    OSM.set_input_bias(-Xtr_mean)

    ######################
    # BASIC VAE TRAINING #
    ######################
    out_file = open(RESULT_PATH + "pt_osm_results.txt", 'wb')
    # Set initial learning rate and basic SGD hyper parameters
    obs_costs = np.zeros((batch_size, ))
    costs = [0. for i in range(10)]
    learn_rate = 0.002
    for i in range(200000):
        scale = min(1.0, float(i) / 5000.0)
        if ((i > 1) and ((i % 20000) == 0)):
            learn_rate = learn_rate * 0.8
        if (i < 50000):
            momentum = 0.5
        elif (i < 10000):
            momentum = 0.7
        else:
            momentum = 0.9
        if ((i == 0) or (npr.rand() < reset_prob)):
            # sample a fully random batch
            batch_idx = npr.randint(low=0,
                                    high=tr_samples,
                                    size=(batch_size, ))
        else:
            # sample a partially random batch, which retains some portion of
            # the worst scoring examples from the previous batch
            fresh_idx = npr.randint(low=0,
                                    high=tr_samples,
                                    size=(batch_size - carry_size, ))
            batch_idx = np.concatenate((fresh_idx.ravel(), carry_idx.ravel()))
        # do a minibatch update of the model, and compute some costs
        tr_idx = npr.randint(low=0, high=tr_samples, size=(batch_size, ))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xc_batch = 0.0 * Xd_batch
        Xm_batch = 0.0 * Xd_batch
        # do a minibatch update of the model, and compute some costs
        OSM.set_sgd_params(lr_1=(scale*learn_rate), \
                mom_1=(scale*momentum), mom_2=0.98)
        OSM.set_lam_nll(1.0)
        OSM.set_lam_kld(lam_kld_1=scale * lam_kld,
                        lam_kld_2=0.0,
                        lam_kld_c=50.0)
        result = OSM.train_joint(Xd_batch, Xc_batch, Xm_batch, batch_reps)
        batch_costs = result[4] + result[5]
        obs_costs = collect_obs_costs(batch_costs, batch_reps)
        carry_idx = batch_idx[np.argsort(-obs_costs)[0:carry_size]]
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 1000) == 0):
            # record and then reset the cost trackers
            costs = [(v / 1000.0) for v in costs]
            str_1 = "-- batch {0:d} --".format(i)
            str_2 = "    joint_cost: {0:.4f}".format(costs[0])
            str_3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str_4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str_5 = "    reg_cost  : {0:.4f}".format(costs[3])
            costs = [0.0 for v in costs]
            # print out some diagnostic information
            joint_str = "\n".join([str_1, str_2, str_3, str_4, str_5])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
        if ((i % 2000) == 0):
            Xva = row_shuffle(Xva)
            model_samps = OSM.sample_from_prior(500)
            file_name = RESULT_PATH + "pt_osm_samples_b{0:d}_XG.png".format(i)
            utils.visualize_samples(model_samps, file_name, num_rows=20)
            file_name = RESULT_PATH + "pt_osm_inf_weights_b{0:d}.png".format(i)
            utils.visualize_samples(OSM.inf_weights.get_value(borrow=False).T, \
                    file_name, num_rows=30)
            file_name = RESULT_PATH + "pt_osm_gen_weights_b{0:d}.png".format(i)
            utils.visualize_samples(OSM.gen_weights.get_value(borrow=False), \
                    file_name, num_rows=30)
            # compute information about free-energy on validation set
            file_name = RESULT_PATH + "pt_osm_free_energy_b{0:d}.png".format(i)
            fe_terms = OSM.compute_fe_terms(Xva[0:2500], 20)
            fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
            fe_str = "    nll_bound : {0:.4f}".format(fe_mean)
            print(fe_str)
            out_file.write(fe_str + "\n")
            utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \
                    x_label='Posterior KLd', y_label='Negative Log-likelihood')
            # compute information about posterior KLds on validation set
            file_name = RESULT_PATH + "pt_osm_post_klds_b{0:d}.png".format(i)
            post_klds = OSM.compute_post_klds(Xva[0:2500])
            post_dim_klds = np.mean(post_klds, axis=0)
            utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \
                    file_name)
        if ((i % 5000) == 0):
            IN.save_to_file(f_name=RESULT_PATH +
                            "pt_osm_params_b{0:d}_IN.pkl".format(i))
            GN.save_to_file(f_name=RESULT_PATH +
                            "pt_osm_params_b{0:d}_GN.pkl".format(i))
    IN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_IN.pkl")
    GN.save_to_file(f_name=RESULT_PATH + "pt_osm_params_GN.pkl")
    return
예제 #37
0
def check_tfd_walkout():
    # DERPA DERPA DOO
    KLD_PATH = "TFD_WALKOUT_TEST_KLD/"
    VAE_PATH = "TFD_WALKOUT_TEST_VAE/"
    RESULT_PATH = "TFD_WALKOUT_RESULTS/"

    # Initialize a source of randomness
    rng = np.random.RandomState(1234)

    # Load some data to train/validate/test with
    data_file = 'data/tfd_data_48x48.pkl'
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all')
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all')
    Xva = dataset[0]
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 100
    batch_reps = 5

    # Construct a GenNet and an InfNet, then test constructor for GIPair.
    # Do basic testing, to make sure classes aren't completely broken.
    Xp = T.matrix('Xp_base')
    Xd = T.matrix('Xd_base')
    Xc = T.matrix('Xc_base')
    Xm = T.matrix('Xm_base')
    data_dim = Xtr.shape[1]
    prior_sigma = 1.0

    p_vals_kld, v_vals_kld, p_vals_vae, v_vals_vae = [], [], [], []
    kl_vals_kld, ll_vals_kld, kl_vals_vae, ll_vals_vae = [], [], [], []

    ########################################################
    # CHECK MODEL BEHAVIOR AT DIFFERENT STAGES OF TRAINING #
    ########################################################
    for i in range(10000,200000):
        if ((i % 10000) == 0):
            if (i <= 150000):
                net_type = 'gip'
                b = i
            else:
                net_type = 'walk'
                b = i - 150000
            #############################################################
            # Process the GIPair trained with strong KLd regularization #
            #############################################################
            gn_fname = KLD_PATH + "pt_{0:s}_params_b{1:d}_GN.pkl".format(net_type, b)
            in_fname = KLD_PATH + "pt_{0:s}_params_b{1:d}_IN.pkl".format(net_type, b)
            IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
            GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp)
            IN.set_sigma_scale(1.0)
            prior_dim = GN.latent_dim
            post_klds_kld = posterior_klds(IN, Xtr, 5000, 5)
            # Initialize the GIPair
            GIP_KLD = GIPair(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, g_net=GN, i_net=IN, \
                    data_dim=data_dim, prior_dim=prior_dim, params=None)
            GIP_KLD.set_lam_l2w(1e-4)
            GIP_KLD.set_lam_nll(1.0)
            GIP_KLD.set_lam_kld(1.0)
            # draw samples freely from the generative model's prior
            Xs = GIP_KLD.sample_from_prior(20*20)
            file_name = RESULT_PATH + "prior_samples_b{0:d}_kld.png".format(i)
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # test Parzen density estimator built from prior samples
            Xs = GIP_KLD.sample_from_prior(10000, sigma=1.0)
            parzen_vals_kld = cross_validate_sigma(Xs, Xva, [0.08, 0.09, 0.1, 0.11, 0.12, 0.15, 0.2], 20)
            # get variational bound info
            var_vals_kld = GIP_KLD.compute_ll_bound(Xva)
            # record info about variational and parzen bounds
            p_vals_kld.append(parzen_vals_kld[1])
            v_vals_kld.append(np.mean(var_vals_kld[0]))
            ################################################################
            # Process the GIPair trained with basic VAE KLd regularization #
            ################################################################
            gn_fname = VAE_PATH + "pt_{0:s}_params_b{1:d}_GN.pkl".format(net_type, b)
            in_fname = VAE_PATH + "pt_{0:s}_params_b{1:d}_IN.pkl".format(net_type, b)
            IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
            GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp)
            IN.set_sigma_scale(1.0)
            prior_dim = GN.latent_dim
            post_klds_vae = posterior_klds(IN, Xtr, 5000, 5)
            # Initialize the GIPair
            GIP_VAE = GIPair(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, g_net=GN, i_net=IN, \
                    data_dim=data_dim, prior_dim=prior_dim, params=None)
            GIP_VAE.set_lam_l2w(1e-4)
            GIP_VAE.set_lam_nll(1.0)
            GIP_VAE.set_lam_kld(1.0)
            # draw samples freely from the generative model's prior
            Xs = GIP_VAE.sample_from_prior(20*20)
            file_name = RESULT_PATH + "prior_samples_b{0:d}_vae.png".format(i)
            utils.visualize_samples(Xs, file_name, num_rows=20)
            # test Parzen density estimator built from prior samples
            Xs = GIP_VAE.sample_from_prior(10000, sigma=1.0)
            parzen_vals_vae = cross_validate_sigma(Xs, Xva, [0.08, 0.09, 0.1, 0.11, 0.12, 0.15, 0.2], 20)
            # get variational bound info
            var_vals_vae = GIP_VAE.compute_ll_bound(Xva)
            # record info about variational and parzen bounds
            p_vals_vae.append(parzen_vals_vae[1])
            v_vals_vae.append(np.mean(var_vals_vae[0]))
            ########################
            # Plot posterior KLds. #
            ########################
            file_name = RESULT_PATH + "post_klds_b{0:d}.pdf".format(i)
            draw_posterior_kld_hist( \
                    np.asarray(post_klds_kld), np.asarray(post_klds_vae), file_name, bins=30)
            if i in [20000, 50000, 80000, 110000, 150000, 190000]:
                # select random random indices into the validation set
                va_idx = npr.randint(0,high=va_samples,size=(150,))
                # record information about their current variational bounds
                kl_vals_kld.extend([v for v in var_vals_kld[1][va_idx]])
                ll_vals_kld.extend([v for v in var_vals_kld[2][va_idx]])
                kl_vals_vae.extend([v for v in var_vals_vae[1][va_idx]])
                ll_vals_vae.extend([v for v in var_vals_vae[2][va_idx]])
                # do some plotting
                s1_name = RESULT_PATH + "parzen_vs_variational.pdf"
                s2_name = RESULT_PATH + "kld_vs_likelihood.pdf"
                draw_parzen_vs_variational_scatter(p_vals_kld, v_vals_kld, \
                        p_vals_vae, v_vals_vae, f_name=s1_name)
                draw_kld_vs_likelihood_scatter(kl_vals_kld, ll_vals_kld, \
                        kl_vals_vae, ll_vals_vae, f_name=s2_name)
    return
예제 #38
0
def test_with_model_init():
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
    del Xte
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200
    batch_reps = 1

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    obs_dim = Xtr.shape[1]
    z_dim = 20
    h_dim = 200
    ir_steps = 6
    init_scale = 1.0
    
    x_type = 'bernoulli'

    # some InfNet instances to build the TwoStageModel from
    x_in_sym = T.matrix('x_in_sym')
    x_out_sym = T.matrix('x_out_sym')

    #################
    # p_hi_given_si #
    #################
    params = {}
    shared_config = [obs_dim, 300, 300]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_hi_given_si = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_hi_given_si.init_biases(0.2)
    ######################
    # p_sip1_given_si_hi #
    ######################
    params = {}
    shared_config = [h_dim, 300, 300]
    output_config = [obs_dim, obs_dim, obs_dim]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_sip1_given_si_hi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_sip1_given_si_hi.init_biases(0.2)
    ################
    # p_s0_given_z #
    ################
    params = {}
    shared_config = [z_dim, 250, 250]
    top_config = [shared_config[-1], obs_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_s0_given_z = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_s0_given_z.init_biases(0.2)
    ###############
    # q_z_given_x #
    ###############
    params = {}
    shared_config = [obs_dim, 250, 250]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_z_given_x = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_z_given_x.init_biases(0.2)
    ###################
    # q_hi_given_x_si #
    ###################
    params = {}
    shared_config = [(obs_dim + obs_dim), 500, 500]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_hi_given_x_si = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_hi_given_x_si.init_biases(0.2)


    ################################################################
    # Define parameters for the MultiStageModel, and initialize it #
    ################################################################
    print("Building the MultiStageModel...")
    msm_params = {}
    msm_params['x_type'] = x_type
    msm_params['obs_transform'] = 'sigmoid'
    MSM = MultiStageModel(rng=rng, x_in=x_in_sym, x_out=x_out_sym, \
            p_s0_given_z=p_s0_given_z, \
            p_hi_given_si=p_hi_given_si, \
            p_sip1_given_si_hi=p_sip1_given_si_hi, \
            q_z_given_x=q_z_given_x, \
            q_hi_given_x_si=q_hi_given_x_si, \
            obs_dim=obs_dim, z_dim=z_dim, h_dim=h_dim, \
            ir_steps=ir_steps, params=msm_params)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    out_file = open("MSM_A_RESULTS.txt", 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0003
    momentum = 0.9
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(250000):
        scale = min(1.0, ((i+1) / 3000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        MSM.set_sgd_params(lr_1=scale*learn_rate, lr_2=scale*learn_rate, \
                mom_1=scale*momentum, mom_2=0.99)
        MSM.set_train_switch(1.0)
        MSM.set_lam_nll(lam_nll=1.0)
        MSM.set_lam_kld(lam_kld_z=1.0, lam_kld_q2p=0.8, lam_kld_p2q=0.2)
        MSM.set_lam_kld_l1l2(lam_kld_l1l2=1.0)
        MSM.set_lam_l2w(1e-4)
        MSM.set_drop_rate(0.0)
        MSM.q_hi_given_x_si.set_bias_noise(0.0)
        MSM.p_hi_given_si.set_bias_noise(0.0)
        MSM.p_sip1_given_si_hi.set_bias_noise(0.0)
        # perform a minibatch update and record the cost for this batch
        Xb_tr = to_fX( Xtr.take(batch_idx, axis=0) )
        result = MSM.train_joint(Xb_tr, Xb_tr, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result)-1)]
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str5 = "    reg_cost  : {0:.4f}".format(costs[3])
            joint_str = "\n".join([str1, str2, str3, str4, str5])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if (((i % 2000) == 0) or ((i < 10000) and ((i % 1000) == 0))):
            MSM.set_drop_rate(0.0)
            MSM.q_hi_given_x_si.set_bias_noise(0.0)
            MSM.p_hi_given_si.set_bias_noise(0.0)
            MSM.p_sip1_given_si_hi.set_bias_noise(0.0)
            # Get some validation samples for computing diagnostics
            Xva = row_shuffle(Xva)
            Xb_va = to_fX( Xva[0:2000] )
            # draw some independent random samples from the model
            samp_count = 200
            model_samps = MSM.sample_from_prior(samp_count)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count):
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "MSM_A_SAMPLES_IND_b{0:d}.png".format(i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
            # draw some conditional random samples from the model
            samp_count = 200
            Xs = np.vstack((Xb_tr[0:(samp_count/4)], Xb_va[0:(samp_count/4)]))
            Xs = np.repeat(Xs, 2, axis=0)
            # draw some conditional random samples from the model
            model_samps = MSM.sample_from_input(Xs, guided_decoding=False)
            model_samps.append(Xs)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count): 
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "MSM_A_SAMPLES_CND_b{0:d}.png".format(i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
            # compute information about posterior KLds on validation set
            raw_klds = MSM.compute_raw_klds(Xb_va, Xb_va)
            init_kld, q2p_kld, p2q_kld = raw_klds
            file_name = "MSM_A_H0_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(init_kld.shape[1]), \
                    np.mean(init_kld, axis=0), file_name)
            file_name = "MSM_A_HI_Q2P_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(q2p_kld.shape[1]), \
                    np.mean(q2p_kld, axis=0), file_name)
            file_name = "MSM_A_HI_P2Q_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(p2q_kld.shape[1]), \
                    np.mean(p2q_kld, axis=0), file_name)
            Xb_tr = to_fX( Xtr[0:2000] )
            fe_terms = MSM.compute_fe_terms(Xb_tr, Xb_tr, 30)
            fe_nll = np.mean(fe_terms[0])
            fe_kld = np.mean(fe_terms[1])
            fe_joint = fe_nll + fe_kld
            joint_str = "    vfe-tr: {0:.4f}, nll: ({1:.4f}, {2:.4f}, {3:.4f}), kld: ({4:.4f}, {5:.4f}, {6:.4f})".format( \
                    fe_joint, fe_nll, np.min(fe_terms[0]), np.max(fe_terms[0]), fe_kld, np.min(fe_terms[1]), np.max(fe_terms[1]))
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            fe_terms = MSM.compute_fe_terms(Xb_va, Xb_va, 30)
            fe_nll = np.mean(fe_terms[0])
            fe_kld = np.mean(fe_terms[1])
            fe_joint = fe_nll + fe_kld
            joint_str = "    vfe-va: {0:.4f}, nll: ({1:.4f}, {2:.4f}, {3:.4f}), kld: ({4:.4f}, {5:.4f}, {6:.4f})".format( \
                    fe_joint, fe_nll, np.min(fe_terms[0]), np.max(fe_terms[0]), fe_kld, np.min(fe_terms[1]), np.max(fe_terms[1]))
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
예제 #39
0
def check_tfd_recon():
    # DERPA DERPA DOO
    KLD_PATH = "TFD_WALKOUT_TEST_KLD/"
    VAE_PATH = "TFD_WALKOUT_TEST_VAE/"
    RESULT_PATH = "TFD_WALKOUT_RESULTS/"

    # Initialize a source of randomness
    rng = np.random.RandomState(1234)

    # Load some data to train/validate/test with
    data_file = 'data/tfd_data_48x48.pkl'
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all')
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all')
    Xva = dataset[0]
    Xtr_mean = np.mean(Xtr, axis=0, keepdims=True)
    Xtr_mean = (0.0 * Xtr_mean) + np.mean(Xtr_mean)
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 100
    batch_reps = 5

    # Construct a GenNet and an InfNet, then test constructor for GIPair.
    # Do basic testing, to make sure classes aren't completely broken.
    Xp = T.matrix('Xp_base')
    Xd = T.matrix('Xd_base')
    Xc = T.matrix('Xc_base')
    Xm = T.matrix('Xm_base')
    data_dim = Xtr.shape[1]
    prior_sigma = 1.0

    #############################################################
    # Process the GIPair trained with strong KLd regularization #
    #############################################################
    gn_fname = KLD_PATH + "pt_recon_params_b180000_GN.pkl"
    in_fname = KLD_PATH + "pt_recon_params_b180000_IN.pkl"
    IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
    GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp)
    IN.set_sigma_scale(1.0)
    prior_dim = GN.latent_dim
    # Initialize the GIPair
    GIP_KLD = GIPair(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, g_net=GN, i_net=IN, \
            data_dim=data_dim, prior_dim=prior_dim, params=None)

    ################################################################
    # Process the GIPair trained with basic VAE KLd regularization #
    ################################################################
    gn_fname = VAE_PATH + "pt_walk_params_b50000_GN.pkl"
    in_fname = VAE_PATH + "pt_walk_params_b50000_IN.pkl"
    IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
    GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp)
    IN.set_sigma_scale(1.0)
    prior_dim = GN.latent_dim
    # Initialize the GIPair
    GIP_VAE = GIPair(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, g_net=GN, i_net=IN, \
            data_dim=data_dim, prior_dim=prior_dim, params=None)

    for trial in range(15):
        #########################################################
        # DRAW THE SAMPLE OBSERVATIONS AND MASKS FOR THIS TRIAL #
        #########################################################
        va_idx = npr.randint(low=0, high=Xva.shape[0], size=(15,))
        Xc_batch = Xva.take(va_idx, axis=0)
        Xd_batch = np.repeat(Xtr_mean, Xc_batch.shape[0], axis=0).astype(theano.config.floatX)
        Xm_rand = sample_masks(Xc_batch, drop_prob=0.001)
        Xm_patch = sample_patch_masks(Xc_batch, (48,48), (25,25))
        Xm_batch = Xm_rand * Xm_patch

        #####################################
        # COMPARE SAMPLES IN A NORMAL CHAIN #
        #####################################
        # draw some chains of samples from the VAE loop
        result_kld = GIP_KLD.sample_from_chain(Xc_batch, loop_iters=19)
        result_vae = GIP_VAE.sample_from_chain(Xc_batch, loop_iters=19)
        chain_samples_kld = []
        chain_samples_vae = []
        for i in range(len(result_kld['data samples'])):
            if (((i % 3) == 0) or (i == 1)):
                chain_samples_kld.append(result_kld['data samples'][i])
                chain_samples_vae.append(result_vae['data samples'][i])
        # interleave the chain samples for beauteous display
        chain_samples_both = []
        for i in range(len(chain_samples_kld)):
            Xs_kld = chain_samples_kld[i]
            Xs_vae = chain_samples_vae[i]
            joint_samples = np.zeros((2*Xs_kld.shape[0], Xs_kld.shape[1]))
            for j in range(Xs_kld.shape[0]):
                joint_samples[2*j] = Xs_kld[j]
                joint_samples[2*j + 1] = Xs_vae[j]
            chain_samples_both.append(joint_samples)
        chain_len = len(chain_samples_both)
        Xs = np.vstack(chain_samples_both)
        file_name = RESULT_PATH + "FIG_CHAIN_{0:d}.png".format(trial)
        utils.visualize_samples(Xs, file_name, num_rows=chain_len)

        #############################################
        # COMPARE SAMPLES IN A RECONSTRUCTION CHAIN #
        #############################################
        # draw some chains of samples from the VAE loop
        result_kld = GIP_KLD.sample_from_chain(Xd_batch, X_c=Xc_batch, \
                X_m=Xm_batch, loop_iters=10)
        result_vae = GIP_VAE.sample_from_chain(Xd_batch, X_c=Xc_batch, \
                X_m=Xm_batch, loop_iters=10)
        recon_samples_kld = []
        recon_samples_vae = []
        for i in range(len(result_kld['data samples'])):
            if (((i % 2) == 0) or (i == 1)):
                recon_samples_kld.append(result_kld['data samples'][i])
                recon_samples_vae.append(result_vae['data samples'][i])
        # interleave the recon samples for beauteous display
        recon_samples_both = []
        for i in range(len(recon_samples_kld)):
            Xs_kld = recon_samples_kld[i]
            Xs_vae = recon_samples_vae[i]
            joint_samples = np.zeros((2*Xs_kld.shape[0], Xs_kld.shape[1]))
            for j in range(Xs_kld.shape[0]):
                joint_samples[2*j] = Xs_kld[j]
                joint_samples[2*j + 1] = Xs_vae[j]
            recon_samples_both.append(joint_samples)
        recon_len = len(recon_samples_both)
        Xs = np.vstack(recon_samples_both)
        file_name = RESULT_PATH + "FIG_RECON_{0:d}.png".format(trial)
        utils.visualize_samples(Xs, file_name, num_rows=recon_len)
    return
예제 #40
0
def pretrain_osm(lam_kld=0.0):
    # Initialize a source of randomness
    rng = np.random.RandomState(1234)

    # Load some data to train/validate/test with
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, zero_mean=False)
    Xtr = datasets[0][0]
    Xtr = Xtr.get_value(borrow=False)
    Xva = datasets[2][0]
    Xva = Xva.get_value(borrow=False)
    print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),str(Xva.shape)))

    # get and set some basic dataset information
    Xtr_mean = np.mean(Xtr, axis=0)
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 100
    batch_reps = 5

    # setup some symbolic variables and stuff
    Xd = T.matrix('Xd_base')
    Xc = T.matrix('Xc_base')
    Xm = T.matrix('Xm_base')
    data_dim = Xtr.shape[1]
    prior_sigma = 1.0

    ##########################
    # NETWORK CONFIGURATIONS #
    ##########################
    gn_params = {}
    shared_config = [PRIOR_DIM, 1000, 1000]
    top_config = [shared_config[-1], data_dim]
    gn_params['shared_config'] = shared_config
    gn_params['mu_config'] = top_config
    gn_params['sigma_config'] = top_config
    gn_params['activation'] = relu_actfun
    gn_params['init_scale'] = 1.4
    gn_params['lam_l2a'] = 0.0
    gn_params['vis_drop'] = 0.0
    gn_params['hid_drop'] = 0.0
    gn_params['bias_noise'] = 0.0
    gn_params['input_noise'] = 0.0
    # choose some parameters for the continuous inferencer
    in_params = {}
    shared_config = [data_dim, 1000, 1000]
    top_config = [shared_config[-1], PRIOR_DIM]
    in_params['shared_config'] = shared_config
    in_params['mu_config'] = top_config
    in_params['sigma_config'] = top_config
    in_params['activation'] = relu_actfun
    in_params['init_scale'] = 1.4
    in_params['lam_l2a'] = 0.0
    in_params['vis_drop'] = 0.0
    in_params['hid_drop'] = 0.0
    in_params['bias_noise'] = 0.0
    in_params['input_noise'] = 0.0
    # Initialize the base networks for this OneStageModel
    IN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \
            params=in_params, shared_param_dicts=None)
    GN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \
            params=gn_params, shared_param_dicts=None)
    # Initialize biases in IN and GN
    IN.init_biases(0.2)
    GN.init_biases(0.2)

    #########################
    # INITIALIZE THE GIPAIR #
    #########################
    osm_params = {}
    osm_params['x_type'] = 'bernoulli'
    osm_params['xt_transform'] = 'sigmoid'
    osm_params['logvar_bound'] = LOGVAR_BOUND
    OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \
            p_x_given_z=GN, q_z_given_x=IN, \
            x_dim=data_dim, z_dim=PRIOR_DIM, params=osm_params)
    OSM.set_lam_l2w(1e-5)
    safe_mean = (0.9 * Xtr_mean) + 0.05
    safe_mean_logit = np.log(safe_mean / (1.0 - safe_mean))
    OSM.set_output_bias(safe_mean_logit)
    OSM.set_input_bias(-Xtr_mean)

    ######################
    # BASIC VAE TRAINING #
    ######################
    out_file = open(RESULT_PATH+"pt_osm_results.txt", 'wb')
    # Set initial learning rate and basic SGD hyper parameters
    obs_costs = np.zeros((batch_size,))
    costs = [0. for i in range(10)]
    learn_rate = 0.0005
    for i in range(150000):
        scale = min(1.0, float(i) / 10000.0)
        if ((i > 1) and ((i % 20000) == 0)):
            learn_rate = learn_rate * 0.9
        # do a minibatch update of the model, and compute some costs
        tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xc_batch = 0.0 * Xd_batch
        Xm_batch = 0.0 * Xd_batch
        # do a minibatch update of the model, and compute some costs
        OSM.set_sgd_params(lr_1=(scale*learn_rate), mom_1=0.5, mom_2=0.98)
        OSM.set_lam_nll(1.0)
        OSM.set_lam_kld(lam_kld_1=(1.0 + (scale*(lam_kld-1.0))), lam_kld_2=0.0)
        result = OSM.train_joint(Xd_batch, Xc_batch, Xm_batch, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 1000) == 0):
            # record and then reset the cost trackers
            costs = [(v / 1000.0) for v in costs]
            str_1 = "-- batch {0:d} --".format(i)
            str_2 = "    joint_cost: {0:.4f}".format(costs[0])
            str_3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str_4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str_5 = "    reg_cost  : {0:.4f}".format(costs[3])
            costs = [0.0 for v in costs]
            # print out some diagnostic information
            joint_str = "\n".join([str_1, str_2, str_3, str_4, str_5])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
        if ((i % 2000) == 0):
            Xva = row_shuffle(Xva)
            model_samps = OSM.sample_from_prior(500)
            file_name = RESULT_PATH+"pt_osm_samples_b{0:d}_XG.png".format(i)
            utils.visualize_samples(model_samps, file_name, num_rows=20)
            # compute information about free-energy on validation set
            file_name = RESULT_PATH+"pt_osm_free_energy_b{0:d}.png".format(i)
            fe_terms = OSM.compute_fe_terms(Xva[0:2500], 20)
            fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
            fe_str = "    nll_bound : {0:.4f}".format(fe_mean)
            print(fe_str)
            out_file.write(fe_str+"\n")
            utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \
                    x_label='Posterior KLd', y_label='Negative Log-likelihood')
            # compute information about posterior KLds on validation set
            file_name = RESULT_PATH+"pt_osm_post_klds_b{0:d}.png".format(i)
            post_klds = OSM.compute_post_klds(Xva[0:2500])
            post_dim_klds = np.mean(post_klds, axis=0)
            utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \
                    file_name)
        if ((i % 5000) == 0):
            IN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_b{0:d}_IN.pkl".format(i))
            GN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_b{0:d}_GN.pkl".format(i))
    IN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_IN.pkl")
    GN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_GN.pkl")
    return
예제 #41
0
def test_gip_sigma_scale_tfd():
    from LogPDFs import cross_validate_sigma
    # Simple test code, to check that everything is basically functional.
    print("TESTING...")

    # Initialize a source of randomness
    rng = np.random.RandomState(12345)

    # Load some data to train/validate/test with
    data_file = 'data/tfd_data_48x48.pkl'
    dataset = load_tfd(tfd_pkl_name=data_file,
                       which_set='unlabeled',
                       fold='all')
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='test', fold='all')
    Xva = dataset[0]
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),
                                                      str(Xva.shape)))

    # get and set some basic dataset information
    tr_samples = Xtr.shape[0]
    data_dim = Xtr.shape[1]
    batch_size = 100

    # Symbolic inputs
    Xd = T.matrix(name='Xd')
    Xc = T.matrix(name='Xc')
    Xm = T.matrix(name='Xm')
    Xt = T.matrix(name='Xt')

    # Load inferencer and generator from saved parameters
    gn_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_GN.pkl"
    in_fname = "TFD_WALKOUT_TEST_KLD/pt_walk_params_b25000_IN.pkl"
    IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd)
    GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd)
    x_dim = IN.shared_layers[0].in_dim
    z_dim = IN.mu_layers[-1].out_dim
    # construct a GIPair with the loaded InfNet and GenNet
    osm_params = {}
    osm_params['x_type'] = 'gaussian'
    osm_params['xt_transform'] = 'sigmoid'
    osm_params['logvar_bound'] = LOGVAR_BOUND
    OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \
            p_x_given_z=GN, q_z_given_x=IN, \
            x_dim=x_dim, z_dim=z_dim, params=osm_params)

    # # compute variational likelihood bound and its sub-components
    Xva = row_shuffle(Xva)
    Xb = Xva[0:5000]
    # file_name = "A_TFD_POST_KLDS.png"
    # post_klds = OSM.compute_post_klds(Xb)
    # post_dim_klds = np.mean(post_klds, axis=0)
    # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \
    #         file_name)
    # compute information about free-energy on validation set
    file_name = "A_TFD_KLD_FREE_ENERGY.png"
    fe_terms = OSM.compute_fe_terms(Xb, 20)
    utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \
            x_label='Posterior KLd', y_label='Negative Log-likelihood')

    # bound_results = OSM.compute_ll_bound(Xva)
    # ll_bounds = bound_results[0]
    # post_klds = bound_results[1]
    # log_likelihoods = bound_results[2]
    # max_lls = bound_results[3]
    # print("mean ll bound: {0:.4f}".format(np.mean(ll_bounds)))
    # print("mean posterior KLd: {0:.4f}".format(np.mean(post_klds)))
    # print("mean log-likelihood: {0:.4f}".format(np.mean(log_likelihoods)))
    # print("mean max log-likelihood: {0:.4f}".format(np.mean(max_lls)))
    # print("min ll bound: {0:.4f}".format(np.min(ll_bounds)))
    # print("max posterior KLd: {0:.4f}".format(np.max(post_klds)))
    # print("min log-likelihood: {0:.4f}".format(np.min(log_likelihoods)))
    # print("min max log-likelihood: {0:.4f}".format(np.min(max_lls)))
    # # compute some information about the approximate posteriors
    # post_stats = OSM.compute_post_stats(Xva, 0.0*Xva, 0.0*Xva)
    # all_post_klds = np.sort(post_stats[0].ravel()) # post KLds for each obs and dim
    # obs_post_klds = np.sort(post_stats[1]) # summed post KLds for each obs
    # post_dim_klds = post_stats[2] # average post KLds for each post dim
    # post_dim_vars = post_stats[3] # average squared mean for each post dim
    # utils.plot_line(np.arange(all_post_klds.shape[0]), all_post_klds, "AAA_ALL_POST_KLDS.png")
    # utils.plot_line(np.arange(obs_post_klds.shape[0]), obs_post_klds, "AAA_OBS_POST_KLDS.png")
    # utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, "AAA_POST_DIM_KLDS.png")
    # utils.plot_stem(np.arange(post_dim_vars.shape[0]), post_dim_vars, "AAA_POST_DIM_VARS.png")

    # draw many samples from the GIP
    for i in range(5):
        tr_idx = npr.randint(low=0, high=tr_samples, size=(100, ))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xs = []
        for row in range(3):
            Xs.append([])
            for col in range(3):
                sample_lists = OSM.sample_from_chain(Xd_batch[0:10,:], loop_iters=100, \
                        sigma_scale=1.0)
                Xs[row].append(group_chains(sample_lists['data samples']))
        Xs, block_im_dim = block_video(Xs, (48, 48), (3, 3))
        to_video(Xs,
                 block_im_dim,
                 "A_TFD_KLD_CHAIN_VIDEO_{0:d}.avi".format(i),
                 frame_rate=10)
        #sample_lists = GIP.sample_from_chain(Xd_batch[0,:].reshape((1,data_dim)), loop_iters=300, \
        #        sigma_scale=1.0)
        #Xs = np.vstack(sample_lists["data samples"])
        #file_name = "TFD_TEST_{0:d}.png".format(i)
        #utils.visualize_samples(Xs, file_name, num_rows=15)
    file_name = "A_TFD_KLD_PRIOR_SAMPLE.png"
    Xs = OSM.sample_from_prior(20 * 20)
    utils.visualize_samples(Xs, file_name, num_rows=20)

    # test Parzen density estimator built from prior samples
    # Xs = OSM.sample_from_prior(10000)
    # [best_sigma, best_ll, best_lls] = \
    #         cross_validate_sigma(Xs, Xva, [0.09, 0.095, 0.1, 0.105, 0.11], 10)
    # sort_idx = np.argsort(best_lls)
    # sort_idx = sort_idx[0:400]
    # utils.plot_line(np.arange(sort_idx.shape[0]), best_lls[sort_idx], "A_TFD_BEST_LLS_1.png")
    # utils.visualize_samples(Xva[sort_idx], "A_TFD_BAD_FACES_1.png", num_rows=20)
    return
def test_mnist(step_type='add',
               imp_steps=6,
               occ_dim=15,
               drop_prob=0.0):
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = "{}GPSI_conv_bn_OD{}_DP{}_IS{}_{}_NA".format(RESULT_PATH, occ_dim, dp_int, imp_steps, step_type)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    Xtr = datasets[0][0]
    Xva = datasets[1][0]
    Xte = datasets[2][0]
    # Merge validation set and training set, and test on test set.
    Xtr = np.concatenate((Xtr, Xva), axis=0)
    Xva = Xte
    Xtr = to_fX(shift_and_scale_into_01(Xtr))
    Xva = to_fX(shift_and_scale_into_01(Xva))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) )

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    x_dim = Xtr.shape[1]
    z_dim = 100
    init_scale = 1.0
    use_bn = True

    x_in_sym = T.matrix('x_in_sym')
    x_out_sym = T.matrix('x_out_sym')
    x_mask_sym = T.matrix('x_mask_sym')

    #################
    # p_zi_given_xi #
    #################
    params = {}
    shared_config = \
    [ {'layer_type': 'conv',
       'in_chans': 1,   # in shape:  (batch, 784)
       'out_chans': 64, # out shape: (batch, 64, 14, 14)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': use_bn,
       'shape_func_in': lambda x: T.reshape(x, (-1, 1, 28, 28))}, \
      {'layer_type': 'conv',
       'in_chans': 64,   # in shape:  (batch, 64, 14, 14)
       'out_chans': 128, # out shape: (batch, 128, 7, 7)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': use_bn,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'fc',
       'in_chans': 128*7*7,
       'out_chans': 256,
       'activation': relu_actfun,
       'apply_bn': use_bn} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = init_scale
    params['build_theano_funcs'] = False
    p_zi_given_xi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_zi_given_xi.init_biases(0.0)
    ###################
    # p_sip1_given_zi #
    ###################
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': z_dim,
       'out_chans': 256,
       'activation': relu_actfun,
       'apply_bn': use_bn}, \
      {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': 7*7*128,
       'activation': relu_actfun,
       'apply_bn': use_bn,
       'shape_func_out': lambda x: T.reshape(x, (-1, 128, 7, 7))}, \
      {'layer_type': 'conv',
       'in_chans': 128, # in shape:  (batch, 128, 7, 7)
       'out_chans': 64, # out shape: (batch, 64, 14, 14)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': use_bn} ]
    output_config = \
    [ {'layer_type': 'conv',
       'in_chans': 64, # in shape:  (batch, 64, 14, 14)
       'out_chans': 1, # out shape: (batch, 1, 28, 28)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': False,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'conv',
       'in_chans': 64,
       'out_chans': 1,
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': False,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'conv',
       'in_chans': 64,
       'out_chans': 1,
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': False,
       'shape_func_out': lambda x: T.flatten(x, 2)} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = init_scale
    params['build_theano_funcs'] = False
    p_sip1_given_zi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_sip1_given_zi.init_biases(0.0)

    #################
    # q_zi_given_xi #
    #################
    params = {}
    shared_config = \
    [ {'layer_type': 'conv',
       'in_chans': 2,   # in shape:  (batch, 784+784)
       'out_chans': 64, # out shape: (batch, 64, 14, 14)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': use_bn,
       'shape_func_in': lambda x: T.reshape(x, (-1, 2, 28, 28))}, \
      {'layer_type': 'conv',
       'in_chans': 64,   # in shape:  (batch, 64, 14, 14)
       'out_chans': 128, # out shape: (batch, 128, 7, 7)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': use_bn,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'fc',
       'in_chans': 128*7*7,
       'out_chans': 256,
       'activation': relu_actfun,
       'apply_bn': use_bn} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = init_scale
    params['build_theano_funcs'] = False
    q_zi_given_xi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_zi_given_xi.init_biases(0.0)

    ###########################################################
    # Define parameters for the GPSImputer, and initialize it #
    ###########################################################
    print("Building the GPSImputer...")
    gpsi_params = {}
    gpsi_params['x_dim'] = x_dim
    gpsi_params['z_dim'] = z_dim
    # switch between direct construction and construction via p_x_given_si
    gpsi_params['imp_steps'] = imp_steps
    gpsi_params['step_type'] = step_type
    gpsi_params['x_type'] = 'bernoulli'
    gpsi_params['obs_transform'] = 'sigmoid'
    GPSI = GPSImputer(rng=rng,
            x_in=x_in_sym, x_out=x_out_sym, x_mask=x_mask_sym,
            p_zi_given_xi=p_zi_given_xi,
            p_sip1_given_zi=p_sip1_given_zi,
            q_zi_given_xi=q_zi_given_xi,
            params=gpsi_params,
            shared_param_dicts=None)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0001
    momentum = 0.90
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(200000):
        scale = min(1.0, ((i+1) / 5000.0))
        if (((i + 1) % 15000) == 0):
            learn_rate = learn_rate * 0.95
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        GPSI.set_sgd_params(lr=scale*learn_rate, \
                            mom_1=scale*momentum, mom_2=0.98)
        GPSI.set_train_switch(1.0)
        GPSI.set_lam_nll(lam_nll=1.0)
        GPSI.set_lam_kld(lam_kld_q=1.0, lam_kld_p=0.1, lam_kld_g=0.0)
        GPSI.set_lam_l2w(1e-5)
        # perform a minibatch update and record the cost for this batch
        xb = to_fX( Xtr.take(batch_idx, axis=0) )
        xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                        occ_dim=occ_dim, data_mean=data_mean)
        result = GPSI.train_joint(xi, xo, xm, batch_reps)
        # do diagnostics and general training tracking
        costs = [(costs[j] + result[j]) for j in range(len(result)-1)]
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_bound : {0:.4f}".format(costs[1])
            str4 = "    nll_cost  : {0:.4f}".format(costs[2])
            str5 = "    kld_cost  : {0:.4f}".format(costs[3])
            str6 = "    reg_cost  : {0:.4f}".format(costs[4])
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            Xva = row_shuffle(Xva)
            # record an estimate of performance on the test set
            xi, xo, xm = construct_masked_data(Xva[0:5000], drop_prob=drop_prob, \
                                               occ_dim=occ_dim, data_mean=data_mean)
            nll, kld = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10)
            vfe = np.mean(nll) + np.mean(kld)
            str1 = "    va_nll_bound : {}".format(vfe)
            str2 = "    va_nll_term  : {}".format(np.mean(nll))
            str3 = "    va_kld_q2p   : {}".format(np.mean(kld))
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
        if ((i % 2000) == 0):
            #GPSI.save_to_file("{}_PARAMS.pkl".format(result_tag))
            # Get some validation samples for evaluating model performance
            xb = to_fX( Xva[0:100] )
            xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                    occ_dim=occ_dim, data_mean=data_mean)
            xi = np.repeat(xi, 2, axis=0)
            xo = np.repeat(xo, 2, axis=0)
            xm = np.repeat(xm, 2, axis=0)
            # draw some sample imputations from the model
            samp_count = xi.shape[0]
            _, model_samps = GPSI.sample_imputer(xi, xo, xm, use_guide_policy=False)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count):
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "{0:s}_samples_ng_b{1:d}.png".format(result_tag, i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
def test_svhn(step_type='add', occ_dim=15, drop_prob=0.0):
    #########################################
    # Format the result tag more thoroughly #
    #########################################
    dp_int = int(100.0 * drop_prob)
    result_tag = "{}GPSI_OD{}_DP{}_{}_NA".format(RESULT_PATH, occ_dim, dp_int,
                                                 step_type)

    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    tr_file = 'data/svhn_train_gray.pkl'
    te_file = 'data/svhn_test_gray.pkl'
    ex_file = 'data/svhn_extra_gray.pkl'
    data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000)
    Xtr = to_fX(shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])))
    Xva = to_fX(shift_and_scale_into_01(data['Xte']))
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 250
    batch_reps = 1
    all_pix_mean = np.mean(np.mean(Xtr, axis=1))
    data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], )))

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    x_dim = Xtr.shape[1]
    z_dim = 200
    imp_steps = 6
    init_scale = 1.0

    x_in_sym = T.matrix('x_in_sym')
    x_out_sym = T.matrix('x_out_sym')
    x_mask_sym = T.matrix('x_mask_sym')

    #################
    # p_zi_given_xi #
    #################
    params = {}
    shared_config = [x_dim, 1500, 1500]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_zi_given_xi.init_biases(0.2)
    ###################
    # p_xip1_given_zi #
    ###################
    params = {}
    shared_config = [z_dim, 1500, 1500]
    output_config = [x_dim, x_dim]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_xip1_given_zi = HydraNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_xip1_given_zi.init_biases(0.2)
    ###################
    # q_zi_given_xi #
    ###################
    params = {}
    shared_config = [(x_dim + x_dim), 1500, 1500]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = relu_actfun
    params['init_scale'] = init_scale
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_zi_given_xi.init_biases(0.2)

    ###########################################################
    # Define parameters for the GPSImputer, and initialize it #
    ###########################################################
    print("Building the GPSImputer...")
    gpsi_params = {}
    gpsi_params['x_dim'] = x_dim
    gpsi_params['z_dim'] = z_dim
    gpsi_params['imp_steps'] = imp_steps
    gpsi_params['step_type'] = step_type
    gpsi_params['x_type'] = 'bernoulli'
    gpsi_params['obs_transform'] = 'sigmoid'
    GPSI = GPSImputer(rng=rng,
            x_in=x_in_sym, x_out=x_out_sym, x_mask=x_mask_sym, \
            p_zi_given_xi=p_zi_given_xi, \
            p_xip1_given_zi=p_xip1_given_zi, \
            q_zi_given_xi=q_zi_given_xi, \
            params=gpsi_params, \
            shared_param_dicts=None)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    log_name = "{}_RESULTS.txt".format(result_tag)
    out_file = open(log_name, 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0002
    momentum = 0.5
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(200005):
        scale = min(1.0, ((i + 1) / 5000.0))
        if (((i + 1) % 15000) == 0):
            learn_rate = learn_rate * 0.92
        if (i > 10000):
            momentum = 0.90
        else:
            momentum = 0.50
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        # set sgd and objective function hyperparams for this update
        GPSI.set_sgd_params(lr=scale*learn_rate, \
                            mom_1=scale*momentum, mom_2=0.98)
        GPSI.set_train_switch(1.0)
        GPSI.set_lam_nll(lam_nll=1.0)
        GPSI.set_lam_kld(lam_kld_p=0.1, lam_kld_q=0.9)
        GPSI.set_lam_l2w(1e-4)
        # perform a minibatch update and record the cost for this batch
        xb = to_fX(Xtr.take(batch_idx, axis=0))
        xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                        occ_dim=occ_dim, data_mean=data_mean)
        result = GPSI.train_joint(xi, xo, xm, batch_reps)
        # do diagnostics and general training tracking
        costs = [(costs[j] + result[j]) for j in range(len(result) - 1)]
        if ((i % 250) == 0):
            costs = [(v / 250.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_bound : {0:.4f}".format(costs[1])
            str4 = "    nll_cost  : {0:.4f}".format(costs[2])
            str5 = "    kld_cost  : {0:.4f}".format(costs[3])
            str6 = "    reg_cost  : {0:.4f}".format(costs[4])
            joint_str = "\n".join([str1, str2, str3, str4, str5, str6])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if ((i % 1000) == 0):
            Xva = row_shuffle(Xva)
            # record an estimate of performance on the test set
            xi, xo, xm = construct_masked_data(Xva[0:5000], drop_prob=drop_prob, \
                                               occ_dim=occ_dim, data_mean=data_mean)
            nll, kld = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10)
            vfe = np.mean(nll) + np.mean(kld)
            str1 = "    va_nll_bound : {}".format(vfe)
            str2 = "    va_nll_term  : {}".format(np.mean(nll))
            str3 = "    va_kld_q2p   : {}".format(np.mean(kld))
            joint_str = "\n".join([str1, str2, str3])
            print(joint_str)
            out_file.write(joint_str + "\n")
            out_file.flush()
            GPSI.save_to_file("{}_PARAMS.pkl".format(result_tag))
        if ((i % 20000) == 0):
            # Get some validation samples for evaluating model performance
            xb = to_fX(Xva[0:100])
            xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \
                                    occ_dim=occ_dim, data_mean=data_mean)
            xi = np.repeat(xi, 2, axis=0)
            xo = np.repeat(xo, 2, axis=0)
            xm = np.repeat(xm, 2, axis=0)
            # draw some sample imputations from the model
            samp_count = xi.shape[0]
            _, model_samps = GPSI.sample_imputer(xi,
                                                 xo,
                                                 xm,
                                                 use_guide_policy=False)
            seq_len = len(model_samps)
            seq_samps = np.zeros(
                (seq_len * samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count):
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "{0:s}_samples_ng_b{1:d}.png".format(result_tag, i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
예제 #44
0
def pretrain_osm(lam_kld=0.0):
    # Initialize a source of randomness
    rng = np.random.RandomState(1234)

    # Load some data to train/validate/test with
    data_file = 'data/tfd_data_48x48.pkl'
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='unlabeled', fold='all')
    Xtr_unlabeled = dataset[0]
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='train', fold='all')
    Xtr_train = dataset[0]
    Xtr = np.vstack([Xtr_unlabeled, Xtr_train])
    dataset = load_tfd(tfd_pkl_name=data_file, which_set='valid', fold='all')
    Xva = dataset[0]
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 400
    batch_reps = 6
    carry_frac = 0.25
    carry_size = int(batch_size * carry_frac)
    reset_prob = 0.04

    # setup some symbolic variables and stuff
    Xd = T.matrix('Xd_base')
    Xc = T.matrix('Xc_base')
    Xm = T.matrix('Xm_base')
    data_dim = Xtr.shape[1]
    prior_sigma = 1.0
    Xtr_mean = np.mean(Xtr, axis=0)

    ##########################
    # NETWORK CONFIGURATIONS #
    ##########################
    gn_params = {}
    shared_config = [PRIOR_DIM, 1500, 1500]
    top_config = [shared_config[-1], data_dim]
    gn_params['shared_config'] = shared_config
    gn_params['mu_config'] = top_config
    gn_params['sigma_config'] = top_config
    gn_params['activation'] = relu_actfun
    gn_params['init_scale'] = 1.4
    gn_params['lam_l2a'] = 0.0
    gn_params['vis_drop'] = 0.0
    gn_params['hid_drop'] = 0.0
    gn_params['bias_noise'] = 0.0
    gn_params['input_noise'] = 0.0
    # choose some parameters for the continuous inferencer
    in_params = {}
    shared_config = [data_dim, 1500, 1500]
    top_config = [shared_config[-1], PRIOR_DIM]
    in_params['shared_config'] = shared_config
    in_params['mu_config'] = top_config
    in_params['sigma_config'] = top_config
    in_params['activation'] = relu_actfun
    in_params['init_scale'] = 1.4
    in_params['lam_l2a'] = 0.0
    in_params['vis_drop'] = 0.0
    in_params['hid_drop'] = 0.0
    in_params['bias_noise'] = 0.0
    in_params['input_noise'] = 0.0
    # Initialize the base networks for this OneStageModel
    IN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \
            params=in_params, shared_param_dicts=None)
    GN = InfNet(rng=rng, Xd=Xd, prior_sigma=prior_sigma, \
            params=gn_params, shared_param_dicts=None)
    # Initialize biases in IN and GN
    IN.init_biases(0.2)
    GN.init_biases(0.2)

    ######################################
    # LOAD AND RESTART FROM SAVED PARAMS #
    ######################################
    # gn_fname = RESULT_PATH+"pt_osm_params_b110000_GN.pkl"
    # in_fname = RESULT_PATH+"pt_osm_params_b110000_IN.pkl"
    # IN = load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, \
    #         new_params=None)
    # GN = load_infnet_from_file(f_name=gn_fname, rng=rng, Xd=Xd, \
    #         new_params=None)
    # in_params = IN.params
    # gn_params = GN.params

    #########################
    # INITIALIZE THE GIPAIR #
    #########################
    osm_params = {}
    osm_params['x_type'] = 'bernoulli'
    osm_params['xt_transform'] = 'sigmoid'
    osm_params['logvar_bound'] = LOGVAR_BOUND
    OSM = OneStageModel(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, \
            p_x_given_z=GN, q_z_given_x=IN, \
            x_dim=data_dim, z_dim=PRIOR_DIM, params=osm_params)
    OSM.set_lam_l2w(1e-5)
    safe_mean = (0.9 * Xtr_mean) + 0.05
    safe_mean_logit = np.log(safe_mean / (1.0 - safe_mean))
    OSM.set_output_bias(safe_mean_logit)
    OSM.set_input_bias(-Xtr_mean)

    ######################
    # BASIC VAE TRAINING #
    ######################
    out_file = open(RESULT_PATH+"pt_osm_results.txt", 'wb')
    # Set initial learning rate and basic SGD hyper parameters
    obs_costs = np.zeros((batch_size,))
    costs = [0. for i in range(10)]
    learn_rate = 0.002
    for i in range(200000):
        scale = min(1.0, float(i) / 5000.0)
        if ((i > 1) and ((i % 20000) == 0)):
            learn_rate = learn_rate * 0.8
        if (i < 50000):
            momentum = 0.5
        elif (i < 10000):
            momentum = 0.7
        else:
            momentum = 0.9
        if ((i == 0) or (npr.rand() < reset_prob)):
            # sample a fully random batch
            batch_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,))
        else:
            # sample a partially random batch, which retains some portion of
            # the worst scoring examples from the previous batch
            fresh_idx = npr.randint(low=0,high=tr_samples,size=(batch_size-carry_size,))
            batch_idx = np.concatenate((fresh_idx.ravel(), carry_idx.ravel()))
        # do a minibatch update of the model, and compute some costs
        tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,))
        Xd_batch = Xtr.take(tr_idx, axis=0)
        Xc_batch = 0.0 * Xd_batch
        Xm_batch = 0.0 * Xd_batch
        # do a minibatch update of the model, and compute some costs
        OSM.set_sgd_params(lr_1=(scale*learn_rate), \
                mom_1=(scale*momentum), mom_2=0.98)
        OSM.set_lam_nll(1.0)
        OSM.set_lam_kld(lam_kld_1=scale*lam_kld, lam_kld_2=0.0, lam_kld_c=50.0)
        result = OSM.train_joint(Xd_batch, Xc_batch, Xm_batch, batch_reps)
        batch_costs = result[4] + result[5]
        obs_costs = collect_obs_costs(batch_costs, batch_reps)
        carry_idx = batch_idx[np.argsort(-obs_costs)[0:carry_size]]
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 1000) == 0):
            # record and then reset the cost trackers
            costs = [(v / 1000.0) for v in costs]
            str_1 = "-- batch {0:d} --".format(i)
            str_2 = "    joint_cost: {0:.4f}".format(costs[0])
            str_3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str_4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str_5 = "    reg_cost  : {0:.4f}".format(costs[3])
            costs = [0.0 for v in costs]
            # print out some diagnostic information
            joint_str = "\n".join([str_1, str_2, str_3, str_4, str_5])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
        if ((i % 2000) == 0):
            Xva = row_shuffle(Xva)
            model_samps = OSM.sample_from_prior(500)
            file_name = RESULT_PATH+"pt_osm_samples_b{0:d}_XG.png".format(i)
            utils.visualize_samples(model_samps, file_name, num_rows=20)
            file_name = RESULT_PATH+"pt_osm_inf_weights_b{0:d}.png".format(i)
            utils.visualize_samples(OSM.inf_weights.get_value(borrow=False).T, \
                    file_name, num_rows=30)
            file_name = RESULT_PATH+"pt_osm_gen_weights_b{0:d}.png".format(i)
            utils.visualize_samples(OSM.gen_weights.get_value(borrow=False), \
                    file_name, num_rows=30)
            # compute information about free-energy on validation set
            file_name = RESULT_PATH+"pt_osm_free_energy_b{0:d}.png".format(i)
            fe_terms = OSM.compute_fe_terms(Xva[0:2500], 20)
            fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
            fe_str = "    nll_bound : {0:.4f}".format(fe_mean)
            print(fe_str)
            out_file.write(fe_str+"\n")
            utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \
                    x_label='Posterior KLd', y_label='Negative Log-likelihood')
            # compute information about posterior KLds on validation set
            file_name = RESULT_PATH+"pt_osm_post_klds_b{0:d}.png".format(i)
            post_klds = OSM.compute_post_klds(Xva[0:2500])
            post_dim_klds = np.mean(post_klds, axis=0)
            utils.plot_stem(np.arange(post_dim_klds.shape[0]), post_dim_klds, \
                    file_name)
        if ((i % 5000) == 0):
            IN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_b{0:d}_IN.pkl".format(i))
            GN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_b{0:d}_GN.pkl".format(i))
    IN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_IN.pkl")
    GN.save_to_file(f_name=RESULT_PATH+"pt_osm_params_GN.pkl")
    return
def test_one_stage_model():
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
    Xtr = np.vstack((Xtr, Xva))
    Xva = Xte
    #del Xte
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 128
    batch_reps = 1

    ###############################################
    # Setup some parameters for the OneStageModel #
    ###############################################
    x_dim = Xtr.shape[1]
    z_dim = 64
    x_type = 'bernoulli'
    xin_sym = T.matrix('xin_sym')

    ###############
    # p_x_given_z #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'fc',
       'in_chans': z_dim,
       'out_chans': 256,
       'activation': relu_actfun,
       'apply_bn': True}, \
      {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': 7*7*128,
       'activation': relu_actfun,
       'apply_bn': True,
       'shape_func_out': lambda x: T.reshape(x, (-1, 128, 7, 7))}, \
      {'layer_type': 'conv',
       'in_chans': 128, # in shape:  (batch, 128, 7, 7)
       'out_chans': 64, # out shape: (batch, 64, 14, 14)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'conv',
       'in_chans': 64, # in shape:  (batch, 64, 14, 14)
       'out_chans': 1, # out shape: (batch, 1, 28, 28)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': False,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'conv',
       'in_chans': 64,
       'out_chans': 1,
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'half',
       'apply_bn': False,
       'shape_func_out': lambda x: T.flatten(x, 2)} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    p_x_given_z = HydraNet(rng=rng, Xd=xin_sym, \
            params=params, shared_param_dicts=None)
    p_x_given_z.init_biases(0.0)
    ###############
    # q_z_given_x #
    ###############
    params = {}
    shared_config = \
    [ {'layer_type': 'conv',
       'in_chans': 1,   # in shape:  (batch, 784)
       'out_chans': 64, # out shape: (batch, 64, 14, 14)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': True,
       'shape_func_in': lambda x: T.reshape(x, (-1, 1, 28, 28))}, \
      {'layer_type': 'conv',
       'in_chans': 64,   # in shape:  (batch, 64, 14, 14)
       'out_chans': 128, # out shape: (batch, 128, 7, 7)
       'activation': relu_actfun,
       'filt_dim': 5,
       'conv_stride': 'double',
       'apply_bn': True,
       'shape_func_out': lambda x: T.flatten(x, 2)}, \
      {'layer_type': 'fc',
       'in_chans': 128*7*7,
       'out_chans': 256,
       'activation': relu_actfun,
       'apply_bn': True} ]
    output_config = \
    [ {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False}, \
      {'layer_type': 'fc',
       'in_chans': 256,
       'out_chans': z_dim,
       'activation': relu_actfun,
       'apply_bn': False} ]
    params['shared_config'] = shared_config
    params['output_config'] = output_config
    params['init_scale'] = 1.0
    params['build_theano_funcs'] = False
    q_z_given_x = HydraNet(rng=rng, Xd=xin_sym, \
            params=params, shared_param_dicts=None)
    q_z_given_x.init_biases(0.0)


    ##############################################################
    # Define parameters for the TwoStageModel, and initialize it #
    ##############################################################
    print("Building the OneStageModel...")
    osm_params = {}
    osm_params['x_type'] = x_type
    osm_params['obs_transform'] = 'sigmoid'
    OSM = OneStageModel(rng=rng, x_in=xin_sym,
            x_dim=x_dim, z_dim=z_dim,
            p_x_given_z=p_x_given_z,
            q_z_given_x=q_z_given_x,
            params=osm_params)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    log_name = "{}_RESULTS.txt".format("OSM_TEST")
    out_file = open(log_name, 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0005
    momentum = 0.9
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(500000):
        scale = min(0.5, ((i+1) / 5000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        Xb = to_fX( Xtr.take(batch_idx, axis=0) )
        #Xb = binarize_data(Xtr.take(batch_idx, axis=0))
        # set sgd and objective function hyperparams for this update
        OSM.set_sgd_params(lr=scale*learn_rate, \
                           mom_1=(scale*momentum), mom_2=0.98)
        OSM.set_lam_nll(lam_nll=1.0)
        OSM.set_lam_kld(lam_kld=1.0)
        OSM.set_lam_l2w(1e-5)
        # perform a minibatch update and record the cost for this batch
        result = OSM.train_joint(Xb, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str5 = "    reg_cost  : {0:.4f}".format(costs[3])
            joint_str = "\n".join([str1, str2, str3, str4, str5])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if (((i % 5000) == 0) or ((i < 10000) and ((i % 1000) == 0))):
            # draw some independent random samples from the model
            samp_count = 300
            model_samps = OSM.sample_from_prior(samp_count)
            file_name = "OSM_SAMPLES_b{0:d}.png".format(i)
            utils.visualize_samples(model_samps, file_name, num_rows=15)
            # compute free energy estimate for validation samples
            Xva = row_shuffle(Xva)
            fe_terms = OSM.compute_fe_terms(Xva[0:5000], 20)
            fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
            out_str = "    nll_bound : {0:.4f}".format(fe_mean)
            print(out_str)
            out_file.write(out_str+"\n")
            out_file.flush()
    return
예제 #46
0
def test_with_model_init():
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    #dataset = 'data/mnist.pkl.gz'
    #datasets = load_udm(dataset, as_shared=False, zero_mean=False)
    #Xtr = datasets[0][0]
    #Xva = datasets[1][0]
    Xtr, Xva, Xte = load_binarized_mnist(data_path='./data/')
    del Xte
    tr_samples = Xtr.shape[0]
    va_samples = Xva.shape[0]
    batch_size = 200
    batch_reps = 1

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    x_dim = Xtr.shape[1]
    z_dim = 20
    h_dim = 50
    s_dim = 50
    init_scale = 1.0
    
    x_type = 'bernoulli'

    # some InfNet instances to build the TwoStageModel from
    x_in_sym = T.matrix('x_in_sym')
    x_out_sym = T.matrix('x_out_sym')

    ###############
    # p_h_given_s #
    ###############
    params = {}
    shared_config = [s_dim, 250, 250]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = softplus_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_h_given_s = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_h_given_s.init_biases(0.2)
    #################
    # p_x_given_s_h #
    #################
    params = {}
    shared_config = [(s_dim + h_dim), 250, 250]
    top_config = [shared_config[-1], x_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = softplus_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_x_given_s_h = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_x_given_s_h.init_biases(0.2)
    ###############
    # p_s_given_z #
    ###############
    params = {}
    shared_config = [z_dim, 250]
    top_config = [shared_config[-1], s_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = softplus_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_s_given_z = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    p_s_given_z.init_biases(0.2)
    ###############
    # q_z_given_x #
    ###############
    params = {}
    shared_config = [x_dim, 250, 250]
    top_config = [shared_config[-1], z_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = softplus_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_z_given_x = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_z_given_x.init_biases(0.2)
    #################
    # q_h_given_x_s #
    #################
    params = {}
    shared_config = [(x_dim + s_dim), 500, 500]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = softplus_actfun
    params['init_scale'] = init_scale
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_h_given_x_s = InfNet(rng=rng, Xd=x_in_sym, \
            params=params, shared_param_dicts=None)
    q_h_given_x_s.init_biases(0.2)


    ##############################################################
    # Define parameters for the TwoStageModel, and initialize it #
    ##############################################################
    print("Building the TwoStageModel...")
    msm_params = {}
    msm_params['x_type'] = x_type
    msm_params['obs_transform'] = 'sigmoid'
    TSM = TwoStageModel(rng=rng, \
            x_in=x_in_sym, x_out=x_out_sym, \
            p_s_given_z=p_s_given_z, \
            p_h_given_s=p_h_given_s, \
            p_x_given_s_h=p_x_given_s_h, \
            q_z_given_x=q_z_given_x, \
            q_h_given_x_s=q_h_given_x_s, \
            x_dim=x_dim, \
            z_dim=z_dim, s_dim=s_dim, h_dim=h_dim, \
            params=msm_params)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    out_file = open("TSM_A_RESULTS.txt", 'wb')
    costs = [0. for i in range(10)]
    learn_rate = 0.0003
    momentum = 0.5
    batch_idx = np.arange(batch_size) + tr_samples
    for i in range(250000):
        scale = min(1.0, ((i+1) / 3000.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.95
        if (i > 50000):
            momentum = 0.90
        else:
            momentum = 0.50
        # get the indices of training samples for this batch update
        batch_idx += batch_size
        if (np.max(batch_idx) >= tr_samples):
            # we finished an "epoch", so we rejumble the training set
            Xtr = row_shuffle(Xtr)
            batch_idx = np.arange(batch_size)
        # train on the training set
        lam_kld = 1.0
        # set sgd and objective function hyperparams for this update
        TSM.set_sgd_params(lr_1=scale*learn_rate, lr_2=scale*learn_rate, \
                mom_1=scale*momentum, mom_2=0.99)
        TSM.set_train_switch(1.0)
        TSM.set_lam_nll(lam_nll=1.0)
        TSM.set_lam_kld(lam_kld_z=1.0, lam_kld_q2p=0.8, lam_kld_p2q=0.2)
        TSM.set_lam_kld_l1l2(lam_kld_l1l2=scale)
        TSM.set_lam_l2w(1e-4)
        TSM.set_drop_rate(0.0)
        TSM.q_h_given_x_s.set_bias_noise(0.0)
        TSM.p_h_given_s.set_bias_noise(0.0)
        TSM.p_x_given_s_h.set_bias_noise(0.0)
        # perform a minibatch update and record the cost for this batch
        Xb_tr = to_fX( Xtr.take(batch_idx, axis=0) )
        result = TSM.train_joint(Xb_tr, Xb_tr, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result)-1)]
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            str1 = "-- batch {0:d} --".format(i)
            str2 = "    joint_cost: {0:.4f}".format(costs[0])
            str3 = "    nll_cost  : {0:.4f}".format(costs[1])
            str4 = "    kld_cost  : {0:.4f}".format(costs[2])
            str5 = "    reg_cost  : {0:.4f}".format(costs[3])
            joint_str = "\n".join([str1, str2, str3, str4, str5])
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            costs = [0.0 for v in costs]
        if (((i % 2000) == 0) or ((i < 10000) and ((i % 1000) == 0))):
            TSM.set_drop_rate(0.0)
            TSM.q_h_given_x_s.set_bias_noise(0.0)
            TSM.p_h_given_s.set_bias_noise(0.0)
            TSM.p_x_given_s_h.set_bias_noise(0.0)
            # Get some validation samples for computing diagnostics
            Xva = row_shuffle(Xva)
            Xb_va = to_fX( Xva[0:2000] )
            # draw some independent random samples from the model
            samp_count = 500
            model_samps = TSM.sample_from_prior(samp_count)
            file_name = "TSM_A_SAMPLES_IND_b{0:d}.png".format(i)
            utils.visualize_samples(model_samps, file_name, num_rows=20)
            Xb_tr = to_fX( Xtr[0:2000] )
            fe_terms = TSM.compute_fe_terms(Xb_tr, Xb_tr, 30)
            fe_nll = np.mean(fe_terms[0])
            fe_kld = np.mean(fe_terms[1])
            fe_joint = fe_nll + fe_kld
            joint_str = "    vfe-tr: {0:.4f}, nll: ({1:.4f}, {2:.4f}, {3:.4f}), kld: ({4:.4f}, {5:.4f}, {6:.4f})".format( \
                    fe_joint, fe_nll, np.min(fe_terms[0]), np.max(fe_terms[0]), fe_kld, np.min(fe_terms[1]), np.max(fe_terms[1]))
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
            fe_terms = TSM.compute_fe_terms(Xb_va, Xb_va, 30)
            fe_nll = np.mean(fe_terms[0])
            fe_kld = np.mean(fe_terms[1])
            fe_joint = fe_nll + fe_kld
            joint_str = "    vfe-va: {0:.4f}, nll: ({1:.4f}, {2:.4f}, {3:.4f}), kld: ({4:.4f}, {5:.4f}, {6:.4f})".format( \
                    fe_joint, fe_nll, np.min(fe_terms[0]), np.max(fe_terms[0]), fe_kld, np.min(fe_terms[1]), np.max(fe_terms[1]))
            print(joint_str)
            out_file.write(joint_str+"\n")
            out_file.flush()
예제 #47
0
def test_with_model_init():
    ##########################
    # Get some training data #
    ##########################
    rng = np.random.RandomState(1234)
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm(dataset, zero_mean=False)
    Xtr_shared = datasets[0][0]
    Xva_shared = datasets[1][0]
    Xtr = Xtr_shared.get_value(borrow=False).astype(theano.config.floatX)
    Xva = Xva_shared.get_value(borrow=False).astype(theano.config.floatX)
    tr_samples = Xtr.shape[0]
    batch_size = 500
    batch_reps = 1

    ############################################################
    # Setup some parameters for the Iterative Refinement Model #
    ############################################################
    obs_dim = Xtr.shape[1]
    z_rnn_dim = 25
    z_obs_dim = 5
    jnt_dim = obs_dim + z_rnn_dim
    h_dim = 100
    x_type = 'bernoulli'
    prior_sigma = 1.0

    # some InfNet instances to build the TwoStageModel from
    X_sym = T.matrix('X_sym')

    ########################
    # p_s0_obs_given_z_obs #
    ########################
    params = {}
    shared_config = [z_obs_dim, 250, 250]
    top_config = [shared_config[-1], obs_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = softplus_actfun
    params['init_scale'] = 1.2
    params['lam_l2a'] = 1e-3
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_s0_obs_given_z_obs = InfNet(rng=rng, Xd=X_sym, prior_sigma=prior_sigma, \
            params=params, shared_param_dicts=None)
    p_s0_obs_given_z_obs.init_biases(0.2)
    #################
    # p_hi_given_si #
    #################
    params = {}
    shared_config = [jnt_dim, 500, 500]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = softplus_actfun
    params['init_scale'] = 1.2
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_hi_given_si = InfNet(rng=rng, Xd=X_sym, prior_sigma=prior_sigma, \
            params=params, shared_param_dicts=None)
    p_hi_given_si.init_biases(0.2)
    ######################
    # p_sip1_given_si_hi #
    ######################
    params = {}
    shared_config = [(h_dim + z_rnn_dim), 500, 500]
    top_config = [shared_config[-1], obs_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = softplus_actfun
    params['init_scale'] = 1.2
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    p_sip1_given_si_hi = InfNet(rng=rng, Xd=X_sym, prior_sigma=prior_sigma, \
            params=params, shared_param_dicts=None)
    p_sip1_given_si_hi.init_biases(0.2)
    ###############
    # q_z_given_x #
    ###############
    params = {}
    shared_config = [obs_dim, 250, 250]
    top_config = [shared_config[-1], (z_rnn_dim + z_obs_dim)]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = softplus_actfun
    params['init_scale'] = 1.2
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_z_given_x = InfNet(rng=rng, Xd=X_sym, prior_sigma=prior_sigma, \
            params=params, shared_param_dicts=None)
    q_z_given_x.init_biases(0.2)
    ###################
    # q_hi_given_x_si #
    ###################
    params = {}
    shared_config = [(obs_dim + jnt_dim), 500, 500]
    top_config = [shared_config[-1], h_dim]
    params['shared_config'] = shared_config
    params['mu_config'] = top_config
    params['sigma_config'] = top_config
    params['activation'] = softplus_actfun
    params['init_scale'] = 1.2
    params['lam_l2a'] = 0.0
    params['vis_drop'] = 0.0
    params['hid_drop'] = 0.0
    params['bias_noise'] = 0.0
    params['input_noise'] = 0.0
    params['build_theano_funcs'] = False
    q_hi_given_x_si = InfNet(rng=rng, Xd=X_sym, prior_sigma=prior_sigma, \
            params=params, shared_param_dicts=None)
    q_hi_given_x_si.init_biases(0.2)


    ################################################################
    # Define parameters for the MultiStageModel, and initialize it #
    ################################################################
    print("Building the MultiStageModel...")
    msm_params = {}
    msm_params['x_type'] = x_type
    msm_params['obs_transform'] = 'sigmoid'
    MSM = MultiStageModel(rng=rng, x_in=X_sym, \
            p_s0_obs_given_z_obs=p_s0_obs_given_z_obs, \
            p_hi_given_si=p_hi_given_si, \
            p_sip1_given_si_hi=p_sip1_given_si_hi, \
            q_z_given_x=q_z_given_x, \
            q_hi_given_x_si=q_hi_given_x_si, \
            obs_dim=obs_dim, z_rnn_dim=z_rnn_dim, z_obs_dim=z_obs_dim, \
            h_dim=h_dim, model_init_obs=False, model_init_rnn=True, \
            ir_steps=3, params=msm_params)
    obs_mean = (0.9 * np.mean(Xtr, axis=0)) + 0.05
    obs_mean_logit = np.log(obs_mean / (1.0 - obs_mean))
    MSM.set_input_bias(-obs_mean)
    MSM.set_obs_bias(0.1*obs_mean_logit)

    ################################################################
    # Apply some updates, to check that they aren't totally broken #
    ################################################################
    costs = [0. for i in range(10)]
    learn_rate = 0.003
    momentum = 0.5
    for i in range(300000):
        scale = min(1.0, ((i+1) / 5000.0))
        l1l2_weight = 1.0 #min(1.0, ((i+1) / 2500.0))
        if (((i + 1) % 10000) == 0):
            learn_rate = learn_rate * 0.92
        if (i > 100000):
            momentum = 0.80
        if (i > 50000):
            momentum = 0.65
        else:
            momentum = 0.50
        # randomly sample a minibatch
        tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,))
        Xb = binarize_data(Xtr.take(tr_idx, axis=0))
        Xb = Xb.astype(theano.config.floatX)
        # set sgd and objective function hyperparams for this update
        MSM.set_sgd_params(lr_1=scale*learn_rate, lr_2=scale*learn_rate, \
                mom_1=(scale*momentum), mom_2=0.99)
        MSM.set_train_switch(1.0)
        MSM.set_l1l2_weight(l1l2_weight)
        MSM.set_lam_nll(lam_nll=1.0)
        MSM.set_lam_kld(lam_kld_1=1.0, lam_kld_2=1.0)
        MSM.set_lam_l2w(1e-5)
        MSM.set_kzg_weight(0.01)
        # perform a minibatch update and record the cost for this batch
        result = MSM.train_joint(Xb, batch_reps)
        costs = [(costs[j] + result[j]) for j in range(len(result))]
        if ((i % 500) == 0):
            costs = [(v / 500.0) for v in costs]
            print("-- batch {0:d} --".format(i))
            print("    joint_cost: {0:.4f}".format(costs[0]))
            print("    nll_cost  : {0:.4f}".format(costs[1]))
            print("    kld_cost  : {0:.4f}".format(costs[2]))
            print("    reg_cost  : {0:.4f}".format(costs[3]))
            costs = [0.0 for v in costs]
        if (((i % 2000) == 0) or ((i < 10000) and ((i % 1000) == 0))):
            Xva = row_shuffle(Xva)
            # draw some independent random samples from the model
            samp_count = 200
            model_samps = MSM.sample_from_prior(samp_count)
            seq_len = len(model_samps)
            seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1]))
            idx = 0
            for s1 in range(samp_count): 
                for s2 in range(seq_len):
                    seq_samps[idx] = model_samps[s2][s1]
                    idx += 1
            file_name = "MZ_SAMPLES_b{0:d}.png".format(i)
            utils.visualize_samples(seq_samps, file_name, num_rows=20)
            # visualize some important weights in the model
            file_name = "MZ_INF_1_WEIGHTS_b{0:d}.png".format(i)
            W = MSM.inf_1_weights.get_value(borrow=False).T
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            file_name = "MZ_INF_2_WEIGHTS_b{0:d}.png".format(i)
            W = MSM.inf_2_weights.get_value(borrow=False).T
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            file_name = "MZ_GEN_1_WEIGHTS_b{0:d}.png".format(i)
            W = MSM.gen_1_weights.get_value(borrow=False)
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            file_name = "MZ_GEN_2_WEIGHTS_b{0:d}.png".format(i)
            W = MSM.gen_2_weights.get_value(borrow=False)
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            file_name = "MZ_GEN_INF_WEIGHTS_b{0:d}.png".format(i)
            W = MSM.gen_inf_weights.get_value(borrow=False).T
            utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
            # compute information about posterior KLds on validation set
            post_klds = MSM.compute_post_klds(Xva[0:5000])
            file_name = "MZ_H0_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(post_klds[0].shape[1]), \
                    np.mean(post_klds[0], axis=0), file_name)
            file_name = "MZ_HI_COND_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(post_klds[1].shape[1]), \
                    np.mean(post_klds[1], axis=0), file_name)
            file_name = "MZ_HI_GLOB_KLDS_b{0:d}.png".format(i)
            utils.plot_stem(np.arange(post_klds[2].shape[1]), \
                    np.mean(post_klds[2], axis=0), file_name)
            # compute information about free-energy on validation set
            file_name = "MZ_FREE_ENERGY_b{0:d}.png".format(i)
            fe_terms = MSM.compute_fe_terms(binarize_data(Xva[0:5000]), 20)
            fe_mean = np.mean(fe_terms[0]) + np.mean(fe_terms[1])
            print("    nll_bound : {0:.4f}".format(fe_mean))
            utils.plot_scatter(fe_terms[1], fe_terms[0], file_name, \
                    x_label='Posterior KLd', y_label='Negative Log-likelihood')
    return
예제 #48
0
    # configure a trajectory generator
    num_samples = 100
    traj_len = 64
    x_range = [-0.8, 0.8]
    y_range = [-0.8, 0.8]
    max_speed = 0.15
    TRAJ = TrajectoryGenerator(x_range=x_range, y_range=y_range, \
                               max_speed=max_speed)

    # test the writer function
    start_time = time.time()
    batch_count = 50
    for i in range(batch_count):
        # generate a minibatch of trajectories
        traj_pos, traj_vel = TRAJ.generate_trajectories(num_samples, traj_len)
        traj_x = traj_pos[:, :, 0]
        traj_y = traj_pos[:, :, 1]
        # draw the trajectories
        center_x = to_fX(traj_x.T.ravel())
        center_y = to_fX(traj_y.T.ravel())
        delta = to_fX(np.ones(center_x.shape))
        sigma = to_fX(np.ones(center_x.shape))
        W = write_func(center_y, center_x, delta, 0.2 * sigma)
    end_time = time.time()
    render_time = end_time - start_time
    render_bps = batch_count / render_time
    print("RENDER BATCH/SECOND: {0:.2f}".format(render_bps))

    W = W[:20 * traj_len]
    utils.visualize_samples(W, "AAAAA.png", num_rows=20)
예제 #49
0
def evaluate_lenet5(learning_rate=0.05, n_epochs=500,
                    dataset='./data/mnist.pkl.gz',
                    nkerns=[48, 64], batch_size=256):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ishape = (28, 28)  # this is the size of MNIST images

    start_rate = numpy.asarray([0.05]).astype(theano.config.floatX)
    learning_rate = theano.shared(value=start_rate, name='learning_rate')

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    tanh = lambda vals: T.tanh(vals)
    relu = lambda vals: relu_actfun(vals)

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_prep = Reshape2D4DLayer(input=x, out_shape=(1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-7+1,28-7+1)=(22,22)
    # maxpooling reduces this further to (22/2,22/2) = (11,11)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],11,11)
    layer0 = ConvPoolLayer(rng, input=layer0_prep.output, \
            filt_def=(nkerns[0], 1, 7, 7), pool_def=(2, 2), \
            activation=relu, drop_rate=0.0, input_noise=0.1, bias_noise=0.05, \
            W=None, b=None, name="layer0", W_scale=2.0)

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (11-4+1,11-4+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = ConvPoolLayer(rng, input=layer0.output, \
            filt_def=(nkerns[1], nkerns[0], 4, 4), pool_def=(2, 2), \
            activation=relu, drop_rate=0.0, input_noise=0.0, bias_noise=0.05, \
            W=None, b=None, name="layer1", W_scale=2.0)

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_prep = Reshape4D2DLayer(layer1.output)

    # construct a fully-connected relu layer
    layer2 = HiddenLayer(rng, layer2_prep.output, nkerns[1]*4*4, 512, \
                 activation=relu, pool_size=0, \
                 drop_rate=0.0, input_noise=0.0, bias_noise=0.05, \
                 W=None, b=None, name="layer2", W_scale=2.0)

    # construct an output layer to predict classes
    layer3 = HiddenLayer(rng, layer2.output, 512, 10, \
             activation=relu, pool_size=0, \
             drop_rate=0.5, input_noise=0.0, bias_noise=0.0, \
             W=None, b=None, name="layer2", W_scale=2.0)

    # get a loss function to apply to the output layer
    loss_func = LogisticRegression(layer3)

    # the cost we minimize during training is the NLL of the model
    cost = loss_func.loss_func(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([index], loss_func.errors(y),
             givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]})

    validate_model = theano.function([index], loss_func.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params
    moms = OrderedDict()
    for p in params:
        moms[p] = theano.shared(value=numpy.zeros( \
            p.get_value(borrow=True).shape).astype(theano.config.floatX))

    # create a list of gradients for all model parameters
    grads = OrderedDict()
    for p in params:
        grads[p] = T.grad(cost, p)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for p in params:
        mom_update = (moms[p], (0.8 * moms[p]) + (0.2 * grads[p]))
        param_update = (p, p - learning_rate[0] * moms[p])
        updates.append(mom_update)
        updates.append(param_update)

    train_model = theano.function([index], cost, updates=updates,
          givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]})

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses) / batch_size
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses) / batch_size
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

        if ((epoch+1 % 30) == 0):
            new_rate = 0.5 * learning_rate.get_value(borrow=True)
            learning_rate.set_value(new_rate)

        if ((epoch % 10) == 0):
            W_l0 = layer0.W.get_value(borrow=False)
            W_l0 = W_l0.reshape((W_l0.shape[0], numpy.prod(W_l0.shape[1:])))
            visualize_samples(W_l0, 'A1_CONV_FILTS.png')

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))