def test_svhn_nll(occ_dim=15, drop_prob=0.0): RESULT_PATH = "IMP_SVHN_TM/" ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX(shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']]))) Xva = to_fX(shift_and_scale_into_01(data['Xte'])) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], ))) TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli') log_name = "{}_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) result = TM.best_match_nll(xo, xm) match_on_known = np.mean(result[0]) match_on_unknown = np.mean(result[1]) str0 = "Test 1:" str1 = " match on known : {}".format(match_on_known) str2 = " match on unknown : {}".format(match_on_unknown) joint_str = "\n".join([str0, str1, str2]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() out_file.close() return
def test_svhn_nll(occ_dim=15, drop_prob=0.0): RESULT_PATH = "IMP_SVHN_TM/" ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = RESULT_PATH + "TM_OD{}_DP{}".format(occ_dim, dp_int) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX( shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])) ) Xva = to_fX( shift_and_scale_into_01(data['Xte']) ) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) ) TM = TemplateMatchImputer(x_train=Xtr, x_type='bernoulli') log_name = "{}_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) result = TM.best_match_nll(xo, xm) match_on_known = np.mean(result[0]) match_on_unknown = np.mean(result[1]) str0 = "Test 1:" str1 = " match on known : {}".format(match_on_known) str2 = " match on unknown : {}".format(match_on_unknown) joint_str = "\n".join([str0, str1, str2]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() out_file.close() return
def test_imocld_imp_svhn(step_type='add', occ_dim=14, drop_prob=0.0, attention=False): ########################## # Get some training data # ########################## tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX( shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])) ) Xva = to_fX( shift_and_scale_into_01(data['Xte']) ) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) ) ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] write_dim = 600 enc_dim = 600 dec_dim = 600 mix_dim = 20 z_dim = 200 n_iter = 25 dp_int = int(100.0 * drop_prob) rnninits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } att_tag = "NA" # attention not implemented yet # setup the reader and writer (shared by primary and guide policies) read_dim = 2*x_dim # dimension of output from reader_mlp reader_mlp = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer_mlp = MLP([None, None], [dec_dim, write_dim, x_dim], \ name="writer_mlp", **inits) # mlps for setting conditionals over z_mix mix_var_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_var_mlp", **inits) mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_enc_mlp", **inits) # mlp for decoding z_mix into a distribution over initial LSTM states mix_dec_mlp = MLP([Tanh(), Tanh()], \ [mix_dim, 250, (2*enc_dim + 2*dec_dim + 2*enc_dim)], \ name="mix_dec_mlp", **inits) # mlps for processing inputs to LSTMs var_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \ name="var_mlp_in", **inits) enc_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \ name="enc_mlp_in", **inits) dec_mlp_in = MLP([Identity()], [ z_dim, 4*dec_dim], \ name="dec_mlp_in", **inits) #dec_mlp_in = MLP([Identity()], [ (enc_dim + z_dim), 4*dec_dim], \ # name="dec_mlp_in", **inits) # mlps for turning LSTM outputs into conditionals over z_gen var_mlp_out = CondNet([], [enc_dim, z_dim], name="var_mlp_out", **inits) enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits) # LSTMs for the actual LSTMs (obviously, perhaps) var_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="var_rnn", **rnninits) enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="enc_rnn", **rnninits) dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, \ name="dec_rnn", **rnninits) draw = IMoCLDrawModels( n_iter, step_type=step_type, # step_type can be 'add' or 'jump' reader_mlp=reader_mlp, writer_mlp=writer_mlp, mix_enc_mlp=mix_enc_mlp, mix_dec_mlp=mix_dec_mlp, mix_var_mlp=mix_var_mlp, enc_mlp_in=enc_mlp_in, enc_mlp_out=enc_mlp_out, enc_rnn=enc_rnn, dec_mlp_in=dec_mlp_in, dec_rnn=dec_rnn, var_mlp_in=var_mlp_in, var_mlp_out=var_mlp_out, var_rnn=var_rnn) draw.initialize() # build the cost gradients, training function, samplers, etc. draw.build_model_funcs() draw.load_model_params(f_name="TBCLM_IMP_SVHN_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag)) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ print("Beginning to train the model...") out_file = open("TBCLM_IMP_SVHN_RESULTS_OD{}_DP{}_{}_{}.txt".format(occ_dim, dp_int, step_type, att_tag), 'wb') out_file.flush() costs = [0. for i in range(10)] learn_rate = 0.0002 momentum = 0.5 batch_idx = np.arange(batch_size) + tr_samples for i in range(250000): scale = min(1.0, ((i+1) / 1000.0)) if (((i + 1) % 10000) == 0): learn_rate = learn_rate * 0.95 if (i > 10000): momentum = 0.90 else: momentum = 0.50 # get the indices of training samples for this batch update batch_idx += batch_size if (np.max(batch_idx) >= tr_samples): # we finished an "epoch", so we rejumble the training set Xtr = row_shuffle(Xtr) batch_idx = np.arange(batch_size) # set sgd and objective function hyperparams for this update zero_ary = np.zeros((1,)) draw.lr.set_value(to_fX(zero_ary + learn_rate)) draw.mom_1.set_value(to_fX(zero_ary + momentum)) draw.mom_2.set_value(to_fX(zero_ary + 0.99)) # perform a minibatch update and record the cost for this batch Xb = to_fX(Xtr.take(batch_idx, axis=0)) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) result = draw.train_joint(Xb, Mb) costs = [(costs[j] + result[j]) for j in range(len(result))] if ((i % 200) == 0): costs = [(v / 200.0) for v in costs] str1 = "-- batch {0:d} --".format(i) str2 = " total_cost: {0:.4f}".format(costs[0]) str3 = " nll_bound : {0:.4f}".format(costs[1]) str4 = " nll_term : {0:.4f}".format(costs[2]) str5 = " kld_q2p : {0:.4f}".format(costs[3]) str6 = " kld_p2q : {0:.4f}".format(costs[4]) str7 = " reg_term : {0:.4f}".format(costs[5]) joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() costs = [0.0 for v in costs] if ((i % 1000) == 0): #draw.save_model_params("TBCLM_IMP_SVHN_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag)) # compute a small-sample estimate of NLL bound on validation set Xva = row_shuffle(Xva) Xb = to_fX(Xva[:5000]) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) va_costs = draw.compute_nll_bound(Xb, Mb) str1 = " va_nll_bound : {}".format(va_costs[1]) str2 = " va_nll_term : {}".format(va_costs[2]) str3 = " va_kld_q2p : {}".format(va_costs[3]) joint_str = "\n".join([str1, str2, str3]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() # draw some independent samples from the model Xb = to_fX(Xva[:100]) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) samples = draw.do_sample(Xb, Mb) n_iter, N, D = samples.shape samples = samples.reshape( (n_iter, N, 32, 32) ) for j in xrange(n_iter): img = img_grid(samples[j,:,:,:]) img.save("TBCLM-IMP-SVHN-OD{0:d}-DP{1:d}-{2:s}-samples-{3:03d}.png".format(occ_dim, dp_int, step_type, j))
def test_imocld_svhn(step_type="add", attention=False): ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) tr_file = "data/svhn_train_gray.pkl" te_file = "data/svhn_test_gray.pkl" ex_file = "data/svhn_extra_gray.pkl" data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX(shift_and_scale_into_01(np.vstack([data["Xtr"], data["Xex"]]))) Xva = to_fX(shift_and_scale_into_01(data["Xte"])) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1],))) ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] write_dim = 600 enc_dim = 600 dec_dim = 600 mix_dim = 20 z_dim = 200 n_iter = 16 rnninits = {"weights_init": IsotropicGaussian(0.01), "biases_init": Constant(0.0)} inits = {"weights_init": IsotropicGaussian(0.01), "biases_init": Constant(0.0)} att_tag = "NA" # attention not implemented yet # setup the reader and writer (shared by primary and guide policies) read_dim = 2 * x_dim # dimension of output from reader_mlp reader_mlp = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer_mlp = MLP([None, None], [dec_dim, write_dim, x_dim], name="writer_mlp", **inits) # mlps for setting conditionals over z_mix mix_var_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], name="mix_var_mlp", **inits) mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], name="mix_enc_mlp", **inits) # mlp for decoding z_mix into a distribution over initial LSTM states mix_dec_mlp = MLP( [Tanh(), Tanh()], [mix_dim, 250, (2 * enc_dim + 2 * dec_dim + 2 * enc_dim)], name="mix_dec_mlp", **inits ) # mlps for processing inputs to LSTMs var_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim], name="var_mlp_in", **inits) enc_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim], name="enc_mlp_in", **inits) dec_mlp_in = MLP([Identity()], [z_dim, 4 * dec_dim], name="dec_mlp_in", **inits) # mlps for turning LSTM outputs into conditionals over z_gen var_mlp_out = CondNet([], [enc_dim, z_dim], name="var_mlp_out", **inits) enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits) # LSTMs for the actual LSTMs (obviously, perhaps) var_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, name="var_rnn", **rnninits) enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, name="enc_rnn", **rnninits) dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, name="dec_rnn", **rnninits) draw = IMoCLDrawModels( n_iter, step_type=step_type, # step_type can be 'add' or 'jump' reader_mlp=reader_mlp, writer_mlp=writer_mlp, mix_enc_mlp=mix_enc_mlp, mix_dec_mlp=mix_dec_mlp, mix_var_mlp=mix_var_mlp, enc_mlp_in=enc_mlp_in, enc_mlp_out=enc_mlp_out, enc_rnn=enc_rnn, dec_mlp_in=dec_mlp_in, dec_rnn=dec_rnn, var_mlp_in=var_mlp_in, var_mlp_out=var_mlp_out, var_rnn=var_rnn, ) draw.initialize() # build the cost gradients, training function, samplers, etc. draw.build_model_funcs() # sample several interchangeable versions of the model conditions = [{"occ_dim": 0, "drop_prob": 0.8}, {"occ_dim": 17, "drop_prob": 0.0}] for cond_dict in conditions: occ_dim = cond_dict["occ_dim"] drop_prob = cond_dict["drop_prob"] dp_int = int(100.0 * drop_prob) draw.load_model_params( f_name="TBCLM_IMP_SVHN_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag) ) # draw some independent samples from the model Xva = row_shuffle(Xva) Xb = to_fX(Xva[:128]) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, occ_dim=occ_dim, data_mean=None) Xb = np.repeat(Xb, 2, axis=0) Mb = np.repeat(Mb, 2, axis=0) samples = draw.do_sample(Xb, Mb) # save the samples to a pkl file, in their numpy array form sample_pkl_name = "IMP-SVHN-OD{0:d}-DP{1:d}-{2:s}.pkl".format(occ_dim, dp_int, step_type) f_handle = file(sample_pkl_name, "wb") cPickle.dump(samples, f_handle, protocol=-1) f_handle.close() print("Saved some samples in: {}".format(sample_pkl_name)) return
def test_imocld_svhn(step_type='add', attention=False): ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX( shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])) ) Xva = to_fX( shift_and_scale_into_01(data['Xte']) ) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) ) ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] write_dim = 600 enc_dim = 600 dec_dim = 600 mix_dim = 20 z_dim = 200 n_iter = 16 rnninits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } att_tag = "NA" # attention not implemented yet # setup the reader and writer (shared by primary and guide policies) read_dim = 2*x_dim # dimension of output from reader_mlp reader_mlp = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer_mlp = MLP([None, None], [dec_dim, write_dim, x_dim], \ name="writer_mlp", **inits) # mlps for setting conditionals over z_mix mix_var_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_var_mlp", **inits) mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_enc_mlp", **inits) # mlp for decoding z_mix into a distribution over initial LSTM states mix_dec_mlp = MLP([Tanh(), Tanh()], \ [mix_dim, 250, (2*enc_dim + 2*dec_dim + 2*enc_dim)], \ name="mix_dec_mlp", **inits) # mlps for processing inputs to LSTMs var_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \ name="var_mlp_in", **inits) enc_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \ name="enc_mlp_in", **inits) dec_mlp_in = MLP([Identity()], [ z_dim, 4*dec_dim], \ name="dec_mlp_in", **inits) # mlps for turning LSTM outputs into conditionals over z_gen var_mlp_out = CondNet([], [enc_dim, z_dim], name="var_mlp_out", **inits) enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits) # LSTMs for the actual LSTMs (obviously, perhaps) var_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="var_rnn", **rnninits) enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="enc_rnn", **rnninits) dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, \ name="dec_rnn", **rnninits) draw = IMoCLDrawModels( n_iter, step_type=step_type, # step_type can be 'add' or 'jump' reader_mlp=reader_mlp, writer_mlp=writer_mlp, mix_enc_mlp=mix_enc_mlp, mix_dec_mlp=mix_dec_mlp, mix_var_mlp=mix_var_mlp, enc_mlp_in=enc_mlp_in, enc_mlp_out=enc_mlp_out, enc_rnn=enc_rnn, dec_mlp_in=dec_mlp_in, dec_rnn=dec_rnn, var_mlp_in=var_mlp_in, var_mlp_out=var_mlp_out, var_rnn=var_rnn) draw.initialize() # build the cost gradients, training function, samplers, etc. draw.build_model_funcs() # sample several interchangeable versions of the model conditions = [{'occ_dim': 0, 'drop_prob': 0.8}, \ {'occ_dim': 17, 'drop_prob': 0.0}] for cond_dict in conditions: occ_dim = cond_dict['occ_dim'] drop_prob = cond_dict['drop_prob'] dp_int = int(100.0 * drop_prob) draw.load_model_params(f_name="TBCLM_IMP_SVHN_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag)) # draw some independent samples from the model Xva = row_shuffle(Xva) Xb = to_fX(Xva[:128]) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) Xb = np.repeat(Xb, 2, axis=0) Mb = np.repeat(Mb, 2, axis=0) samples = draw.do_sample(Xb, Mb) # save the samples to a pkl file, in their numpy array form sample_pkl_name = "IMP-SVHN-OD{0:d}-DP{1:d}-{2:s}.pkl".format(occ_dim, dp_int, step_type) f_handle = file(sample_pkl_name, 'wb') cPickle.dump(samples, f_handle, protocol=-1) f_handle.close() print("Saved some samples in: {}".format(sample_pkl_name)) return
def test_svhn(step_type='add', occ_dim=15, drop_prob=0.0): ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = "{}GPSI_OD{}_DP{}_{}_NA".format(RESULT_PATH, occ_dim, dp_int, step_type) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX(shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']]))) Xva = to_fX(shift_and_scale_into_01(data['Xte'])) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], ))) ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] z_dim = 200 imp_steps = 6 init_scale = 1.0 x_in_sym = T.matrix('x_in_sym') x_out_sym = T.matrix('x_out_sym') x_mask_sym = T.matrix('x_mask_sym') ################# # p_zi_given_xi # ################# params = {} shared_config = [x_dim, 1500, 1500] top_config = [shared_config[-1], z_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = init_scale params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \ params=params, shared_param_dicts=None) p_zi_given_xi.init_biases(0.2) ################### # p_xip1_given_zi # ################### params = {} shared_config = [z_dim, 1500, 1500] output_config = [x_dim, x_dim] params['shared_config'] = shared_config params['output_config'] = output_config params['activation'] = relu_actfun params['init_scale'] = init_scale params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_xip1_given_zi = HydraNet(rng=rng, Xd=x_in_sym, \ params=params, shared_param_dicts=None) p_xip1_given_zi.init_biases(0.2) ################### # q_zi_given_xi # ################### params = {} shared_config = [(x_dim + x_dim), 1500, 1500] top_config = [shared_config[-1], z_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = init_scale params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False q_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \ params=params, shared_param_dicts=None) q_zi_given_xi.init_biases(0.2) ########################################################### # Define parameters for the GPSImputer, and initialize it # ########################################################### print("Building the GPSImputer...") gpsi_params = {} gpsi_params['x_dim'] = x_dim gpsi_params['z_dim'] = z_dim gpsi_params['imp_steps'] = imp_steps gpsi_params['step_type'] = step_type gpsi_params['x_type'] = 'bernoulli' gpsi_params['obs_transform'] = 'sigmoid' GPSI = GPSImputer(rng=rng, x_in=x_in_sym, x_out=x_out_sym, x_mask=x_mask_sym, \ p_zi_given_xi=p_zi_given_xi, \ p_xip1_given_zi=p_xip1_given_zi, \ q_zi_given_xi=q_zi_given_xi, \ params=gpsi_params, \ shared_param_dicts=None) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ log_name = "{}_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') costs = [0. for i in range(10)] learn_rate = 0.0002 momentum = 0.5 batch_idx = np.arange(batch_size) + tr_samples for i in range(200005): scale = min(1.0, ((i + 1) / 5000.0)) if (((i + 1) % 15000) == 0): learn_rate = learn_rate * 0.92 if (i > 10000): momentum = 0.90 else: momentum = 0.50 # get the indices of training samples for this batch update batch_idx += batch_size if (np.max(batch_idx) >= tr_samples): # we finished an "epoch", so we rejumble the training set Xtr = row_shuffle(Xtr) batch_idx = np.arange(batch_size) # set sgd and objective function hyperparams for this update GPSI.set_sgd_params(lr=scale*learn_rate, \ mom_1=scale*momentum, mom_2=0.98) GPSI.set_train_switch(1.0) GPSI.set_lam_nll(lam_nll=1.0) GPSI.set_lam_kld(lam_kld_p=0.1, lam_kld_q=0.9) GPSI.set_lam_l2w(1e-4) # perform a minibatch update and record the cost for this batch xb = to_fX(Xtr.take(batch_idx, axis=0)) xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) result = GPSI.train_joint(xi, xo, xm, batch_reps) # do diagnostics and general training tracking costs = [(costs[j] + result[j]) for j in range(len(result) - 1)] if ((i % 250) == 0): costs = [(v / 250.0) for v in costs] str1 = "-- batch {0:d} --".format(i) str2 = " joint_cost: {0:.4f}".format(costs[0]) str3 = " nll_bound : {0:.4f}".format(costs[1]) str4 = " nll_cost : {0:.4f}".format(costs[2]) str5 = " kld_cost : {0:.4f}".format(costs[3]) str6 = " reg_cost : {0:.4f}".format(costs[4]) joint_str = "\n".join([str1, str2, str3, str4, str5, str6]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() costs = [0.0 for v in costs] if ((i % 1000) == 0): Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva[0:5000], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) nll, kld = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10) vfe = np.mean(nll) + np.mean(kld) str1 = " va_nll_bound : {}".format(vfe) str2 = " va_nll_term : {}".format(np.mean(nll)) str3 = " va_kld_q2p : {}".format(np.mean(kld)) joint_str = "\n".join([str1, str2, str3]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() GPSI.save_to_file("{}_PARAMS.pkl".format(result_tag)) if ((i % 20000) == 0): # Get some validation samples for evaluating model performance xb = to_fX(Xva[0:100]) xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) xi = np.repeat(xi, 2, axis=0) xo = np.repeat(xo, 2, axis=0) xm = np.repeat(xm, 2, axis=0) # draw some sample imputations from the model samp_count = xi.shape[0] _, model_samps = GPSI.sample_imputer(xi, xo, xm, use_guide_policy=False) seq_len = len(model_samps) seq_samps = np.zeros( (seq_len * samp_count, model_samps[0].shape[1])) idx = 0 for s1 in range(samp_count): for s2 in range(seq_len): seq_samps[idx] = model_samps[s2][s1] idx += 1 file_name = "{0:s}_samples_ng_b{1:d}.png".format(result_tag, i) utils.visualize_samples(seq_samps, file_name, num_rows=20)
def test_svhn_results(step_type='add', occ_dim=15, drop_prob=0.0): ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = "{}GPSI_OD{}_DP{}_{}_NA".format(RESULT_PATH, occ_dim, dp_int, step_type) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX(shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']]))) Xva = to_fX(shift_and_scale_into_01(data['Xte'])) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], ))) ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] z_dim = 200 imp_steps = 6 init_scale = 1.0 x_in_sym = T.matrix('x_in_sym') x_out_sym = T.matrix('x_out_sym') x_mask_sym = T.matrix('x_mask_sym') # Load parameters from a previously trained model print("Testing model load from file...") GPSI = load_gpsimputer_from_file(f_name="{}_PARAMS.pkl".format(result_tag), \ rng=rng) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ log_name = "{}_FINAL_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') Xva = row_shuffle(Xva) # record an estimate of performance on the test set str0 = "GUIDED SAMPLE BOUND:" print(str0) xi, xo, xm = construct_masked_data(Xva[:5000], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) nll_0, kld_0 = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10, \ use_guide_policy=True) xi, xo, xm = construct_masked_data(Xva[5000:], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) nll_1, kld_1 = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10, \ use_guide_policy=True) nll = np.concatenate((nll_0, nll_1)) kld = np.concatenate((kld_0, kld_1)) vfe = np.mean(nll) + np.mean(kld) str1 = " va_nll_bound : {}".format(vfe) str2 = " va_nll_term : {}".format(np.mean(nll)) str3 = " va_kld_q2p : {}".format(np.mean(kld)) joint_str = "\n".join([str0, str1, str2, str3]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() # record an estimate of performance on the test set str0 = "UNGUIDED SAMPLE BOUND:" print(str0) xi, xo, xm = construct_masked_data(Xva[:5000], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) nll_0, kld_0 = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10, \ use_guide_policy=False) xi, xo, xm = construct_masked_data(Xva[5000:], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) nll_1, kld_1 = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10, \ use_guide_policy=False) nll = np.concatenate((nll_0, nll_1)) kld = np.concatenate((kld_0, kld_1)) str1 = " va_nll_bound : {}".format(np.mean(nll)) str2 = " va_nll_term : {}".format(np.mean(nll)) str3 = " va_kld_q2p : {}".format(np.mean(kld)) joint_str = "\n".join([str0, str1, str2, str3]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush()
def pretrain_gip(extra_lam_kld=0.0, kld2_scale=0.0): # Initialize a source of randomness rng = np.random.RandomState(1234) # Load some data to train/validate/test with tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) #all_file = 'data/svhn_all_gray_zca.pkl' #data = load_svhn_all_gray_zca(all_file) Xtr = np.vstack([data['Xtr'], data['Xex']]) Xtr = Xtr - np.mean(Xtr, axis=1, keepdims=True) Xtr = Xtr / np.std(Xtr, axis=1, keepdims=True) Xtr = shift_and_scale_into_01(Xtr) Xtr, Xva = train_valid_split(Xtr, valid_count=5000) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 100 batch_reps = 5 # setup some symbolic variables and stuff Xp = T.matrix('Xp_base') Xd = T.matrix('Xd_base') Xc = T.matrix('Xc_base') Xm = T.matrix('Xm_base') data_dim = Xtr.shape[1] prior_sigma = 1.0 ########################## # NETWORK CONFIGURATIONS # ########################## gn_params = {} gn_config = [PRIOR_DIM, 2400, 2400, data_dim] gn_params['mlp_config'] = gn_config gn_params['activation'] = relu_actfun gn_params['out_type'] = 'gaussian' gn_params['mean_transform'] = 'sigmoid' gn_params['logvar_type'] = 'single_shared' gn_params['init_scale'] = 1.2 gn_params['lam_l2a'] = 1e-2 gn_params['vis_drop'] = 0.0 gn_params['hid_drop'] = 0.0 gn_params['bias_noise'] = 0.1 # choose some parameters for the continuous inferencer in_params = {} shared_config = [data_dim, 2400, 2400] top_config = [shared_config[-1], PRIOR_DIM] in_params['shared_config'] = shared_config in_params['mu_config'] = top_config in_params['sigma_config'] = top_config in_params['activation'] = relu_actfun in_params['init_scale'] = 1.2 in_params['lam_l2a'] = 1e-2 in_params['vis_drop'] = 0.2 in_params['hid_drop'] = 0.0 in_params['bias_noise'] = 0.1 in_params['input_noise'] = 0.0 in_params['kld2_scale'] = kld2_scale # Initialize the base networks for this GIPair IN = InfNet(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, prior_sigma=prior_sigma, \ params=in_params, shared_param_dicts=None) GN = GenNet(rng=rng, Xp=Xp, prior_sigma=prior_sigma, \ params=gn_params, shared_param_dicts=None) # Initialize biases in IN and GN IN.init_biases(0.1) GN.init_biases(0.1) ###################################### # LOAD AND RESTART FROM SAVED PARAMS # ###################################### # new_in_params = {'kld2_scale': kld2_scale, 'bias_noise': 0.2} # new_gn_params = {'bias_noise': 0.2} # # Load inferencer and generator from saved parameters # gn_fname = "TMS_RESULTS_DROPLESS/pt_params_b50000_GN.pkl" # in_fname = "TMS_RESULTS_DROPLESS/pt_params_b50000_IN.pkl" # IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, \ # Xc=Xc, Xm=Xm, new_params=new_in_params) # GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp, \ # new_params=new_gn_params) # in_params = IN.params # gn_params = GN.params ######################### # INITIALIZE THE GIPAIR # ######################### GIP = GIPair(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, g_net=GN, i_net=IN, \ data_dim=data_dim, prior_dim=PRIOR_DIM, params=None) GIP.set_lam_l2w(1e-4) #################### # RICA PRETRAINING # #################### IN.W_rica.set_value(0.05 * IN.W_rica.get_value(borrow=False)) GN.W_rica.set_value(0.05 * GN.W_rica.get_value(borrow=False)) for i in range(6000): scale = min(1.0, (float(i+1) / 6000.0)) l_rate = 0.0001 * scale lam_l1 = 0.025 tr_idx = npr.randint(low=0,high=tr_samples,size=(1000,)) Xd_batch = Xtr.take(tr_idx, axis=0) inr_out = IN.train_rica(Xd_batch, l_rate, lam_l1) gnr_out = GN.train_rica(Xd_batch, l_rate, lam_l1) inr_out = [v for v in gnr_out] if ((i % 1000) == 0): print("rica batch {0:d}: in_recon={1:.4f}, in_spars={2:.4f}, gn_recon={3:.4f}, gn_spars={4:.4f}".format( \ i, 1.*inr_out[1], 1.*inr_out[2], 1.*gnr_out[1], 1.*gnr_out[2])) # draw inference net first layer weights file_name = RESULT_PATH+"pt_rica_inf_weights.png".format(i) utils.visualize_samples(IN.W_rica.get_value(borrow=False).T, file_name, num_rows=20) # draw generator net final layer weights file_name = RESULT_PATH+"pt_rica_gen_weights.png".format(i) if ('gaussian' in gn_params['out_type']): lay_num = -2 else: lay_num = -1 utils.visualize_samples(GN.W_rica.get_value(borrow=False), file_name, num_rows=20) ###################### # BASIC VAE TRAINING # ###################### out_file = open(RESULT_PATH+"pt_gip_results.txt", 'wb') # Set initial learning rate and basic SGD hyper parameters cost_1 = [0. for i in range(10)] learn_rate = 0.0002 for i in range(300000): scale = min(1.0, float(i) / 40000.0) if ((i + 1) % 100000 == 0): learn_rate = learn_rate * 0.8 # do a minibatch update of the model, and compute some costs tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,)) Xd_batch = Xtr.take(tr_idx, axis=0) Xd_batch = np.repeat(Xd_batch, batch_reps, axis=0) Xc_batch = 0.0 * Xd_batch Xm_batch = 0.0 * Xd_batch # do a minibatch update of the model, and compute some costs GIP.set_all_sgd_params(lr_gn=(scale*learn_rate), \ lr_in=(scale*learn_rate), mom_1=0.9, mom_2=0.999) GIP.set_lam_nll(1.0) GIP.set_lam_kld(1.0 + extra_lam_kld*scale) outputs = GIP.train_joint(Xd_batch, Xc_batch, Xm_batch) cost_1 = [(cost_1[k] + 1.*outputs[k]) for k in range(len(outputs))] if ((i % 1000) == 0): cost_1 = [(v / 1000.) for v in cost_1] o_str = "batch: {0:d}, joint_cost: {1:.4f}, data_nll_cost: {2:.4f}, post_kld_cost: {3:.4f}, other_reg_cost: {4:.4f}".format( \ i, cost_1[0], cost_1[1], cost_1[2], cost_1[3]) print(o_str) out_file.write(o_str+"\n") out_file.flush() cost_1 = [0. for v in cost_1] if ((i % 5000) == 0): cost_2 = GIP.compute_costs(Xva, 0.*Xva, 0.*Xva) o_str = "--val: {0:d}, joint_cost: {1:.4f}, data_nll_cost: {2:.4f}, post_kld_cost: {3:.4f}, other_reg_cost: {4:.4f}".format( \ i, 1.*cost_2[0], 1.*cost_2[1], 1.*cost_2[2], 1.*cost_2[3]) print(o_str) out_file.write(o_str+"\n") out_file.flush() if ((i % 5000) == 0): tr_idx = npr.randint(low=0,high=va_samples,size=(100,)) Xd_batch = Xva.take(tr_idx, axis=0) file_name = RESULT_PATH+"pt_gip_chain_samples_b{0:d}.png".format(i) Xd_samps = np.repeat(Xd_batch[0:10,:], 3, axis=0) sample_lists = GIP.sample_from_chain(Xd_samps, loop_iters=20) Xs = np.vstack(sample_lists["data samples"]) utils.visualize_samples(Xs, file_name, num_rows=20) # draw samples freely from the generative model's prior file_name = RESULT_PATH+"pt_gip_prior_samples_b{0:d}.png".format(i) Xs = GIP.sample_from_prior(20*20) utils.visualize_samples(Xs, file_name, num_rows=20) # draw inference net first layer weights file_name = RESULT_PATH+"pt_gip_inf_weights_b{0:d}.png".format(i) utils.visualize_net_layer(GIP.IN.shared_layers[0], file_name) # draw generator net final layer weights file_name = RESULT_PATH+"pt_gip_gen_weights_b{0:d}.png".format(i) if (gn_params['out_type'] == 'gaussian'): lay_num = -2 else: lay_num = -1 utils.visualize_net_layer(GIP.GN.mlp_layers[lay_num], file_name, \ colorImg=False, use_transpose=True) ######################### # Check posterior KLds. # ######################### post_klds = posterior_klds(IN, Xtr, 5000, 5) file_name = RESULT_PATH+"pt_gip_post_klds_b{0:d}.png".format(i) utils.plot_kde_histogram2( \ np.asarray(post_klds), np.asarray(post_klds), file_name, bins=30) if ((i % 10000) == 0): IN.save_to_file(f_name=RESULT_PATH+"pt_gip_params_b{0:d}_IN.pkl".format(i)) GN.save_to_file(f_name=RESULT_PATH+"pt_gip_params_b{0:d}_GN.pkl".format(i)) IN.save_to_file(f_name=RESULT_PATH+"pt_gip_params_IN.pkl") GN.save_to_file(f_name=RESULT_PATH+"pt_gip_params_GN.pkl") return
def train_walk_from_pretrained_gip(extra_lam_kld=0.0): # Simple test code, to check that everything is basically functional. print("TESTING...") # Initialize a source of randomness rng = np.random.RandomState(1234) # Load some data to train/validate/test with tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) #all_file = 'data/svhn_all_gray_zca.pkl' #data = load_svhn_all_gray_zca(all_file) Xtr = np.vstack([data['Xtr'], data['Xex']]) Xtr = Xtr - np.mean(Xtr, axis=1, keepdims=True) Xtr = Xtr / np.std(Xtr, axis=1, keepdims=True) Xtr = shift_and_scale_into_01(Xtr) Xtr, Xva = train_valid_split(Xtr, valid_count=5000) print("Xtr.shape: {0:s}, Xva.shape: {1:s}".format(str(Xtr.shape),str(Xva.shape))) # get and set some basic dataset information tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] data_dim = Xtr.shape[1] batch_size = 100 batch_reps = 5 prior_sigma = 1.0 Xtr_mean = np.mean(Xtr, axis=0, keepdims=True) Xtr_mean = (0.0 * Xtr_mean) + np.mean(np.mean(Xtr,axis=1)) Xc_mean = np.repeat(Xtr_mean, batch_size, axis=0) # Symbolic inputs Xd = T.matrix(name='Xd') Xc = T.matrix(name='Xc') Xm = T.matrix(name='Xm') Xt = T.matrix(name='Xt') Xp = T.matrix(name='Xp') START_FRESH = True if START_FRESH: ############################### # Setup discriminator network # ############################### # Set some reasonable mlp parameters dn_params = {} # Set up some proto-networks pc0 = [data_dim, (300, 4), (300, 4), 10] dn_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} #sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} dn_params['spawn_configs'] = [sc0] dn_params['spawn_weights'] = [1.0] # Set remaining params dn_params['init_scale'] = 0.25 dn_params['lam_l2a'] = 1e-2 dn_params['vis_drop'] = 0.2 dn_params['hid_drop'] = 0.5 # Initialize a network object to use as the discriminator DN = PeaNet(rng=rng, Xd=Xd, params=dn_params) DN.init_biases(0.0) ####################################################### # Load inferencer and generator from saved parameters # ####################################################### gn_fname = RESULT_PATH+"pt_gip_params_b200000_GN.pkl" in_fname = RESULT_PATH+"pt_gip_params_b200000_IN.pkl" IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, Xc=Xc, Xm=Xm) GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp) else: ########################################################### # Load all networks from partially-trained VCGLoop params # ########################################################### gn_fname = RESULT_PATH+"pt_walk_params_GN.pkl" in_fname = RESULT_PATH+"pt_walk_params_IN.pkl" dn_fname = RESULT_PATH+"pt_walk_params_DN.pkl" IN = INet.load_infnet_from_file(f_name=in_fname, rng=rng, Xd=Xd, Xc=Xc, Xm=Xm) GN = GNet.load_gennet_from_file(f_name=gn_fname, rng=rng, Xp=Xp) DN = PNet.load_peanet_from_file(f_name=dn_fname, rng=rng, Xd=Xd) ############################### # Initialize the main VCGLoop # ############################### vcgl_params = {} vcgl_params['lam_l2d'] = 5e-2 VCGL = VCGLoop(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, Xt=Xt, i_net=IN, \ g_net=GN, d_net=DN, chain_len=6, data_dim=data_dim, \ prior_dim=PRIOR_DIM, params=vcgl_params) VCGL.set_lam_l2w(1e-4) out_file = open(RESULT_PATH+"pt_walk_results.txt", 'wb') #################################################### # Train the VCGLoop by unrolling and applying BPTT # #################################################### learn_rate = 0.0002 cost_1 = [0. for i in range(10)] for i in range(1000000): scale = float(min((i+1), 25000)) / 25000.0 if ((i+1 % 50000) == 0): learn_rate = learn_rate * 0.8 ######################################## # TRAIN THE CHAIN IN FREE-RUNNING MODE # ######################################## VCGL.set_all_sgd_params(learn_rate=(scale*learn_rate), \ mom_1=0.9, mom_2=0.999) VCGL.set_disc_weights(dweight_gn=20.0, dweight_dn=4.0) VCGL.set_lam_chain_nll(1.0) VCGL.set_lam_chain_kld(1.0 + extra_lam_kld) VCGL.set_lam_chain_vel(0.0) VCGL.set_lam_mask_nll(0.0) VCGL.set_lam_mask_kld(0.0) # get some data to train with tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_size,)) Xd_batch = Xtr.take(tr_idx, axis=0) Xc_batch = 0.0 * Xd_batch Xm_batch = 0.0 * Xd_batch # do 5 repetitions of the batch Xd_batch = np.repeat(Xd_batch, batch_reps, axis=0) Xc_batch = np.repeat(Xc_batch, batch_reps, axis=0) Xm_batch = np.repeat(Xm_batch, batch_reps, axis=0) # examples from the target distribution, to train discriminator tr_idx = npr.randint(low=0,high=tr_samples,size=(batch_reps*batch_size,)) Xt_batch = Xtr.take(tr_idx, axis=0) # do a minibatch update of the model, and compute some costs outputs = VCGL.train_joint(Xd_batch, Xc_batch, Xm_batch, Xt_batch) cost_1 = [(cost_1[k] + 1.*outputs[k]) for k in range(len(outputs))] if ((i % 1000) == 0): cost_1 = [(v / 1000.0) for v in cost_1] o_str_1 = "batch: {0:d}, joint_cost: {1:.4f}, chain_nll_cost: {2:.4f}, chain_kld_cost: {3:.4f}, disc_cost_gn: {4:.4f}, disc_cost_dn: {5:.4f}".format( \ i, cost_1[0], cost_1[1], cost_1[2], cost_1[6], cost_1[7]) print(o_str_1) out_file.write(o_str_1+"\n") out_file.flush() cost_1 = [0. for v in cost_1] if ((i % 5000) == 0): tr_idx = npr.randint(low=0,high=Xtr.shape[0],size=(5,)) va_idx = npr.randint(low=0,high=Xva.shape[0],size=(5,)) Xd_batch = np.vstack([Xtr.take(tr_idx, axis=0), Xva.take(va_idx, axis=0)]) # draw some chains of samples from the VAE loop file_name = RESULT_PATH+"pt_walk_chain_samples_b{0:d}.png".format(i) Xd_samps = np.repeat(Xd_batch, 3, axis=0) sample_lists = VCGL.GIP.sample_from_chain(Xd_samps, loop_iters=20) Xs = np.vstack(sample_lists["data samples"]) utils.visualize_samples(Xs, file_name, num_rows=20) # draw some masked chains of samples from the VAE loop file_name = RESULT_PATH+"pt_walk_mask_samples_b{0:d}.png".format(i) Xd_samps = np.repeat(Xc_mean[0:Xd_batch.shape[0],:], 3, axis=0) Xc_samps = np.repeat(Xd_batch, 3, axis=0) Xm_rand = sample_masks(Xc_samps, drop_prob=0.2) Xm_patch = sample_patch_masks(Xc_samps, (32,32), (16,16)) Xm_samps = Xm_rand * Xm_patch sample_lists = VCGL.GIP.sample_from_chain(Xd_samps, \ X_c=Xc_samps, X_m=Xm_samps, loop_iters=20) Xs = np.vstack(sample_lists["data samples"]) utils.visualize_samples(Xs, file_name, num_rows=20) # draw some samples independently from the GenNet's prior file_name = RESULT_PATH+"pt_walk_prior_samples_b{0:d}.png".format(i) Xs = VCGL.sample_from_prior(20*20) utils.visualize_samples(Xs, file_name, num_rows=20) # draw discriminator network's weights file_name = RESULT_PATH+"pt_walk_dis_weights_b{0:d}.png".format(i) utils.visualize_net_layer(VCGL.DN.proto_nets[0][0], file_name) # draw inference net first layer weights file_name = RESULT_PATH+"pt_walk_inf_weights_b{0:d}.png".format(i) utils.visualize_net_layer(VCGL.IN.shared_layers[0], file_name) # draw generator net final layer weights file_name = RESULT_PATH+"pt_walk_gen_weights_b{0:d}.png".format(i) if GN.out_type == 'sigmoid': utils.visualize_net_layer(VCGL.GN.mlp_layers[-1], file_name, use_transpose=True) else: utils.visualize_net_layer(VCGL.GN.mlp_layers[-2], file_name, use_transpose=True) ######################### # Check posterior KLds. # ######################### post_klds = posterior_klds(IN, Xtr, 5000, 5) file_name = RESULT_PATH+"pt_walk_post_klds_b{0:d}.png".format(i) utils.plot_kde_histogram2( \ np.asarray(post_klds), np.asarray(post_klds), file_name, bins=30) # DUMP PARAMETERS FROM TIME-TO-TIME if (i % 10000 == 0): DN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_DN.pkl".format(i)) IN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_IN.pkl".format(i)) GN.save_to_file(f_name=RESULT_PATH+"pt_walk_params_b{0:d}_GN.pkl".format(i)) return
def test_imocld_imp_svhn(step_type='add', occ_dim=14, drop_prob=0.0, attention=False): ########################## # Get some training data # ########################## tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX(shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']]))) Xva = to_fX(shift_and_scale_into_01(data['Xte'])) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX(all_pix_mean * np.ones((Xtr.shape[1], ))) ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] write_dim = 600 enc_dim = 600 dec_dim = 600 mix_dim = 20 z_dim = 200 n_iter = 16 dp_int = int(100.0 * drop_prob) rnninits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } att_tag = "NA" # attention not implemented yet # setup the reader and writer (shared by primary and guide policies) read_dim = 2 * x_dim # dimension of output from reader_mlp reader_mlp = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer_mlp = MLP([None, None], [dec_dim, write_dim, x_dim], \ name="writer_mlp", **inits) # mlps for setting conditionals over z_mix mix_var_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_var_mlp", **inits) mix_enc_mlp = CondNet([Tanh()], [x_dim, 250, mix_dim], \ name="mix_enc_mlp", **inits) # mlp for decoding z_mix into a distribution over initial LSTM states mix_dec_mlp = MLP([Tanh(), Tanh()], \ [mix_dim, 250, (2*enc_dim + 2*dec_dim + 2*enc_dim)], \ name="mix_dec_mlp", **inits) # mlps for processing inputs to LSTMs var_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \ name="var_mlp_in", **inits) enc_mlp_in = MLP([Identity()], [(read_dim + dec_dim), 4*enc_dim], \ name="enc_mlp_in", **inits) dec_mlp_in = MLP([Identity()], [ z_dim, 4*dec_dim], \ name="dec_mlp_in", **inits) #dec_mlp_in = MLP([Identity()], [ (enc_dim + z_dim), 4*dec_dim], \ # name="dec_mlp_in", **inits) # mlps for turning LSTM outputs into conditionals over z_gen var_mlp_out = CondNet([], [enc_dim, z_dim], name="var_mlp_out", **inits) enc_mlp_out = CondNet([], [enc_dim, z_dim], name="enc_mlp_out", **inits) # LSTMs for the actual LSTMs (obviously, perhaps) var_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="var_rnn", **rnninits) enc_rnn = BiasedLSTM(dim=enc_dim, ig_bias=2.0, fg_bias=2.0, \ name="enc_rnn", **rnninits) dec_rnn = BiasedLSTM(dim=dec_dim, ig_bias=2.0, fg_bias=2.0, \ name="dec_rnn", **rnninits) draw = IMoCLDrawModels( n_iter, step_type=step_type, # step_type can be 'add' or 'jump' reader_mlp=reader_mlp, writer_mlp=writer_mlp, mix_enc_mlp=mix_enc_mlp, mix_dec_mlp=mix_dec_mlp, mix_var_mlp=mix_var_mlp, enc_mlp_in=enc_mlp_in, enc_mlp_out=enc_mlp_out, enc_rnn=enc_rnn, dec_mlp_in=dec_mlp_in, dec_rnn=dec_rnn, var_mlp_in=var_mlp_in, var_mlp_out=var_mlp_out, var_rnn=var_rnn) draw.initialize() # build the cost gradients, training function, samplers, etc. draw.build_model_funcs() #draw.load_model_params(f_name="TBCLM_IMP_SVHN_PARAMS_OD{}_DP{}_{}_{}.pkl".format(occ_dim, dp_int, step_type, att_tag)) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ print("Beginning to train the model...") out_file = open( "TBCLM_IMP_SVHN_RESULTS_OD{}_DP{}_{}_{}.txt".format( occ_dim, dp_int, step_type, att_tag), 'wb') out_file.flush() costs = [0. for i in range(10)] learn_rate = 0.0002 momentum = 0.5 batch_idx = np.arange(batch_size) + tr_samples for i in [0]: #range(250000): scale = min(1.0, ((i + 1) / 1000.0)) if (((i + 1) % 10000) == 0): learn_rate = learn_rate * 0.95 if (i > 10000): momentum = 0.90 else: momentum = 0.50 # get the indices of training samples for this batch update batch_idx += batch_size if (np.max(batch_idx) >= tr_samples): # we finished an "epoch", so we rejumble the training set Xtr = row_shuffle(Xtr) batch_idx = np.arange(batch_size) # set sgd and objective function hyperparams for this update zero_ary = np.zeros((1, )) draw.lr.set_value(to_fX(zero_ary + learn_rate)) draw.mom_1.set_value(to_fX(zero_ary + momentum)) draw.mom_2.set_value(to_fX(zero_ary + 0.99)) # perform a minibatch update and record the cost for this batch Xb = to_fX(Xtr.take(batch_idx, axis=0)) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) result = draw.train_joint(Xb, Mb) costs = [(costs[j] + result[j]) for j in range(len(result))] if ((i % 200) == 0): costs = [(v / 200.0) for v in costs] str1 = "-- batch {0:d} --".format(i) str2 = " total_cost: {0:.4f}".format(costs[0]) str3 = " nll_bound : {0:.4f}".format(costs[1]) str4 = " nll_term : {0:.4f}".format(costs[2]) str5 = " kld_q2p : {0:.4f}".format(costs[3]) str6 = " kld_p2q : {0:.4f}".format(costs[4]) str7 = " reg_term : {0:.4f}".format(costs[5]) joint_str = "\n".join([str1, str2, str3, str4, str5, str6, str7]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() costs = [0.0 for v in costs] if ((i % 1000) == 0): draw.save_model_params( "TBCLM_IMP_SVHN_PARAMS_OD{}_DP{}_{}_{}.pkl".format( occ_dim, dp_int, step_type, att_tag)) # compute a small-sample estimate of NLL bound on validation set Xva = row_shuffle(Xva) Xb = to_fX(Xva[:5000]) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) va_costs = draw.compute_nll_bound(Xb, Mb) str1 = " va_nll_bound : {}".format(va_costs[1]) str2 = " va_nll_term : {}".format(va_costs[2]) str3 = " va_kld_q2p : {}".format(va_costs[3]) joint_str = "\n".join([str1, str2, str3]) print(joint_str) out_file.write(joint_str + "\n") out_file.flush() # draw some independent samples from the model Xb = to_fX(Xva[:100]) _, Xb, Mb = construct_masked_data(Xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=None) samples, _ = draw.do_sample(Xb, Mb) n_iter, N, D = samples.shape samples = samples.reshape((n_iter, N, 32, 32)) for j in xrange(n_iter): img = img_grid(samples[j, :, :, :]) img.save( "TBCLM-IMP-SVHN-OD{0:d}-DP{1:d}-{2:s}-samples-{3:03d}.png". format(occ_dim, dp_int, step_type, j))
def test_svhn(occ_dim=15, drop_prob=0.0): RESULT_PATH = "IMP_SVHN_VAE/" ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = "{}VAE_OD{}_DP{}".format(RESULT_PATH, occ_dim, dp_int) ########################## # Get some training data # ########################## tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX( shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])) ) Xva = to_fX( shift_and_scale_into_01(data['Xte']) ) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) ) ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ obs_dim = Xtr.shape[1] z_dim = 100 imp_steps = 15 # we'll check for the best step count (found oracularly) init_scale = 1.0 x_in_sym = T.matrix('x_in_sym') x_out_sym = T.matrix('x_out_sym') x_mask_sym = T.matrix('x_mask_sym') ################# # p_zi_given_xi # ################# params = {} shared_config = [obs_dim, 1000, 1000] top_config = [shared_config[-1], z_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = init_scale params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \ params=params, shared_param_dicts=None) p_zi_given_xi.init_biases(0.2) ################### # p_xip1_given_zi # ################### params = {} shared_config = [z_dim, 1000, 1000] output_config = [obs_dim, obs_dim] params['shared_config'] = shared_config params['output_config'] = output_config params['activation'] = relu_actfun params['init_scale'] = init_scale params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_xip1_given_zi = HydraNet(rng=rng, Xd=x_in_sym, \ params=params, shared_param_dicts=None) p_xip1_given_zi.init_biases(0.2) ################### # q_zi_given_x_xi # ################### params = {} shared_config = [(obs_dim + obs_dim), 1000, 1000] top_config = [shared_config[-1], z_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = init_scale params['lam_l2a'] = 0.0 params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False q_zi_given_x_xi = InfNet(rng=rng, Xd=x_in_sym, \ params=params, shared_param_dicts=None) q_zi_given_x_xi.init_biases(0.2) ########################################################### # Define parameters for the GPSImputer, and initialize it # ########################################################### print("Building the GPSImputer...") gpsi_params = {} gpsi_params['obs_dim'] = obs_dim gpsi_params['z_dim'] = z_dim gpsi_params['imp_steps'] = imp_steps gpsi_params['step_type'] = 'jump' gpsi_params['x_type'] = 'bernoulli' gpsi_params['obs_transform'] = 'sigmoid' gpsi_params['use_osm_mode'] = True GPSI = GPSImputer(rng=rng, x_in=x_in_sym, x_out=x_out_sym, x_mask=x_mask_sym, \ p_zi_given_xi=p_zi_given_xi, \ p_xip1_given_zi=p_xip1_given_zi, \ q_zi_given_x_xi=q_zi_given_x_xi, \ params=gpsi_params, \ shared_param_dicts=None) ######################################################################### # Define parameters for the underlying OneStageModel, and initialize it # ######################################################################### print("Building the OneStageModel...") osm_params = {} osm_params['x_type'] = 'bernoulli' osm_params['xt_transform'] = 'sigmoid' OSM = OneStageModel(rng=rng, \ x_in=x_in_sym, \ p_x_given_z=p_xip1_given_zi, \ q_z_given_x=p_zi_given_xi, \ x_dim=obs_dim, z_dim=z_dim, \ params=osm_params) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ log_name = "{}_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') costs = [0. for i in range(10)] learn_rate = 0.0002 momentum = 0.5 batch_idx = np.arange(batch_size) + tr_samples for i in range(200005): scale = min(1.0, ((i+1) / 5000.0)) if (((i + 1) % 15000) == 0): learn_rate = learn_rate * 0.92 if (i > 10000): momentum = 0.90 else: momentum = 0.50 # get the indices of training samples for this batch update batch_idx += batch_size if (np.max(batch_idx) >= tr_samples): # we finished an "epoch", so we rejumble the training set Xtr = row_shuffle(Xtr) batch_idx = np.arange(batch_size) # set sgd and objective function hyperparams for this update OSM.set_sgd_params(lr=scale*learn_rate, \ mom_1=scale*momentum, mom_2=0.99) OSM.set_lam_nll(lam_nll=1.0) OSM.set_lam_kld(lam_kld_1=1.0, lam_kld_2=0.0) OSM.set_lam_l2w(1e-4) # perform a minibatch update and record the cost for this batch xb = to_fX( Xtr.take(batch_idx, axis=0) ) result = OSM.train_joint(xb, batch_reps) costs = [(costs[j] + result[j]) for j in range(len(result)-1)] if ((i % 250) == 0): costs = [(v / 250.0) for v in costs] str1 = "-- batch {0:d} --".format(i) str2 = " joint_cost: {0:.4f}".format(costs[0]) str3 = " nll_cost : {0:.4f}".format(costs[1]) str4 = " kld_cost : {0:.4f}".format(costs[2]) str5 = " reg_cost : {0:.4f}".format(costs[3]) joint_str = "\n".join([str1, str2, str3, str4, str5]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() costs = [0.0 for v in costs] if ((i % 1000) == 0): Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva[0:5000], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) step_nll, step_kld = GPSI.compute_per_step_cost(xi, xo, xm, sample_count=10) min_nll = np.min(step_nll) str1 = " va_nll_bound : {}".format(min_nll) str2 = " va_nll_min : {}".format(min_nll) str3 = " va_nll_final : {}".format(step_nll[-1]) joint_str = "\n".join([str1, str2, str3]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() if ((i % 10000) == 0): # Get some validation samples for evaluating model performance xb = to_fX( Xva[0:100] ) xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) xi = np.repeat(xi, 2, axis=0) xo = np.repeat(xo, 2, axis=0) xm = np.repeat(xm, 2, axis=0) # draw some sample imputations from the model samp_count = xi.shape[0] _, model_samps = GPSI.sample_imputer(xi, xo, xm, use_guide_policy=False) seq_len = len(model_samps) seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1])) idx = 0 for s1 in range(samp_count): for s2 in range(seq_len): seq_samps[idx] = model_samps[s2][s1] idx += 1 file_name = "{}_samples_ng_b{}.png".format(result_tag, i) utils.visualize_samples(seq_samps, file_name, num_rows=20) # get visualizations of policy parameters file_name = "{}_gen_gen_weights_b{}.png".format(result_tag, i) W = GPSI.gen_gen_weights.get_value(borrow=False) utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20) file_name = "{}_gen_inf_weights_b{}.png".format(result_tag, i) W = GPSI.gen_inf_weights.get_value(borrow=False).T utils.visualize_samples(W[:,:obs_dim], file_name, num_rows=20)
def test_svhn_results(step_type='add', occ_dim=15, drop_prob=0.0): ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = "{}GPSI_OD{}_DP{}_{}_NA".format(RESULT_PATH, occ_dim, dp_int, step_type) ########################## # Get some training data # ########################## tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) #all_file = 'data/svhn_all_gray_zca.pkl' #data = load_svhn_all_gray_zca(all_file) Xtr = to_fX( shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])) ) Xva = to_fX( shift_and_scale_into_01(data['Xte']) ) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) ) ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ obs_dim = Xtr.shape[1] z_dim = 200 imp_steps = 6 init_scale = 1.0 x_in_sym = T.matrix('x_in_sym') x_out_sym = T.matrix('x_out_sym') x_mask_sym = T.matrix('x_mask_sym') # Load parameters from a previously trained model print("Testing model load from file...") GPSI = load_gpsimputer_from_file(f_name="{}_PARAMS.pkl".format(result_tag), \ rng=rng) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ log_name = "{}_FINAL_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') Xva = row_shuffle(Xva) # record an estimate of performance on the test set str0 = "GUIDED SAMPLE BOUND:" print(str0) xi, xo, xm = construct_masked_data(Xva[:5000], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) nll_0, kld_0 = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10, \ use_guide_policy=True) xi, xo, xm = construct_masked_data(Xva[5000:], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) nll_1, kld_1 = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10, \ use_guide_policy=True) nll = np.concatenate((nll_0, nll_1)) kld = np.concatenate((kld_0, kld_1)) vfe = np.mean(nll) + np.mean(kld) str1 = " va_nll_bound : {}".format(vfe) str2 = " va_nll_term : {}".format(np.mean(nll)) str3 = " va_kld_q2p : {}".format(np.mean(kld)) joint_str = "\n".join([str0, str1, str2, str3]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() # record an estimate of performance on the test set str0 = "UNGUIDED SAMPLE BOUND:" print(str0) xi, xo, xm = construct_masked_data(Xva[:5000], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) nll_0, kld_0 = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10, \ use_guide_policy=False) xi, xo, xm = construct_masked_data(Xva[5000:], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) nll_1, kld_1 = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10, \ use_guide_policy=False) nll = np.concatenate((nll_0, nll_1)) kld = np.concatenate((kld_0, kld_1)) str1 = " va_nll_bound : {}".format(np.mean(nll)) str2 = " va_nll_term : {}".format(np.mean(nll)) str3 = " va_kld_q2p : {}".format(np.mean(kld)) joint_str = "\n".join([str0, str1, str2, str3]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush()
def test_svhn(step_type='add', occ_dim=15, drop_prob=0.0): ######################################### # Format the result tag more thoroughly # ######################################### dp_int = int(100.0 * drop_prob) result_tag = "{}GPSI_OD{}_DP{}_{}_NA".format(RESULT_PATH, occ_dim, dp_int, step_type) ########################## # Get some training data # ########################## rng = np.random.RandomState(1234) tr_file = 'data/svhn_train_gray.pkl' te_file = 'data/svhn_test_gray.pkl' ex_file = 'data/svhn_extra_gray.pkl' data = load_svhn_gray(tr_file, te_file, ex_file=ex_file, ex_count=200000) Xtr = to_fX( shift_and_scale_into_01(np.vstack([data['Xtr'], data['Xex']])) ) Xva = to_fX( shift_and_scale_into_01(data['Xte']) ) tr_samples = Xtr.shape[0] va_samples = Xva.shape[0] batch_size = 250 batch_reps = 1 all_pix_mean = np.mean(np.mean(Xtr, axis=1)) data_mean = to_fX( all_pix_mean * np.ones((Xtr.shape[1],)) ) ############################################################ # Setup some parameters for the Iterative Refinement Model # ############################################################ x_dim = Xtr.shape[1] z_dim = 200 imp_steps = 6 init_scale = 1.0 x_in_sym = T.matrix('x_in_sym') x_out_sym = T.matrix('x_out_sym') x_mask_sym = T.matrix('x_mask_sym') ################# # p_zi_given_xi # ################# params = {} shared_config = [x_dim, 1500, 1500] top_config = [shared_config[-1], z_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = init_scale params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \ params=params, shared_param_dicts=None) p_zi_given_xi.init_biases(0.2) ################### # p_xip1_given_zi # ################### params = {} shared_config = [z_dim, 1500, 1500] output_config = [x_dim, x_dim] params['shared_config'] = shared_config params['output_config'] = output_config params['activation'] = relu_actfun params['init_scale'] = init_scale params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False p_xip1_given_zi = HydraNet(rng=rng, Xd=x_in_sym, \ params=params, shared_param_dicts=None) p_xip1_given_zi.init_biases(0.2) ################### # q_zi_given_xi # ################### params = {} shared_config = [(x_dim + x_dim), 1500, 1500] top_config = [shared_config[-1], z_dim] params['shared_config'] = shared_config params['mu_config'] = top_config params['sigma_config'] = top_config params['activation'] = relu_actfun params['init_scale'] = init_scale params['vis_drop'] = 0.0 params['hid_drop'] = 0.0 params['bias_noise'] = 0.0 params['input_noise'] = 0.0 params['build_theano_funcs'] = False q_zi_given_xi = InfNet(rng=rng, Xd=x_in_sym, \ params=params, shared_param_dicts=None) q_zi_given_xi.init_biases(0.2) ########################################################### # Define parameters for the GPSImputer, and initialize it # ########################################################### print("Building the GPSImputer...") gpsi_params = {} gpsi_params['x_dim'] = x_dim gpsi_params['z_dim'] = z_dim gpsi_params['imp_steps'] = imp_steps gpsi_params['step_type'] = step_type gpsi_params['x_type'] = 'bernoulli' gpsi_params['obs_transform'] = 'sigmoid' GPSI = GPSImputer(rng=rng, x_in=x_in_sym, x_out=x_out_sym, x_mask=x_mask_sym, \ p_zi_given_xi=p_zi_given_xi, \ p_xip1_given_zi=p_xip1_given_zi, \ q_zi_given_xi=q_zi_given_xi, \ params=gpsi_params, \ shared_param_dicts=None) ################################################################ # Apply some updates, to check that they aren't totally broken # ################################################################ log_name = "{}_RESULTS.txt".format(result_tag) out_file = open(log_name, 'wb') costs = [0. for i in range(10)] learn_rate = 0.0002 momentum = 0.5 batch_idx = np.arange(batch_size) + tr_samples for i in range(200005): scale = min(1.0, ((i+1) / 5000.0)) if (((i + 1) % 15000) == 0): learn_rate = learn_rate * 0.92 if (i > 10000): momentum = 0.90 else: momentum = 0.50 # get the indices of training samples for this batch update batch_idx += batch_size if (np.max(batch_idx) >= tr_samples): # we finished an "epoch", so we rejumble the training set Xtr = row_shuffle(Xtr) batch_idx = np.arange(batch_size) # set sgd and objective function hyperparams for this update GPSI.set_sgd_params(lr=scale*learn_rate, \ mom_1=scale*momentum, mom_2=0.98) GPSI.set_train_switch(1.0) GPSI.set_lam_nll(lam_nll=1.0) GPSI.set_lam_kld(lam_kld_p=0.1, lam_kld_q=0.9) GPSI.set_lam_l2w(1e-4) # perform a minibatch update and record the cost for this batch xb = to_fX( Xtr.take(batch_idx, axis=0) ) xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) result = GPSI.train_joint(xi, xo, xm, batch_reps) # do diagnostics and general training tracking costs = [(costs[j] + result[j]) for j in range(len(result)-1)] if ((i % 250) == 0): costs = [(v / 250.0) for v in costs] str1 = "-- batch {0:d} --".format(i) str2 = " joint_cost: {0:.4f}".format(costs[0]) str3 = " nll_bound : {0:.4f}".format(costs[1]) str4 = " nll_cost : {0:.4f}".format(costs[2]) str5 = " kld_cost : {0:.4f}".format(costs[3]) str6 = " reg_cost : {0:.4f}".format(costs[4]) joint_str = "\n".join([str1, str2, str3, str4, str5, str6]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() costs = [0.0 for v in costs] if ((i % 1000) == 0): Xva = row_shuffle(Xva) # record an estimate of performance on the test set xi, xo, xm = construct_masked_data(Xva[0:5000], drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) nll, kld = GPSI.compute_fe_terms(xi, xo, xm, sample_count=10) vfe = np.mean(nll) + np.mean(kld) str1 = " va_nll_bound : {}".format(vfe) str2 = " va_nll_term : {}".format(np.mean(nll)) str3 = " va_kld_q2p : {}".format(np.mean(kld)) joint_str = "\n".join([str1, str2, str3]) print(joint_str) out_file.write(joint_str+"\n") out_file.flush() GPSI.save_to_file("{}_PARAMS.pkl".format(result_tag)) if ((i % 20000) == 0): # Get some validation samples for evaluating model performance xb = to_fX( Xva[0:100] ) xi, xo, xm = construct_masked_data(xb, drop_prob=drop_prob, \ occ_dim=occ_dim, data_mean=data_mean) xi = np.repeat(xi, 2, axis=0) xo = np.repeat(xo, 2, axis=0) xm = np.repeat(xm, 2, axis=0) # draw some sample imputations from the model samp_count = xi.shape[0] _, model_samps = GPSI.sample_imputer(xi, xo, xm, use_guide_policy=False) seq_len = len(model_samps) seq_samps = np.zeros((seq_len*samp_count, model_samps[0].shape[1])) idx = 0 for s1 in range(samp_count): for s2 in range(seq_len): seq_samps[idx] = model_samps[s2][s1] idx += 1 file_name = "{0:s}_samples_ng_b{1:d}.png".format(result_tag, i) utils.visualize_samples(seq_samps, file_name, num_rows=20)