def main(n_passes, n_labeled, n_z, n_hidden, dataset, seed, alpha, n_minibatches, comment): ''' Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z) And where 'x' is always observed and 'y' is _sometimes_ observed (hence semi-supervised). We're going to use q(y|x) as a classification model. ''' import time logdir = 'results/learn_yz_x_ss_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+'_nlabeled'+str(n_labeled)+'_alpha'+str(alpha)+'_seed'+str(seed)+'_'+comment+'-'+str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir print sys.argv[0], n_labeled, n_z, n_hidden, dataset, seed, comment np.random.seed(seed) # Init data if dataset == 'mnist_2layer': size = 28 dim_input = (size,size) # Load model for feature extraction path = 'models/mnist_z_x_50-500-500_longrun/' #'models/mnist_z_x_50-600-600/' l1_v = ndict.loadz(path+'v.ndict.tar.gz') l1_w = ndict.loadz(path+'w.ndict.tar.gz') n_h = (500,500) from anglepy.models.VAE_Z_X import VAE_Z_X l1_model = VAE_Z_X(n_x=28*28, n_hidden_q=n_h, n_z=50, n_hidden_p=n_h, nonlinear_q='softplus', nonlinear_p='softplus', type_px='bernoulli', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1) # Load dataset import anglepy.data.mnist as mnist # load train and test sets train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split(size, binarize_y=True) # create labeled/unlabeled split in training set x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, n_labeled) # Extract features # 1. Determine which dimensions to keep def transform(v, _x): return l1_model.dist_qz['z'](*([_x] + v.values() + [np.ones((1, _x.shape[1]))])) q_mean, _ = transform(l1_v, x_u[0:1000]) idx_keep = np.std(q_mean, axis=1) > 0.1 # 2. Select dimensions for key in ['mean_b','mean_w','logvar_b','logvar_w']: l1_v[key] = l1_v[key][idx_keep,:] l1_w['w0'] = l1_w['w0'][:,idx_keep] # 3. Extract features x_mean_u, x_logvar_u = transform(l1_v, x_u) x_mean_l, x_logvar_l = transform(l1_v, x_l) x_unlabeled = {'mean':x_mean_u, 'logvar':x_logvar_u, 'y':y_u} x_labeled = {'mean':x_mean_l, 'logvar':x_logvar_l, 'y':y_l} valid_x, _ = transform(l1_v, valid_x) test_x, _ = transform(l1_v, test_x) n_x = np.sum(idx_keep) n_y = 10 type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' colorImg = False if dataset == 'svhn_2layer': size = 32 dim_input = (size,size) # Load model for feature extraction path = 'models/tmp/svhn_z_x_300-500-500/' l1_v = ndict.loadz(path+'v.ndict.tar.gz') l1_w = ndict.loadz(path+'w.ndict.tar.gz') f_enc, f_dec = pp.PCA_fromfile(path+'pca_params.ndict.tar.gz', True) from anglepy.models.VAE_Z_X import VAE_Z_X n_x = l1_v['w0'].shape[1] #=600 l1_model = VAE_Z_X(n_x=n_x, n_hidden_q=(600,600), n_z=300, n_hidden_p=(600,600), nonlinear_q='softplus', nonlinear_p='softplus', type_px='gaussian', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1) # SVHN dataset import anglepy.data.svhn as svhn size = 32 train_x, train_y, valid_x, valid_y, test_x, test_y = svhn.load_numpy_split(False, binarize_y=True, extra=False) #norb.load_resized(size, binarize_y=True) #train_x = np.hstack((_train_x, extra_x)) #train_y = np.hstack((_train_y, extra_y))[:,:604000] # create labeled/unlabeled split in training set import anglepy.data.mnist as mnist x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, n_labeled) # Extract features # 1. Determine which dimensions to keep def transform(v, _x): return l1_model.dist_qz['z'](*([f_enc(_x)] + v.values() + [np.ones((1, _x.shape[1]))])) # 2. We're keeping all latent dimensions # 3. Extract features x_mean_u, x_logvar_u = transform(l1_v, x_u) x_mean_l, x_logvar_l = transform(l1_v, x_l) x_unlabeled = {'mean':x_mean_u, 'logvar':x_logvar_u, 'y':y_u} x_labeled = {'mean':x_mean_l, 'logvar':x_logvar_l, 'y':y_l} valid_x, _ = transform(l1_v, valid_x) test_x, _ = transform(l1_v, test_x) n_x = l1_w['w0'].shape[1] n_y = 10 type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' # Init VAE model p(x,y,z) from anglepy.models.VAE_YZ_X import VAE_YZ_X uniform_y = True model = VAE_YZ_X(n_x, n_y, n_hidden, n_z, n_hidden, nonlinear, nonlinear, type_px, type_qz="gaussianmarg", type_pz=type_pz, prior_sd=1, uniform_y=uniform_y) v, w = model.init_w(1e-3) # Init q(y|x) model from anglepy.models.MLP_Categorical import MLP_Categorical n_units = [n_x]+list(n_hidden)+[n_y] model_qy = MLP_Categorical(n_units=n_units, prior_sd=1, nonlinearity=nonlinear) u = model_qy.init_w(1e-3) # Just test if False: u = ndict.loadz('u.ndict.tar.gz') v = ndict.loadz('v.ndict.tar.gz') w = ndict.loadz('w.ndict.tar.gz') pass # Progress hook t0 = time.time() def hook(t, u, v, w, ll): # Get classification error of validation and test sets def error(dataset_x, dataset_y): _, _, _z = model_qy.gen_xz(u, {'x':dataset_x}, {}) return np.sum( np.argmax(_z['py'], axis=0) != np.argmax(dataset_y, axis=0)) / (0.0 + dataset_y.shape[1]) valid_error = error(valid_x, valid_y) test_error = error(test_x, test_y) # Log ndict.savez(u, logdir+'u') ndict.savez(v, logdir+'v') ndict.savez(w, logdir+'w') dt = time.time() - t0 print dt, t, ll, valid_error, test_error with open(logdir+'hook.txt', 'a') as f: print >>f, dt, t, ll, valid_error, test_error return valid_error # Optimize result = optim_vae_ss_adam(alpha, model_qy, model, x_labeled, x_unlabeled, n_y, u, v, w, n_minibatches=n_minibatches, n_passes=n_passes, hook=hook) return result
def main(n_passes, n_labeled, n_z, n_hidden, dataset, seed, alpha, n_minibatches, comment): ''' Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z) And where 'x' is always observed and 'y' is _sometimes_ observed (hence semi-supervised). We're going to use q(y|x) as a classification model. ''' import time logdir = 'results/learn_yz_x_ss_' + dataset + '_' + str(n_z) + '-' + str( n_hidden) + '_nlabeled' + str(n_labeled) + '_alpha' + str( alpha) + '_seed' + str(seed) + '_' + comment + '-' + str( int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print('logdir:', logdir) print(sys.argv[0], n_labeled, n_z, n_hidden, dataset, seed, comment) np.random.seed(seed) # Init data if dataset == 'mnist_2layer': size = 28 dim_input = (size, size) # Load model for feature extraction path = 'models/mnist_z_x_50-500-500_longrun/' #'models/mnist_z_x_50-600-600/' l1_v = ndict.loadz(path + 'v.ndict.tar.gz') l1_w = ndict.loadz(path + 'w.ndict.tar.gz') n_h = (500, 500) from anglepy.models.VAE_Z_X import VAE_Z_X l1_model = VAE_Z_X(n_x=28 * 28, n_hidden_q=n_h, n_z=50, n_hidden_p=n_h, nonlinear_q='softplus', nonlinear_p='softplus', type_px='bernoulli', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1) # Load dataset import anglepy.data.mnist as mnist # load train and test sets train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split( size, binarize_y=True) # create labeled/unlabeled split in training set x_l, y_l, x_u, y_u = mnist.create_semisupervised( train_x, train_y, n_labeled) # Extract features # 1. Determine which dimensions to keep def transform(v, _x): return l1_model.dist_qz['z'](*([_x] + list(v.values()) + [np.ones((1, _x.shape[1]))])) q_mean, _ = transform(l1_v, x_u[0:1000]) idx_keep = np.std(q_mean, axis=1) > 0.1 # 2. Select dimensions for key in ['mean_b', 'mean_w', 'logvar_b', 'logvar_w']: l1_v[key] = l1_v[key][idx_keep, :] l1_w['w0'] = l1_w['w0'][:, idx_keep] # 3. Extract features x_mean_u, x_logvar_u = transform(l1_v, x_u) x_mean_l, x_logvar_l = transform(l1_v, x_l) x_unlabeled = {'mean': x_mean_u, 'logvar': x_logvar_u, 'y': y_u} x_labeled = {'mean': x_mean_l, 'logvar': x_logvar_l, 'y': y_l} valid_x, _ = transform(l1_v, valid_x) test_x, _ = transform(l1_v, test_x) n_x = np.sum(idx_keep) n_y = 10 type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' colorImg = False if dataset == 'svhn_2layer': size = 32 dim_input = (size, size) # Load model for feature extraction path = 'models/tmp/svhn_z_x_300-500-500/' l1_v = ndict.loadz(path + 'v.ndict.tar.gz') l1_w = ndict.loadz(path + 'w.ndict.tar.gz') f_enc, f_dec = pp.PCA_fromfile(path + 'pca_params.ndict.tar.gz', True) from anglepy.models.VAE_Z_X import VAE_Z_X n_x = l1_v['w0'].shape[1] #=600 l1_model = VAE_Z_X(n_x=n_x, n_hidden_q=(600, 600), n_z=300, n_hidden_p=(600, 600), nonlinear_q='softplus', nonlinear_p='softplus', type_px='gaussian', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1) # SVHN dataset import anglepy.data.svhn as svhn size = 32 train_x, train_y, valid_x, valid_y, test_x, test_y = svhn.load_numpy_split( False, binarize_y=True, extra=False) #norb.load_resized(size, binarize_y=True) #train_x = np.hstack((_train_x, extra_x)) #train_y = np.hstack((_train_y, extra_y))[:,:604000] # create labeled/unlabeled split in training set import anglepy.data.mnist as mnist x_l, y_l, x_u, y_u = mnist.create_semisupervised( train_x, train_y, n_labeled) # Extract features # 1. Determine which dimensions to keep def transform(v, _x): return l1_model.dist_qz['z'](*([f_enc(_x)] + list(v.values()) + [np.ones((1, _x.shape[1]))])) # 2. We're keeping all latent dimensions # 3. Extract features x_mean_u, x_logvar_u = transform(l1_v, x_u) x_mean_l, x_logvar_l = transform(l1_v, x_l) x_unlabeled = {'mean': x_mean_u, 'logvar': x_logvar_u, 'y': y_u} x_labeled = {'mean': x_mean_l, 'logvar': x_logvar_l, 'y': y_l} valid_x, _ = transform(l1_v, valid_x) test_x, _ = transform(l1_v, test_x) n_x = l1_w['w0'].shape[1] n_y = 10 type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' # Init VAE model p(x,y,z) from anglepy.models.VAE_YZ_X import VAE_YZ_X uniform_y = True model = VAE_YZ_X(n_x, n_y, n_hidden, n_z, n_hidden, nonlinear, nonlinear, type_px, type_qz="gaussianmarg", type_pz=type_pz, prior_sd=1, uniform_y=uniform_y) v, w = model.init_w(1e-3) # Init q(y|x) model from anglepy.models.MLP_Categorical import MLP_Categorical n_units = [n_x] + list(n_hidden) + [n_y] model_qy = MLP_Categorical(n_units=n_units, prior_sd=1, nonlinearity=nonlinear) u = model_qy.init_w(1e-3) # Just test if False: u = ndict.loadz('u.ndict.tar.gz') v = ndict.loadz('v.ndict.tar.gz') w = ndict.loadz('w.ndict.tar.gz') pass # Progress hook t0 = time.time() def hook(t, u, v, w, ll): # Get classification error of validation and test sets def error(dataset_x, dataset_y): _, _, _z = model_qy.gen_xz(u, {'x': dataset_x}, {}) return np.sum( np.argmax(_z['py'], axis=0) != np.argmax(dataset_y, axis=0) ) / (0.0 + dataset_y.shape[1]) valid_error = error(valid_x, valid_y) test_error = error(test_x, test_y) # Log ndict.savez(u, logdir + 'u') ndict.savez(v, logdir + 'v') ndict.savez(w, logdir + 'w') dt = time.time() - t0 print(dt, t, ll, valid_error, test_error) with open(logdir + 'hook.txt', 'a') as f: print(dt, t, ll, valid_error, test_error, file=f) return valid_error # Optimize result = optim_vae_ss_adam(alpha, model_qy, model, x_labeled, x_unlabeled, n_y, u, v, w, n_minibatches=n_minibatches, n_passes=n_passes, hook=hook) return result