Exemplo n.º 1
0
def main(n_passes, n_labeled, n_z, n_hidden, dataset, seed, alpha,
         n_minibatches, comment):
    '''
    Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z)
    And where 'x' is always observed and 'y' is _sometimes_ observed (hence semi-supervised).
    We're going to use q(y|x) as a classification model.
    '''

    import time
    logdir = 'results/learn_yz_x_ss_' + dataset + '_' + str(n_z) + '-' + str(
        n_hidden) + '_nlabeled' + str(n_labeled) + '_alpha' + str(
            alpha) + '_seed' + str(seed) + '_' + comment + '-' + str(
                int(time.time())) + '/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print('logdir:', logdir)

    print(sys.argv[0], n_labeled, n_z, n_hidden, dataset, seed, comment)

    np.random.seed(seed)

    # Init data
    if dataset == 'mnist_2layer':

        size = 28
        dim_input = (size, size)

        # Load model for feature extraction
        path = 'models/mnist_z_x_50-500-500_longrun/'  #'models/mnist_z_x_50-600-600/'
        l1_v = ndict.loadz(path + 'v.ndict.tar.gz')
        l1_w = ndict.loadz(path + 'w.ndict.tar.gz')
        n_h = (500, 500)
        from anglepy.models.VAE_Z_X import VAE_Z_X
        l1_model = VAE_Z_X(n_x=28 * 28,
                           n_hidden_q=n_h,
                           n_z=50,
                           n_hidden_p=n_h,
                           nonlinear_q='softplus',
                           nonlinear_p='softplus',
                           type_px='bernoulli',
                           type_qz='gaussianmarg',
                           type_pz='gaussianmarg',
                           prior_sd=1)

        # Load dataset
        import anglepy.data.mnist as mnist
        # load train and test sets
        train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split(
            size, binarize_y=True)

        # create labeled/unlabeled split in training set
        x_l, y_l, x_u, y_u = mnist.create_semisupervised(
            train_x, train_y, n_labeled)

        # Extract features

        # 1. Determine which dimensions to keep
        def transform(v, _x):
            return l1_model.dist_qz['z'](*([_x] + list(v.values()) +
                                           [np.ones((1, _x.shape[1]))]))

        q_mean, _ = transform(l1_v, x_u[0:1000])
        idx_keep = np.std(q_mean, axis=1) > 0.1

        # 2. Select dimensions
        for key in ['mean_b', 'mean_w', 'logvar_b', 'logvar_w']:
            l1_v[key] = l1_v[key][idx_keep, :]
        l1_w['w0'] = l1_w['w0'][:, idx_keep]

        # 3. Extract features
        x_mean_u, x_logvar_u = transform(l1_v, x_u)
        x_mean_l, x_logvar_l = transform(l1_v, x_l)
        x_unlabeled = {'mean': x_mean_u, 'logvar': x_logvar_u, 'y': y_u}
        x_labeled = {'mean': x_mean_l, 'logvar': x_logvar_l, 'y': y_l}

        valid_x, _ = transform(l1_v, valid_x)
        test_x, _ = transform(l1_v, test_x)

        n_x = np.sum(idx_keep)
        n_y = 10

        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'

        colorImg = False

    if dataset == 'svhn_2layer':

        size = 32
        dim_input = (size, size)

        # Load model for feature extraction
        path = 'models/tmp/svhn_z_x_300-500-500/'
        l1_v = ndict.loadz(path + 'v.ndict.tar.gz')
        l1_w = ndict.loadz(path + 'w.ndict.tar.gz')
        f_enc, f_dec = pp.PCA_fromfile(path + 'pca_params.ndict.tar.gz', True)
        from anglepy.models.VAE_Z_X import VAE_Z_X
        n_x = l1_v['w0'].shape[1]  #=600
        l1_model = VAE_Z_X(n_x=n_x,
                           n_hidden_q=(600, 600),
                           n_z=300,
                           n_hidden_p=(600, 600),
                           nonlinear_q='softplus',
                           nonlinear_p='softplus',
                           type_px='gaussian',
                           type_qz='gaussianmarg',
                           type_pz='gaussianmarg',
                           prior_sd=1)

        # SVHN dataset
        import anglepy.data.svhn as svhn
        size = 32
        train_x, train_y, valid_x, valid_y, test_x, test_y = svhn.load_numpy_split(
            False, binarize_y=True,
            extra=False)  #norb.load_resized(size, binarize_y=True)

        #train_x = np.hstack((_train_x, extra_x))
        #train_y = np.hstack((_train_y, extra_y))[:,:604000]

        # create labeled/unlabeled split in training set
        import anglepy.data.mnist as mnist
        x_l, y_l, x_u, y_u = mnist.create_semisupervised(
            train_x, train_y, n_labeled)

        # Extract features

        # 1. Determine which dimensions to keep
        def transform(v, _x):
            return l1_model.dist_qz['z'](*([f_enc(_x)] + list(v.values()) +
                                           [np.ones((1, _x.shape[1]))]))

        # 2. We're keeping all latent dimensions

        # 3. Extract features
        x_mean_u, x_logvar_u = transform(l1_v, x_u)
        x_mean_l, x_logvar_l = transform(l1_v, x_l)
        x_unlabeled = {'mean': x_mean_u, 'logvar': x_logvar_u, 'y': y_u}
        x_labeled = {'mean': x_mean_l, 'logvar': x_logvar_l, 'y': y_l}

        valid_x, _ = transform(l1_v, valid_x)
        test_x, _ = transform(l1_v, test_x)

        n_x = l1_w['w0'].shape[1]
        n_y = 10

        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'

    # Init VAE model p(x,y,z)
    from anglepy.models.VAE_YZ_X import VAE_YZ_X
    uniform_y = True
    model = VAE_YZ_X(n_x,
                     n_y,
                     n_hidden,
                     n_z,
                     n_hidden,
                     nonlinear,
                     nonlinear,
                     type_px,
                     type_qz="gaussianmarg",
                     type_pz=type_pz,
                     prior_sd=1,
                     uniform_y=uniform_y)
    v, w = model.init_w(1e-3)

    # Init q(y|x) model
    from anglepy.models.MLP_Categorical import MLP_Categorical
    n_units = [n_x] + list(n_hidden) + [n_y]
    model_qy = MLP_Categorical(n_units=n_units,
                               prior_sd=1,
                               nonlinearity=nonlinear)
    u = model_qy.init_w(1e-3)

    # Just test
    if False:
        u = ndict.loadz('u.ndict.tar.gz')
        v = ndict.loadz('v.ndict.tar.gz')
        w = ndict.loadz('w.ndict.tar.gz')
        pass

    # Progress hook
    t0 = time.time()

    def hook(t, u, v, w, ll):

        # Get classification error of validation and test sets
        def error(dataset_x, dataset_y):
            _, _, _z = model_qy.gen_xz(u, {'x': dataset_x}, {})
            return np.sum(
                np.argmax(_z['py'], axis=0) != np.argmax(dataset_y, axis=0)
            ) / (0.0 + dataset_y.shape[1])

        valid_error = error(valid_x, valid_y)
        test_error = error(test_x, test_y)

        # Log
        ndict.savez(u, logdir + 'u')
        ndict.savez(v, logdir + 'v')
        ndict.savez(w, logdir + 'w')

        dt = time.time() - t0

        print(dt, t, ll, valid_error, test_error)
        with open(logdir + 'hook.txt', 'a') as f:
            print(dt, t, ll, valid_error, test_error, file=f)

        return valid_error

    # Optimize
    result = optim_vae_ss_adam(alpha,
                               model_qy,
                               model,
                               x_labeled,
                               x_unlabeled,
                               n_y,
                               u,
                               v,
                               w,
                               n_minibatches=n_minibatches,
                               n_passes=n_passes,
                               hook=hook)

    return result
Exemplo n.º 2
0
def main(n_passes, n_labeled, n_z, n_hidden, dataset, seed, alpha, n_minibatches, comment):
    '''
    Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z)
    And where 'x' is always observed and 'y' is _sometimes_ observed (hence semi-supervised).
    We're going to use q(y|x) as a classification model.
    '''

    import time
    logdir = 'results/learn_yz_x_ss_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+'_nlabeled'+str(n_labeled)+'_alpha'+str(alpha)+'_seed'+str(seed)+'_'+comment+'-'+str(int(time.time()))+'/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print 'logdir:', logdir
    
    print sys.argv[0], n_labeled, n_z, n_hidden, dataset, seed, comment
    
    np.random.seed(seed)
    
    # Init data
    if dataset == 'mnist_2layer':
        
        size = 28
        dim_input = (size,size)
        
        # Load model for feature extraction
        path = 'models/mnist_z_x_50-500-500_longrun/' #'models/mnist_z_x_50-600-600/'
        l1_v = ndict.loadz(path+'v.ndict.tar.gz')
        l1_w = ndict.loadz(path+'w.ndict.tar.gz')
        n_h = (500,500)
        from anglepy.models.VAE_Z_X import VAE_Z_X
        l1_model = VAE_Z_X(n_x=28*28, n_hidden_q=n_h, n_z=50, n_hidden_p=n_h, nonlinear_q='softplus', nonlinear_p='softplus', type_px='bernoulli', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1)
        
        # Load dataset
        import anglepy.data.mnist as mnist
        # load train and test sets
        train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split(size, binarize_y=True)
       
        # create labeled/unlabeled split in training set
        x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, n_labeled)
        
        # Extract features
        
        # 1. Determine which dimensions to keep
        def transform(v, _x):
            return l1_model.dist_qz['z'](*([_x] + v.values() + [np.ones((1, _x.shape[1]))]))
        q_mean, _ = transform(l1_v, x_u[0:1000])
        idx_keep = np.std(q_mean, axis=1) > 0.1
        
        # 2. Select dimensions
        for key in ['mean_b','mean_w','logvar_b','logvar_w']:
            l1_v[key] = l1_v[key][idx_keep,:]
        l1_w['w0'] = l1_w['w0'][:,idx_keep]
        
        # 3. Extract features
        x_mean_u, x_logvar_u = transform(l1_v, x_u)
        x_mean_l, x_logvar_l = transform(l1_v, x_l)
        x_unlabeled = {'mean':x_mean_u, 'logvar':x_logvar_u, 'y':y_u}
        x_labeled = {'mean':x_mean_l, 'logvar':x_logvar_l, 'y':y_l}
        
        valid_x, _ = transform(l1_v, valid_x)
        test_x, _ = transform(l1_v, test_x)
        
        n_x = np.sum(idx_keep)
        n_y = 10
        
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        
        colorImg = False

    if dataset == 'svhn_2layer':
        
        size = 32
        dim_input = (size,size)
        
        # Load model for feature extraction
        path = 'models/tmp/svhn_z_x_300-500-500/'
        l1_v = ndict.loadz(path+'v.ndict.tar.gz')
        l1_w = ndict.loadz(path+'w.ndict.tar.gz')
        f_enc, f_dec = pp.PCA_fromfile(path+'pca_params.ndict.tar.gz', True)
        from anglepy.models.VAE_Z_X import VAE_Z_X
        n_x = l1_v['w0'].shape[1] #=600
        l1_model = VAE_Z_X(n_x=n_x, n_hidden_q=(600,600), n_z=300, n_hidden_p=(600,600), nonlinear_q='softplus', nonlinear_p='softplus', type_px='gaussian', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1)
        
        # SVHN dataset
        import anglepy.data.svhn as svhn
        size = 32
        train_x, train_y, valid_x, valid_y, test_x, test_y = svhn.load_numpy_split(False, binarize_y=True, extra=False) #norb.load_resized(size, binarize_y=True)
        
        #train_x = np.hstack((_train_x, extra_x)) 
        #train_y = np.hstack((_train_y, extra_y))[:,:604000]
        
        # create labeled/unlabeled split in training set
        import anglepy.data.mnist as mnist
        x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, n_labeled)
        
        # Extract features
        
        # 1. Determine which dimensions to keep
        def transform(v, _x):
            return l1_model.dist_qz['z'](*([f_enc(_x)] + v.values() + [np.ones((1, _x.shape[1]))]))
        
        # 2. We're keeping all latent dimensions
        
        # 3. Extract features
        x_mean_u, x_logvar_u = transform(l1_v, x_u)
        x_mean_l, x_logvar_l = transform(l1_v, x_l)
        x_unlabeled = {'mean':x_mean_u, 'logvar':x_logvar_u, 'y':y_u}
        x_labeled = {'mean':x_mean_l, 'logvar':x_logvar_l, 'y':y_l}
        
        valid_x, _ = transform(l1_v, valid_x)
        test_x, _ = transform(l1_v, test_x)
        
        n_x = l1_w['w0'].shape[1]
        n_y = 10
        
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        
    # Init VAE model p(x,y,z)
    from anglepy.models.VAE_YZ_X import VAE_YZ_X
    uniform_y = True
    model = VAE_YZ_X(n_x, n_y, n_hidden, n_z, n_hidden, nonlinear, nonlinear, type_px, type_qz="gaussianmarg", type_pz=type_pz, prior_sd=1, uniform_y=uniform_y)
    v, w = model.init_w(1e-3)
    
    # Init q(y|x) model
    from anglepy.models.MLP_Categorical import MLP_Categorical
    n_units = [n_x]+list(n_hidden)+[n_y]
    model_qy = MLP_Categorical(n_units=n_units, prior_sd=1, nonlinearity=nonlinear)
    u = model_qy.init_w(1e-3)
    
    # Just test
    if False:
        u = ndict.loadz('u.ndict.tar.gz')
        v = ndict.loadz('v.ndict.tar.gz')
        w = ndict.loadz('w.ndict.tar.gz')
        pass
    
    # Progress hook
    t0 = time.time()
    
    def hook(t, u, v, w, ll):
        
        # Get classification error of validation and test sets
        def error(dataset_x, dataset_y):
            _, _, _z = model_qy.gen_xz(u, {'x':dataset_x}, {})
            return np.sum( np.argmax(_z['py'], axis=0) != np.argmax(dataset_y, axis=0)) / (0.0 + dataset_y.shape[1])
        
        valid_error = error(valid_x, valid_y)
        test_error = error(test_x, test_y)

        # Log
        ndict.savez(u, logdir+'u')
        ndict.savez(v, logdir+'v')
        ndict.savez(w, logdir+'w')
    
        dt = time.time() - t0
        
        print dt, t, ll, valid_error, test_error
        with open(logdir+'hook.txt', 'a') as f:
            print >>f, dt, t, ll, valid_error, test_error
        
        return valid_error

    # Optimize
    result = optim_vae_ss_adam(alpha, model_qy, model, x_labeled, x_unlabeled, n_y, u, v, w, n_minibatches=n_minibatches, n_passes=n_passes, hook=hook)
    
    return result
Exemplo n.º 3
0
print("train_x:")
print("Max:", train_x.max())
print("Min:", train_x.min())
print("Std:", train_x.std())
print("Random example:", train_x[np.random.randint(low=0,
                                                   high=train_x.shape[0] - 1)])
print(" ")
print("train_y:")
print("Max:", train_y.max())
print("Min:", train_y.min())
print("Random example:", train_y[np.random.randint(low=0,
                                                   high=train_y.shape[0] - 1)])

# Ahora para el aprendizaje semi supervisado:

train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split(
    size, binarize_y=True)
# create labeled/unlabeled split in training set
x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, 100)

print("--------------------")
print("Shape of the datasets.")
print(" ")
print("Train x labeled:", x_l.shape)
print("Train y labeled:", y_l.shape)
print("Train x unlabeled:", x_u.shape)
print("Train y unlabeled:", y_u.shape)
print(" ")
print("Descriptions:")
print(" ")
print("X labeled:")
print("Max:", x_l.max())