Exemplo n.º 1
0
def main():
    try:
        if not ('.pkl' in args.dataset): raise Exception('input_format')

        # Get data
        with open('{}'.format(args.dataset), 'r') as inputfile:
            data = pkl.load(inputfile)
            xn = data['xn']
        N, D = xn.shape

        if VERBOSE: init_time = time()

        # Priors
        alpha_o = np.array([1.0] * K)
        nu_o = np.array([float(D)])
        if nu_o[0] < D: raise Exception('degrees_of_freedom')
        w_o = generate_random_positive_matrix(D)
        m_o = np.array([0.0] * D)
        beta_o = np.array([0.7])

        # Variational parameters intialization
        lambda_phi = np.random.dirichlet(alpha_o, N) \
            if args.randomInit else init_kmeans(xn, N, K)
        lambda_pi = np.zeros(shape=K)
        lambda_beta = np.zeros(shape=K)
        lambda_nu = np.zeros(shape=K)
        lambda_m = np.random.rand(K, D)
        lambda_w = np.array([np.copy(w_o) for _ in range(K)])

        # Plot configs
        if VERBOSE and D == 2:
            plt.ion()
            plt.style.use('seaborn-darkgrid')
            fig = plt.figure(figsize=(10, 10))
            ax_spatial = fig.add_subplot(1, 1, 1)
            circs = []
            sctZ = None

        # Inference
        lbs = []
        aux_lbs = []
        n_iters = 0
        for i in range(args.maxIter * (N / BATCH_SIZE)):

            # Sample xn
            idx = np.random.randint(N, size=BATCH_SIZE)
            x_batch = xn[idx, :]

            # Variational parameter updates
            lambda_pi = update_lambda_pi(lambda_pi, lambda_phi[idx, :],
                                         alpha_o)
            Nks = np.sum(lambda_phi[idx, :], axis=0)
            lambda_beta = update_lambda_beta(lambda_beta, beta_o, Nks)
            lambda_nu = update_lambda_nu(lambda_nu, nu_o, Nks)
            lambda_m = update_lambda_m(lambda_m, lambda_phi[idx, :],
                                       lambda_beta, m_o, beta_o, x_batch,
                                       BATCH_SIZE, D)
            lambda_w = update_lambda_w(lambda_w, lambda_phi[idx, :],
                                       lambda_beta, lambda_m, w_o, beta_o, m_o,
                                       x_batch, K, BATCH_SIZE, D)
            lambda_phi = update_lambda_phi(lambda_phi, lambda_pi, lambda_m,
                                           lambda_nu, lambda_w, lambda_beta,
                                           xn, K, D, idx)

            # ELBO computation and variational parameter updates
            lb = elbo2(x_batch, alpha_o, lambda_pi, lambda_phi[idx, :], m_o,
                       lambda_m, beta_o, lambda_beta, nu_o, lambda_nu, w_o,
                       inv(lambda_w), BATCH_SIZE, K)
            lb = lb * (N / BATCH_SIZE)
            aux_lbs.append(lb)
            if len(aux_lbs) == (N / BATCH_SIZE):
                lbs.append(np.mean(aux_lbs))
                n_iters += 1
                aux_lbs = []

            if VERBOSE:
                print('\n******* ITERATION {} *******'.format(n_iters))
                print('lambda_pi: {}'.format(lambda_pi))
                print('lambda_beta: {}'.format(lambda_beta))
                print('lambda_nu: {}'.format(lambda_nu))
                print('lambda_m: {}'.format(lambda_m))
                print('lambda_w: {}'.format(lambda_w))
                print('lambda_phi: {}'.format(lambda_phi[0:9, :]))
                print('ELBO: {}'.format(lb))
                if D == 2:
                    covs = [
                        lambda_w[k, :, :] / (lambda_nu[k] - D - 1)
                        for k in range(K)
                    ]
                    ax_spatial, circs, sctZ = plot_iteration(
                        ax_spatial, circs, sctZ, lambda_m, covs, xn, i, K)

            # Break condition
            improve = lb - lbs[n_iters - 1] if n_iters > 0 else lb
            if VERBOSE: print('Improve: {}'.format(improve))
            if n_iters > 0 and 0 <= improve < THRESHOLD: break

        zn = np.array([np.argmax(lambda_phi[n, :]) for n in xrange(N)])

        if VERBOSE:
            print('\n******* RESULTS *******')
            for k in range(K):
                print('Mu k{}: {}'.format(k, lambda_m[k, :]))
            final_time = time()
            exec_time = final_time - init_time
            print('Time: {} seconds'.format(exec_time))
            print('Iterations: {}'.format(n_iters))
            print('ELBOs: {}'.format(lbs[len(lbs) - 10:len(lbs)]))
            if D == 2: plt.savefig('generated/scavi_plot.png')
            if D == 3:
                fig = plt.figure()
                ax = fig.add_subplot(111, projection='3d')
                ax.scatter(xn[:, 0],
                           xn[:, 1],
                           xn[:, 2],
                           c=zn,
                           cmap=cm.gist_rainbow,
                           s=5)
                ax.set_xlabel('X')
                ax.set_ylabel('Y')
                ax.set_zlabel('Z')
                plt.show()
            plt.gcf().clear()
            plt.plot(np.arange(len(lbs)), lbs)
            plt.ylabel('ELBO')
            plt.xlabel('Iterations')
            plt.savefig('generated/scavi_elbos.png')

        if args.exportAssignments:
            with open('generated/scavi_assignments.csv', 'wb') as output:
                writer = csv.writer(output,
                                    delimiter=';',
                                    quotechar='',
                                    escapechar='\\',
                                    quoting=csv.QUOTE_NONE)
                writer.writerow(['zn'])
                for i in range(len(zn)):
                    writer.writerow([zn[i]])

        if args.exportVariationalParameters:
            with open('generated/scavi_variational_parameters.pkl',
                      'w') as output:
                pkl.dump(
                    {
                        'lambda_pi': lambda_pi,
                        'lambda_m': lambda_m,
                        'lambda_beta': lambda_beta,
                        'lambda_nu': lambda_nu,
                        'lambda_w': lambda_w,
                        'K': K,
                        'D': D
                    }, output)

        if args.exportELBOs:
            with open('generated/scavi_elbos.pkl', 'w') as output:
                pkl.dump({
                    'elbos': lbs,
                    'iter_time': exec_time / n_iters
                }, output)

    except IOError:
        print('File not found!')
    except Exception as e:
        if e.args[0] == 'input_format': print('Input must be a pkl file')
        elif e.args[0] == 'degrees_of_freedom':
            print('Degrees of freedom can not be smaller than D!')
        else:
            print('Unexpected error: {}'.format(sys.exc_info()[0]))
            raise
Exemplo n.º 2
0
    data = pkl.load(inputfile)
    xn = data['xn']
N, D = xn.shape

if VERBOSE: init_time = time()

# Priors
alpha_o = np.array([1.0] * K)
nu_o = np.array([float(D)])
w_o = generate_random_positive_matrix(D)
m_o = np.array([0.0] * D)
beta_o = np.array([0.7])

# Variational parameters intialization
lambda_phi_var = np.random.dirichlet(alpha_o, N) \
    if args.randomInit else init_kmeans(xn, N, K)
lambda_pi_var = np.zeros(shape=K)
lambda_beta_var = np.zeros(shape=K)
lambda_nu_var = np.zeros(shape=K) + D
lambda_m_var = np.random.uniform(np.min(xn[:, 0]), np.max(xn[:, 0]), (K, D))
lambda_w_var = np.array([np.copy(w_o) for _ in range(K)])

lambda_phi = tf.Variable(lambda_phi_var, trainable=False, dtype=tf.float64)
lambda_pi_var = tf.Variable(lambda_pi_var, dtype=tf.float64)
lambda_beta_var = tf.Variable(lambda_beta_var, dtype=tf.float64)
lambda_nu_var = tf.Variable(lambda_nu_var, dtype=tf.float64)
lambda_m = tf.Variable(lambda_m_var, dtype=tf.float64)
lambda_w_var = tf.Variable(lambda_w_var, dtype=tf.float64)

# Maintain numerical stability
lambda_pi = tf.nn.softplus(lambda_pi_var)
Exemplo n.º 3
0
def main():

    # Get data
    with open('{}'.format(args.dataset), 'r') as inputfile:
        data = pkl.load(inputfile)
        xn = data['xn']
    N, D = xn.shape

    if VERBOSE: init_time = time()

    # Priors
    alpha_o = [1.0] * K
    m_o = np.array([0.0, 0.0])
    beta_o = 0.01
    delta_o = np.zeros((D, D), long)
    np.fill_diagonal(delta_o, 1)

    # Variational parameters intialization
    lambda_phi = np.random.dirichlet(alpha_o, N) \
        if args.randomInit else init_kmeans(xn, N, K)
    lambda_beta = beta_o + np.sum(lambda_phi, axis=0)
    lambda_m = np.tile(1. / lambda_beta, (2, 1)).T * \
               (beta_o * m_o + np.dot(lambda_phi.T, xn))

    # Plot configs
    if VERBOSE:
        plt.ion()
        fig = plt.figure(figsize=(10, 10))
        ax_spatial = fig.add_subplot(1, 1, 1)
        circs = []
        sctZ = None

    # Inference
    n_iters = 0
    lbs = []
    for _ in range(args.maxIter):

        # Variational parameter updates
        lambda_pi = update_lambda_pi(lambda_phi, alpha_o)
        lambda_phi = update_lambda_phi(lambda_pi, lambda_m, lambda_beta,
                                       lambda_phi, delta_o, xn, N, D)
        lambda_beta = update_lambda_beta(lambda_phi, beta_o)
        lambda_m = update_lambda_m(lambda_beta, lambda_phi, m_o, beta_o, xn, D)

        # ELBO computation
        lb = elbo(xn, D, K, alpha_o, m_o, beta_o, delta_o, lambda_pi, lambda_m,
                  lambda_beta, lambda_phi)
        lbs.append(lb)

        if VERBOSE:
            print('\n******* ITERATION {} *******'.format(n_iters))
            print('lambda_pi: {}'.format(lambda_pi))
            print('lambda_beta: {}'.format(lambda_beta))
            print('lambda_m: {}'.format(lambda_m))
            print('lambda_phi: {}'.format(lambda_phi[0:9, :]))
            print('ELBO: {}'.format(lb))
            ax_spatial, circs, sctZ = plot_iteration(ax_spatial, circs, sctZ,
                                                     lambda_m, delta_o, xn,
                                                     n_iters, K)

        # Break condition
        improve = lb - lbs[n_iters - 1]
        if VERBOSE: print('Improve: {}'.format(improve))
        if (n_iters == (args.maxIter - 1)) \
                or (n_iters > 0 and 0 < improve < THRESHOLD):
            if VERBOSE and D == 2: plt.savefig('generated/plot.png')
            break

        n_iters += 1

    if VERBOSE:
        print('\n******* RESULTS *******')
        for k in range(K):
            print('Mu k{}: {}'.format(k, lambda_m[k, :]))
        final_time = time()
        exec_time = final_time - init_time
        print('Time: {} seconds'.format(exec_time))
        print('Iterations: {}'.format(n_iters))
        print('ELBOs: {}'.format(lbs))