num_inducing_points=num_inducing_points,
                              num_latent_dims=num_latent_dimensions,
                              truncation_level=truncation_level,
                              mask_size=1)  # Treat each observed dimension as independent.

            model_training_objective = model.objective
            predict_lower_bound_1, x_mean_test_1, x_covar_test_1, test_log_likelihood_1 = \
                model.predict_new_latent_variables(y_test=y_test_1)
            model_test_objective_1 = tf.negative(predict_lower_bound_1)
            predict_lower_bound_2, x_mean_test_2, x_covar_test_2, test_log_likelihood_2 = \
                model.predict_new_latent_variables(y_test=y_test_2)
            model_test_objective_2 = tf.negative(predict_lower_bound_2)

            # Optimisation.
            training_var_list = get_training_variables()
            test_var_list = get_prediction_variables()

            model_opt_train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
                loss=model_training_objective, var_list=training_var_list)
            model_opt_test_1 = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
                loss=model_test_objective_1, var_list=test_var_list)
            model_opt_test_2 = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
                loss=model_test_objective_2, var_list=test_var_list)

            with tf.Session() as s:
                # Initialise variables.
                s.run(tf.variables_initializer(var_list=training_var_list))  # Initialise training variables first.
                s.run(tf.variables_initializer(var_list=test_var_list))  # Then initialise prediction variables.
                s.run(tf.global_variables_initializer())  # Finally initialise any remaining global variables.

                # Training optimisation loop.
def run_dp_gp_lvm(y_train,
                  y_test_observed,
                  y_test_unobserved,
                  num_latent_dimensions,
                  num_inducing_points,
                  truncation_level,
                  dp_mask_size,
                  train_iter,
                  predict_iter,
                  learning_rate,
                  save_file,
                  seed_val=1):
    """
    TODO
    :param y_train:
    :param y_test_observed:
    :param y_test_unobserved:
    :param num_latent_dimensions:
    :param num_inducing_points:
    :param truncation_level:
    :param dp_mask_size:
    :param train_iter:
    :param predict_iter:
    :param learning_rate:
    :param save_file:
    :param seed_val:
    :return:
    """

    # Set seed.
    np.random.seed(seed=seed_val)

    # Define instance of DP-GP-LVM .
    gpdp = dp_gp_lvm(y_train=y_train,
                     num_latent_dims=num_latent_dimensions,
                     num_inducing_points=num_inducing_points,
                     truncation_level=truncation_level,
                     mask_size=dp_mask_size)

    num_unobserved_dimensions = np.shape(y_test_unobserved)[1]

    # Define objectives.
    training_objective = gpdp.objective
    predict_lower_bound, x_mean_test, x_covar_test, \
        predicted_mean, predicted_covar = gpdp.predict_missing_data(y_test=y_test_observed)
    predict_objective = tf.negative(predict_lower_bound)

    # Optimisation.
    training_var_list = get_training_variables()
    predict_var_list = get_prediction_variables()

    opt_train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
        loss=training_objective, var_list=training_var_list)
    opt_predict = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
        loss=predict_objective, var_list=predict_var_list)

    with tf.Session() as s:

        # Initialise variables.
        s.run(tf.variables_initializer(var_list=training_var_list)
              )  # Initialise training variables first.
        s.run(tf.variables_initializer(var_list=predict_var_list)
              )  # Then initialise prediction variables.
        s.run(
            tf.global_variables_initializer()
        )  # Finally initialise any remaining global variables such as opt ones.

        # Training optimisation loop.
        start_time = time()
        print('\nTraining DP-GP-LVM..')
        for c in range(train_iter):
            s.run(opt_train)
            if (c % 100) == 0:
                print('  DP-GP-LVM opt iter {:5}: {}'.format(
                    c, s.run(training_objective)))
        end_time = time()
        train_opt_time = end_time - start_time
        print('Final iter {:5}:'.format(c))
        print('  DP-GP-LVM: {}'.format(s.run(training_objective)))
        print('Time to optimise: {} s'.format(train_opt_time))

        # Get converged values as numpy arrays.
        ard_weights, noise_precision, signal_variance, inducing_input, assignments = \
            s.run((gpdp.ard_weights, gpdp.noise_precision, gpdp.signal_variance, gpdp.inducing_input, gpdp.assignments))
        x_mean, x_covar = s.run(gpdp.q_x)
        gamma_atoms, alpha_atoms, beta_atoms = s.run(gpdp.dp_atoms)

        # Initialise prediction variables.
        s.run(tf.variables_initializer(var_list=predict_var_list))

        # Prediction optimisation loop.
        start_time = time()
        print('\nOptimising Predictions..')
        for c in range(predict_iter):
            s.run(opt_predict)
            if (c % 100) == 0:
                print('  DP-GP-LVM opt iter {:5}: {}'.format(
                    c, s.run(predict_objective)))
        end_time = time()
        predict_opt_time = end_time - start_time
        print('Final iter {:5}:'.format(c))
        print('  DP-GP-LVM: {}'.format(s.run(predict_objective)))
        print('Time to optimise: {} s'.format(predict_opt_time))

        # Get converged values as numpy arrays.
        x_mean_test_np, x_covar_test_np, predicted_mean_np, predicted_covar_np = s.run(
            (x_mean_test, x_covar_test, predicted_mean, predicted_covar))

        # Calculate log-likelihood of ground truth with predicted posteriors.
        gt_log_likelihoods = [
            mvn_log_pdf(x=tf.transpose(
                tf.slice(y_test_unobserved, begin=[0, du], size=[-1, 1])),
                        mean=tf.transpose(
                            tf.slice(predicted_mean,
                                     begin=[0, du],
                                     size=[-1, 1])),
                        covariance=tf.squeeze(tf.slice(predicted_covar,
                                                       begin=[du, 0, 0],
                                                       size=[1, -1, -1]),
                                              axis=0))
            for du in range(num_unobserved_dimensions)
        ]
        gt_log_likelihoods_np = np.array(s.run(gt_log_likelihoods))
        gt_log_likelihood = np.sum(gt_log_likelihoods_np)

    # Save results.
    np.savez(save_file,
             y_train=y_train,
             y_test_observed=y_test_observed,
             y_test_unobserved=y_test_unobserved,
             ard_weights=ard_weights,
             noise_precision=noise_precision,
             signal_variance=signal_variance,
             x_u=inducing_input,
             x_mean=x_mean,
             x_covar=x_covar,
             gamma_atoms=gamma_atoms,
             alpha_atoms=alpha_atoms,
             beta_atoms=beta_atoms,
             train_opt_time=train_opt_time,
             x_mean_test=x_mean_test_np,
             x_covar_test=x_covar_test_np,
             predicted_mean=predicted_mean_np,
             predicted_covar=predicted_covar_np,
             predict_opt_time=predict_opt_time,
             gt_log_likelihoods=gt_log_likelihoods_np,
             gt_log_likelihood=gt_log_likelihood)

    # Print results.
    print('\nDP-GP-LVM:')
    print('  Ground Truth Predicted Posterior Log-Likelihood: {}'.format(
        gt_log_likelihood))
    print('  Noise Precisions: {}'.format(np.squeeze(noise_precision)))
示例#3
0
def run_mrd(y_train,
            y_test_observed,
            y_test_unobserved,
            num_latent_dimensions,
            num_inducing_points,
            view_mask,
            train_iter,
            predict_iter,
            learning_rate,
            save_file,
            seed_val=1):
    """
    TODO
    :param y_train:
    :param y_test_observed:
    :param y_test_unobserved:
    :param num_latent_dimensions:
    :param num_inducing_points:
    :param view_mask:
    :param train_iter:
    :param predict_iter:
    :param learning_rate:
    :param save_file:
    :param seed_val:
    :return:
    """

    # Set seed.
    np.random.seed(seed=seed_val)

    # Segment training data into views of size view_mask.
    num_output_dimensions = np.shape(y_train)[1]
    views_train = [
        y_train[:, i:i + view_mask]
        for i in range(0, num_output_dimensions, view_mask)
    ]

    # Define instance of MRD.
    mrd = manifold_relevance_determination(
        views_train=views_train,
        num_latent_dims=num_latent_dimensions,
        num_inducing_points=num_inducing_points)

    # Segment observed and unobserved data into views.
    num_unobserved_dimensions = np.shape(y_test_unobserved)[1]
    views_test_observed = [y_test_observed]
    views_test_unobserved = [y_test_unobserved]

    # Define ground truth.
    ground_truth = y_test_unobserved

    # Define objectives.
    training_objective = mrd.objective
    predict_lower_bound, x_mean_test, x_covar_test, \
        predicted_means, predicted_covars = mrd.predict_missing_data(views_test=views_test_observed)
    predict_objective = tf.negative(predict_lower_bound)

    # Optimisation.
    training_var_list = get_training_variables()
    predict_var_list = get_prediction_variables()

    opt_train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
        loss=training_objective, var_list=training_var_list)
    opt_predict = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
        loss=predict_objective, var_list=predict_var_list)

    with tf.Session() as s:

        # Initialise variables.
        s.run(tf.variables_initializer(var_list=training_var_list)
              )  # Initialise training variables first.
        s.run(tf.variables_initializer(var_list=predict_var_list)
              )  # Then initialise prediction variables.
        s.run(
            tf.global_variables_initializer()
        )  # Finally initialise any remaining global variables such as opt ones.

        # Training optimisation loop.
        start_time = time()
        print('\nTraining MRD..')
        for c in range(train_iter):
            s.run(opt_train)
            if (c % 100) == 0:
                print('  MRD opt iter {:5}: {}'.format(
                    c, s.run(training_objective)))
        end_time = time()
        train_opt_time = end_time - start_time
        print('Final iter {:5}:'.format(c))
        print('  MRD: {}'.format(s.run(training_objective)))
        print('Time to optimise: {} s'.format(train_opt_time))

        # Get converged values as numpy arrays.
        ard_weights, noise_precisions, signal_variances, inducing_inputs = s.run(
            (mrd.ard_weights, mrd.noise_precision, mrd.signal_variance,
             mrd.inducing_input))
        x_mean, x_covar = s.run(mrd.q_x)

        # Initialise prediction variables.
        s.run(tf.variables_initializer(var_list=predict_var_list))

        # Prediction optimisation loop.
        start_time = time()
        print('\nOptimising Predictions..')
        for c in range(predict_iter):
            s.run(opt_predict)
            if (c % 100) == 0:
                print('  MRD opt iter {:5}: {}'.format(
                    c, s.run(predict_objective)))
        end_time = time()
        predict_opt_time = end_time - start_time
        print('Final iter {:5}:'.format(c))
        print('  MRD: {}'.format(s.run(predict_objective)))
        print('Time to optimise: {} s'.format(predict_opt_time))

        # Get converged values as numpy arrays.
        x_mean_test_np, x_covar_test_np, list_predicted_means, list_predicted_covars = s.run(
            (x_mean_test, x_covar_test, predicted_means, predicted_covars))

        # Convert lists to numpy arrays.
        predicted_means_np = np.hstack(list_predicted_means)
        predicted_covars_np = np.concatenate(list_predicted_covars, axis=0)

        # Calculate log-likelihood of ground truth with predicted posteriors.
        gt_log_likelihoods = [
            mvn_log_pdf(x=tf.transpose(
                tf.slice(ground_truth, begin=[0, du], size=[-1, 1])),
                        mean=tf.transpose(
                            tf.slice(predicted_means_np,
                                     begin=[0, du],
                                     size=[-1, 1])),
                        covariance=tf.squeeze(tf.slice(predicted_covars_np,
                                                       begin=[du, 0, 0],
                                                       size=[1, -1, -1]),
                                              axis=0))
            for du in range(num_unobserved_dimensions)
        ]
        gt_log_likelihoods_np = np.array(s.run(gt_log_likelihoods))
        gt_log_likelihood = np.sum(gt_log_likelihoods_np)

    # Save results. Converting lists to numpy arrays.
    np.savez(save_file,
             y_train=y_train,
             y_test_observed=y_test_observed,
             y_test_unobserved=y_test_unobserved,
             views_train=views_train,
             views_test_observed=views_test_observed,
             views_test_unobserved=views_test_unobserved,
             ard_weights=np.array(ard_weights),
             noise_precision=np.array(noise_precisions),
             signal_variance=np.array(signal_variances),
             x_u=np.array(inducing_inputs),
             x_mean=x_mean,
             x_covar=x_covar,
             train_opt_time=train_opt_time,
             x_mean_test=x_mean_test_np,
             x_covar_test=x_covar_test_np,
             predicted_mean=predicted_means_np,
             predicted_covar=predicted_covars_np,
             predict_opt_time=predict_opt_time,
             gt_log_likelihoods=gt_log_likelihoods_np,
             gt_log_likelihood=gt_log_likelihood)