Пример #1
0
    def local_discriminant(self, node, alpha, beta, b, x):
        X_file = str(datas_path) + "/data_" + str(node) + ".csv"
        X = pd.read_csv(X_file).values
        chi_file = str(datas_path) + "/chi.csv"
        chi = pd.read_csv(chi_file).values

        g = b
        n_node_data = X.shape[0]
        n_chi_data = chi.shape[0]
        for i in range(n_node_data):
            g += alpha[i] * rbf(x, X[i], self.gamma)
        for i in range(n_chi_data):
            g += beta[i] * rbf(x, chi[i], self.gamma)

        return np.sign(g)
Пример #2
0
def main(dataset='magic'):
    """
    Run experiments for determining the number of principal components to
    retain in kernel PCA through cross-validation.

    After each plot is shown the program halts, close the plot to continue.

    Parameters
    ----------
    dataset : str
        Either 'magic', 'yeast', 'cardiotocography' or 'segmentation'

    """

    if not dataset in ('magic', 'yeast', 'cardiotocography', 'segmentation'):
        raise ValueError("Unknown dataset.")

    X = getattr(data, "get_" + dataset + "_data")()

    for datasize, n_iter in zip((10, 50, 100), (10, 50, 90)):

        X_i = X[:datasize]

        sigma = median_distance(X_i)
        kernel = lambda x, y: rbf(x, y, sigma)
        kpca_cv_experiment(X_i, kernel, dataset, n_iter, "rbf")

        kpca_cv_experiment(X_i, poly, dataset, n_iter, "polynomial")
Пример #3
0
 def fit(self, x_np, y_multiclass_np, kernel=rbf(1), C=0.001):
     x = torch.from_numpy(x_np) if not torch.is_tensor(x_np) else x_np
     x = x.float().to(self.device)
     y_multiclass = torch.from_numpy(y_multiclass_np) if not torch.is_tensor(y_multiclass_np) else y_multiclass_np
     y_multiclass = y_multiclass.to("cpu").view(-1)
     self.x = x
     self.y_multiclass = y_multiclass
     self.kernel = kernel
     self.C = C
     self.y_matrix = torch.stack([self.cast(y_multiclass, k) for k in range(self.n_svm)],0)
     for k in range(self.n_svm):
         print("training ",k,"th SVM in ",self.n_svm)
         y = self.y_matrix[k, :].view(-1,1)
         yx = y.to(self.device)*x
         G = kernel(yx, yx).to("cpu") # Gram matrix
         G = G + torch.eye(G.shape[0])*1e-5 # to make sure G is positive definite
         objective = cp.Maximize(cp.sum(self.a[k])-(1/2)*cp.quad_form(self.a[k], G))
         if not objective.is_dcp():
             print("Not solvable!")
             assert objective.is_dcp()
         constraints = [self.a[k] <= C, cp.sum(cp.multiply(self.a[k],y)) == 0] # box constraint
         prob = cp.Problem(objective, constraints)
         result = prob.solve()
         x_pos = x[y[:,0]==1,:]
         x_neg = x[y[:,0]==-1,:]
         b_min = -torch.min(self.wTx(k,x_pos)) if x_pos.shape[0]!=0 else torch.tensor(0,device=self.device)
         b_max = -torch.max(self.wTx(k,x_neg)) if x_neg.shape[0]!=0 else torch.tensor(0,device=self.device)
         self.b[k,0] = (1/2)*(b_min + b_max)
     self.a_matrix = torch.stack([torch.from_numpy(i.value).float().view(-1) for i in self.a],0).to(self.device)
Пример #4
0
    def __init__(self,
                 Z,
                 sf2,
                 alpha,
                 beta,
                 M,
                 Q,
                 N,
                 D,
                 update_global_statistics=True):
        '''
        Init the calculation of partial terms

        Args:

        '''
        # TODO: Take or assert M, Q, N, D from Z
        self.Z = Z

        self.M = M
        self.Q = Q
        self.N = N
        self.D = D

        self.beta = beta
        self.hyp = kernels.ArdHypers(self.Q, sf=sf2**0.5, ard=alpha**-0.5)
        self.kernel = kernels.rbf(self.Q, sf=self.hyp.sf, ard=self.hyp.ard)

        if update_global_statistics:
            self.update_global_statistics()
def test_case_generator():
    sys.path.append('solutions/hw2')
    sys.path.append('suppl/hw2')
    from assignment_two_adaboost import weak_learner as wl
    sys.path.pop()
    sys.path.pop()
    from kernels import rbf

    seed(1)
    instances = normal(size=(50, 5))
    labels = binomial(1, 0.5, 50)
    dist = dirichlet(uniform(size=50))
    ker = rbf(1)
    mat = uniform(size=(5, 5))
    mat = (mat / np.sum(mat, axis=1)).T
    test_cases = {'assignment_two_adaboost': {
        'compute_error':
        [lambda x: x[3] < 0.2, instances, labels, dist],
        'run_adaboost':
        [instances, labels, wl],
        'update_dist':
        [lambda x: x[2] > -0.2, instances,
         labels, dist, normal()],
        'weak_learner': [instances, labels, dist]},
        'assignment_two_pagerank': {'compute_pageranks': [mat],
                                    'main': []},
        'assignment_two_svm': {
        'evaluate_classifier':
        [lambda x: norm(x) > 5, instances, labels],
        'svm_train': [instances, labels, ker]}}

    return test_cases
Пример #6
0
 def update_global_statistics(self):
     '''
     Update statistics for when Z changes
     '''
     self.kernel = kernels.rbf(self.Q, sf=self.hyp.sf, ard=self.hyp.ard)
     self.Kmm = self.kernel.K(self.Z)
     self.Kmm_inv = linalg.inv(self.Kmm)
Пример #7
0
def test_case_generator():
    sys.path.append('solutions/hw2')
    sys.path.append('suppl/hw2')
    from assignment_two_adaboost import weak_learner as wl
    sys.path.pop()
    sys.path.pop()
    from kernels import rbf

    seed(1)
    instances = normal(size=(50, 5))
    labels = binomial(1, 0.5, 50)
    dist = dirichlet(uniform(size=50))
    ker = rbf(1)
    mat = uniform(size=(5, 5))
    mat = (mat / np.sum(mat, axis=1)).T
    test_cases = {
        'assignment_two_adaboost': {
            'compute_error': [lambda x: x[3] < 0.2, instances, labels, dist],
            'run_adaboost': [instances, labels, wl],
            'update_dist':
            [lambda x: x[2] > -0.2, instances, labels, dist,
             normal()],
            'weak_learner': [instances, labels, dist]
        },
        'assignment_two_pagerank': {
            'compute_pageranks': [mat],
            'main': []
        },
        'assignment_two_svm': {
            'evaluate_classifier': [lambda x: norm(x) > 5, instances, labels],
            'svm_train': [instances, labels, ker]
        }
    }

    return test_cases
 def fit(self, x, y_multiclass, kernel=rbf(1), C=0.001):
     y_multiclass = y_multiclass.reshape(-1)
     self.x = x
     self.y_multiclass = y_multiclass
     self.kernel = kernel
     self.C = C
     self.y_matrix = np.stack(
         [self.cast(y_multiclass, k) for k in range(self.n_svm)], 0)
     for k in range(self.n_svm):
         print("training ", k, "th SVM in ", self.n_svm)
         y = self.y_matrix[k, :].reshape((-1, 1))
         yx = y * x
         G = kernel(yx, yx)  # Gram matrix
         objective = cp.Maximize(
             cp.sum(self.a[k]) - (1 / 2) * cp.quad_form(self.a[k], G))
         if not objective.is_dcp():
             print("Not solvable!")
             assert objective.is_dcp()
         constraints = [
             self.a[k] <= C,
             cp.sum(cp.multiply(self.a[k], y)) == 0
         ]  # box constraint
         prob = cp.Problem(objective, constraints)
         result = prob.solve()
         x_pos = x[y[:, 0] == 1, :]
         x_neg = x[y[:, 0] == -1, :]
         b_min = -np.min(self.wTx(k, x_pos)) if x_pos.shape[0] != 0 else 0
         b_max = -np.max(self.wTx(k, x_neg)) if x_neg.shape[0] != 0 else 0
         self.b[k, 0] = (1 / 2) * (b_min + b_max)
     self.a_matrix = np.stack([i.value.reshape(-1) for i in self.a], 0)
Пример #9
0
    def setUp(self):
        ###################################################################
        # Setup parameters and values to evaluate gradients at.
        self.D = 7  # Output dimension
        self.Q = 2
        self.N = 5
        self.hyp = kernels.ArdHypers(self.Q,
                                     sf=0.5 + np.exp(rnd.randn(1)),
                                     ard=np.exp(rnd.randn(self.Q)))
        #self.hyp = kernels.ArdHypers(self.Q, sf=0.5 + np.exp(rnd.randn(1)), ard=1 + 0 * np.exp(rnd.randn(self.Q)))
        #self.hyp = kernels.ArdHypers(self.Q, sf=2.0, ard=1 + 0 * np.exp(rnd.randn(self.Q)))
        self.sn = rnd.uniform(0.01, 0.1)
        self.beta = self.sn**-2
        self.kernel = kernels.rbf(self.Q, sf=self.hyp.sf, ard=self.hyp.ard)

        # Inducing points
        self.M = 10

        self.genPriorData()
        self.Kmm = self.kernel.K(self.Z)
        self.Kmm_inv = linalg.inv(self.Kmm)
        self.partial_terms = partial_terms.partial_terms(
            self.Z, self.hyp.sf**2, self.hyp.ard**-2, self.beta, self.M,
            self.Q, self.N, self.D)
        self.partial_terms.set_global_statistics(self.Kmm, self.Kmm_inv)
        self.partial_terms.set_data(self.Y,
                                    self.X_mu,
                                    self.X_S,
                                    is_set_statistics=True)
Пример #10
0
def K(X, Y, gamma):
    K = []
    X_n_data = X.shape[0]
    Y_n_data = Y.shape[0]

    for i in range(X_n_data):
        line = []
        for j in range(Y_n_data):
            line.append(rbf(X[i], Y[j], gamma))
        K.append(line)
    return np.array(K)
Пример #11
0
def K(X, Y, gamma):
    K = []
    X_n_data = X.shape[0]
    Y_n_data = Y.shape[0]

    for i in range(X_n_data):
        line = []
        for j in range(Y_n_data):
            line.append(rbf(X[i], Y[j], gamma))
        K.append(line)
    return np.array(K)
Пример #12
0
def KRR(X, y, lam, sigma, Xtest=None):
    """
    Train and predict Kernel Ridge Regression

    Syntax: ytrain, ytest = KRR(X, y, lam, Xtest)
    
    Inputs: 
        :param X:     A N x D matrix of training data
        :param lam:   The ridge regression tuning parameter
        :param sigma: The RBF kernel parameter
        :param Xtest: Optional matrix of test data
    
    Outputs:
        ytrain is the set of predicted labels for the training data X
        ytest is the set of predicted labels for the test data Xtest
    """
    N, D = X.shape
    K = np.zeros((N, N))

    # Train KRR
    for ii in range(N):
        for jj in range(N):
            K[ii, jj] = rbf(X[ii, :], X[jj, :], sigma)

    ytrain = y.T @ lstsq(K + lam * np.eye(N), K)[0]

    ## Task 3
    if Xtest is not None:
        Ntest, _ = Xtest.shape
        Ktest = np.zeros((N, Ntest))
        for ii in range(N):
            for jj in range(Ntest):
                Ktest[ii, jj] = rbf(X[ii, :], Xtest[jj, :], sigma)

        ytest = ytrain @ lstsq(K + lam * np.eye(N), Ktest)[0]
    else:
        ytest = []

    return ytrain, ytest
Пример #13
0
def cache(options, global_statistics):
    '''
    To Do: support Q=1 case where alpha squeeze is scalar
    '''
    # We can calculate the global statistics once for all nodes
    kernel = kernels.rbf(options['Q'],
                         sf=float(global_statistics['sf2']**0.5),
                         ard=numpy.squeeze(global_statistics['alpha'])**-0.5)
    Kmm = kernel.K(global_statistics['Z'])
    file_name = options['statistics'] + '/cache_Kmm_' + str(
        options['i']) + '.npy'
    save(file_name, Kmm)
    Kmm_inv = linalg.inv(Kmm)
    file_name = options['statistics'] + '/cache_Kmm_inv_' + str(
        options['i']) + '.npy'
    save(file_name, Kmm_inv)
Пример #14
0
 def update_local_statistics(self):
     '''
     Update statistics for when X_mu or X_S have changed
     '''
     self.kernel = kernels.rbf(self.Q, sf=self.hyp.sf, ard=self.hyp.ard)
     self.sum_exp_K_mi_K_im = self.exp_K_mi_K_im.sum(0)
     self.exp_K_miY = kernel_exp.calc_expect_K_mi_Y(self.Z, self.hyp,
                                                    self.X_mu, self.X_S,
                                                    self.Y)
     self.sum_exp_K_ii = self.hyp.sf**2 * self.local_N
     self.Kmm_plus_op_inv = linalg.inv(self.Kmm +
                                       self.beta * self.sum_exp_K_mi_K_im)
     if not np.all(self.X_S == 0):
         mu_ip = np.array([x.dot(x) for x in self.X_mu])
         self.KL = 0.5 * np.sum(
             np.sum(self.X_S - np.log(self.X_S), 1) + mu_ip - self.Q)
     else:  # We have fixed embeddings
         self.KL = 0
    def __init__(self, m, n_class):
        self.n_svm = n_class * (n_class - 1) // 2
        self.m = m  # number of samples
        self.n_class = n_class

        # multiplier
        self.a = [
            cp.Variable(shape=(m, 1), pos=True) for i in range(self.n_svm)
        ]
        # bias
        self.b = np.zeros((self.n_svm, 1))

        # kernel function  should input x [n,d] y [m,d] output [n,m]
        # Example of kernels: rbf(1.0), poly(3)
        self.kernel = rbf(1)

        # Binary setting for every SVM,
        # Mij says the SVMj should give
        # Mij label to sample with class i
        self.lookup_matrix = np.zeros((self.n_class, self.n_svm))

        # The two classes SVMi concerns,
        # lookup_class[i]=[pos, neg]
        self.lookup_class = np.zeros((self.n_svm, 2))

        k = 0
        for i in range(n_class - 1):
            for j in range(i + 1, n_class):
                self.lookup_class[k, 0] = i
                self.lookup_class[k, 1] = j
                k += 1

        for i in range(n_class):
            for j in range(self.n_svm):
                if i == self.lookup_class[j, 0] or i == self.lookup_class[j,
                                                                          1]:
                    if self.lookup_class[j, 0] == i:
                        self.lookup_matrix[i, j] = 1.0
                    else:
                        self.lookup_matrix[i, j] = -1.0
Пример #16
0
def main():

    data_file = 'ionosphere.data'

    data = np.genfromtxt(data_file, delimiter=',', dtype='|S10')
    instances = np.array(data[:, :-1], dtype='float')
    labels = np.array(data[:, -1] == 'g', dtype='int')

    n, d = instances.shape
    nlabels = labels.size

    if n != nlabels:
        raise Exception('Expected same no. of feature vector as no. of labels')

    train_data = instances[:200]  # first 200 examples
    train_labels = labels[:200]  # first 200 labels

    test_data = instances[200:]  # example 201 onwards
    test_labels = labels[200:]  # label 201 onwards

    # parameters for the kernels we'll use
    gamma = 1.0 / d
    intercept = 0

    kernel_dict = {
        'linear': ker.linear,
        'polynomial': ker.poly(degree=3, gamma=gamma),
        'rbf/gaussian': ker.rbf(gamma=gamma),
        'sigmoid/arctan': ker.sigmoid(gamma=gamma)
    }

    for kernel_name in sorted(kernel_dict.keys()):
        print 'Training an SVM using the %s kernel...' % kernel_name
        svm_classifier = svm_train(train_data, train_labels,
                                   kernel_dict[kernel_name])
        confusion_mat = evaluate_classifier(svm_classifier, test_data,
                                            test_labels)
        print_evaluation_summary(confusion_mat)
        print
Пример #17
0
def main(dataset='magic', datasize=1000):
    """
    Run experiments for the incremental kernel PCA algorithm and the
    incremental Nyström approximation.

    After each plot is shown the program halts, close the plot to continue.

    Parameters
    ----------
    dataset : str
        Either 'magic' or 'yeast'
    datasize : int or None
        Size of dataset for Nyström comparison

    """

    if not dataset in ('magic', 'yeast'):
        raise ValueError("Unknown dataset.")

    X = getattr(data, "get_" + dataset + "_data")()

    if datasize:
        Xcut = X[:datasize]

    sigma = median_distance(X)

    kernel = lambda x, y: rbf(x, y, sigma)

    mmax = 100

    m0 = 20

    incremental_experiment(X, m0, mmax, kernel, dataset)

    incremental_experiment(X, m0, mmax, kernel, dataset, adjust=True)

    nystrom_experiment(Xcut, m0, mmax, kernel, dataset)
Пример #18
0
def main():

    data_file = 'ionosphere.data'

    data = np.genfromtxt(data_file, delimiter=',', dtype='|S10')
    instances = np.array(data[:, :-1], dtype='float')
    labels = np.array(data[:, -1] == 'g', dtype='int')

    n, d = instances.shape
    nlabels = labels.size

    if n != nlabels:
        raise Exception('Expected same no. of feature vector as no. of labels')

    train_data = instances[:200]  # first 200 examples
    train_labels = labels[:200]  # first 200 labels

    test_data = instances[200:]  # example 201 onwards
    test_labels = labels[200:]  # label 201 onwards

    # parameters for the kernels we'll use
    gamma = 1.0/d
    intercept = 0

    kernel_dict = {'linear': ker.linear,
                   'polynomial': ker.poly(degree=3, gamma=gamma),
                   'rbf/gaussian': ker.rbf(gamma=gamma),
                   'sigmoid/arctan': ker.sigmoid(gamma=gamma)}

    for kernel_name in sorted(kernel_dict.keys()):
        print 'Training an SVM using the %s kernel...' % kernel_name
        svm_classifier = svm_train(train_data, train_labels,
                                   kernel_dict[kernel_name])
        confusion_mat = evaluate_classifier(svm_classifier, test_data,
                                            test_labels)
        print_evaluation_summary(confusion_mat)
        print
Пример #19
0
args = parser.parse_args()

if args.dataset == "himoon":
    x, y, _, _, xtest, ytest = data_gen.himoon(n_samples=args.n_samples,
                                               n_dims=args.n_dims)

elif args.dataset == "mmgauss":
    x, y, _, _, xtest, ytest = data_gen.mmgauss(n_samples=args.n_samples,
                                                n_dims=args.n_dims)
else:
    raise ValueError("Unknown dataset")

kernels = dict(
    zip(
        ["rbf", "linear", "poly", "sigmoid"],
        [k.rbf(), k.linear(), k.poly(),
         k.sigmoid()],
    ))
try:
    kernel = kernels.get(args.kernel)
except KeyError as e:
    kernel = "linear"

df = pd.DataFrame()

print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: Running " +
      f"{args.dataset} with {args.dimension} dimensions and " +
      f"epsilon={args.epsilon} with {args.kernel} kernel for " +
      f"{args.repetitions} repetitions.")

for run in range(args.repetitions):
Пример #20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size', '-bs', type=int, default=64)
    parser.add_argument('--nb_epoch', '-e', type=int, default=300)
    parser.add_argument('--latent_dim', '-ld', type=int, default=2)
    parser.add_argument('--lambda',
                        '-l',
                        type=float,
                        default=0.1,
                        dest='lambda_')
    parser.add_argument('--save_steps', '-ss', type=int, default=10)
    parser.add_argument('--visualize_steps', '-vs', type=int, default=1)
    parser.add_argument('--model_dir', '-md', type=str, default="./params")
    parser.add_argument('--result_dir', '-rd', type=str, default="./result")
    parser.add_argument('--noise_mode', '-nm', type=str, default="normal")
    parser.add_argument('--autoencoder',
                        '-ae',
                        type=str,
                        default="fc",
                        choices=['fc', 'conv'])
    parser.add_argument('--base',
                        '-b',
                        type=str,
                        default="gan",
                        choices=['gan', 'mmd'])

    args = parser.parse_args()

    os.makedirs(args.result_dir, exist_ok=True)
    os.makedirs(args.model_dir, exist_ok=True)

    dump_config(os.path.join(args.result_dir, 'config.csv'), args)

    noise_sampler = NoiseSampler(args.noise_mode)

    if args.autoencoder == 'fc':
        autoencoder = FCAutoEncoder((784, ),
                                    latent_dim=args.latent_dim,
                                    last_activation='tanh',
                                    is_training=True)
        image_sampler = ImageSampler(args.batch_size,
                                     shuffle=True,
                                     is_training=True,
                                     is_vectorize=True)

    elif args.autoencoder == 'conv':
        autoencoder = ConvAutoEncoder((28, 28, 1),
                                      latent_dim=args.latent_dim,
                                      last_activation='tanh',
                                      is_training=True)
        image_sampler = ImageSampler(args.batch_size,
                                     shuffle=True,
                                     is_training=True,
                                     is_vectorize=False)
    else:
        raise NotImplementedError

    discriminator = Discriminator(is_training=True)

    if args.base == 'gan':
        wae = WAEGAN(autoencoder,
                     discriminator,
                     lambda_=args.lambda_,
                     is_training=True)
    elif args.base == 'mmd':
        wae = MMDWAE(autoencoder,
                     rbf(),
                     lambda_=args.lambda_,
                     is_training=True)
    else:
        raise NotImplementedError

    wae.fit_generator(image_sampler,
                      noise_sampler,
                      nb_epoch=args.nb_epoch,
                      save_steps=args.save_steps,
                      visualize_steps=args.visualize_steps,
                      result_dir=args.result_dir,
                      model_dir=args.model_dir)
Пример #21
0
def likelihood_and_gradient(flat_array, iteration=0, step_size=0):
    global Kmm, Kmm_inv, accumulated_statistics, N, Y, flat_global_statistics_bounds, fix_beta, global_statistics_names
    # Transform the parameters that have to be positive to be positive
    flat_array_transformed = numpy.array([
        transform(b, x)
        for b, x in zip(flat_global_statistics_bounds, flat_array)
    ])
    global_statistics = rebuild_global_statistics(global_statistics_names,
                                                  flat_array_transformed)

    #print 'global_statistics'
    #print global_statistics

    Z = global_statistics['Z']
    sf2 = float(global_statistics['sf2'])
    beta = float(global_statistics['beta'])
    alpha = numpy.squeeze(global_statistics['alpha'])
    X_mu = global_statistics['X_mu']
    X_S = global_statistics['X_S']

    # We can calculate the global statistics once
    kernel = kernels.rbf(Q, sf=sf2**0.5, ard=alpha**-0.5)
    Kmm = kernel.K(Z)
    Kmm_inv = numpy.linalg.inv(Kmm)

    # Calculate partial statistics...
    partial_terms = pt.partial_terms(Z,
                                     sf2,
                                     alpha,
                                     beta,
                                     M,
                                     Q,
                                     N,
                                     D,
                                     update_global_statistics=True)
    partial_terms.set_data(Y, X_mu, X_S, is_set_statistics=True)
    terms = partial_terms.get_local_statistics()
    accumulated_statistics = {
        'sum_YYT': terms['sum_YYT'],
        'sum_exp_K_ii': terms['sum_exp_K_ii'],
        'sum_exp_K_mi_K_im': terms['sum_exp_K_mi_K_im'],
        'sum_exp_K_miY': terms['exp_K_miY'],
        'sum_KL': terms['KL'],
        'sum_d_Kmm_d_Z': partial_terms.dKmm_dZ(),
        'sum_d_exp_K_miY_d_Z': partial_terms.dexp_K_miY_dZ(),
        'sum_d_exp_K_mi_K_im_d_Z': partial_terms.dexp_K_mi_K_im_dZ(),
        'sum_d_Kmm_d_alpha': partial_terms.dKmm_dalpha(),
        'sum_d_exp_K_miY_d_alpha': partial_terms.dexp_K_miY_dalpha(),
        'sum_d_exp_K_mi_K_im_d_alpha': partial_terms.dexp_K_mi_K_im_dalpha(),
        'sum_d_Kmm_d_sf2': partial_terms.dKmm_dsf2(),
        'sum_d_exp_K_ii_d_sf2': partial_terms.dexp_K_ii_dsf2(),
        'sum_d_exp_K_miY_d_sf2': partial_terms.dexp_K_miY_dsf2(),
        'sum_d_exp_K_mi_K_im_d_sf2': partial_terms.dexp_K_mi_K_im_dsf2()
    }
    '''
    Calculates global statistics such as F and partial derivatives of F
    
    In the parallel code we create a new partial_terms object and 
    load the data into it. Here we use the previous one for debugging.
    '''
    partial_derivatives = {
        'F': partial_terms.logmarglik(),
        'dF_dsum_exp_K_ii': partial_terms.dF_dexp_K_ii(),
        'dF_dsum_exp_K_miY': partial_terms.dF_dexp_K_miY(),
        'dF_dsum_exp_K_mi_K_im': partial_terms.dF_dexp_K_mi_K_im(),
        'dF_dKmm': partial_terms.dF_dKmm()
    }
    '''
    Evaluate the gradient for 'Z', 'sf2', 'alpha', and 'beta'
    '''
    grad_Z = partial_terms.grad_Z(
        partial_derivatives['dF_dKmm'],
        accumulated_statistics['sum_d_Kmm_d_Z'],
        partial_derivatives['dF_dsum_exp_K_miY'],
        accumulated_statistics['sum_d_exp_K_miY_d_Z'],
        partial_derivatives['dF_dsum_exp_K_mi_K_im'],
        accumulated_statistics['sum_d_exp_K_mi_K_im_d_Z'])
    grad_alpha = partial_terms.grad_alpha(
        partial_derivatives['dF_dKmm'],
        accumulated_statistics['sum_d_Kmm_d_alpha'],
        partial_derivatives['dF_dsum_exp_K_miY'],
        accumulated_statistics['sum_d_exp_K_miY_d_alpha'],
        partial_derivatives['dF_dsum_exp_K_mi_K_im'],
        accumulated_statistics['sum_d_exp_K_mi_K_im_d_alpha'])
    grad_sf2 = partial_terms.grad_sf2(
        partial_derivatives['dF_dKmm'],
        accumulated_statistics['sum_d_Kmm_d_sf2'],
        partial_derivatives['dF_dsum_exp_K_ii'],
        accumulated_statistics['sum_d_exp_K_ii_d_sf2'],
        partial_derivatives['dF_dsum_exp_K_miY'],
        accumulated_statistics['sum_d_exp_K_miY_d_sf2'],
        partial_derivatives['dF_dsum_exp_K_mi_K_im'],
        accumulated_statistics['sum_d_exp_K_mi_K_im_d_sf2'])
    if fix_beta:
        grad_beta = numpy.zeros(1)
    else:
        grad_beta = partial_terms.grad_beta()
    grad_X_mu = partial_terms.grad_X_mu()
    grad_X_S = partial_terms.grad_X_S()

    ####################################################################################################################
    # Debug comparison to GPy
    ####################################################################################################################
    '''
    #sys.path.append('../GPy-master_20140118')
    import GPy
    gkern = GPy.kern.rbf(Q, global_statistics['sf2'].squeeze(), global_statistics['alpha'].squeeze()**-0.5, True)

    gpy = GPy.models.BayesianGPLVM(GPy.likelihoods.Gaussian(Y, global_statistics['beta']**-1), Q, X_mu, X_S, num_inducing=M, Z=global_statistics['Z'], kernel=gkern)
    GPy_lml = gpy.log_likelihood()
    GPy_grad = gpy._log_likelihood_gradients()
    dF_dmu = GPy_grad[0:(N * Q)].reshape(N, Q)
    dF_ds = GPy_grad[(N * Q):2*(N * Q)].reshape(N, Q)
    dF_dZ = GPy_grad[2*(N * Q):2*(N * Q)+(M*Q)].reshape(M, Q)
    dF_dsigma2 = GPy_grad[2*(N * Q)+(M*Q)]
    dF_dalpha = GPy_grad[2*(N * Q)+(M*Q)+1:2*(N * Q)+(M*Q)+3]
    dF_dbeta = GPy_grad[2*(N * Q)+(M*Q)+3:]

    dF_dmu2 = grad_X_mu
    dF_ds2 = grad_X_S
    dF_dZ2 = grad_Z
    dF_dalpha2 = grad_alpha * -2 * global_statistics['alpha']**1.5
    dF_dsigma22 = grad_sf2
    dF_dbeta2 = grad_beta * -1 * global_statistics['beta']**2

    if not numpy.sum(numpy.abs(dF_dmu - dF_dmu2)) < 10**-6:
        print '1'
    if not numpy.sum(numpy.abs(dF_dZ - dF_dZ2)) < 10**-6:
        print '2'
    if not numpy.sum(numpy.abs(dF_ds - dF_ds2)) < 10**-6:
        print '3'
    if not numpy.sum(numpy.abs(dF_dalpha - dF_dalpha2)) < 10**-6:
        print '4'
    if not numpy.sum(numpy.abs(dF_dsigma2 - dF_dsigma22))  < 10**-6:
        print '5'
    if not numpy.sum(numpy.abs(dF_dbeta - dF_dbeta2))  < 10**-6:
        print '6'
    if not numpy.abs(GPy_lml - partial_derivatives['F']) < 10**-6:
        print '7'

    #print 'gradient'
    #print gradient

    #gradient = {'Z' : dF_dZ,
    #    'sf2' : dF_dsigma2,
    #    'alpha' : dF_dalpha * -0.5 * global_statistics['alpha']**-1.5,
    #    'beta' : dF_dbeta * -1 * global_statistics['beta']**-2,
    #    'X_mu' : dF_dmu,
    #    'X_S' : dF_ds}
    #gradient = flatten_global_statistics(gradient)
    #likelihood = GPy_lml
    '''

    gradient = {
        'Z': grad_Z,
        'sf2': grad_sf2,
        'alpha': grad_alpha,
        'beta': grad_beta,
        'X_mu': grad_X_mu,
        'X_S': grad_X_S
    }
    gradient = flatten_global_statistics(gradient)
    likelihood = partial_derivatives['F']
    # Transform the gradient parameters that have to be positive by multiplying
    # them by the gradeint of the transform f:  g(f(x))' = g'(f(x))f'(x)
    gradient = numpy.array([
        g * transform_grad(b, x)
        for b, x, g in zip(flat_global_statistics_bounds, flat_array, gradient)
    ])
    return -1 * likelihood, -1 * gradient
                                         n_features=2,
                                         n_informative=2,
                                         n_redundant=0,
                                         n_clusters_per_class=1,
                                         n_classes=4,
                                         class_sep=2)
    fig = plt.figure()
    fig = plt.scatter(data_x[:, 0],
                      data_x[:, 1],
                      c=data_y,
                      cmap=ListedColormap(colors),
                      marker='o')
    m = len(data_x)
    c = len(np.unique(data_y))
    svm = svm_model_cvxpy(m, c)
    svm.fit(data_x, data_y, rbf(1), 1e-3)
    from mlxtend.plotting import plot_decision_regions
    x = np.linspace(-4, 4, 100)
    test_x = np.array(np.meshgrid(x, x)).T.reshape(-1, 2)
    test_y = svm.predict(test_x).reshape(-1)
    scatter_kwargs = {'alpha': 0.0}
    fig = plot_decision_regions(test_x,
                                test_y,
                                clf=svm,
                                scatter_kwargs=scatter_kwargs)
    xx = np.linspace(-4, 4, 10)
    for i in range(svm.n_svm):

        ak = svm.a[i].value.reshape(-1)
        mask = (svm.C * 0.0001 < ak) & (ak < svm.C * (1 - 0.0001))
        fig.scatter(data_x[mask, 0] + i / 8, data_x[mask, 1], marker=4)