Python inference_gco примеры использования

Язык программирования: Python

Пространство имен/Пакет: heterogenous_crf

Метод/Функция: inference_gco

Примеров на hotexamples.com: 8

Python inference_gco - 8 примеров найдено. Это лучшие примеры Python кода для heterogenous_crf.inference_gco, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: trw4.py Проект: aiwagan/latent_ssvm

def trw(node_weights, edges, edge_weights, y,
        max_iter=100, verbose=0, tol=1e-3,
        relaxed=False):

    result = decompose_grid_graph([(node_weights, edges, edge_weights)])
    contains_node, chains, edge_index = result[0][0], result[1][0], result[2][0]

    n_nodes, n_states = node_weights.shape

    y_hat = []
    lambdas = []
    multiplier = []

    for p in xrange(n_nodes):
        multiplier.append(1.0 / len(contains_node[p]))
    for chain in chains:
        lambdas.append(np.zeros((len(chain), n_states)))
        y_hat.append(np.zeros(len(chain)))

    multiplier = np.array(multiplier)
    multiplier.shape = (n_nodes, 1)

    mu = np.zeros((n_nodes, n_states))

    learning_rate = 0.1
    energy_history = []

    for iteration in xrange(max_iter):
        E = 0
        dmu = np.zeros((n_nodes, n_states))
        unaries = node_weights - mu

        y_hat_gco, energy = inference_gco(unaries, edge_weights, edges,
                                          n_iter=5, return_energy=True)
        E -= energy

        y_hat_kappa, energy = optimize_kappa(y, mu, 1, n_nodes, n_states)
        E += energy

        dmu[np.ogrid[:dmu.shape[0]], y_hat_gco] -= 1
        dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1

        mu -= learning_rate * dmu

        energy_history.append(E)

        if iteration:
            learning_rate = 1. / np.sqrt(iteration)

        if verbose:
            print 'Iteration {}: energy {}'.format(iteration, E)

        if iteration and np.abs(E - energy_history[-2]) < tol:
            if verbose:
                print 'Converged'
            break

    return y_hat_gco, y_hat_kappa, energy_history, iteration

Пример #2

Показать файл

Файл: over_lbfgs.py Проект: aiwagan/latent_ssvm

    def loss_augmented_inference(self, x, y, w):
        unary_potentials = self._get_unary_potentials(x, w)
        pairwise_potentials = self._get_pairwise_potentials(x, w)
        edges = self._get_edges(x)

        for label in xrange(self.n_states):
            mask = y.full != label
            unary_potentials[mask, label] += y.weights[mask]

        return inference_gco(unary_potentials, pairwise_potentials, edges,
                          n_iter=5, return_energy=True)

Пример #3

Показать файл

    def loss_augmented_inference(self, x, y, w):
        unary_potentials = self._get_unary_potentials(x, w)
        pairwise_potentials = self._get_pairwise_potentials(x, w)
        edges = self._get_edges(x)

        for label in xrange(self.n_states):
            mask = y.full != label
            unary_potentials[mask, label] += y.weights[mask]

        return inference_gco(unary_potentials,
                             pairwise_potentials,
                             edges,
                             n_iter=5,
                             return_energy=True)

Пример #4

Показать файл

Файл: over_weak.py Проект: aiwagan/latent_ssvm

    def loss_augmented_inference(self, x, y, w):
        unary_potentials = self._get_unary_potentials(x, w)
        pairwise_potentials = self._get_pairwise_potentials(x, w)
        edges = self._get_edges(x)

        label_costs = np.zeros(self.n_states)
        c = np.sum(y.weights) / float(self.n_states)
        for label in y.weak:
            label_costs[label] = c

        for label in xrange(0, self.n_states):
            if label not in y.weak:
                unary_potentials[:, label] += y.weights
    
        h = inference_gco(unary_potentials, pairwise_potentials, edges,
                          label_costs, n_iter=5, return_energy=True)
    
        return h

Пример #5

Показать файл

Файл: over_weak.py Проект: Cookies-gh/latent_ssvm

    def loss_augmented_inference(self, x, y, w):
        unary_potentials = self._get_unary_potentials(x, w)
        pairwise_potentials = self._get_pairwise_potentials(x, w)
        edges = self._get_edges(x)

        label_costs = np.zeros(self.n_states)
        c = np.sum(y.weights) / float(self.n_states)
        for label in y.weak:
            label_costs[label] = c

        for label in xrange(0, self.n_states):
            if label not in y.weak:
                unary_potentials[:, label] += y.weights

        h = inference_gco(unary_potentials,
                          pairwise_potentials,
                          edges,
                          label_costs,
                          n_iter=5,
                          return_energy=True)

        return h

Пример #6

Показать файл

Файл: over_weak.py Проект: Cookies-gh/latent_ssvm

    def fit(self,
            X,
            Y,
            train_scorer,
            test_scorer,
            decompose='general',
            use_latent_first_iter=500,
            undergenerating_weak=True,
            smd=False):
        self.logger.info('Initialization')

        if decompose == 'general':
            contains_node, chains, edge_index = decompose_graph(X)
        elif decompose == 'grid':
            contains_node, chains, edge_index = decompose_grid_graph(X)
        else:
            raise ValueError

        y_hat = []
        lambdas = []
        multiplier = []
        xx = []
        mu = {}
        for k in xrange(len(X)):
            x, y = X[k], Y[k]
            n_nodes = x[0].shape[0]
            xx.append(np.zeros(n_nodes))
            _lambdas = []
            _y_hat = []
            _multiplier = []
            for p in xrange(n_nodes):
                _multiplier.append(1.0 / len(contains_node[k][p]))
            for chain in chains[k]:
                _lambdas.append(np.zeros((len(chain), self.n_states)))
                _y_hat.append(np.zeros(len(chain), dtype=np.int32))
            lambdas.append(_lambdas)
            y_hat.append(_y_hat)
            _multiplier = np.array(_multiplier)
            _multiplier.shape = (n_nodes, 1)
            multiplier.append(_multiplier)
            if not y.full_labeled:
                mu[k] = np.zeros((n_nodes, self.n_states))

        w = np.zeros(self.size_w)
        self.w = w.copy()

        self.start_time = time.time()
        self.timestamps = [0]
        self.objective_curve = []
        self.train_score = []
        self.test_score = []
        self.w_history = []

        learning_rate1 = 0.1
        learning_rate2 = 0.1

        for iteration in xrange(self.max_iter):
            self.logger.info('Iteration %d', iteration)
            self.logger.info('Optimize slave MRF and update w')

            objective = 0
            dw = np.zeros(w.shape)

            for k in xrange(len(X)):
                x, y = X[k], Y[k]
                n_nodes = x[0].shape[0]

                #                self.logger.info('object %d', k)

                if y.full_labeled:
                    unaries = self._loss_augment_unaries(
                        self._get_unary_potentials(x, w), y.full, y.weights)
                    unaries *= multiplier[k]
                    pairwise = self._get_pairwise_potentials(x, w)

                    jf = self._joint_features_full(x, y.full)
                    objective -= np.dot(w, jf)
                    dw -= jf

                    for i in xrange(len(chains[k])):
                        y_hat[k][i], energy = optimize_chain(
                            chains[k][i],
                            lambdas[k][i] + unaries[chains[k][i], :], pairwise,
                            edge_index[k])

                        dw += self._joint_features(chains[k][i], x,
                                                   y_hat[k][i], edge_index[k],
                                                   multiplier[k])

                        objective += energy
                elif iteration > use_latent_first_iter:
                    if undergenerating_weak:
                        # Use gco for full K oracle
                        #                        y_hat_, energy = self.loss_augmented_inference(x, y, w)
                        #                        jf_gt = self._joint_features_full(x, y.full)
                        #                        objective -= np.dot(w, jf_gt)
                        #                        objective += energy
                        #                        dw -= jf_gt
                        #                        dw += self._joint_features_full(x, y_hat_)

                        # use gco for first summand in DD
                        for mm in xrange(10):
                            dmu = np.zeros((n_nodes, self.n_states))

                            unaries = self._get_unary_potentials(x, w) - mu[k]
                            pairwise = self._get_pairwise_potentials(x, w)

                            y_hat_gco, energy = inference_gco(
                                unaries,
                                pairwise,
                                self._get_edges(x),
                                n_iter=5,
                                return_energy=True)
                            objective -= energy
                            dmu[np.ogrid[:dmu.shape[0]], y_hat_gco] -= 1
                            dw += self._joint_features_full(x, y_hat_gco)

                            jf = self._joint_features_full(x, y.full)
                            objective -= np.dot(w, jf)
                            dw -= jf

                            y_hat_kappa, energy = optimize_kappa(
                                y, mu[k], self.alpha, n_nodes, self.n_states)
                            objective += energy
                            dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1

                            mu[k] -= learning_rate2 * dmu
                    elif not smd:
                        dmu = np.zeros((n_nodes, self.n_states))

                        unaries = (self._get_unary_potentials(x, w) -
                                   mu[k]) * multiplier[k]
                        pairwise = self._get_pairwise_potentials(x, w)

                        jf = self._joint_features_full(x, y.full)
                        objective -= np.dot(w, jf)
                        dw -= jf

                        #begin inner (can remove this to restore to previous state)
                        E = 0
                        Eprev = -100
                        for j in xrange(self.update_mu):
                            E = 0
                            for i in xrange(len(chains[k])):
                                y_hat[k][i], energy = optimize_chain(
                                    chains[k][i],
                                    lambdas[k][i] + unaries[chains[k][i], :],
                                    pairwise, edge_index[k])
                                E += energy

                            lambda_sum = np.zeros((n_nodes, self.n_states),
                                                  dtype=np.float64)

                            for p in xrange(n_nodes):
                                for i in contains_node[k][p]:
                                    pos = np.where(chains[k][i] == p)[0][0]
                                    lambda_sum[
                                        p,
                                        y_hat[k][i][pos]] += multiplier[k][p]

                            for i in xrange(len(chains[k])):
                                N = lambdas[k][i].shape[0]

                                lambdas[k][i][np.ogrid[:N],
                                              y_hat[k][i]] -= learning_rate2
                                lambdas[k][i] += learning_rate2 * lambda_sum[
                                    chains[k][i], :]

                            if np.abs(E - Eprev) < 0.1:
                                break
                            Eprev = E
#end inner

#last one
                        for i in xrange(len(chains[k])):
                            y_hat[k][i], energy = optimize_chain(
                                chains[k][i],
                                lambdas[k][i] + unaries[chains[k][i], :],
                                pairwise, edge_index[k])

                            dw += self._joint_features(chains[k][i], x,
                                                       y_hat[k][i],
                                                       edge_index[k],
                                                       multiplier[k])

                            objective += energy

                            dmu[chains[k][i], y_hat[k][i]] -= multiplier[k][
                                chains[k][i]].flatten()
#

                        y_hat_kappa, energy = optimize_kappa(
                            y, mu[k], self.alpha, n_nodes, self.n_states)

                        objective += energy
                        dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1

                        mu[k] -= learning_rate2 * dmu
                    elif smd:
                        if iteration > 1500:
                            mMu = 10
                        else:
                            mMu = 1
                        for mm in xrange(mMu):
                            dmu = np.zeros((n_nodes, self.n_states))

                            jf = self._joint_features_full(x, y.full)
                            objective -= np.dot(w, jf)
                            dw -= jf

                            unaries = -self._get_unary_potentials(x, w) + mu[k]
                            edge_weights = -self._get_pairwise_potentials(x, w)
                            edges = self._get_edges(x)

                            n_edges = edges.shape[0]
                            y_hat2 = []
                            pairwise = []
                            for j in xrange(self.n_states):
                                y_hat2.append(np.zeros(self.n_states))
                                _pairwise = np.zeros((n_edges, 2, 2))
                                for i in xrange(n_edges):
                                    _pairwise[i, 1, 0] = _pairwise[
                                        i, 0, 1] = -0.5 * edge_weights[i, j, j]
                                pairwise.append(_pairwise)

                            for i in xrange(n_edges):
                                e1, e2 = edges[i]
                                unaries[e1, :] += 0.5 * np.diag(
                                    edge_weights[i, :, :])
                                unaries[e2, :] += 0.5 * np.diag(
                                    edge_weights[i, :, :])

                            xx[k], f_val, d = fmin_l_bfgs_b(f,
                                                            xx[k],
                                                            args=(unaries,
                                                                  pairwise,
                                                                  edges),
                                                            maxiter=50,
                                                            maxfun=50,
                                                            pgtol=1e-2)

                            E = np.sum(xx[k])
                            for j in xrange(self.n_states):
                                new_unaries = np.zeros((n_nodes, 2))
                                new_unaries[:, 1] = unaries[:, j] + xx[k]
                                y_hat2[j], energy = binary_general_graph(
                                    edges, new_unaries, pairwise[j])
                                E -= 0.5 * energy
                                dmu[:, j] -= y_hat2[j]

                                dw += self._joint_features_full(
                                    x, y_hat2[j] * j)

                            y_hat_kappa, energy = optimize_kappa(
                                y, mu[k], 1, n_nodes, self.n_states)
                            E += energy
                            dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1
                            objective += E

                            mu[k] -= learning_rate2 * dmu

            dw += w / self.C

            if iteration < 100 or iteration % self.update_w_every == 0:
                w -= learning_rate1 * dw
            objective = self.C * objective + np.sum(w**2) / 2

            self.logger.info('Update lambda')

            for k in xrange(len(X)):
                if undergenerating_weak and not Y[k].full_labeled:
                    continue
                if smd and not Y[k].full_labeled:
                    continue

                n_nodes = X[k][0].shape[0]
                lambda_sum = np.zeros((n_nodes, self.n_states),
                                      dtype=np.float64)

                for p in xrange(n_nodes):
                    for i in contains_node[k][p]:
                        pos = np.where(chains[k][i] == p)[0][0]
                        lambda_sum[p, y_hat[k][i][pos]] += multiplier[k][p]

                for i in xrange(len(chains[k])):
                    N = lambdas[k][i].shape[0]

                    lambdas[k][i][np.ogrid[:N], y_hat[k][i]] -= learning_rate2
                    lambdas[k][i] += learning_rate2 * lambda_sum[
                        chains[k][i], :]

            if iteration % self.complete_every == 0 or iteration in [
                    51, 80, 101, 130
            ]:
                self.logger.info('Complete latent variables')
                Y_new = Parallel(n_jobs=self.n_jobs, verbose=0,
                                 max_nbytes=1e8)(
                                     delayed(latent)(self.model, x, y, w)
                                     for x, y in zip(X, Y))
                changes = np.sum([
                    np.any(y_new.full != y.full) for y_new, y in zip(Y_new, Y)
                ])
                self.logger.info('changes in latent variables: %d', changes)
                Y = Y_new

            if iteration and (iteration % self.check_every == 0):
                self.logger.info('Compute train and test scores')
                self.train_score.append(train_scorer(w))
                self.logger.info('Train SCORE: %f', self.train_score[-1])
                self.test_score.append(test_scorer(w))
                self.logger.info('Test SCORE: %f', self.test_score[-1])

            self.logger.info('diff: %f', np.sum((w - self.w)**2))
            if iteration:
                learning_rate1 = 1.0 / iteration
                learning_rate2 = 1.0 / iteration

            self.timestamps.append(time.time() - self.start_time)
            self.objective_curve.append(objective)

            self.logger.info('Objective: %f', objective)

            self.w = w.copy()
            self.w_history.append(self.w)

        self.w = w

        self.timestamps = np.array(self.timestamps)
        self.objective_curve = np.array(self.objective_curve)
        self.train_score = np.array(self.train_score)
        self.test_score = np.array(self.test_score)
        self.w_history = np.vstack(self.w_history)

Пример #7

Показать файл

def trw(node_weights,
        edges,
        edge_weights,
        y,
        max_iter=100,
        verbose=0,
        tol=1e-3,
        relaxed=False):

    result = decompose_grid_graph([(node_weights, edges, edge_weights)])
    contains_node, chains, edge_index = result[0][0], result[1][0], result[2][
        0]

    n_nodes, n_states = node_weights.shape

    y_hat = []
    lambdas = []
    multiplier = []

    for p in xrange(n_nodes):
        multiplier.append(1.0 / len(contains_node[p]))
    for chain in chains:
        lambdas.append(np.zeros((len(chain), n_states)))
        y_hat.append(np.zeros(len(chain)))

    multiplier = np.array(multiplier)
    multiplier.shape = (n_nodes, 1)

    mu = np.zeros((n_nodes, n_states))

    learning_rate = 0.1
    energy_history = []

    for iteration in xrange(max_iter):
        E = 0
        dmu = np.zeros((n_nodes, n_states))
        unaries = node_weights - mu

        y_hat_gco, energy = inference_gco(unaries,
                                          edge_weights,
                                          edges,
                                          n_iter=5,
                                          return_energy=True)
        E -= energy

        y_hat_kappa, energy = optimize_kappa(y, mu, 1, n_nodes, n_states)
        E += energy

        dmu[np.ogrid[:dmu.shape[0]], y_hat_gco] -= 1
        dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1

        mu -= learning_rate * dmu

        energy_history.append(E)

        if iteration:
            learning_rate = 1. / np.sqrt(iteration)

        if verbose:
            print 'Iteration {}: energy {}'.format(iteration, E)

        if iteration and np.abs(E - energy_history[-2]) < tol:
            if verbose:
                print 'Converged'
            break

    return y_hat_gco, y_hat_kappa, energy_history, iteration

Пример #8

Показать файл

Файл: over_weak.py Проект: aiwagan/latent_ssvm

    def fit(self, X, Y, train_scorer, test_scorer, decompose='general',
            use_latent_first_iter=500, undergenerating_weak=True, smd=False):
        self.logger.info('Initialization')

        if decompose == 'general':
            contains_node, chains, edge_index = decompose_graph(X)
        elif decompose == 'grid':
            contains_node, chains, edge_index = decompose_grid_graph(X)
        else:
            raise ValueError
    
        y_hat = []
        lambdas = []
        multiplier = []
        xx = []
        mu = {}
        for k in xrange(len(X)):
            x, y = X[k], Y[k]
            n_nodes = x[0].shape[0]
            xx.append(np.zeros(n_nodes))
            _lambdas = []
            _y_hat = []
            _multiplier = []
            for p in xrange(n_nodes):
                _multiplier.append(1.0 / len(contains_node[k][p]))
            for chain in chains[k]:
                _lambdas.append(np.zeros((len(chain), self.n_states)))
                _y_hat.append(np.zeros(len(chain), dtype=np.int32))
            lambdas.append(_lambdas)
            y_hat.append(_y_hat)
            _multiplier = np.array(_multiplier)
            _multiplier.shape = (n_nodes, 1)
            multiplier.append(_multiplier)
            if not y.full_labeled:
                mu[k] = np.zeros((n_nodes, self.n_states))

        w = np.zeros(self.size_w)
        self.w = w.copy()

        self.start_time = time.time()
        self.timestamps = [0]
        self.objective_curve = []
        self.train_score = []
        self.test_score = []
        self.w_history = []

        learning_rate1 = 0.1
        learning_rate2 = 0.1

        for iteration in xrange(self.max_iter):
            self.logger.info('Iteration %d', iteration)
            self.logger.info('Optimize slave MRF and update w')

            objective = 0
            dw = np.zeros(w.shape)

            for k in xrange(len(X)):
                x, y = X[k], Y[k]
                n_nodes = x[0].shape[0]

#                self.logger.info('object %d', k)

                if y.full_labeled:
                    unaries = self._loss_augment_unaries(self._get_unary_potentials(x, w),
                                                         y.full, y.weights)
                    unaries *= multiplier[k]
                    pairwise = self._get_pairwise_potentials(x, w)

                    jf = self._joint_features_full(x, y.full)
                    objective -= np.dot(w, jf)
                    dw -= jf

                    for i in xrange(len(chains[k])):
                        y_hat[k][i], energy = optimize_chain(chains[k][i],
                                                             lambdas[k][i] + unaries[chains[k][i],:],
                                                             pairwise,
                                                             edge_index[k])

                        dw += self._joint_features(chains[k][i], x, y_hat[k][i], edge_index[k], multiplier[k])

                        objective += energy
                elif iteration > use_latent_first_iter:
                    if undergenerating_weak:
# Use gco for full K oracle
#                        y_hat_, energy = self.loss_augmented_inference(x, y, w)
#                        jf_gt = self._joint_features_full(x, y.full)
#                        objective -= np.dot(w, jf_gt)
#                        objective += energy
#                        dw -= jf_gt
#                        dw += self._joint_features_full(x, y_hat_)

# use gco for first summand in DD
                        for mm in xrange(10):
                            dmu = np.zeros((n_nodes, self.n_states))

                            unaries = self._get_unary_potentials(x, w) - mu[k]
                            pairwise = self._get_pairwise_potentials(x, w)

                            y_hat_gco, energy = inference_gco(unaries, pairwise, self._get_edges(x),
                                                              n_iter=5, return_energy=True)
                            objective -= energy
                            dmu[np.ogrid[:dmu.shape[0]], y_hat_gco] -= 1
                            dw += self._joint_features_full(x, y_hat_gco)

                            jf = self._joint_features_full(x, y.full)
                            objective -= np.dot(w, jf)
                            dw -= jf

                            y_hat_kappa, energy = optimize_kappa(y, mu[k], self.alpha, n_nodes, self.n_states)
                            objective += energy
                            dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1

                            mu[k] -= learning_rate2 * dmu
                    elif not smd:
                        dmu = np.zeros((n_nodes, self.n_states))

                        unaries = (self._get_unary_potentials(x, w) - mu[k]) * multiplier[k]
                        pairwise = self._get_pairwise_potentials(x, w)

                        jf = self._joint_features_full(x, y.full)
                        objective -= np.dot(w, jf)
                        dw -= jf

#begin inner (can remove this to restore to previous state)
                        E = 0
                        Eprev = -100
                        for j in xrange(self.update_mu):
                            E = 0
                            for i in xrange(len(chains[k])):
                                y_hat[k][i], energy = optimize_chain(chains[k][i],
                                                                     lambdas[k][i] + unaries[chains[k][i],:],
                                                                     pairwise,
                                                                     edge_index[k])
                                E += energy


                            lambda_sum = np.zeros((n_nodes, self.n_states), dtype=np.float64)

                            for p in xrange(n_nodes):
                                for i in contains_node[k][p]:
                                    pos = np.where(chains[k][i] == p)[0][0]
                                    lambda_sum[p, y_hat[k][i][pos]] += multiplier[k][p]

                            for i in xrange(len(chains[k])):
                                N = lambdas[k][i].shape[0]

                                lambdas[k][i][np.ogrid[:N], y_hat[k][i]] -= learning_rate2
                                lambdas[k][i] += learning_rate2 * lambda_sum[chains[k][i],:]

                            if np.abs(E - Eprev) < 0.1:
                                break
                            Eprev = E
#end inner

#last one
                        for i in xrange(len(chains[k])):
                            y_hat[k][i], energy = optimize_chain(chains[k][i],
                                                                 lambdas[k][i] + unaries[chains[k][i],:],
                                                                 pairwise,
                                                                 edge_index[k])

                            dw += self._joint_features(chains[k][i], x, y_hat[k][i], edge_index[k], multiplier[k])

                            objective += energy

                            dmu[chains[k][i], y_hat[k][i]] -= multiplier[k][chains[k][i]].flatten()
#

                        y_hat_kappa, energy = optimize_kappa(y, mu[k], self.alpha, n_nodes, self.n_states)

                        objective += energy
                        dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1

                        mu[k] -= learning_rate2 * dmu
                    elif smd:
                        if iteration > 1500:
                            mMu = 10
                        else:
                            mMu = 1
                        for mm in xrange(mMu):
                            dmu = np.zeros((n_nodes, self.n_states))

                            jf = self._joint_features_full(x, y.full)
                            objective -= np.dot(w, jf)
                            dw -= jf

                            unaries = -self._get_unary_potentials(x, w) + mu[k]
                            edge_weights = -self._get_pairwise_potentials(x, w)
                            edges = self._get_edges(x)

                            n_edges = edges.shape[0]
                            y_hat2 = []
                            pairwise = []
                            for j in xrange(self.n_states):
                                y_hat2.append(np.zeros(self.n_states))
                                _pairwise = np.zeros((n_edges, 2, 2))
                                for i in xrange(n_edges):
                                    _pairwise[i,1,0] = _pairwise[i,0,1] = -0.5 * edge_weights[i,j,j]
                                pairwise.append(_pairwise)
                    
                            for i in xrange(n_edges):
                                e1, e2 = edges[i]
                                unaries[e1,:] += 0.5 * np.diag(edge_weights[i,:,:])
                                unaries[e2,:] += 0.5 * np.diag(edge_weights[i,:,:])
                    
                            xx[k], f_val, d = fmin_l_bfgs_b(f, xx[k],
                                                            args=(unaries, pairwise, edges),
                                                            maxiter=50,
                                                            maxfun=50,
                                                            pgtol=1e-2)
                                
                            E = np.sum(xx[k])
                            for j in xrange(self.n_states):
                                new_unaries = np.zeros((n_nodes, 2))
                                new_unaries[:,1] = unaries[:,j] + xx[k]
                                y_hat2[j], energy = binary_general_graph(edges, new_unaries, pairwise[j])
                                E -= 0.5*energy
                                dmu[:,j] -= y_hat2[j]

                                dw += self._joint_features_full(x, y_hat2[j] * j)
                    
                            y_hat_kappa, energy = optimize_kappa(y, mu[k], 1, n_nodes, self.n_states)
                            E += energy
                            dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1
                            objective += E
                    
                            mu[k] -= learning_rate2 * dmu

            dw += w / self.C

            if iteration < 100 or iteration % self.update_w_every == 0:
                w -= learning_rate1 * dw
            objective = self.C * objective + np.sum(w ** 2) / 2

            self.logger.info('Update lambda')

            for k in xrange(len(X)):
                if undergenerating_weak and not Y[k].full_labeled:
                    continue
                if smd and not Y[k].full_labeled:
                    continue

                n_nodes = X[k][0].shape[0]
                lambda_sum = np.zeros((n_nodes, self.n_states), dtype=np.float64)

                for p in xrange(n_nodes):
                    for i in contains_node[k][p]:
                        pos = np.where(chains[k][i] == p)[0][0]
                        lambda_sum[p, y_hat[k][i][pos]] += multiplier[k][p]

                for i in xrange(len(chains[k])):
                    N = lambdas[k][i].shape[0]

                    lambdas[k][i][np.ogrid[:N], y_hat[k][i]] -= learning_rate2
                    lambdas[k][i] += learning_rate2 * lambda_sum[chains[k][i],:]

            if iteration % self.complete_every == 0 or iteration in [51, 80, 101, 130]:
                self.logger.info('Complete latent variables')
                Y_new = Parallel(n_jobs=self.n_jobs, verbose=0, max_nbytes=1e8)(
                    delayed(latent)(self.model, x, y, w) for x, y in zip(X, Y))
                changes = np.sum([np.any(y_new.full != y.full) for y_new, y in zip(Y_new, Y)])
                self.logger.info('changes in latent variables: %d', changes)
                Y = Y_new

            if iteration and (iteration % self.check_every == 0):
                self.logger.info('Compute train and test scores')
                self.train_score.append(train_scorer(w))
                self.logger.info('Train SCORE: %f', self.train_score[-1])
                self.test_score.append(test_scorer(w))
                self.logger.info('Test SCORE: %f', self.test_score[-1])

            self.logger.info('diff: %f', np.sum((w-self.w)**2))
            if iteration:
                learning_rate1 = 1.0 / iteration
                learning_rate2 = 1.0 / iteration

            self.timestamps.append(time.time() - self.start_time)
            self.objective_curve.append(objective)

            self.logger.info('Objective: %f', objective)

            self.w = w.copy()
            self.w_history.append(self.w)
        
        self.w = w

        self.timestamps = np.array(self.timestamps)
        self.objective_curve = np.array(self.objective_curve)
        self.train_score = np.array(self.train_score)
        self.test_score = np.array(self.test_score)
        self.w_history = np.vstack(self.w_history)