def trw(node_weights, edges, edge_weights, y, max_iter=100, verbose=0, tol=1e-3, relaxed=False): result = decompose_grid_graph([(node_weights, edges, edge_weights)]) contains_node, chains, edge_index = result[0][0], result[1][0], result[2][0] n_nodes, n_states = node_weights.shape y_hat = [] lambdas = [] multiplier = [] for p in xrange(n_nodes): multiplier.append(1.0 / len(contains_node[p])) for chain in chains: lambdas.append(np.zeros((len(chain), n_states))) y_hat.append(np.zeros(len(chain))) multiplier = np.array(multiplier) multiplier.shape = (n_nodes, 1) mu = np.zeros((n_nodes, n_states)) learning_rate = 0.1 energy_history = [] for iteration in xrange(max_iter): E = 0 dmu = np.zeros((n_nodes, n_states)) unaries = node_weights - mu y_hat_gco, energy = inference_gco(unaries, edge_weights, edges, n_iter=5, return_energy=True) E -= energy y_hat_kappa, energy = optimize_kappa(y, mu, 1, n_nodes, n_states) E += energy dmu[np.ogrid[:dmu.shape[0]], y_hat_gco] -= 1 dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1 mu -= learning_rate * dmu energy_history.append(E) if iteration: learning_rate = 1. / np.sqrt(iteration) if verbose: print 'Iteration {}: energy {}'.format(iteration, E) if iteration and np.abs(E - energy_history[-2]) < tol: if verbose: print 'Converged' break return y_hat_gco, y_hat_kappa, energy_history, iteration
def trw_lbfgs(node_weights, edges, edge_weights, max_iter=100, verbose=1, tol=1e-3): result = decompose_grid_graph([(node_weights, edges, edge_weights)], get_sign=True) contains_node, chains, edge_index, sign = result[0][0], result[1][0], result[2][0], result[3][0] n_nodes, n_states = node_weights.shape y_hat = [] lambdas = np.zeros((n_nodes, n_states)) multiplier = [] for p in xrange(n_nodes): multiplier.append(1.0 / len(contains_node[p])) assert len(contains_node[p]) == 2 for chain in chains: y_hat.append(np.zeros(len(chain))) multiplier = np.array(multiplier) multiplier.shape = (n_nodes, 1) history = [] x, f_val, d = fmin_l_bfgs_b(f, np.zeros((n_nodes, n_states)), args=(node_weights, multiplier, chains, edge_weights, edge_index, sign, y_hat, contains_node, history), maxiter=max_iter, disp=verbose, pgtol=tol) lambdas = x.reshape((400, 10)) unaries = node_weights * multiplier for i, chain in enumerate(chains): y_hat[i], e = optimize_chain(chain, sign[i] * lambdas[chain,:] + unaries[chain,:], edge_weights, edge_index) lambda_sum = np.zeros((n_nodes, n_states), dtype=np.float64) for p in xrange(n_nodes): for i in contains_node[p]: pos = np.where(chains[i] == p)[0][0] lambda_sum[p, y_hat[i][pos]] += multiplier[p] info = {} info['x'] = x info['f'] = f_val info['d'] = d info['history'] = history return lambda_sum, info
def trw(node_weights, edges, edge_weights, y, max_iter=100, verbose=0, tol=1e-3): result = decompose_grid_graph([(node_weights, edges, edge_weights)]) contains_node, chains, edge_index = result[0][0], result[1][0], result[2][0] n_nodes, n_states = node_weights.shape y_hat = [] lambdas = [] multiplier = [] for p in xrange(n_nodes): multiplier.append(1.0 / (len(contains_node[p]) + 1)) for chain in chains: lambdas.append(np.zeros((len(chain), n_states))) y_hat.append(np.zeros(len(chain))) multiplier = np.array(multiplier) multiplier.shape = (n_nodes, 1) mu = np.zeros((n_nodes, n_states)) learning_rate = 0.1 energy_history = [] for iteration in xrange(max_iter): E = 0 unaries = node_weights.copy() for label in xrange(n_states): if label not in y.weak: unaries[:,label] += y.weights unaries *= multiplier for i, chain in enumerate(chains): y_hat[i], energy = optimize_chain(chain, lambdas[i] + unaries[chain,:], edge_weights, edge_index) E += energy y_hat_kappa, energy = optimize_kappa(y, mu + unaries, 1, n_nodes, n_states, augment=False) E += energy lambda_sum = np.zeros((n_nodes, n_states), dtype=np.float64) for p in xrange(n_nodes): assert len(contains_node[p]) == 2 for i in contains_node[p]: pos = np.where(chains[i] == p)[0][0] lambda_sum[p, y_hat[i][pos]] += multiplier[p] lambda_sum[np.ogrid[:n_nodes], y_hat_kappa] += multiplier.flatten() for i in xrange(len(chains)): N = lambdas[i].shape[0] lambdas[i][np.ogrid[:N], y_hat[i]] -= learning_rate lambdas[i] += learning_rate * lambda_sum[chains[i],:] mu[np.ogrid[:n_nodes], y_hat_kappa] -= learning_rate mu += learning_rate * lambda_sum test_l = np.zeros((n_nodes, n_states)) for p in xrange(n_nodes): for i in contains_node[p]: pos = np.where(chains[i] == p)[0][0] test_l[p, :] += lambdas[i][pos,:] test_l += mu assert np.sum(test_l) < 1e-10 energy_history.append(E) if iteration: learning_rate = 1. / np.sqrt(iteration) if verbose: print 'Iteration {}: energy {}'.format(iteration, E) if iteration > 300 and np.abs(E - energy_history[-2]) < tol: if verbose: print 'Converged' break return lambda_sum, y_hat_kappa, energy_history, iteration
def trw(node_weights, edges, edge_weights, y, max_iter=100, verbose=0, tol=1e-3, update_mu=50, get_energy=None): result = decompose_grid_graph([(node_weights, edges, edge_weights)]) contains_node, chains, edge_index = result[0][0], result[1][0], result[2][0] n_nodes, n_states = node_weights.shape y_hat = [] lambdas = [] multiplier = [] for p in xrange(n_nodes): multiplier.append(1.0 / len(contains_node[p])) for chain in chains: lambdas.append(np.zeros((len(chain), n_states))) y_hat.append(np.zeros(len(chain))) multiplier = np.array(multiplier) multiplier.shape = (n_nodes, 1) mu = np.zeros((n_nodes, n_states)) learning_rate = 0.1 energy_history = [] primal_history = [] for iteration in xrange(max_iter): dmu = np.zeros((n_nodes, n_states)) unaries = (node_weights - mu) * multiplier inner_energy = [] for inner in xrange(update_mu): E = 0 for i, chain in enumerate(chains): y_hat[i], energy = optimize_chain(chain, lambdas[i] + unaries[chain,:], edge_weights, edge_index) E += energy inner_energy.append(E) lambda_sum = np.zeros((n_nodes, n_states), dtype=np.float64) for p in xrange(n_nodes): for i in contains_node[p]: pos = np.where(chains[i] == p)[0][0] lambda_sum[p, y_hat[i][pos]] += multiplier[p] for i in xrange(len(chains)): N = lambdas[i].shape[0] lambdas[i][np.ogrid[:N], y_hat[i]] -= learning_rate lambdas[i] += learning_rate * lambda_sum[chains[i],:] if inner > 0 and np.abs(inner_energy[-2] - E) < 1e-2: break E = inner_energy[-1] y_hat_kappa, energy = optimize_kappa(y, mu, 1, n_nodes, n_states) E += energy for i in xrange(len(chains)): dmu[chains[i], y_hat[i]] -= multiplier[chains[i]].flatten() dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1 mu -= learning_rate * dmu energy_history.append(E) if get_energy is not None: primal = get_energy(get_labelling(lambda_sum)) primal_history.append(primal) if iteration: learning_rate = 1. / np.sqrt(iteration) if verbose: print 'Iteration {}: inner={} energy={}'.format(iteration, inner, E) if iteration > 0 and np.abs(E - energy_history[-2]) < tol: if verbose: print 'Converged' break info = {'primal': primal_history, 'dual': energy_history, 'iteration': iteration} return lambda_sum, y_hat_kappa, info
def fit(self, X, Y, train_scorer, test_scorer, decompose='general', use_latent_first_iter=500, undergenerating_weak=True, smd=False): self.logger.info('Initialization') if decompose == 'general': contains_node, chains, edge_index = decompose_graph(X) elif decompose == 'grid': contains_node, chains, edge_index = decompose_grid_graph(X) else: raise ValueError y_hat = [] lambdas = [] multiplier = [] xx = [] mu = {} for k in xrange(len(X)): x, y = X[k], Y[k] n_nodes = x[0].shape[0] xx.append(np.zeros(n_nodes)) _lambdas = [] _y_hat = [] _multiplier = [] for p in xrange(n_nodes): _multiplier.append(1.0 / len(contains_node[k][p])) for chain in chains[k]: _lambdas.append(np.zeros((len(chain), self.n_states))) _y_hat.append(np.zeros(len(chain), dtype=np.int32)) lambdas.append(_lambdas) y_hat.append(_y_hat) _multiplier = np.array(_multiplier) _multiplier.shape = (n_nodes, 1) multiplier.append(_multiplier) if not y.full_labeled: mu[k] = np.zeros((n_nodes, self.n_states)) w = np.zeros(self.size_w) self.w = w.copy() self.start_time = time.time() self.timestamps = [0] self.objective_curve = [] self.train_score = [] self.test_score = [] self.w_history = [] learning_rate1 = 0.1 learning_rate2 = 0.1 for iteration in xrange(self.max_iter): self.logger.info('Iteration %d', iteration) self.logger.info('Optimize slave MRF and update w') objective = 0 dw = np.zeros(w.shape) for k in xrange(len(X)): x, y = X[k], Y[k] n_nodes = x[0].shape[0] # self.logger.info('object %d', k) if y.full_labeled: unaries = self._loss_augment_unaries( self._get_unary_potentials(x, w), y.full, y.weights) unaries *= multiplier[k] pairwise = self._get_pairwise_potentials(x, w) jf = self._joint_features_full(x, y.full) objective -= np.dot(w, jf) dw -= jf for i in xrange(len(chains[k])): y_hat[k][i], energy = optimize_chain( chains[k][i], lambdas[k][i] + unaries[chains[k][i], :], pairwise, edge_index[k]) dw += self._joint_features(chains[k][i], x, y_hat[k][i], edge_index[k], multiplier[k]) objective += energy elif iteration > use_latent_first_iter: if undergenerating_weak: # Use gco for full K oracle # y_hat_, energy = self.loss_augmented_inference(x, y, w) # jf_gt = self._joint_features_full(x, y.full) # objective -= np.dot(w, jf_gt) # objective += energy # dw -= jf_gt # dw += self._joint_features_full(x, y_hat_) # use gco for first summand in DD for mm in xrange(10): dmu = np.zeros((n_nodes, self.n_states)) unaries = self._get_unary_potentials(x, w) - mu[k] pairwise = self._get_pairwise_potentials(x, w) y_hat_gco, energy = inference_gco( unaries, pairwise, self._get_edges(x), n_iter=5, return_energy=True) objective -= energy dmu[np.ogrid[:dmu.shape[0]], y_hat_gco] -= 1 dw += self._joint_features_full(x, y_hat_gco) jf = self._joint_features_full(x, y.full) objective -= np.dot(w, jf) dw -= jf y_hat_kappa, energy = optimize_kappa( y, mu[k], self.alpha, n_nodes, self.n_states) objective += energy dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1 mu[k] -= learning_rate2 * dmu elif not smd: dmu = np.zeros((n_nodes, self.n_states)) unaries = (self._get_unary_potentials(x, w) - mu[k]) * multiplier[k] pairwise = self._get_pairwise_potentials(x, w) jf = self._joint_features_full(x, y.full) objective -= np.dot(w, jf) dw -= jf #begin inner (can remove this to restore to previous state) E = 0 Eprev = -100 for j in xrange(self.update_mu): E = 0 for i in xrange(len(chains[k])): y_hat[k][i], energy = optimize_chain( chains[k][i], lambdas[k][i] + unaries[chains[k][i], :], pairwise, edge_index[k]) E += energy lambda_sum = np.zeros((n_nodes, self.n_states), dtype=np.float64) for p in xrange(n_nodes): for i in contains_node[k][p]: pos = np.where(chains[k][i] == p)[0][0] lambda_sum[ p, y_hat[k][i][pos]] += multiplier[k][p] for i in xrange(len(chains[k])): N = lambdas[k][i].shape[0] lambdas[k][i][np.ogrid[:N], y_hat[k][i]] -= learning_rate2 lambdas[k][i] += learning_rate2 * lambda_sum[ chains[k][i], :] if np.abs(E - Eprev) < 0.1: break Eprev = E #end inner #last one for i in xrange(len(chains[k])): y_hat[k][i], energy = optimize_chain( chains[k][i], lambdas[k][i] + unaries[chains[k][i], :], pairwise, edge_index[k]) dw += self._joint_features(chains[k][i], x, y_hat[k][i], edge_index[k], multiplier[k]) objective += energy dmu[chains[k][i], y_hat[k][i]] -= multiplier[k][ chains[k][i]].flatten() # y_hat_kappa, energy = optimize_kappa( y, mu[k], self.alpha, n_nodes, self.n_states) objective += energy dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1 mu[k] -= learning_rate2 * dmu elif smd: if iteration > 1500: mMu = 10 else: mMu = 1 for mm in xrange(mMu): dmu = np.zeros((n_nodes, self.n_states)) jf = self._joint_features_full(x, y.full) objective -= np.dot(w, jf) dw -= jf unaries = -self._get_unary_potentials(x, w) + mu[k] edge_weights = -self._get_pairwise_potentials(x, w) edges = self._get_edges(x) n_edges = edges.shape[0] y_hat2 = [] pairwise = [] for j in xrange(self.n_states): y_hat2.append(np.zeros(self.n_states)) _pairwise = np.zeros((n_edges, 2, 2)) for i in xrange(n_edges): _pairwise[i, 1, 0] = _pairwise[ i, 0, 1] = -0.5 * edge_weights[i, j, j] pairwise.append(_pairwise) for i in xrange(n_edges): e1, e2 = edges[i] unaries[e1, :] += 0.5 * np.diag( edge_weights[i, :, :]) unaries[e2, :] += 0.5 * np.diag( edge_weights[i, :, :]) xx[k], f_val, d = fmin_l_bfgs_b(f, xx[k], args=(unaries, pairwise, edges), maxiter=50, maxfun=50, pgtol=1e-2) E = np.sum(xx[k]) for j in xrange(self.n_states): new_unaries = np.zeros((n_nodes, 2)) new_unaries[:, 1] = unaries[:, j] + xx[k] y_hat2[j], energy = binary_general_graph( edges, new_unaries, pairwise[j]) E -= 0.5 * energy dmu[:, j] -= y_hat2[j] dw += self._joint_features_full( x, y_hat2[j] * j) y_hat_kappa, energy = optimize_kappa( y, mu[k], 1, n_nodes, self.n_states) E += energy dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1 objective += E mu[k] -= learning_rate2 * dmu dw += w / self.C if iteration < 100 or iteration % self.update_w_every == 0: w -= learning_rate1 * dw objective = self.C * objective + np.sum(w**2) / 2 self.logger.info('Update lambda') for k in xrange(len(X)): if undergenerating_weak and not Y[k].full_labeled: continue if smd and not Y[k].full_labeled: continue n_nodes = X[k][0].shape[0] lambda_sum = np.zeros((n_nodes, self.n_states), dtype=np.float64) for p in xrange(n_nodes): for i in contains_node[k][p]: pos = np.where(chains[k][i] == p)[0][0] lambda_sum[p, y_hat[k][i][pos]] += multiplier[k][p] for i in xrange(len(chains[k])): N = lambdas[k][i].shape[0] lambdas[k][i][np.ogrid[:N], y_hat[k][i]] -= learning_rate2 lambdas[k][i] += learning_rate2 * lambda_sum[ chains[k][i], :] if iteration % self.complete_every == 0 or iteration in [ 51, 80, 101, 130 ]: self.logger.info('Complete latent variables') Y_new = Parallel(n_jobs=self.n_jobs, verbose=0, max_nbytes=1e8)( delayed(latent)(self.model, x, y, w) for x, y in zip(X, Y)) changes = np.sum([ np.any(y_new.full != y.full) for y_new, y in zip(Y_new, Y) ]) self.logger.info('changes in latent variables: %d', changes) Y = Y_new if iteration and (iteration % self.check_every == 0): self.logger.info('Compute train and test scores') self.train_score.append(train_scorer(w)) self.logger.info('Train SCORE: %f', self.train_score[-1]) self.test_score.append(test_scorer(w)) self.logger.info('Test SCORE: %f', self.test_score[-1]) self.logger.info('diff: %f', np.sum((w - self.w)**2)) if iteration: learning_rate1 = 1.0 / iteration learning_rate2 = 1.0 / iteration self.timestamps.append(time.time() - self.start_time) self.objective_curve.append(objective) self.logger.info('Objective: %f', objective) self.w = w.copy() self.w_history.append(self.w) self.w = w self.timestamps = np.array(self.timestamps) self.objective_curve = np.array(self.objective_curve) self.train_score = np.array(self.train_score) self.test_score = np.array(self.test_score) self.w_history = np.vstack(self.w_history)
def trw(node_weights, edges, edge_weights, max_iter=100, verbose=0, tol=1e-3, strategy='sqrt', r0=1.5, r1=0.5, gamma=0.1): assert strategy in ['best-dual', 'best-primal', 'sqrt', 'linear'] result = decompose_grid_graph([(node_weights, edges, edge_weights)]) contains_node, chains, edge_index = result[0][0], result[1][0], result[2][0] n_nodes, n_states = node_weights.shape y_hat = [] lambdas = [] multiplier = [] for p in xrange(n_nodes): multiplier.append(1.0 / len(contains_node[p])) for chain in chains: lambdas.append(np.zeros((len(chain), n_states))) y_hat.append(np.zeros(len(chain))) multiplier = np.array(multiplier) multiplier.shape = (n_nodes, 1) delta = 1. learning_rate = 0.1 dual_history = [] primal_history = [] best_dual = np.inf best_primal = -np.inf for iteration in xrange(max_iter): dual = 0.0 unaries = node_weights * multiplier for i, chain in enumerate(chains): y_hat[i], e = optimize_chain(chain, lambdas[i] + unaries[chain,:], edge_weights, edge_index) dual += e lambda_sum = np.zeros((n_nodes, n_states), dtype=np.float64) for p in xrange(n_nodes): for i in contains_node[p]: pos = np.where(chains[i] == p)[0][0] lambda_sum[p, y_hat[i][pos]] += multiplier[p] p_norm = 0.0 for i in xrange(len(chains)): N = lambdas[i].shape[0] dlambda = lambda_sum[chains[i],:].copy() dlambda[np.ogrid[:N], y_hat[i]] -= 1 p_norm += np.sum(dlambda ** 2) lambdas[i] += learning_rate * dlambda primal = compute_energy(get_labelling(lambda_sum), unaries, edge_weights, edges) primal_history.append(primal) dual_history.append(dual) if iteration and (np.abs(dual - dual_history[-2]) < tol or p_norm < tol): if verbose: print 'Converged' break if iteration: if strategy == 'sqrt': learning_rate = 1. / np.sqrt(iteration) elif strategy == 'linear': learning_rate = 1. / iteration elif strategy == 'best-dual': best_dual = min(best_dual, dual) approx = best_dual - delta if dual <= dual_history[-2]: delta *= r0 else: delta = max(r1 * delta, 1e-4) learning_rate = gamma * (dual - approx) / p_norm elif strategy == 'best-primal': best_primal = max(best_primal, primal) learning_rate = gamma * (dual - best_primal) / p_norm if verbose: print 'iteration {}: dual energy = {}'.format(iteration, dual) info = {} info['dual_energy'] = dual_history info['primal_energy'] = primal_history return lambda_sum, info
def trw(node_weights, edges, edge_weights, y, max_iter=100, verbose=0, tol=1e-3, update_mu=50, get_energy=None): result = decompose_grid_graph([(node_weights, edges, edge_weights)]) contains_node, chains, edge_index = result[0][0], result[1][0], result[2][ 0] n_nodes, n_states = node_weights.shape y_hat = [] lambdas = [] multiplier = [] for p in xrange(n_nodes): multiplier.append(1.0 / len(contains_node[p])) for chain in chains: lambdas.append(np.zeros((len(chain), n_states))) y_hat.append(np.zeros(len(chain))) multiplier = np.array(multiplier) multiplier.shape = (n_nodes, 1) mu = np.zeros((n_nodes, n_states)) learning_rate = 0.1 energy_history = [] primal_history = [] for iteration in xrange(max_iter): dmu = np.zeros((n_nodes, n_states)) unaries = (node_weights - mu) * multiplier inner_energy = [] for inner in xrange(update_mu): E = 0 for i, chain in enumerate(chains): y_hat[i], energy = optimize_chain( chain, lambdas[i] + unaries[chain, :], edge_weights, edge_index) E += energy inner_energy.append(E) lambda_sum = np.zeros((n_nodes, n_states), dtype=np.float64) for p in xrange(n_nodes): for i in contains_node[p]: pos = np.where(chains[i] == p)[0][0] lambda_sum[p, y_hat[i][pos]] += multiplier[p] for i in xrange(len(chains)): N = lambdas[i].shape[0] lambdas[i][np.ogrid[:N], y_hat[i]] -= learning_rate lambdas[i] += learning_rate * lambda_sum[chains[i], :] if inner > 0 and np.abs(inner_energy[-2] - E) < 1e-2: break E = inner_energy[-1] y_hat_kappa, energy = optimize_kappa(y, mu, 1, n_nodes, n_states) E += energy for i in xrange(len(chains)): dmu[chains[i], y_hat[i]] -= multiplier[chains[i]].flatten() dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1 mu -= learning_rate * dmu energy_history.append(E) if get_energy is not None: primal = get_energy(get_labelling(lambda_sum)) primal_history.append(primal) if iteration: learning_rate = 1. / np.sqrt(iteration) if verbose: print 'Iteration {}: inner={} energy={}'.format( iteration, inner, E) if iteration > 0 and np.abs(E - energy_history[-2]) < tol: if verbose: print 'Converged' break info = { 'primal': primal_history, 'dual': energy_history, 'iteration': iteration } return lambda_sum, y_hat_kappa, info
def fit(self, X, Y, train_scorer, test_scorer, decompose='grid', w0=None): print('over unconstr begin') if decompose == 'general': contains_node, chains, edge_index = decompose_graph(X) elif decompose == 'grid': contains_node, chains, edge_index, sign = decompose_grid_graph( X, get_sign=True) else: raise ValueError y_hat = [] lambdas = [] multiplier = [] for k in xrange(len(X)): n_nodes = X[k][0].shape[0] _y_hat = [] _multiplier = [] for p in xrange(n_nodes): _multiplier.append(1.0 / len(contains_node[k][p])) for chain in chains[k]: _y_hat.append(np.zeros(len(chain))) lambdas.append(np.zeros((n_nodes, self.n_states))) y_hat.append(_y_hat) _multiplier = np.array(_multiplier) _multiplier.shape = (n_nodes, 1) multiplier.append(_multiplier) w = np.zeros(self.size_w) self.w = w.copy() self.start_time = time.time() self.timestamps = [0] self.objective_curve = [] history = { 'train_scores': [], 'test_scores': [], 'objective': [], 'iteration': 0, 'w': [] } self.train_scorer = train_scorer self.test_scorer = test_scorer #x0 = np.zeros(self.size_w + 4000 * len(X)) x0 = np.zeros(self.size_w) if w0 is not None: x0 = w0 # l = 0.1 # for iteration in xrange(100): # fval, grad = f2(w, self, X, Y, history) # w -= l * grad # if iteration: # l = 0.01 / iteration x, f_val, d = fmin_l_bfgs_b(f2, x0, args=(self, X, Y, history), maxiter=self.max_iter, disp=0, pgtol=1e-8) return w, history
def fit(self, X, Y, train_scorer, test_scorer, decompose='general'): self.logger.info('Initialization') if decompose == 'general': contains_node, chains, edge_index = decompose_graph(X) elif decompose == 'grid': contains_node, chains, edge_index = decompose_grid_graph(X) else: raise ValueError y_hat = [] lambdas = [] multiplier = [] for k in xrange(len(X)): n_nodes = X[k][0].shape[0] _lambdas = [] _y_hat = [] _multiplier = [] for p in xrange(n_nodes): _multiplier.append(1.0 / len(contains_node[k][p])) for chain in chains[k]: _lambdas.append(np.zeros((len(chain), self.n_states))) _y_hat.append(np.zeros(len(chain))) lambdas.append(_lambdas) y_hat.append(_y_hat) _multiplier = np.array(_multiplier) _multiplier.shape = (n_nodes, 1) multiplier.append(_multiplier) w = np.zeros(self.size_w) self.w = w.copy() self.start_time = time.time() self.timestamps = [0] self.objective_curve = [] self.train_score = [] self.test_score = [] self.w_history = [] learning_rate = 0.1 for iteration in xrange(self.max_iter): self.logger.info('Iteration %d', iteration) self.logger.info('Optimize slave MRF and update w') objective = 0 dw = np.zeros(w.shape) for k in xrange(len(X)): self.logger.info('object %d', k) x, y = X[k], Y[k] n_nodes = x[0].shape[0] unaries = self._loss_augment_unaries( self._get_unary_potentials(x, w), y.full, y.weights) unaries *= multiplier[k] pairwise = self._get_pairwise_potentials(x, w) objective += np.dot(w, self._joint_features_full(x, y.full)) dw -= self._joint_features_full(x, y.full) for i in xrange(len(chains[k])): y_hat[k][i], energy = optimize_chain( chains[k][i], lambdas[k][i] + unaries[chains[k][i], :], pairwise, edge_index[k]) dw += self._joint_features(chains[k][i], x, y_hat[k][i], edge_index[k], multiplier[k]) objective -= energy dw -= w / self.C w += learning_rate * dw objective = self.C * objective + np.sum(w**2) / 2 if iteration and (iteration % self.check_every == 0): self.logger.info('Compute train and test scores') self.train_score.append(train_scorer(w)) self.logger.info('Train SCORE: %f', self.train_score[-1]) self.test_score.append(test_scorer(w)) self.logger.info('Test SCORE: %f', self.test_score[-1]) self.logger.info('Update lambda') for k in xrange(len(X)): n_nodes = X[k][0].shape[0] lambda_sum = np.zeros((n_nodes, self.n_states), dtype=np.float64) for p in xrange(n_nodes): for i in contains_node[k][p]: pos = np.where(chains[k][i] == p)[0][0] lambda_sum[p, y_hat[k][i][pos]] += multiplier[k][p] for i in xrange(len(chains[k])): N = lambdas[k][i].shape[0] lambdas[k][i][np.ogrid[:N], y_hat[k][i]] += learning_rate lambdas[k][i] -= learning_rate * lambda_sum[ chains[k][i], :] self.logger.info('diff: %f', np.sum((w - self.w)**2)) if iteration: learning_rate = 1.0 / iteration self.timestamps.append(time.time() - self.start_time) self.objective_curve.append(objective) self.logger.info('Objective: %f', objective) self.w = w.copy() self.w_history.append(self.w) self.w = w self.timestamps = np.array(self.timestamps) self.objective_curve = np.array(self.objective_curve) self.train_score = np.array(self.train_score) self.test_score = np.array(self.test_score) self.w_history = np.vstack(self.w_history)
def fit(self, X, Y, train_scorer, test_scorer, decompose='grid'): print('over unconstr begin') if decompose == 'general': contains_node, chains, edge_index = decompose_graph(X) elif decompose == 'grid': contains_node, chains, edge_index, sign = decompose_grid_graph( X, get_sign=True) else: raise ValueError y_hat = [] lambdas = [] multiplier = [] for k in xrange(len(X)): n_nodes = X[k][0].shape[0] _y_hat = [] _multiplier = [] for p in xrange(n_nodes): _multiplier.append(1.0 / len(contains_node[k][p])) for chain in chains[k]: _y_hat.append(np.zeros(len(chain))) lambdas.append(np.zeros((n_nodes, self.n_states))) y_hat.append(_y_hat) _multiplier = np.array(_multiplier) _multiplier.shape = (n_nodes, 1) multiplier.append(_multiplier) w = np.zeros(self.size_w) self.w = w.copy() self.start_time = time.time() self.timestamps = [0] self.objective_curve = [] self.train_score = [] self.test_score = [] self.w_history = [] learning_rate = 0.1 for iteration in xrange(self.max_iter): print('Iteration %d' % iteration) objective = 0 dw = np.zeros(w.shape) for k in xrange(len(X)): x, y = X[k], Y[k] n_nodes = x[0].shape[0] unaries = self._loss_augment_unaries( self._get_unary_potentials(x, w), y.full, y.weights) unaries *= multiplier[k] pairwise = self._get_pairwise_potentials(x, w) objective -= np.dot(w, self._joint_features_full(x, y.full)) dw -= self._joint_features_full(x, y.full) for i in xrange(len(chains[k])): y_hat[k][i], energy = optimize_chain( chains[k][i], sign[k][i] * lambdas[k][chains[k][i], :] + unaries[chains[k][i], :], pairwise, edge_index[k]) dw += self._joint_features(chains[k][i], x, y_hat[k][i], edge_index[k], multiplier[k]) objective += energy dw += w / self.C w -= learning_rate * dw objective = self.C * objective + np.sum(w**2) / 2 if iteration and (iteration % self.check_every == 0): print('Compute train and test scores') self.train_score.append(train_scorer(w)) print('Train SCORE: %f' % self.train_score[-1]) self.test_score.append(test_scorer(w)) print('Test SCORE: %f' % self.test_score[-1]) for k in xrange(len(X)): n_nodes = X[k][0].shape[0] for p in xrange(n_nodes): dlambda = np.zeros(self.n_states) for i in contains_node[k][p]: pos = np.where(chains[k][i] == p)[0][0] dlambda[y_hat[k][i][pos]] += sign[k][i] lambdas[k][p] -= learning_rate * dlambda if iteration: learning_rate = 1.0 / iteration self.timestamps.append(time.time() - self.start_time) self.objective_curve.append(objective) self.w = w.copy() self.w_history.append(self.w) self.w = w self.timestamps = np.array(self.timestamps) self.objective_curve = np.array(self.objective_curve) self.train_score = np.array(self.train_score) self.test_score = np.array(self.test_score) self.w_history = np.vstack(self.w_history)
def fit(self, X, Y, train_scorer, test_scorer, decompose='grid'): print('over unconstr begin') if decompose == 'general': contains_node, chains, edge_index = decompose_graph(X) elif decompose == 'grid': contains_node, chains, edge_index, sign = decompose_grid_graph(X, get_sign=True) else: raise ValueError y_hat = [] lambdas = [] multiplier = [] for k in xrange(len(X)): n_nodes = X[k][0].shape[0] _y_hat = [] _multiplier = [] for p in xrange(n_nodes): _multiplier.append(1.0 / len(contains_node[k][p])) for chain in chains[k]: _y_hat.append(np.zeros(len(chain))) lambdas.append(np.zeros((n_nodes, self.n_states))) y_hat.append(_y_hat) _multiplier = np.array(_multiplier) _multiplier.shape = (n_nodes, 1) multiplier.append(_multiplier) w = np.zeros(self.size_w) self.w = w.copy() self.start_time = time.time() self.timestamps = [0] self.objective_curve = [] self.train_score = [] self.test_score = [] self.w_history = [] learning_rate = 0.1 for iteration in xrange(self.max_iter): print('Iteration %d' % iteration) objective = 0 dw = np.zeros(w.shape) for k in xrange(len(X)): x, y = X[k], Y[k] n_nodes = x[0].shape[0] unaries = self._loss_augment_unaries(self._get_unary_potentials(x, w), y.full, y.weights) unaries *= multiplier[k] pairwise = self._get_pairwise_potentials(x, w) objective -= np.dot(w, self._joint_features_full(x, y.full)) dw -= self._joint_features_full(x, y.full) for i in xrange(len(chains[k])): y_hat[k][i], energy = optimize_chain(chains[k][i], sign[k][i] * lambdas[k][chains[k][i],:] + unaries[chains[k][i],:], pairwise, edge_index[k]) dw += self._joint_features(chains[k][i], x, y_hat[k][i], edge_index[k], multiplier[k]) objective += energy dw += w / self.C w -= learning_rate * dw objective = self.C * objective + np.sum(w ** 2) / 2 if iteration and (iteration % self.check_every == 0): print('Compute train and test scores') self.train_score.append(train_scorer(w)) print('Train SCORE: %f' % self.train_score[-1]) self.test_score.append(test_scorer(w)) print('Test SCORE: %f' % self.test_score[-1]) for k in xrange(len(X)): n_nodes = X[k][0].shape[0] for p in xrange(n_nodes): dlambda = np.zeros(self.n_states) for i in contains_node[k][p]: pos = np.where(chains[k][i] == p)[0][0] dlambda[y_hat[k][i][pos]] += sign[k][i] lambdas[k][p] -= learning_rate * dlambda if iteration: learning_rate = 1.0 / iteration self.timestamps.append(time.time() - self.start_time) self.objective_curve.append(objective) self.w = w.copy() self.w_history.append(self.w) self.w = w self.timestamps = np.array(self.timestamps) self.objective_curve = np.array(self.objective_curve) self.train_score = np.array(self.train_score) self.test_score = np.array(self.test_score) self.w_history = np.vstack(self.w_history)
def trw(node_weights, edges, edge_weights, y, max_iter=100, verbose=0, tol=1e-3, relaxed=False): result = decompose_grid_graph([(node_weights, edges, edge_weights)]) contains_node, chains, edge_index = result[0][0], result[1][0], result[2][ 0] n_nodes, n_states = node_weights.shape y_hat = [] lambdas = [] multiplier = [] for p in xrange(n_nodes): multiplier.append(1.0 / len(contains_node[p])) for chain in chains: lambdas.append(np.zeros((len(chain), n_states))) y_hat.append(np.zeros(len(chain))) multiplier = np.array(multiplier) multiplier.shape = (n_nodes, 1) mu = np.zeros((n_nodes, n_states)) learning_rate = 0.1 energy_history = [] for iteration in xrange(max_iter): E = 0 dmu = np.zeros((n_nodes, n_states)) unaries = node_weights - mu y_hat_gco, energy = inference_gco(unaries, edge_weights, edges, n_iter=5, return_energy=True) E -= energy y_hat_kappa, energy = optimize_kappa(y, mu, 1, n_nodes, n_states) E += energy dmu[np.ogrid[:dmu.shape[0]], y_hat_gco] -= 1 dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1 mu -= learning_rate * dmu energy_history.append(E) if iteration: learning_rate = 1. / np.sqrt(iteration) if verbose: print 'Iteration {}: energy {}'.format(iteration, E) if iteration and np.abs(E - energy_history[-2]) < tol: if verbose: print 'Converged' break return y_hat_gco, y_hat_kappa, energy_history, iteration
def trw(node_weights, edges, edge_weights, y, max_iter=100, verbose=0, tol=1e-3): result = decompose_grid_graph([(node_weights, edges, edge_weights)]) contains_node, chains, edge_index = result[0][0], result[1][0], result[2][0] n_nodes, n_states = node_weights.shape y_hat = [] lambdas = [] multiplier = [] for p in xrange(n_nodes): multiplier.append(1.0 / (len(contains_node[p]) + 1)) for chain in chains: lambdas.append(np.zeros((len(chain), n_states))) y_hat.append(np.zeros(len(chain))) multiplier = np.array(multiplier) multiplier.shape = (n_nodes, 1) mu = np.zeros((n_nodes, n_states)) learning_rate = 0.1 energy_history = [] for iteration in xrange(max_iter): E = 0 unaries = node_weights.copy() for label in xrange(n_states): if label not in y.weak: unaries[:, label] += y.weights unaries *= multiplier for i, chain in enumerate(chains): y_hat[i], energy = optimize_chain(chain, lambdas[i] + unaries[chain, :], edge_weights, edge_index) E += energy y_hat_kappa, energy = optimize_kappa(y, mu + unaries, 1, n_nodes, n_states, augment=False) E += energy lambda_sum = np.zeros((n_nodes, n_states), dtype=np.float64) for p in xrange(n_nodes): assert len(contains_node[p]) == 2 for i in contains_node[p]: pos = np.where(chains[i] == p)[0][0] lambda_sum[p, y_hat[i][pos]] += multiplier[p] lambda_sum[np.ogrid[:n_nodes], y_hat_kappa] += multiplier.flatten() for i in xrange(len(chains)): N = lambdas[i].shape[0] lambdas[i][np.ogrid[:N], y_hat[i]] -= learning_rate lambdas[i] += learning_rate * lambda_sum[chains[i], :] mu[np.ogrid[:n_nodes], y_hat_kappa] -= learning_rate mu += learning_rate * lambda_sum test_l = np.zeros((n_nodes, n_states)) for p in xrange(n_nodes): for i in contains_node[p]: pos = np.where(chains[i] == p)[0][0] test_l[p, :] += lambdas[i][pos, :] test_l += mu assert np.sum(test_l) < 1e-10 energy_history.append(E) if iteration: learning_rate = 1.0 / np.sqrt(iteration) if verbose: print "Iteration {}: energy {}".format(iteration, E) if iteration > 300 and np.abs(E - energy_history[-2]) < tol: if verbose: print "Converged" break return lambda_sum, y_hat_kappa, energy_history, iteration
def fit(self, X, Y, train_scorer, test_scorer, decompose='grid', w0=None): print('over unconstr begin') if decompose == 'general': contains_node, chains, edge_index = decompose_graph(X) elif decompose == 'grid': contains_node, chains, edge_index, sign = decompose_grid_graph(X, get_sign=True) else: raise ValueError y_hat = [] lambdas = [] multiplier = [] for k in xrange(len(X)): n_nodes = X[k][0].shape[0] _y_hat = [] _multiplier = [] for p in xrange(n_nodes): _multiplier.append(1.0 / len(contains_node[k][p])) for chain in chains[k]: _y_hat.append(np.zeros(len(chain))) lambdas.append(np.zeros((n_nodes, self.n_states))) y_hat.append(_y_hat) _multiplier = np.array(_multiplier) _multiplier.shape = (n_nodes, 1) multiplier.append(_multiplier) w = np.zeros(self.size_w) self.w = w.copy() self.start_time = time.time() self.timestamps = [0] self.objective_curve = [] history = {'train_scores': [], 'test_scores': [], 'objective': [], 'iteration': 0, 'w': [] } self.train_scorer = train_scorer self.test_scorer = test_scorer #x0 = np.zeros(self.size_w + 4000 * len(X)) x0 = np.zeros(self.size_w) if w0 is not None: x0 = w0 # l = 0.1 # for iteration in xrange(100): # fval, grad = f2(w, self, X, Y, history) # w -= l * grad # if iteration: # l = 0.01 / iteration x, f_val, d = fmin_l_bfgs_b(f2, x0, args=(self, X, Y, history), maxiter=self.max_iter, disp=0, pgtol=1e-8) return w, history
def fit(self, X, Y, train_scorer, test_scorer, decompose='general'): self.logger.info('Initialization') if decompose == 'general': contains_node, chains, edge_index = decompose_graph(X) elif decompose == 'grid': contains_node, chains, edge_index = decompose_grid_graph(X) else: raise ValueError y_hat = [] lambdas = [] multiplier = [] for k in xrange(len(X)): n_nodes = X[k][0].shape[0] _lambdas = [] _y_hat = [] _multiplier = [] for p in xrange(n_nodes): _multiplier.append(1.0 / len(contains_node[k][p])) for chain in chains[k]: _lambdas.append(np.zeros((len(chain), self.n_states))) _y_hat.append(np.zeros(len(chain))) lambdas.append(_lambdas) y_hat.append(_y_hat) _multiplier = np.array(_multiplier) _multiplier.shape = (n_nodes, 1) multiplier.append(_multiplier) w = np.zeros(self.size_w) self.w = w.copy() self.start_time = time.time() self.timestamps = [0] self.objective_curve = [] self.train_score = [] self.test_score = [] self.w_history = [] learning_rate = 0.1 for iteration in xrange(self.max_iter): self.logger.info('Iteration %d', iteration) self.logger.info('Optimize slave MRF and update w') objective = 0 dw = np.zeros(w.shape) for k in xrange(len(X)): self.logger.info('object %d', k) x, y = X[k], Y[k] n_nodes = x[0].shape[0] unaries = self._loss_augment_unaries(self._get_unary_potentials(x, w), y.full, y.weights) unaries *= multiplier[k] pairwise = self._get_pairwise_potentials(x, w) objective += np.dot(w, self._joint_features_full(x, y.full)) dw -= self._joint_features_full(x, y.full) for i in xrange(len(chains[k])): y_hat[k][i], energy = optimize_chain(chains[k][i], lambdas[k][i] + unaries[chains[k][i],:], pairwise, edge_index[k]) dw += self._joint_features(chains[k][i], x, y_hat[k][i], edge_index[k], multiplier[k]) objective -= energy dw -= w / self.C w += learning_rate * dw objective = self.C * objective + np.sum(w ** 2) / 2 if iteration and (iteration % self.check_every == 0): self.logger.info('Compute train and test scores') self.train_score.append(train_scorer(w)) self.logger.info('Train SCORE: %f', self.train_score[-1]) self.test_score.append(test_scorer(w)) self.logger.info('Test SCORE: %f', self.test_score[-1]) self.logger.info('Update lambda') for k in xrange(len(X)): n_nodes = X[k][0].shape[0] lambda_sum = np.zeros((n_nodes, self.n_states), dtype=np.float64) for p in xrange(n_nodes): for i in contains_node[k][p]: pos = np.where(chains[k][i] == p)[0][0] lambda_sum[p, y_hat[k][i][pos]] += multiplier[k][p] for i in xrange(len(chains[k])): N = lambdas[k][i].shape[0] lambdas[k][i][np.ogrid[:N], y_hat[k][i]] += learning_rate lambdas[k][i] -= learning_rate * lambda_sum[chains[k][i],:] self.logger.info('diff: %f', np.sum((w-self.w)**2)) if iteration: learning_rate = 1.0 / iteration self.timestamps.append(time.time() - self.start_time) self.objective_curve.append(objective) self.logger.info('Objective: %f', objective) self.w = w.copy() self.w_history.append(self.w) self.w = w self.timestamps = np.array(self.timestamps) self.objective_curve = np.array(self.objective_curve) self.train_score = np.array(self.train_score) self.test_score = np.array(self.test_score) self.w_history = np.vstack(self.w_history)
def fit(self, X, Y, train_scorer, test_scorer, decompose='general', use_latent_first_iter=500, undergenerating_weak=True, smd=False): self.logger.info('Initialization') if decompose == 'general': contains_node, chains, edge_index = decompose_graph(X) elif decompose == 'grid': contains_node, chains, edge_index = decompose_grid_graph(X) else: raise ValueError y_hat = [] lambdas = [] multiplier = [] xx = [] mu = {} for k in xrange(len(X)): x, y = X[k], Y[k] n_nodes = x[0].shape[0] xx.append(np.zeros(n_nodes)) _lambdas = [] _y_hat = [] _multiplier = [] for p in xrange(n_nodes): _multiplier.append(1.0 / len(contains_node[k][p])) for chain in chains[k]: _lambdas.append(np.zeros((len(chain), self.n_states))) _y_hat.append(np.zeros(len(chain), dtype=np.int32)) lambdas.append(_lambdas) y_hat.append(_y_hat) _multiplier = np.array(_multiplier) _multiplier.shape = (n_nodes, 1) multiplier.append(_multiplier) if not y.full_labeled: mu[k] = np.zeros((n_nodes, self.n_states)) w = np.zeros(self.size_w) self.w = w.copy() self.start_time = time.time() self.timestamps = [0] self.objective_curve = [] self.train_score = [] self.test_score = [] self.w_history = [] learning_rate1 = 0.1 learning_rate2 = 0.1 for iteration in xrange(self.max_iter): self.logger.info('Iteration %d', iteration) self.logger.info('Optimize slave MRF and update w') objective = 0 dw = np.zeros(w.shape) for k in xrange(len(X)): x, y = X[k], Y[k] n_nodes = x[0].shape[0] # self.logger.info('object %d', k) if y.full_labeled: unaries = self._loss_augment_unaries(self._get_unary_potentials(x, w), y.full, y.weights) unaries *= multiplier[k] pairwise = self._get_pairwise_potentials(x, w) jf = self._joint_features_full(x, y.full) objective -= np.dot(w, jf) dw -= jf for i in xrange(len(chains[k])): y_hat[k][i], energy = optimize_chain(chains[k][i], lambdas[k][i] + unaries[chains[k][i],:], pairwise, edge_index[k]) dw += self._joint_features(chains[k][i], x, y_hat[k][i], edge_index[k], multiplier[k]) objective += energy elif iteration > use_latent_first_iter: if undergenerating_weak: # Use gco for full K oracle # y_hat_, energy = self.loss_augmented_inference(x, y, w) # jf_gt = self._joint_features_full(x, y.full) # objective -= np.dot(w, jf_gt) # objective += energy # dw -= jf_gt # dw += self._joint_features_full(x, y_hat_) # use gco for first summand in DD for mm in xrange(10): dmu = np.zeros((n_nodes, self.n_states)) unaries = self._get_unary_potentials(x, w) - mu[k] pairwise = self._get_pairwise_potentials(x, w) y_hat_gco, energy = inference_gco(unaries, pairwise, self._get_edges(x), n_iter=5, return_energy=True) objective -= energy dmu[np.ogrid[:dmu.shape[0]], y_hat_gco] -= 1 dw += self._joint_features_full(x, y_hat_gco) jf = self._joint_features_full(x, y.full) objective -= np.dot(w, jf) dw -= jf y_hat_kappa, energy = optimize_kappa(y, mu[k], self.alpha, n_nodes, self.n_states) objective += energy dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1 mu[k] -= learning_rate2 * dmu elif not smd: dmu = np.zeros((n_nodes, self.n_states)) unaries = (self._get_unary_potentials(x, w) - mu[k]) * multiplier[k] pairwise = self._get_pairwise_potentials(x, w) jf = self._joint_features_full(x, y.full) objective -= np.dot(w, jf) dw -= jf #begin inner (can remove this to restore to previous state) E = 0 Eprev = -100 for j in xrange(self.update_mu): E = 0 for i in xrange(len(chains[k])): y_hat[k][i], energy = optimize_chain(chains[k][i], lambdas[k][i] + unaries[chains[k][i],:], pairwise, edge_index[k]) E += energy lambda_sum = np.zeros((n_nodes, self.n_states), dtype=np.float64) for p in xrange(n_nodes): for i in contains_node[k][p]: pos = np.where(chains[k][i] == p)[0][0] lambda_sum[p, y_hat[k][i][pos]] += multiplier[k][p] for i in xrange(len(chains[k])): N = lambdas[k][i].shape[0] lambdas[k][i][np.ogrid[:N], y_hat[k][i]] -= learning_rate2 lambdas[k][i] += learning_rate2 * lambda_sum[chains[k][i],:] if np.abs(E - Eprev) < 0.1: break Eprev = E #end inner #last one for i in xrange(len(chains[k])): y_hat[k][i], energy = optimize_chain(chains[k][i], lambdas[k][i] + unaries[chains[k][i],:], pairwise, edge_index[k]) dw += self._joint_features(chains[k][i], x, y_hat[k][i], edge_index[k], multiplier[k]) objective += energy dmu[chains[k][i], y_hat[k][i]] -= multiplier[k][chains[k][i]].flatten() # y_hat_kappa, energy = optimize_kappa(y, mu[k], self.alpha, n_nodes, self.n_states) objective += energy dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1 mu[k] -= learning_rate2 * dmu elif smd: if iteration > 1500: mMu = 10 else: mMu = 1 for mm in xrange(mMu): dmu = np.zeros((n_nodes, self.n_states)) jf = self._joint_features_full(x, y.full) objective -= np.dot(w, jf) dw -= jf unaries = -self._get_unary_potentials(x, w) + mu[k] edge_weights = -self._get_pairwise_potentials(x, w) edges = self._get_edges(x) n_edges = edges.shape[0] y_hat2 = [] pairwise = [] for j in xrange(self.n_states): y_hat2.append(np.zeros(self.n_states)) _pairwise = np.zeros((n_edges, 2, 2)) for i in xrange(n_edges): _pairwise[i,1,0] = _pairwise[i,0,1] = -0.5 * edge_weights[i,j,j] pairwise.append(_pairwise) for i in xrange(n_edges): e1, e2 = edges[i] unaries[e1,:] += 0.5 * np.diag(edge_weights[i,:,:]) unaries[e2,:] += 0.5 * np.diag(edge_weights[i,:,:]) xx[k], f_val, d = fmin_l_bfgs_b(f, xx[k], args=(unaries, pairwise, edges), maxiter=50, maxfun=50, pgtol=1e-2) E = np.sum(xx[k]) for j in xrange(self.n_states): new_unaries = np.zeros((n_nodes, 2)) new_unaries[:,1] = unaries[:,j] + xx[k] y_hat2[j], energy = binary_general_graph(edges, new_unaries, pairwise[j]) E -= 0.5*energy dmu[:,j] -= y_hat2[j] dw += self._joint_features_full(x, y_hat2[j] * j) y_hat_kappa, energy = optimize_kappa(y, mu[k], 1, n_nodes, self.n_states) E += energy dmu[np.ogrid[:dmu.shape[0]], y_hat_kappa] += 1 objective += E mu[k] -= learning_rate2 * dmu dw += w / self.C if iteration < 100 or iteration % self.update_w_every == 0: w -= learning_rate1 * dw objective = self.C * objective + np.sum(w ** 2) / 2 self.logger.info('Update lambda') for k in xrange(len(X)): if undergenerating_weak and not Y[k].full_labeled: continue if smd and not Y[k].full_labeled: continue n_nodes = X[k][0].shape[0] lambda_sum = np.zeros((n_nodes, self.n_states), dtype=np.float64) for p in xrange(n_nodes): for i in contains_node[k][p]: pos = np.where(chains[k][i] == p)[0][0] lambda_sum[p, y_hat[k][i][pos]] += multiplier[k][p] for i in xrange(len(chains[k])): N = lambdas[k][i].shape[0] lambdas[k][i][np.ogrid[:N], y_hat[k][i]] -= learning_rate2 lambdas[k][i] += learning_rate2 * lambda_sum[chains[k][i],:] if iteration % self.complete_every == 0 or iteration in [51, 80, 101, 130]: self.logger.info('Complete latent variables') Y_new = Parallel(n_jobs=self.n_jobs, verbose=0, max_nbytes=1e8)( delayed(latent)(self.model, x, y, w) for x, y in zip(X, Y)) changes = np.sum([np.any(y_new.full != y.full) for y_new, y in zip(Y_new, Y)]) self.logger.info('changes in latent variables: %d', changes) Y = Y_new if iteration and (iteration % self.check_every == 0): self.logger.info('Compute train and test scores') self.train_score.append(train_scorer(w)) self.logger.info('Train SCORE: %f', self.train_score[-1]) self.test_score.append(test_scorer(w)) self.logger.info('Test SCORE: %f', self.test_score[-1]) self.logger.info('diff: %f', np.sum((w-self.w)**2)) if iteration: learning_rate1 = 1.0 / iteration learning_rate2 = 1.0 / iteration self.timestamps.append(time.time() - self.start_time) self.objective_curve.append(objective) self.logger.info('Objective: %f', objective) self.w = w.copy() self.w_history.append(self.w) self.w = w self.timestamps = np.array(self.timestamps) self.objective_curve = np.array(self.objective_curve) self.train_score = np.array(self.train_score) self.test_score = np.array(self.test_score) self.w_history = np.vstack(self.w_history)