def partial_EM(self, data, cond_muh_ijk, indices, weights=None, eps=1e-4, maxiter=10, verbose=0): (i, j, k) = indices converged = False previous_L = utilities.average( self.likelihood(data), weights=weights) / self.N mini_epochs = 0 if verbose: print('Partial EM %s, L = %.3f' % (mini_epochs, previous_L)) while not converged: if self.nature in ['Bernoulli', 'Spin']: f = np.dot(data, self.weights[[i, j, k], :].T) elif self.nature == 'Potts': f = cy_utilities.compute_output_C(data, self.weights[[i, j, k], :, :], np.zeros([ data.shape[0], 3], dtype=curr_float)) tmp = f - self.logZ[np.newaxis, [i, j, k]] tmp -= tmp.max(-1)[:, np.newaxis] cond_muh = np.exp(tmp) * self.muh[np.newaxis, [i, j, k]] cond_muh /= cond_muh.sum(-1)[:, np.newaxis] cond_muh *= cond_muh_ijk[:, np.newaxis] self.muh[[i, j, k]] = utilities.average(cond_muh, weights=weights) self.cum_muh = np.cumsum(self.muh) self.gh[[i, j, k]] = np.log(self.muh[[i, j, k]]) self.gh -= self.gh.mean() if self.nature == 'Bernoulli': self.cond_muv[[i, j, k]] = utilities.average_product( cond_muh, data, mean1=True, weights=weights) / self.muh[[i, j, k], np.newaxis] self.weights[[i, j, k]] = np.log( (self.cond_muv[[i, j, k]] + eps) / (1 - self.cond_muv[[i, j, k]] + eps)) self.logZ[[i, j, k]] = np.logaddexp( 0, self.weights[[i, j, k]]).sum(-1) elif self.nature == 'Spin': self.cond_muv[[i, j, k]] = utilities.average_product( cond_muh, data, mean1=True, weights=weights) / self.muh[[i, j, k], np.newaxis] self.weights[[i, j, k]] = 0.5 * np.log( (1 + self.cond_muv[[i, j, k]] + eps) / (1 - self.cond_muv[[i, j, k]] + eps)) self.logZ[[i, j, k]] = np.logaddexp( self.weights[[i, j, k]], -self.weights[[i, j, k]]).sum(-1) elif self.nature == 'Potts': self.cond_muv[[i, j, k]] = utilities.average_product( cond_muh, data, c2=self.n_c, mean1=True, weights=weights) / self.muh[[i, j, k], np.newaxis, np.newaxis] self.cum_cond_muv[[i, j, k]] = np.cumsum( self.cond_muv[[i, j, k]], axis=-1) self.weights[[i, j, k]] = np.log( self.cond_muv[[i, j, k]] + eps) self.weights[[i, j, k]] -= self.weights[[i, j, k] ].mean(-1)[:, :, np.newaxis] self.logZ[[i, j, k]] = utilities.logsumexp( self.weights[[i, j, k]], axis=-1).sum(-1) current_L = utilities.average( self.likelihood(data), weights=weights) / self.N mini_epochs += 1 converged = (mini_epochs >= maxiter) | ( np.abs(current_L - previous_L) < eps) previous_L = current_L.copy() if verbose: print('Partial EM %s, L = %.3f' % (mini_epochs, current_L)) return current_L
def split_merge_criterion(self, data, Cmax=5, weights=None): likelihood, cond_muh = self.likelihood_and_expectation(data) norm = np.sqrt(utilities.average(cond_muh**2, weights=weights)) J_merge = utilities.average_product( cond_muh, cond_muh, weights=weights) / (1e-10 + norm[np.newaxis, :] * norm[:, np.newaxis]) J_merge = np.triu(J_merge, 1) proposed_merge = np.argsort(J_merge.flatten())[::-1][:Cmax] proposed_merge = [(merge % self.M, merge // self.M) for merge in proposed_merge] tmp = cond_muh / self.muh[np.newaxis, :] if weights is None: J_split = np.array( [utilities.average(likelihood, weights=tmp[:, m]) for m in range(self.M)]) else: J_split = np.array([utilities.average( likelihood, weights=tmp[:, m] * weights) for m in range(self.M)]) proposed_split = np.argsort(J_split)[:3] proposed_merge_split = [] for merge1, merge2 in proposed_merge: if proposed_split[0] in [merge1, merge2]: if proposed_split[1] in [merge1, merge2]: proposed_merge_split.append( (merge1, merge2, proposed_split[2])) else: proposed_merge_split.append( (merge1, merge2, proposed_split[1])) else: proposed_merge_split.append( (merge1, merge2, proposed_split[0])) return proposed_merge_split
def get_cross_derivatives_Gaussian(V_pos, psi_pos, hlayer, n_cv, weights=None): db_dw = average(V_pos, c=n_cv, weights=weights) da_db = np.zeros(hlayer.N) WChat = covariance(psi_pos, V_pos, weights=weights, c1=1, c2=n_cv) var_e = average(psi_pos**2, weights=weights) - \ average(psi_pos, weights=weights)**2 if n_cv > 1: da_dw = 2 / np.sqrt(1 + 4 * var_e)[:, np.newaxis, np.newaxis] * WChat else: da_dw = 2 / np.sqrt(1 + 4 * var_e)[:, np.newaxis] * WChat return db_dw, da_db, da_dw
def likelihood(model, data, data_test, weights=None, weights_test=None, n_betas_AIS=20000, M_AIS=10): model.AIS(n_betas=n_betas_AIS, M=M_AIS, beta_type='linear') l = utilities.average(model.likelihood(data), weights=weights) l_test = utilities.average(model.likelihood(data_test), weights=weights_test) return [l, l_test]
def auto_correl(data, nmax=None, nature='Bernoulli', n_c=1): B = data.shape[0] L = data.shape[1] N = data.shape[2] if nmax is None: nmax = L / 2 if n_c == 1: data_hat = np.fft.fft(np.real(data), axis=1) C = np.real(np.fft.ifft(np.abs(data_hat)**2, axis=1)).mean(0).mean(-1) / float(L) mu = data.mean(0).mean(0) if nature == 'Bernoulli': C_hat = 1 + 2 * C - 2 * mu.mean() - (mu**2 + (1 - mu)**2).mean() elif nature == 'Spin': C_hat = (1 + C) / 2 - (((1 + mu) / 2)**2 + ((1 - mu) / 2)**2).mean() return C_hat[:nmax] / C_hat[0] else: C = np.zeros(L) mu = utilities.average(data.reshape([B * L, N]), c=n_c) for c in range(n_c): data_ = (data == c) data_hat = np.fft.fft(np.real(data_), axis=1) C += np.real(np.fft.ifft(np.abs(data_hat)**2, axis=1)).mean(0).mean(-1) / float(L) C_hat = C - (mu**2).mean() * n_c return C_hat[:nmax] / C_hat[0]
def symKL(self, PGM, data, weights=None): n_samples = data.shape[0] data_moi, _ = self.gen_data(n_samples) D = -utilities.average(self.likelihood(data) + PGM.free_energy(data), weights=weights) + ( self.likelihood(data_moi) + PGM.free_energy(data_moi)).mean() D /= self.N return D
def weights_to_couplings_approx(RBM, data, weights=None): psi = RBM.vlayer.compute_output(data, RBM.weights) var = RBM.hlayer.var_from_inputs(psi) mean_var = utilities.average(var, weights=weights) J_eff = np.tensordot(RBM.weights, RBM.weights * mean_var[:, np.newaxis, np.newaxis], axes=[0, 0]) J_eff = np.swapaxes(J_eff, 1, 2) return J_eff
def print_wait_times(wait_times, string): print('Number of ' + string + ' in queue: ' + str(len(wait_times)) + ' player.') print('Average current ' + string + ' wait time: ' + str(average(wait_times)) + ' minutes') print('Median current ' + string + ' wait time: ' + str(median(wait_times)) + ' minutes') print('Modal current ' + string + ' wait time: ' + str(mode(wait_times)) + ' minutes') print('Max current ' + string + ' wait time: ' + str(max(wait_times)) + ' minutes')
def get_inception_score(data_gen, dataset, weights=None, path_data='data/', path_classifiers='classifiers/', M=20, eps=1e-10): try: classifier = pickle.load( open(path_classifiers + '%s_Classifier.data' % dataset, 'rb'))['classifier'] except: print('Learning a classifier first....') train_env = {} exec('dataset_utils.load_%s(train_env,path=path_data)' % dataset) if 'train_labels' in train_env.keys(): classifier = LogisticRegressionCV(n_jobs=5, multi_class='multinomial') classifier.fit(train_env['train_data'], train_env['train_labels']) else: nature, N, n_c = dataset_utils.infer_type_data( train_env['train_data']) classifier = moi.MoI(nature=nature, N=N, M=M, n_c=n_c) classifier.fit(train_env['train_data'], verbose=0, weights=train_env['train_weights']) pickle.dump({'classifier': classifier}, open(path_classifiers + '%s_Classifier.data' % dataset, 'wb')) if hasattr(classifier, 'predict_proba'): probas = classifier.predict_proba(data_gen) elif hasattr(classifier, 'expectation'): probas = classifier.expectation(data_gen) else: print('No expectation or predict_proba from classifier') return proba_av = utilities.average(probas, weights=weights) scores = (probas * np.log((probas + eps) / (proba_av + eps))).sum(-1) inception_score = np.exp(utilities.average(scores, weights=weights)) return inception_score
def get_hidden_input(data, RBM, normed=False, offset=True): if normed: mu = utilities.average(data, c=21) norm_null = np.sqrt(((RBM.weights**2 * mu).sum(-1) - (RBM.weights * mu).sum(-1)**2).sum(-1)) return (RBM.vlayer.compute_output(data, RBM.weights) - RBM.hlayer.b[np.newaxis, :]) / norm_null[np.newaxis, :] else: if offset: return (RBM.vlayer.compute_output(data, RBM.weights) - RBM.hlayer.b[np.newaxis, :]) else: return (RBM.vlayer.compute_output(data, RBM.weights))
def minibatch_fit(self, data, weights=None, eps=1e-5, update=True): h = self.expectation(data) self.muh = self.learning_rate * \ utilities.average(h, weights=weights) + \ (1 - self.learning_rate) * self.muh self.cum_muh = np.cumsum(self.muh) if update: self.gh = np.log(self.muh + eps) self.gh -= self.gh.mean() if self.nature == 'Bernoulli': self.muvh = self.learning_rate * \ utilities.average_product( h, data, weights=weights) + (1 - self.learning_rate) * self.muvh if update: self.cond_muv = self.muvh / (self.muh[:, np.newaxis]) self.weights = np.log( (self.cond_muv + eps) / (1 - self.cond_muv + eps)) elif self.nature == 'Spin': self.muvh = self.learning_rate * \ utilities.average(h, data, weights=weights) + \ (1 - self.learning_rate) * self.muvh if update: self.cond_muv = self.muvh / self.muh[:, np.newaxis] self.weights = 0.5 * \ np.log((1 + self.cond_muv + eps) / (1 - self.cond_muv + eps)) else: self.muvh = self.learning_rate * utilities.average_product( h, data, c2=self.n_c, weights=weights) + (1 - self.learning_rate) * self.muvh if update: self.cond_muv = self.muvh / self.muh[:, np.newaxis, np.newaxis] self.weights = np.log(self.cond_muv + eps) self.weights -= self.weights.mean(-1)[:, :, np.newaxis] if update: self.logpartition()
def minibatch_fit_symKL(self, data_PGM, PGM=None, data_MOI=None, F_PGM_dPGM=None, F_PGM_dMOI=None, F_MOI_dPGM=None, F_MOI_dMOI=None, cond_muh_dPGM=None, cond_muh_dMOI=None, weights=None): if data_MOI is None: data_MOI, _ = self.gen_data(data_PGM.shape[0]) if F_PGM_dPGM is None: F_PGM_dPGM = PGM.free_energy(data_PGM) if F_PGM_dMOI is None: F_PGM_dMOI = PGM.free_energy(data_MOI) if (F_MOI_dPGM is None) | (cond_muh_dPGM is None): F_MOI_dPGM, cond_muh_dPGM = self.likelihood_and_expectation( data_PGM) F_MOI_dPGM *= -1 if (F_MOI_dMOI is None) | (cond_muh_dMOI is None): F_MOI_dMOI, cond_muh_dMOI = self.likelihood_and_expectation( data_MOI) F_MOI_dMOI *= -1 delta_lik = -F_PGM_dMOI + F_MOI_dMOI delta_lik -= delta_lik.mean() self.gradient = {} self.gradient['gh'] = utilities.average( cond_muh_dPGM, weights=weights) - self.muh + (delta_lik[:, np.newaxis] * cond_muh_dMOI).mean(0) if self.nature in ['Bernoulli', 'Spin']: self.gradient['weights'] = utilities.average_product( cond_muh_dPGM, data_PGM, mean1=True, weights=weights) + utilities.average_product(cond_muh_dMOI * delta_lik[:, np.newaxis], data_MOI, mean1=True) self.gradient['weights'] -= self.muh[:, np.newaxis] * self.cond_muv elif self.nature == 'Potts': self.gradient['weights'] = utilities.average_product(cond_muh_dPGM, data_PGM, mean1=True, c2=self.n_c, weights=weights) + utilities.average_product( cond_muh_dMOI * delta_lik[:, np.newaxis], data_MOI, mean1=True, c2=self.n_c) self.gradient['weights'] -= self.muh[:, np.newaxis, np.newaxis] * self.cond_muv self.gh += self.learning_rate * self.gradient['gh'] self.weights += self.learning_rate * self.gradient['weights'] self.muh = np.exp(self.gh) self.muh /= self.muh.sum() self.cum_muh = np.cumsum(self.muh) if self.nature == 'Bernoulli': self.cond_muv = utilities.logistic(self.weights) elif self.nature == 'Spin': self.cond_muv = np.tanh(self.weights) elif self.nature == 'Potts': self.weights -= self.weights.mean(-1)[:, :, np.newaxis] self.cond_muv = np.exp(self.weights) self.cond_muv /= self.cond_muv.sum(-1)[:, :, np.newaxis] self.cum_cond_muv = np.cumsum(self.cond_muv, axis=-1) self.logpartition()
def maximization(self, data, cond_muh, weights=None, eps=1e-6): self.muh = utilities.average(cond_muh, weights=weights) self.cum_muh = np.cumsum(self.muh) self.gh = np.log(self.muh) self.gh -= self.gh.mean() if self.nature == 'Bernoulli': self.cond_muv = utilities.average_product( cond_muh, data, mean1=True, weights=weights) / self.muh[:, np.newaxis] self.weights = np.log((self.cond_muv + eps) / (1 - self.cond_muv + eps)) elif self.nature == 'Spin': self.cond_muv = utilities.average_product( cond_muh, data, mean1=True, weights=weights) / self.muh[:, np.newaxis] self.weights = 0.5 * \ np.log((1 + self.cond_muv + eps) / (1 - self.cond_muv + eps)) elif self.nature == 'Potts': self.cond_muv = utilities.average_product( cond_muh, data, c2=self.n_c, mean1=True, weights=weights) / self.muh[:, np.newaxis, np.newaxis] self.cum_cond_muv = np.cumsum(self.cond_muv, axis=-1) self.weights = np.log(self.cond_muv + eps) self.weights -= self.weights.mean(-1)[:, :, np.newaxis] self.logpartition()
def _Ker_weights_to_couplings_exact(x, RBM, data, weights=None, nbins=10): N = RBM.n_v M = RBM.n_h c = RBM.n_cv Jij = np.zeros([c, c]) i = x / N j = x % N L = layer.Layer(N=1, nature=RBM.hidden) tmpW = RBM.weights.copy() subsetW = tmpW[:, [i, j], :].copy() tmpW[:, [i, j], :] *= 0 psi_restr = RBM.vlayer.compute_output(data, tmpW) for m in range(M): count, hist = np.histogram(psi_restr[:, m], bins=nbins, weights=weights) hist = (hist[:-1] + hist[1:]) / 2 hist_mod = (hist[:, np.newaxis, np.newaxis] + subsetW[m, 0][np.newaxis, :, np.newaxis] + subsetW[m, 1][np.newaxis, np.newaxis, :]).reshape( [nbins * c**2, 1]) if RBM.hidden == 'Gaussian': L.a[0] = RBM.hlayer.a[m] L.b[0] = RBM.hlayer.b[m] elif RBM.hidden == 'dReLU': L.a_plus[0] = RBM.hlayer.a_plus[m] L.a_minus[0] = RBM.hlayer.a_minus[m] L.theta_plus[0] = RBM.hlayer.theta_plus[m] L.theta_minus[0] = RBM.hlayer.theta_minus[m] Phi = utilities.average(L.logpartition(hist_mod).reshape([nbins, c, c]), weights=count) Jij += (Phi[:, :, np.newaxis, np.newaxis] + Phi[np.newaxis, np.newaxis, :, :] - Phi[np.newaxis, :, :, np.newaxis].T - Phi[:, np.newaxis, np.newaxis, :]).sum(-1).sum(-1) / c**2 return Jij
def fit(self, data, batch_size=100, nchains=100, learning_rate=None, extra_params=None, init='independent', optimizer='SGD', N_PT=1, N_MC=1, n_iter=10, lr_decay=True, lr_final=None, decay_after=0.5, l1=0, l1b=0, l1c=0, l2=0, l2_fields=0, no_fields=False, batch_norm=False, update_betas=None, record_acceptance=None, epsilon=1e-6, verbose=1, record=[], record_interval=100, p=[1, 0, 0], pseudo_count=0, weights=None): self.nchains = nchains self.optimizer = optimizer self.record_swaps = False self.batch_norm = batch_norm self.layer.batch_norm = batch_norm self.n_iter = n_iter if learning_rate is None: if self.nature in ['Bernoulli', 'Spin', 'Potts']: learning_rate = 0.1 else: learning_rate = 0.01 if self.optimizer == 'ADAM': learning_rate *= 0.1 self.learning_rate = learning_rate self.lr_decay = lr_decay if self.lr_decay: self.decay_after = decay_after self.start_decay = self.n_iter * self.decay_after if lr_final is None: self.lr_final = 1e-2 * self.learning_rate else: self.lr_final = lr_final self.decay_gamma = (float(self.lr_final) / float(self.learning_rate))**( 1 / float(self.n_iter * (1 - self.decay_after))) self.gradient = self.initialize_gradient_dictionary() if self.optimizer == 'momentum': if extra_params is None: extra_params = 0.9 self.momentum = extra_params self.previous_update = self.initialize_gradient_dictionary() elif self.optimizer == 'ADAM': if extra_params is None: extra_params = [0.9, 0.999, 1e-8] self.beta1 = extra_params[0] self.beta2 = extra_params[1] self.epsilon = extra_params[2] self.gradient_moment1 = self.initialize_gradient_dictionary() self.gradient_moment2 = self.initialize_gradient_dictionary() if weights is not None: weights = np.asarray(weights, dtype=float) mean = utilities.average(data, c=self.n_c, weights=weights) covariance = utilities.average_product(data, data, c1=self.n_c, c2=self.n_c, weights=weights) if pseudo_count > 0: p = data.shape[0] / float(data.shape[0] + pseudo_count) covariance = p**2 * covariance + p * \ (1 - p) * (mean[np.newaxis, :, np.newaxis, :] * mean[:, np.newaxis, :, np.newaxis]) / self.n_c + (1 - p)**2 / self.n_c**2 mean = p * mean + (1 - p) / self.n_c iter_per_epoch = data.shape[0] // batch_size if init != 'previous': norm_init = 0 self.init_couplings(norm_init) if init == 'independent': self.layer.init_params_from_data(data, eps=epsilon, value='data') self.N_PT = N_PT self.N_MC = N_MC self.l1 = l1 self.l1b = l1b self.l1c = l1c self.l2 = l2 self.tmp_l2_fields = l2_fields self.no_fields = no_fields if self.N_PT > 1: if record_acceptance == None: record_acceptance = True self.record_acceptance = record_acceptance if update_betas == None: update_betas = True self._update_betas = update_betas if self.record_acceptance: self.mavar_gamma = 0.95 self.acceptance_rates = np.zeros(N_PT - 1) self.mav_acceptance_rates = np.zeros(N_PT - 1) self.count_swaps = 0 if self._update_betas: record_acceptance = True self.update_betas_lr = 0.1 self.update_betas_lr_decay = 1 if self._update_betas | (not hasattr(self, 'betas')): self.betas = np.arange(N_PT) / float(N_PT - 1) self.betas = self.betas[::-1] if (len(self.betas) != N_PT): self.betas = np.arange(N_PT) / float(N_PT - 1) self.betas = self.betas[::-1] if self.nature == 'Potts': (self.fantasy_x, self.fantasy_fields_eff) = self.layer.sample_from_inputs(np.zeros( [self.N_PT * self.nchains, self.N, self.n_c]), beta=0) else: (self.fantasy_x, self.fantasy_fields_eff) = self.layer.sample_from_inputs(np.zeros( [self.N_PT * self.nchains, self.N]), beta=0) if self.N_PT > 1: self.fantasy_x = self.fantasy_x.reshape( [self.N_PT, self.nchains, self.N]) if self.nature == 'Potts': self.fantasy_fields_eff = self.fantasy_fields_eff.reshape( [self.N_PT, self.nchains, self.N, self.n_c]) else: self.fantasy_fields_eff = self.fantasy_fields_eff.reshape( [self.N_PT, self.nchains, self.N]) self.fantasy_E = np.zeros([self.N_PT, self.nchains]) self.count_updates = 0 if verbose: if weights is not None: lik = (self.pseudo_likelihood(data) * weights).sum() / weights.sum() else: lik = self.pseudo_likelihood(data).mean() print('Iteration number 0, pseudo-likelihood: %.2f' % lik) result = {} if 'J' in record: result['J'] = [] if 'F' in record: result['F'] = [] count = 0 for epoch in range(1, n_iter + 1): if verbose: begin = time.time() if self.lr_decay: if (epoch > self.start_decay): self.learning_rate *= self.decay_gamma print('Starting epoch %s' % (epoch)) for _ in range(iter_per_epoch): self.minibatch_fit(mean, covariance) if (count % record_interval == 0): if 'J' in record: result['J'].append(self.layer.couplings.copy()) if 'F' in record: result['F'].append(self.layer.fields.copy()) count += 1 if verbose: end = time.time() if weights is not None: lik = (self.pseudo_likelihood(data) * weights).sum() / weights.sum() else: lik = self.pseudo_likelihood(data).mean() print("[%s] Iteration %d, pseudo-likelihood = %.2f," " time = %.2fs" % (type(self).__name__, epoch, lik, end - begin)) return result
def fit(self, data, weights=None, init_bias=0.1, verbose=1, eps=1e-5, maxiter=100, split_merge=True): # B = data.shape[0] initial_centroids = KMPP_choose_centroids( data, self.M, verbose=verbose) # initial_centroids = np.argsort(np.random.rand(B))[:self.M] if self.nature == 'Bernoulli': self.weights += init_bias / self.N * \ (data[initial_centroids] - 0.5) elif self.nature == 'Spin': self.weights += 0.25 * init_bias / self.N * data[initial_centroids] elif self.nature == 'Potts': self.weights += init_bias / self.N * \ binarize(data[initial_centroids], self.n_c) - \ init_bias / (self.n_c * self.N) n_epoch = 0 converged = (n_epoch >= maxiter) # if nothing... previous_L = utilities.average( self.likelihood(data), weights=weights) / self.N current_L = previous_L.copy() if self.M < 3: split_merge = False if verbose: print('Iteration 0, L = %.3f' % current_L) while not converged: cond_muh = self.expectation(data) self.maximization(data, cond_muh, weights=weights) previous_L = current_L.copy() current_L = utilities.average( self.likelihood(data), weights=weights) / self.N n_epoch += 1 converged = (n_epoch >= maxiter) | ( np.abs(current_L - previous_L) < eps) if verbose: print('Iteration %s, L = %.3f' % (n_epoch, current_L)) if split_merge: converged2 = False while not converged2: current_weights = self.weights.copy() current_cond_muv = self.cond_muv.copy() current_gh = self.gh.copy() current_muh = self.muh.copy() # current_cum_muh = self.cum_muh.copy() current_logZ = self.logZ.copy() if self.nature == 'Potts': current_cum_cond_muv = self.cum_cond_muv.copy() previous_L = current_L.copy() current_cond_muh = self.expectation(data) proposed_merge_splits = self.split_merge_criterion( data, Cmax=5, weights=weights) for proposed_merge_split in proposed_merge_splits: self.merge_split(proposed_merge_split) proposed_L = self.partial_EM(data, current_cond_muh[:, proposed_merge_split].sum( -1), proposed_merge_split, weights=weights, eps=eps, maxiter=10, verbose=verbose) converged3 = False while not converged3: cond_muh = self.expectation(data) self.maximization(data, cond_muh, weights=weights) previous_proposed_L = proposed_L.copy() proposed_L = utilities.average( self.likelihood(data), weights=weights) / self.N n_epoch += 1 converged3 = (n_epoch >= maxiter) | ( np.abs(proposed_L - previous_proposed_L) < eps) if proposed_L - current_L > eps: current_L = proposed_L.copy() if verbose: print('Iteration %s, Split-Merge (%s,%s,%s) accepted, L = %.3f' % ( n_epoch, proposed_merge_split[0], proposed_merge_split[1], proposed_merge_split[2], current_L)) break else: self.weights = current_weights.copy() self.cond_muv = current_cond_muv.copy() self.gh = current_gh.copy() self.muh = current_muh.copy() self.cum_muh = self.cum_muh.copy() self.logZ = current_logZ.copy() if self.nature == 'Potts': self.cum_cond_muv = current_cum_cond_muv.copy() if verbose: print('Iteration %s, Split-Merge (%s,%s,%s) denied, Proposed L = %.3f' % ( n_epoch, proposed_merge_split[0], proposed_merge_split[1], proposed_merge_split[2], proposed_L)) converged2 = (np.abs(current_L - previous_L) < eps) | (n_epoch >= 2 * maxiter) return current_L
def fit_online(self, data, weights=None, batch_size=100, learning_rate=0.01, lr_final=None, n_iter=10, lr_decay=True, decay_after=0.5, verbose=1, shuffle_data=True, print_every=5, init_bias=0.001, init=None): n_samples = data.shape[0] n_batches = int(np.ceil(float(n_samples) / batch_size)) batch_slices = list(utilities.gen_even_slices(n_batches * batch_size, n_batches, n_samples)) # learning_rate_init = copy.copy(learning_rate) self.learning_rate = learning_rate if lr_decay: start_decay = n_iter * decay_after if lr_final is None: lr_final = 1e-2 * learning_rate decay_gamma = (float(lr_final) / float(learning_rate) )**(1 / float(n_iter * (1 - decay_after))) B = data.shape[0] if not init == 'previous': # initial_centroids = KMPP_choose_centroids(data,self.M) initial_centroids = np.argsort(np.random.rand(B))[:self.M] if self.nature == 'Bernoulli': self.weights += init_bias * (data[initial_centroids] - 0.5) elif self.nature == 'Spin': self.weights += 0.25 * init_bias * data[initial_centroids] elif self.nature == 'Potts': self.weights += init_bias * \ binarize(data[initial_centroids], self.n_c) - \ init_bias / self.n_c if self.nature == 'Bernoulli': self.muvh = np.ones( [self.M, self.N], dtype=curr_float) / (2.0 * self.M) elif self.nature == 'Spin': self.muvh = np.zeros([self.M, self.N], dtype=curr_float) else: self.muvh = np.ones( [self.M, self.N, self.n_c], dtype=curr_float) / (self.n_c * self.M) else: if not hasattr(self, 'muvh'): if self.nature == 'Potts': self.muvh = self.cond_muv * \ self.muh[:, np.newaxis, np.newaxis] else: self.muvh = self.cond_muv * self.muh[:, np.newaxis] if shuffle_data: if weights is not None: permute = np.arange(data.shape[0]) self.random_state.shuffle(permute) weights = weights[permute] data = data[permute, :] else: self.random_state.shuffle(data) if verbose: print('Epoch 0: Lik = %.4f' % (utilities.average( self.likelihood(data), weights=weights) / self.N)) for epoch in range(0, n_iter + 1): if verbose: begin = time.time() print('Starting epoch %s' % epoch) if epoch == 0: update = False else: update = True if lr_decay: if (epoch > start_decay): self.learning_rate *= decay_gamma for batch_slice in batch_slices: if weights is None: data_mini = data[batch_slice] weights_mini = None else: data_mini = data[batch_slice] weights_mini = weights[batch_slice] self.minibatch_fit( data_mini, weights=weights_mini, update=update) if verbose: t = time.time() - begin if epoch % print_every == 0: print('Finished epoch %s: time =%.2f s, Lik = %.4f' % ( epoch, t, utilities.average(self.likelihood(data), weights=weights) / self.N)) if shuffle_data: if weights is not None: permute = np.arange(data.shape[0]) self.random_state.shuffle(permute) weights = weights[permute] data = data[permute, :] else: self.random_state.shuffle(data)
def print_status(self): print('Queue has been running for ' + str(self.time) + ' minutes') print('Successfully placed ' + str(self.successes * 12) + ' players.') print('Failed to place ' + str(len(self.waiting_room)) + ' players.') print('Skipped ' + str( len([ player for player in self.waiting_room if player.tested == False ])) + ' players') def print_wait_times(wait_times, string): print('Number of ' + string + ' in queue: ' + str(len(wait_times)) + ' player.') print('Average current ' + string + ' wait time: ' + str(average(wait_times)) + ' minutes') print('Median current ' + string + ' wait time: ' + str(median(wait_times)) + ' minutes') print('Modal current ' + string + ' wait time: ' + str(mode(wait_times)) + ' minutes') print('Max current ' + string + ' wait time: ' + str(max(wait_times)) + ' minutes') def print_ranks(player_list): bronze = [ player for player in player_list if player.get_rank() == 'BRONZE' ] silver = [ player for player in player_list if player.get_rank() == 'SILVER' ] gold = [ player for player in player_list if player.get_rank() == 'GOLD' ] platinum = [ player for player in player_list if player.get_rank() == 'PLATINUM' ] diamond = [ player for player in player_list if player.get_rank() == 'DIAMOND' ] master = [ player for player in player_list if player.get_rank() == 'MASTER' ] GM = [ player for player in player_list if player.get_rank() == 'GM' ] print('Bronze: ' + str(len(bronze))) print('Silver: ' + str(len(silver))) print('Gold: ' + str(len(gold))) print('Platinum: ' + str(len(platinum))) print('Diamond: ' + str(len(diamond))) print('Master: ' + str(len(master))) print('GM: ' + str(len(GM))) all_wait_times = [ player.current_wait_time for player in self.waiting_room ] print_wait_times(all_wait_times, 'player') dps_queue = [ player for player in self.waiting_room if 'DPS' in player.active_roles ] dps_wait_times = [player.current_wait_time for player in dps_queue] print_wait_times(dps_wait_times, 'DPS') print_ranks(dps_queue) tank_queue = [ player for player in self.waiting_room if 'TANK' in player.active_roles ] tank_wait_times = [player.current_wait_time for player in tank_queue] print_wait_times(tank_wait_times, 'Tank') print_ranks(tank_queue) support_queue = [ player for player in self.waiting_room if 'SUPPORT' in player.active_roles ] support_wait_times = [ player.current_wait_time for player in support_queue ] print_wait_times(support_wait_times, 'Support') print_ranks(support_queue) bronze_wait_times = [ player.current_wait_time for player in self.waiting_room if player.get_rank() == 'BRONZE' ] print_wait_times(bronze_wait_times, 'Bronze') silver_wait_times = [ player.current_wait_time for player in self.waiting_room if player.get_rank() == 'SILVER' ] print_wait_times(silver_wait_times, 'Silver') gold_wait_times = [ player.current_wait_time for player in self.waiting_room if player.get_rank() == 'GOLD' ] print_wait_times(gold_wait_times, 'Gold') platinum_wait_times = [ player.current_wait_time for player in self.waiting_room if player.get_rank() == 'PLATINUM' ] print_wait_times(platinum_wait_times, 'Platinum') diamond_wait_times = [ player.current_wait_time for player in self.waiting_room if player.get_rank() == 'DIAMOND' ] print_wait_times(diamond_wait_times, 'Diamond') master_wait_times = [ player.current_wait_time for player in self.waiting_room if player.get_rank() == 'MASTER' ] print_wait_times(master_wait_times, 'Master') gm_wait_times = [ player.current_wait_time for player in self.waiting_room if player.get_rank() == 'GM' ] print_wait_times(gm_wait_times, 'GM') game_SR_ranges = [game.SR_range for game in self.active_games] average_SR_range = average(game_SR_ranges) print('Average active game SR range: ' + str(average_SR_range) + ' SR') median_SR_range = game_SR_ranges[int(len(game_SR_ranges) / 2)] print('Median active game SR range: ' + str(median_SR_range) + ' SR') max_SR_range = max(game_SR_ranges) print('Max active game SR range: ' + str(max_SR_range) + ' SR')
def get_cross_derivatives_ReLU(V_pos, psi_pos, hlayer, n_cv, weights=None): db_dw = average(V_pos, c=n_cv, weights=weights) a = hlayer.gamma[np.newaxis, :] theta = hlayer.delta[np.newaxis, :] b = hlayer.theta[np.newaxis, :] psi = psi_pos psi_plus = (-(psi - b) + theta) / np.sqrt(a) psi_minus = ((psi - b) + theta) / np.sqrt(a) Phi_plus = erf_times_gauss(psi_plus) Phi_minus = erf_times_gauss(psi_minus) p_plus = 1 / (1 + Phi_minus / Phi_plus) p_minus = 1 - p_plus e = (psi - b) - theta * (p_plus - p_minus) v = p_plus * p_minus * (2 * theta / np.sqrt(a)) * \ (2 * theta / np.sqrt(a) - 1 / Phi_plus - 1 / Phi_minus) dpsi_plus_dpsi = -1 / np.sqrt(a) dpsi_minus_dpsi = 1 / np.sqrt(a) dpsi_plus_dtheta = 1 / np.sqrt(a) dpsi_minus_dtheta = 1 / np.sqrt(a) dpsi_plus_da = -1.0 / (2 * a) * psi_plus dpsi_minus_da = -1.0 / (2 * a) * psi_minus d2psi_plus_dadpsi = 0.5 / np.sqrt(a**3) d2psi_plus_dthetadpsi = 0 d2psi_minus_dadpsi = -0.5 / np.sqrt(a**3) d2psi_minus_dthetadpsi = 0 dp_plus_dpsi = p_plus * p_minus * \ ((psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi - (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi) dp_plus_dtheta = p_plus * p_minus * \ ((psi_plus - 1 / Phi_plus) * dpsi_plus_dtheta - (psi_minus - 1 / Phi_minus) * dpsi_minus_dtheta) dp_plus_da = p_plus * p_minus * \ ((psi_plus - 1 / Phi_plus) * dpsi_plus_da - (psi_minus - 1 / Phi_minus) * dpsi_minus_da) d2p_plus_dpsi2 = -(p_plus - p_minus) * p_plus * p_minus * ((psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi - (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi)**2 \ + p_plus * p_minus * ((dpsi_plus_dpsi)**2 * (1 + (psi_plus - 1 / Phi_plus) / Phi_plus) - ( dpsi_minus_dpsi)**2 * (1 + (psi_minus - 1 / Phi_minus) / Phi_minus)) d2p_plus_dadpsi = -(p_plus - p_minus) * ((psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi - (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi) * (dp_plus_da)\ + p_plus * p_minus * ((dpsi_plus_dpsi * dpsi_plus_da) * (1 + (psi_plus - 1 / Phi_plus) / Phi_plus) - (dpsi_minus_dpsi * dpsi_minus_da) * (1 + (psi_minus - 1 / Phi_minus) / Phi_minus) + (d2psi_plus_dadpsi) * (psi_plus - 1 / Phi_plus) - (d2psi_minus_dadpsi) * (psi_minus - 1 / Phi_minus)) d2p_plus_dthetadpsi = -(p_plus - p_minus) * ((psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi - (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi) * (dp_plus_dtheta)\ + p_plus * p_minus * ((dpsi_plus_dpsi * dpsi_plus_dtheta) * (1 + (psi_plus - 1 / Phi_plus) / Phi_plus) - (dpsi_minus_dpsi * dpsi_minus_dtheta) * (1 + (psi_minus - 1 / Phi_minus) / Phi_minus) + (d2psi_plus_dthetadpsi) * (psi_plus - 1 / Phi_plus) - (d2psi_minus_dthetadpsi) * (psi_minus - 1 / Phi_minus)) # dlogZ_dpsi = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi + # p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi) # dlogZ_dtheta = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_dtheta + # p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_dtheta) # dlogZ_da = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_da + # p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_da) de_dpsi = (1 + v) de_db = -de_dpsi de_da = 2 * (-theta) * dp_plus_da de_dtheta = -(p_plus - p_minus) + 2 * (-theta) * dp_plus_dtheta dv_dpsi = 2 * (-theta) * d2p_plus_dpsi2 dv_db = -dv_dpsi dv_da = +2 * (-theta) * d2p_plus_dadpsi dv_dtheta = - 2 * dp_plus_dpsi \ + 2 * (- theta) * d2p_plus_dthetadpsi var_e = average(e**2, weights=weights) - average(e, weights=weights)**2 mean_v = average(v, weights=weights) dmean_v_da = average(dv_da, weights=weights) dmean_v_db = average(dv_db, weights=weights) dmean_v_dtheta = average(dv_dtheta, weights=weights) dvar_e_da = 2 * ( average(e * de_da, weights=weights) - average(e, weights=weights) * average(de_da, weights=weights)) dvar_e_db = 2 * ( average(e * de_db, weights=weights) - average(e, weights=weights) * average(de_db, weights=weights)) dvar_e_dtheta = 2 * ( average(e * de_dtheta, weights=weights) - average(e, weights=weights) * average(de_dtheta, weights=weights)) tmp = np.sqrt((1 + mean_v)**2 + 4 * var_e) da_db = (dvar_e_db + 0.5 * dmean_v_db * (1 + mean_v + tmp)) / \ (tmp - dvar_e_da - 0.5 * dmean_v_da * (1 + mean_v + tmp)) da_dtheta = (dvar_e_dtheta + 0.5 * dmean_v_dtheta * (1 + mean_v + tmp)) / (tmp - dvar_e_da - 0.5 * dmean_v_da * (1 + mean_v + tmp)) dmean_v_dw = average_product(dv_dpsi, V_pos, c1=1, c2=n_cv, weights=weights) if n_cv > 1: dvar_e_dw = 2 * ( average_product(e * de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights) - average(e, weights=weights)[:, np.newaxis, np.newaxis] * average_product(de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights)) da_dw = (dvar_e_dw + 0.5 * dmean_v_dw * (1 + mean_v + tmp)[:, np.newaxis, np.newaxis]) / ( tmp - dvar_e_da - 0.5 * dmean_v_da * (1 + mean_v + tmp))[:, np.newaxis, np.newaxis] else: dvar_e_dw = 2 * ( average_product(e * de_dpsi, V_pos, c1=1, c2=1, weights=weights) - average(e, weights=weights)[:, np.newaxis] * average_product(de_dpsi, V_pos, c1=1, c2=1, weights=weights)) da_dw = (dvar_e_dw + 0.5 * dmean_v_dw * (1 + mean_v + tmp)[:, np.newaxis]) / ( tmp - dvar_e_da - 0.5 * dmean_v_da * (1 + mean_v + tmp))[:, np.newaxis] return db_dw, da_db, da_dtheta, da_dw
def get_cross_derivatives_dReLU(V_pos, psi_pos, hlayer, n_cv, weights=None): # a = 2.0/(1.0/hlayer.a_plus + 1.0/hlayer.a_minus) # eta = 0.5* (a/hlayer.a_plus - a/hlayer.a_minus) # theta = (1.-eta**2)/2. * (hlayer.theta_plus+hlayer.theta_minus) # b = (1.+eta)/2. * hlayer.theta_plus - (1.-eta)/2. * hlayer.theta_minus db_dw = average(V_pos, c=n_cv, weights=weights) a = hlayer.a[np.newaxis, :] eta = hlayer.eta[np.newaxis, :] theta = hlayer.theta[np.newaxis, :] b = hlayer.b[np.newaxis, :] psi = psi_pos psi_plus = (-np.sqrt(1 + eta) * (psi - b) + theta / np.sqrt(1 + eta)) / np.sqrt(a) psi_minus = (np.sqrt(1 - eta) * (psi - b) + theta / np.sqrt(1 - eta)) / np.sqrt(a) Phi_plus = erf_times_gauss(psi_plus) Phi_minus = erf_times_gauss(psi_minus) Z = Phi_plus * np.sqrt(1 + eta) + Phi_minus * np.sqrt(1 - eta) p_plus = 1 / (1 + (Phi_minus * np.sqrt(1 - eta)) / (Phi_plus * np.sqrt(1 + eta))) nans = np.isnan(p_plus) p_plus[nans] = 1.0 * (np.abs(psi_plus[nans]) > np.abs(psi_minus[nans])) p_minus = 1 - p_plus e = (psi - b) * (1 + eta * (p_plus - p_minus)) - theta * ( p_plus - p_minus) + 2 * eta * np.sqrt(a) / Z v = eta * (p_plus - p_minus) + p_plus * p_minus * ( 2 * theta / np.sqrt(a) - 2 * eta * (psi - b) / np.sqrt(a)) * ( 2 * theta / np.sqrt(a) - 2 * eta * (psi - b) / np.sqrt(a) - np.sqrt(1 + eta) / Phi_plus - np.sqrt(1 - eta) / Phi_minus) - 2 * eta * e / (np.sqrt(a) * Z) dpsi_plus_dpsi = -np.sqrt((1 + eta) / a) dpsi_minus_dpsi = np.sqrt((1 - eta) / a) dpsi_plus_dtheta = 1 / np.sqrt(a * (1 + eta)) dpsi_minus_dtheta = 1 / np.sqrt(a * (1 - eta)) # dpsi_plus_da = -1.0/(2*a) * psi_plus # dpsi_minus_da = -1.0/(2*a) * psi_minus dpsi_plus_deta = -1.0 / (2 * np.sqrt(a * (1 + eta))) * ((psi - b) + theta / (1 + eta)) dpsi_minus_deta = -1.0 / (2 * np.sqrt(a * (1 - eta))) * ((psi - b) - theta / (1 - eta)) # d2psi_plus_dadpsi = 0.5 * np.sqrt((1+eta)/a**3 ) d2psi_plus_dthetadpsi = 0 d2psi_plus_detadpsi = -0.5 / np.sqrt((1 + eta) * a) # d2psi_minus_dadpsi = -0.5 * np.sqrt((1-eta)/a**3 ) d2psi_minus_dthetadpsi = 0 d2psi_minus_detadpsi = -0.5 / np.sqrt((1 - eta) * a) dp_plus_dpsi = p_plus * p_minus * ( (psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi - (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi) dp_plus_dtheta = p_plus * p_minus * ( (psi_plus - 1 / Phi_plus) * dpsi_plus_dtheta - (psi_minus - 1 / Phi_minus) * dpsi_minus_dtheta) # dp_plus_da = p_plus * p_minus * ( (psi_plus-1/Phi_plus) * dpsi_plus_da - (psi_minus-1/Phi_minus) * dpsi_minus_da ) dp_plus_deta = p_plus * p_minus * ( (psi_plus - 1 / Phi_plus) * dpsi_plus_deta - (psi_minus - 1 / Phi_minus) * dpsi_minus_deta + 1 / (1 - eta**2)) d2p_plus_dpsi2 = -(p_plus-p_minus) * p_plus * p_minus * ( (psi_plus-1/Phi_plus) * dpsi_plus_dpsi - (psi_minus-1/Phi_minus) * dpsi_minus_dpsi )**2 \ + p_plus * p_minus * ( (dpsi_plus_dpsi)**2 * (1+ (psi_plus-1/Phi_plus)/Phi_plus) - (dpsi_minus_dpsi)**2 * (1+ (psi_minus-1/Phi_minus)/Phi_minus) ) # d2p_plus_dadpsi = -(p_plus-p_minus) * ( (psi_plus-1/Phi_plus) * dpsi_plus_dpsi - (psi_minus-1/Phi_minus) * dpsi_minus_dpsi ) * (dp_plus_da)\ # + p_plus * p_minus * ( (dpsi_plus_dpsi* dpsi_plus_da) * (1+ (psi_plus-1/Phi_plus)/Phi_plus) - (dpsi_minus_dpsi *dpsi_minus_da) * (1+ (psi_minus-1/Phi_minus)/Phi_minus) \ # + (d2psi_plus_dadpsi) * (psi_plus-1/Phi_plus) - (d2psi_minus_dadpsi) * (psi_minus-1/Phi_minus) ) d2p_plus_dthetadpsi = -(p_plus-p_minus) * ( (psi_plus-1/Phi_plus) * dpsi_plus_dpsi - (psi_minus-1/Phi_minus) * dpsi_minus_dpsi ) * (dp_plus_dtheta)\ + p_plus * p_minus * ( (dpsi_plus_dpsi* dpsi_plus_dtheta) * (1+ (psi_plus-1/Phi_plus)/Phi_plus) - (dpsi_minus_dpsi *dpsi_minus_dtheta) * (1+ (psi_minus-1/Phi_minus)/Phi_minus) \ + (d2psi_plus_dthetadpsi) * (psi_plus-1/Phi_plus) - (d2psi_minus_dthetadpsi) * (psi_minus-1/Phi_minus) ) d2p_plus_detadpsi = -(p_plus-p_minus) * ( (psi_plus-1/Phi_plus) * dpsi_plus_dpsi - (psi_minus-1/Phi_minus) * dpsi_minus_dpsi ) * (dp_plus_deta)\ + p_plus * p_minus * ( (dpsi_plus_dpsi* dpsi_plus_deta) * (1+ (psi_plus-1/Phi_plus)/Phi_plus) - (dpsi_minus_dpsi *dpsi_minus_deta) * (1+ (psi_minus-1/Phi_minus)/Phi_minus) \ + (d2psi_plus_detadpsi) * (psi_plus-1/Phi_plus) - (d2psi_minus_detadpsi) * (psi_minus-1/Phi_minus) ) dlogZ_dpsi = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_dpsi + p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_dpsi) dlogZ_dtheta = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_dtheta + p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_dtheta) # dlogZ_da = (p_plus * (psi_plus-1/Phi_plus)* dpsi_plus_da + p_minus * (psi_minus-1/Phi_minus) * dpsi_minus_da ) dlogZ_deta = (p_plus * (psi_plus - 1 / Phi_plus) * dpsi_plus_deta + p_minus * (psi_minus - 1 / Phi_minus) * dpsi_minus_deta + 0.5 * (p_plus / (1 + eta) - p_minus / (1 - eta))) de_dpsi = (1 + v) de_db = -de_dpsi # de_da = 2*((psi-b) * eta - theta) * dp_plus_da + eta/(Z*np.sqrt(a)) - 2*eta*np.sqrt(a)/Z * dlogZ_da de_dtheta = -(p_plus - p_minus) + 2 * ( (psi - b) * eta - theta) * dp_plus_dtheta - 2 * eta * np.sqrt(a) / Z * dlogZ_dtheta de_deta = (psi - b) * (p_plus - p_minus) + 2 * ( (psi - b) * eta - theta) * dp_plus_deta + 2 * np.sqrt( a) / Z - 2 * eta * np.sqrt(a) / Z * dlogZ_deta dv_dpsi = 4 * eta * dp_plus_dpsi\ + 2*( (psi-b)*eta-theta) * d2p_plus_dpsi2 \ - 2* eta/(np.sqrt(a)*Z) * ( de_dpsi - e*dlogZ_dpsi ) dv_db = -dv_dpsi # dv_da = eta * 2 * dp_plus_da \ # + 2 * ((psi-b)*eta - theta) * d2p_plus_dadpsi \ # -2 * eta/(Z * np.sqrt(a)) * ( -e/(2*a) - e*dlogZ_da + de_da ) dv_dtheta = 2 * eta * dp_plus_dtheta \ - 2 * dp_plus_dpsi \ + 2 * ((psi-b)*eta - theta) * d2p_plus_dthetadpsi \ -2 * eta/(Z * np.sqrt(a)) * ( - e*dlogZ_dtheta + de_dtheta ) dv_deta = (p_plus-p_minus) \ + 2 * eta * dp_plus_deta \ + 2 * (psi-b) * dp_plus_dpsi \ + 2 * ((psi-b)*eta - theta) * d2p_plus_detadpsi \ -2 * 1/(Z * np.sqrt(a)) * (e - e*eta*dlogZ_deta + eta*de_deta ) var_e = average(e**2, weights=weights) - average(e, weights=weights)**2 mean_v = average(v, weights=weights) # dmean_v_da = average(dv_da,weights=weights) dmean_v_db = average(dv_db, weights=weights) dmean_v_dtheta = average(dv_dtheta, weights=weights) dmean_v_deta = average(dv_deta, weights=weights) # dvar_e_da = 2* (average(e*de_da,weights=weights) -average(e,weights=weights) * average(de_da,weights=weights) ) dvar_e_db = 2 * ( average(e * de_db, weights=weights) - average(e, weights=weights) * average(de_db, weights=weights)) dvar_e_dtheta = 2 * ( average(e * de_dtheta, weights=weights) - average(e, weights=weights) * average(de_dtheta, weights=weights)) dvar_e_deta = 2 * ( average(e * de_deta, weights=weights) - average(e, weights=weights) * average(de_deta, weights=weights)) tmp = np.sqrt((1 + mean_v)**2 + 4 * var_e) denominator = tmp # denominator = (tmp - dvar_e_da- 0.5 * dmean_v_da * (1+mean_v+tmp)) # denominator = np.maximum( denominator, 0.5) # For numerical stability. da_db = (dvar_e_db + 0.5 * dmean_v_db * (1 + mean_v + tmp)) / denominator da_dtheta = (dvar_e_dtheta + 0.5 * dmean_v_dtheta * (1 + mean_v + tmp)) / denominator da_deta = (dvar_e_deta + 0.5 * dmean_v_deta * (1 + mean_v + tmp)) / denominator dmean_v_dw = average_product(dv_dpsi, V_pos, c1=1, c2=n_cv, weights=weights) if n_cv > 1: dvar_e_dw = 2 * ( average_product(e * de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights) - average(e, weights=weights)[:, np.newaxis, np.newaxis] * average_product(de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights)) da_dw = (dvar_e_dw + 0.5 * dmean_v_dw * (1 + mean_v + tmp)[:, np.newaxis, np.newaxis] ) / denominator[:, np.newaxis, np.newaxis] else: dvar_e_dw = 2 * ( average_product(e * de_dpsi, V_pos, c1=1, c2=1, weights=weights) - average(e, weights=weights)[:, np.newaxis] * average_product(de_dpsi, V_pos, c1=1, c2=1, weights=weights)) da_dw = (dvar_e_dw + 0.5 * dmean_v_dw * (1 + mean_v + tmp)[:, np.newaxis]) / denominator[:, np.newaxis] return db_dw, da_db, da_dtheta, da_deta, da_dw
def get_cross_derivatives_ReLU_plus(V_pos, psi_pos, hlayer, n_cv, weights=None): db_dw = average(V_pos, c=n_cv, weights=weights) a = hlayer.gamma[np.newaxis, :] b = hlayer.theta[np.newaxis, :] psi = psi_pos psi_plus = -(psi - b) / np.sqrt(a) Phi_plus = erf_times_gauss(psi_plus) e = (psi - b) + np.sqrt(a) / Phi_plus v = (psi_plus - 1 / Phi_plus) / Phi_plus dpsi_plus_dpsi = -1 / np.sqrt(a) dpsi_plus_da = -1.0 / (2 * a) * psi_plus de_dpsi = 1 + v de_db = -de_dpsi de_da = np.sqrt(a) * (1.0 / (2 * a * Phi_plus) - (psi_plus - 1 / Phi_plus) / Phi_plus * dpsi_plus_da) dv_dpsi = dpsi_plus_dpsi * \ (1 + psi_plus / Phi_plus - 1 / Phi_plus ** 2 - (psi_plus - 1 / Phi_plus)**2) / Phi_plus dv_db = -dv_dpsi dv_da = dpsi_plus_da * (1 + psi_plus / Phi_plus - 1 / Phi_plus**2 - (psi_plus - 1 / Phi_plus)**2) / Phi_plus var_e = average(e**2, weights=weights) - average(e, weights=weights)**2 mean_v = average(v, weights=weights) dmean_v_da = average(dv_da, weights=weights) dmean_v_db = average(dv_db, weights=weights) dvar_e_da = 2 * ( average(e * de_da, weights=weights) - average(e, weights=weights) * average(de_da, weights=weights)) dvar_e_db = 2 * ( average(e * de_db, weights=weights) - average(e, weights=weights) * average(de_db, weights=weights)) tmp = np.sqrt((1 + mean_v)**2 + 4 * var_e) denominator = (tmp - dvar_e_da - 0.5 * dmean_v_da * (1 + mean_v + tmp)) denominator = np.maximum(denominator, 0.5) # For numerical stability. da_db = (dvar_e_db + 0.5 * dmean_v_db * (1 + mean_v + tmp)) / denominator dmean_v_dw = average_product(dv_dpsi, V_pos, c1=1, c2=n_cv, weights=weights) if n_cv > 1: dvar_e_dw = 2 * ( average_product(e * de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights) - average(e, weights=weights)[:, np.newaxis, np.newaxis] * average_product(de_dpsi, V_pos, c1=1, c2=n_cv, weights=weights)) da_dw = (dvar_e_dw + 0.5 * dmean_v_dw * (1 + mean_v + tmp)[:, np.newaxis, np.newaxis] ) / denominator[:, np.newaxis, np.newaxis] else: dvar_e_dw = 2 * ( average_product(e * de_dpsi, V_pos, c1=1, c2=1, weights=weights) - average(e, weights=weights)[:, np.newaxis] * average_product(de_dpsi, V_pos, c1=1, c2=1, weights=weights)) da_dw = (dvar_e_dw + 0.5 * dmean_v_dw * (1 + mean_v + tmp)[:, np.newaxis]) / denominator[:, np.newaxis] return db_dw, da_db, da_dw
def assess_moment_matching(RBM, data, data_gen, datah_gen=None, weights=None, weights_neg=None, with_reg=True, show=True): h_data = RBM.mean_hiddens(data) if datah_gen is not None: h_gen = datah_gen else: h_gen = RBM.mean_hiddens(data_gen) mu = utilities.average(data, c=RBM.n_cv, weights=weights) if datah_gen is not None: condmu_gen = RBM.mean_visibles(datah_gen) mu_gen = utilities.average(condmu_gen, weights=weights_neg) else: mu_gen = utilities.average(data_gen, c=RBM.n_cv, weights=weights_neg) mu_h = utilities.average(h_data, weights=weights) mu_h_gen = utilities.average(h_gen, weights=weights_neg) if RBM.n_cv > 1: cov_vh = utilities.average_product( h_data, data, c2=RBM.n_cv, weights=weights ) - mu[np.newaxis, :, :] * mu_h[:, np.newaxis, np.newaxis] else: cov_vh = utilities.average_product( h_data, data, c2=RBM.n_cv, weights=weights) - mu[np.newaxis, :] * mu_h[:, np.newaxis] if datah_gen is not None: if RBM.n_cv > 1: cov_vh_gen = utilities.average_product( datah_gen, condmu_gen, mean2=True, c2=RBM.n_cv, weights=weights_neg ) - mu_gen[np.newaxis, :, :] * mu_h_gen[:, np.newaxis, np.newaxis] else: cov_vh_gen = utilities.average_product( datah_gen, condmu_gen, mean2=True, c2=RBM.n_cv, weights=weights_neg ) - mu_gen[np.newaxis, :] * mu_h_gen[:, np.newaxis] else: if RBM.n_cv > 1: cov_vh_gen = utilities.average_product( h_gen, data_gen, c2=RBM.n_cv, weights=weights_neg ) - mu_gen[np.newaxis, :, :] * mu_h_gen[:, np.newaxis, np.newaxis] else: cov_vh_gen = utilities.average_product( h_gen, data_gen, c2=RBM.n_cv, weights=weights_neg ) - mu_gen[np.newaxis, :] * mu_h_gen[:, np.newaxis] if RBM.hidden == 'dReLU': I_data = RBM.vlayer.compute_output(data, RBM.weights) I_gen = RBM.vlayer.compute_output(data_gen, RBM.weights) mu_p_pos, mu_n_pos, mu2_p_pos, mu2_n_pos = RBM.hlayer.mean12_pm_from_inputs( I_data) mu_p_pos = utilities.average(mu_p_pos, weights=weights) mu_n_pos = utilities.average(mu_n_pos, weights=weights) mu2_p_pos = utilities.average(mu2_p_pos, weights=weights) mu2_n_pos = utilities.average(mu2_n_pos, weights=weights) mu_p_neg, mu_n_neg, mu2_p_neg, mu2_n_neg = RBM.hlayer.mean12_pm_from_inputs( I_gen) mu_p_neg = utilities.average(mu_p_neg, weights=weights_neg) mu_n_neg = utilities.average(mu_n_neg, weights=weights_neg) mu2_p_neg = utilities.average(mu2_p_neg, weights=weights_neg) mu2_n_neg = utilities.average(mu2_n_neg, weights=weights_neg) a = RBM.hlayer.gamma eta = RBM.hlayer.eta theta = RBM.hlayer.delta moment_theta = -mu_p_pos / np.sqrt(1 + eta) + mu_n_pos / np.sqrt(1 - eta) moment_theta_gen = -mu_p_neg / np.sqrt(1 + eta) + mu_n_neg / np.sqrt( 1 - eta) moment_eta = 0.5 * a / (1 + eta)**2 * mu2_p_pos - 0.5 * a / ( 1 - eta)**2 * mu2_n_pos + theta / ( 2 * np.sqrt(1 + eta)**3) * mu_p_pos - theta / ( 2 * np.sqrt(1 - eta)**3) * mu_n_pos moment_eta_gen = 0.5 * a / (1 + eta)**2 * mu2_p_neg - 0.5 * a / ( 1 - eta)**2 * mu2_n_neg + theta / ( 2 * np.sqrt(1 + eta)**3) * mu_p_neg - theta / ( 2 * np.sqrt(1 - eta)**3) * mu_n_neg moment_theta *= -1 moment_theta_gen *= -1 moment_eta *= -1 moment_eta_gen *= -1 W = RBM.weights if with_reg: l2 = RBM.l2 l1 = RBM.l1 l1b = RBM.l1b l1c = RBM.l1c l1_custom = RBM.l1_custom l1b_custom = RBM.l1b_custom n_c2 = RBM.n_cv if l2 > 0: cov_vh_gen += l2 * W if l1 > 0: cov_vh_gen += l1 * np.sign(W) if l1b > 0: # NOT SUPPORTED FOR POTTS if n_c2 > 1: # Potts RBM. cov_vh_gen += l1b * np.sign(W) * np.abs(W).mean(-1).mean( -1)[:, np.newaxis, np.newaxis] else: cov_vh_gen += l1b * np.sign(W) * (np.abs(W).sum(1))[:, np.newaxis] if l1c > 0: # NOT SUPPORTED FOR POTTS cov_vh_gen += l1c * np.sign(W) * ( (np.abs(W).sum(1))**2)[:, np.newaxis] if any([l1 > 0, l1b > 0, l1c > 0]): mask_cov = np.abs(W) > 1e-3 else: mask_cov = np.ones(W.shape, dtype='bool') else: mask_cov = np.ones(W.shape, dtype='bool') if RBM.n_cv > 1: if RBM.n_cv == 21: list_aa = Proteins_utils.aa else: list_aa = Proteins_utils.aa[:-1] colors_template = np.array([ matplotlib.colors.to_rgba(aa_color_scatter(letter)) for letter in list_aa ]) color = np.repeat(colors_template[np.newaxis, :, :], data.shape[1], axis=0).reshape([data.shape[1] * RBM.n_cv, 4]) else: color = 'C0' s2 = 14 if RBM.hidden == 'dReLU': fig, ax = plt.subplots(3, 2) fig.set_figheight(3 * 5) fig.set_figwidth(2 * 5) else: fig, ax = plt.subplots(2, 2) fig.set_figheight(2 * 5) fig.set_figwidth(2 * 5) clean_ax(ax[1, 1]) ax_ = ax[0, 0] ax_.scatter(mu.flatten(), mu_gen.flatten(), c=color) ax_.plot([mu.min(), mu.max()], [mu.min(), mu.max()]) ax_.set_xlabel(r'$<v_i>_d$', fontsize=s2) ax_.set_ylabel(r'$<v_i>_m$', fontsize=s2) r2_mu = np.corrcoef(mu.flatten(), mu_gen.flatten())[0, 1]**2 error_mu = np.sqrt(((mu - mu_gen)**2 / (mu * (1 - mu) + 1e-4)).mean()) mini = mu.min() maxi = mu.max() ax_.text(0.6 * maxi + 0.4 * mini, 0.25 * maxi + 0.75 * mini, r'$R^2 = %.2f$' % r2_mu, fontsize=s2) ax_.text(0.6 * maxi + 0.4 * mini, 0.35 * maxi + 0.65 * mini, r'$Err = %.2e$' % error_mu, fontsize=s2) ax_.set_title('Mean visibles', fontsize=s2) ax_ = ax[0, 1] ax_.scatter(mu_h, mu_h_gen) ax_.plot([mu_h.min(), mu_h.max()], [mu_h.min(), mu_h.max()]) ax_.set_xlabel(r'$<h_\mu>_d$', fontsize=s2) ax_.set_ylabel(r'$<h_\mu>_m$', fontsize=s2) r2_muh = np.corrcoef(mu_h, mu_h_gen)[0, 1]**2 error_muh = np.sqrt(((mu_h - mu_h_gen)**2).mean()) mini = mu_h.min() maxi = mu_h.max() ax_.text(0.6 * maxi + 0.4 * mini, 0.25 * maxi + 0.75 * mini, r'$R^2 = %.2f$' % r2_muh, fontsize=s2) ax_.text(0.6 * maxi + 0.4 * mini, 0.35 * maxi + 0.65 * mini, r'$Err = %.2e$' % error_muh, fontsize=s2) ax_.set_title('Mean hiddens', fontsize=s2) ax_ = ax[1, 0] if RBM.n_cv > 1: color = np.repeat(np.repeat(colors_template[np.newaxis, np.newaxis, :, :], RBM.n_h, axis=0), data.shape[1], axis=1).reshape([RBM.n_v * RBM.n_h * RBM.n_cv, 4]) color = color[mask_cov.flatten()] else: color = 'C0' cov_vh = cov_vh[mask_cov].flatten() cov_vh_gen = cov_vh_gen[mask_cov].flatten() ax_.scatter(cov_vh, cov_vh_gen, c=color) ax_.plot([cov_vh.min(), cov_vh.max()], [cov_vh.min(), cov_vh.max()]) ax_.set_xlabel(r'Cov$(v_i \;, h_\mu)_d$', fontsize=s2) ax_.set_ylabel(r'Cov$(v_i \;, h_\mu)_m + \nabla_{w_{\mu i}} \mathcal{R}$', fontsize=s2) r2_vh = np.corrcoef(cov_vh, cov_vh_gen)[0, 1]**2 error_vh = np.sqrt(((cov_vh - cov_vh_gen)**2).mean()) mini = cov_vh.min() maxi = cov_vh.max() ax_.text(0.6 * maxi + 0.4 * mini, 0.25 * maxi + 0.75 * mini, r'$R^2 = %.2f$' % r2_vh, fontsize=s2) ax_.text(0.6 * maxi + 0.4 * mini, 0.35 * maxi + 0.65 * mini, r'$Err = %.2e$' % error_vh, fontsize=s2) ax_.set_title('Hiddens-Visibles correlations', fontsize=s2) if RBM.hidden == 'dReLU': ax_ = ax[2, 0] ax_.scatter(moment_theta, moment_theta_gen, c=theta) ax_.plot([moment_theta.min(), moment_theta.max()], [moment_theta.min(), moment_theta.max()]) ax_.set_xlabel(r'$<-\frac{\partial E}{\partial \theta}>_d$', fontsize=s2) ax_.set_ylabel(r'$<-\frac{\partial E}{\partial \theta}>_m$', fontsize=s2) r2_theta = np.corrcoef(moment_theta, moment_theta_gen)[0, 1]**2 error_theta = np.sqrt(((moment_theta - moment_theta_gen)**2).mean()) mini = moment_theta.min() maxi = moment_theta.max() ax_.text(0.6 * maxi + 0.4 * mini, 0.25 * maxi + 0.75 * mini, r'$R^2 = %.2f$' % r2_theta, fontsize=s2) ax_.text(0.6 * maxi + 0.4 * mini, 0.35 * maxi + 0.65 * mini, r'$Err = %.2e$' % error_theta, fontsize=s2) ax_.set_title('Moment theta', fontsize=s2) ax_ = ax[2, 1] ax_.scatter(moment_eta, moment_eta_gen, c=np.abs(eta)) ax_.plot([moment_eta.min(), moment_eta.max()], [moment_eta.min(), moment_eta.max()]) ax_.set_xlabel(r'$<-\frac{\partial E}{\partial \eta}>_d$', fontsize=s2) ax_.set_ylabel(r'$<-\frac{\partial E}{\partial \eta}>_m$', fontsize=s2) r2_eta = np.corrcoef(moment_eta, moment_eta_gen)[0, 1]**2 error_eta = np.sqrt(((moment_eta - moment_eta_gen)**2).mean()) mini = moment_eta.min() maxi = moment_eta.max() ax_.text(0.6 * maxi + 0.4 * mini, 0.25 * maxi + 0.75 * mini, r'$R^2 = %.2f$' % r2_eta, fontsize=s2) ax_.text(0.6 * maxi + 0.4 * mini, 0.35 * maxi + 0.65 * mini, r'$Err = %.2e$' % error_eta, fontsize=s2) ax_.set_title('Moment eta', fontsize=s2) plt.tight_layout() if show: fig.show() if RBM.hidden == 'dReLU': errors = [error_mu, error_muh, error_vh, error_theta, error_eta] r2s = [r2_mu, r2_muh, r2_vh, r2_theta, r2_eta] else: errors = [error_mu, error_muh, error_vh] r2s = [r2_mu, r2_muh, r2_vh] return fig, errors, r2s
def fit(self, data, weights=None, pseudo_count=1e-4, verbose=1, zero_diag=True): fi = utilities.average(data, c=self.n_c, weights=weights) fij = utilities.average_product(data, data, c1=self.n_c, c2=self.n_c, weights=weights) for i in range(self.N): fij[i, i] = np.diag(fi[i]) fi_PC = (1 - pseudo_count) * fi + pseudo_count / float(self.n_c) fij_PC = (1 - pseudo_count) * fij + pseudo_count / float(self.n_c)**2 for i in range(self.N): fij_PC[i, i] = np.diag(fi_PC[i]) Cij = fij_PC - fi_PC[ np.newaxis, :, np.newaxis, :] * fi_PC[:, np.newaxis, :, np.newaxis] D = np.zeros([self.N, self.n_c - 1, self.n_c - 1]) invD = np.zeros([self.N, self.n_c - 1, self.n_c - 1]) for n in range(self.N): D[n] = scipy.linalg.sqrtm(Cij[n, n, :-1, :-1]) invD[n] = np.linalg.inv(D[n]) Gamma = np.zeros([self.N, self.n_c - 1, self.N, self.n_c - 1]) for n1 in range(self.N): for n2 in range(self.N): Gamma[n1, :, n2, :] = np.dot(invD[n1], np.dot(Cij[n1, n2, :-1, :-1], invD[n2])) Gamma_bin = Gamma.reshape( [self.N * (self.n_c - 1), self.N * (self.n_c - 1)]) Gamma_bin = (Gamma_bin + Gamma_bin.T) / 2 lam, v = np.linalg.eigh(Gamma_bin) order = np.argsort(lam)[::-1] v_ordered = np.rollaxis( v.reshape([self.N, self.n_c - 1, self.N * (self.n_c - 1)]), 2, 0)[order, :, :] lam_ordered = lam[order] DeltaL = 0.5 * (lam_ordered - 1 - np.log(lam_ordered)) xi = np.zeros(v_ordered.shape) for n in range(self.N): xi[:, n, :] = np.dot(v_ordered[:, n, :], invD[n]) xi = np.sqrt(np.abs(1 - 1 / lam_ordered))[:, np.newaxis, np.newaxis] * xi xi = np.concatenate( (xi, np.zeros([self.N * (self.n_c - 1), self.N, 1])), axis=2) # Write in zero-sum gauge. xi -= xi.mean(-1)[:, :, np.newaxis] top_M_contrib = np.argsort(DeltaL)[::-1][:self.M] self.xi = xi[top_M_contrib] self.lam = lam_ordered[top_M_contrib] self.DeltaL = DeltaL[top_M_contrib] couplings = np.tensordot( self.xi[self.lam > 1], self.xi[self.lam > 1], axes=[ (0), (0) ]) - np.tensordot( self.xi[self.lam < 1], self.xi[self.lam < 1], axes=[(0), (0)]) couplings = np.asarray(np.swapaxes(couplings, 1, 2), order='c') if zero_diag: # With zero diag is much better; I just check things... for n in range(self.N): couplings[n, n, :, :] *= 0 fields = np.log(fi_PC) - np.tensordot( couplings, fi_PC, axes=[(1, 3), (0, 1)]) fields -= fields.mean(-1)[:, np.newaxis] self.layer.couplings = couplings self.layer.fields = fields if verbose: fig, ax = plt.subplots() ax2 = ax.twinx() ax.plot(self.DeltaL) ax2.semilogy(self.lam, c='red') ax.set_ylabel(r'$\Delta L$', color='blue') ax2.set_ylabel('Mode variance', color='red') for tl in ax.get_yticklabels(): tl.set_color('blue') for tl in ax2.get_yticklabels(): tl.set_color('red')
and reuse them in any of our program. let us add our average function to utilities module utilities.py def average(*values): return sum(values) / len(values) We have to import our custom modules manually """ import utilities values = [4, 5, 6, 7] print(utilities.average(values)) # Output 5.5 """ You may also choose to specifically import the average function """ from utilities import average print(average(values)) # Output 5.5 """ With this syntax, you don't need to use the dot notation when you call the function. Because we've explicitly imported average() in the import statement, we can call it by name when we use the function. You can also import them as 'variable' to avoid collisions or create short forms. """
def learn_mapping_to_alignment(alignment, sequence, hmmer_path=hmmer_path, n_iter=3, verbose=1): if not type(alignment) == str: # data alignment. name_alignment = 'tmp.fasta' sequences_alignment = alignment Proteins_utils.write_FASTA('tmp.fasta', alignment) else: name_alignment = alignment sequences_alignment = Proteins_utils.load_FASTA(alignment, drop_duplicates=False) sequences_alignment_original = sequences_alignment.copy() consensus_sequence = np.argmax(utilities.average( sequences_alignment_original, c=21)[:, :-1], axis=1)[np.newaxis, :] if type(sequence) == str: sequence_num = Proteins_utils.seq2num(sequence) else: sequence_num = sequence if sequence_num.ndim == 1: sequence_num = sequence_num[np.newaxis] Proteins_utils.write_FASTA('tmp_target.fasta', sequence_num) for iteration in range(1, n_iter + 1): hmm_alignment = 'tmp.hmm' if iteration > 1: cmd = hmmer_path + 'src/hmmbuild --symfrac 0 --wnone %s %s' % ( hmm_alignment, name_alignment) else: cmd = hmmer_path + 'src/hmmbuild --symfrac 0 %s %s' % ( hmm_alignment, name_alignment) os.system(cmd) cmd = hmmer_path + 'src/hmmalign -o tmp_aligned.txt %s %s' % ( hmm_alignment, 'tmp_target.fasta') os.system(cmd) cmd = hmmer_path + 'easel/miniapps/esl-reformat --informat stockholm afa tmp_aligned.txt > tmp_aligned.fasta' os.system(cmd) sequence_aligned = ''.join( open('tmp_aligned.fasta', 'r').read().split('\n')[1:]) if verbose: print('Iteration %s: %s,' % (iteration, sequence_aligned)) mapping_alignment_to_struct = [] sequence_ref_aligned = [] index_sequence = 0 index_alignment = 0 for k, s in enumerate(sequence_aligned): if s == '-': mapping_alignment_to_struct.append(-1) index_alignment += 1 sequence_ref_aligned.append('-') elif s == s.upper(): mapping_alignment_to_struct.append(index_sequence) index_sequence += 1 index_alignment += 1 sequence_ref_aligned.append(s) elif s == s.lower(): index_sequence += 1 mapping_alignment_to_struct = np.array(mapping_alignment_to_struct, dtype='int') print(len(sequence_ref_aligned)) sequence_ref_aligned = Proteins_utils.seq2num( ''.join(sequence_ref_aligned)) if verbose: fraction_of_sites = (mapping_alignment_to_struct != -1).mean() print( 'Iteration %s, fraction of sites mapped on the structure: %.2f' % (iteration, fraction_of_sites)) top_closest = np.minimum(50, sequences_alignment_original.shape[0] // 5) closest = np.argsort( (sequences_alignment_original == sequence_ref_aligned ).mean(1))[::-1][:top_closest] name_alignment = 'tmp.fasta' reduced_alignment = np.concatenate( (np.repeat(sequences_alignment_original[closest], 10, axis=0), consensus_sequence), axis=0 ) # Need to add the consensus sequence. Otherwise, hmmalign can remove a column if it has only gaps in the reduced alignment. compensate by increasing the weights of the other sequences and removing the reweighting. Proteins_utils.write_FASTA('tmp.fasta', reduced_alignment) os.system( 'rm tmp_target.fasta tmp_aligned.txt tmp_aligned.fasta tmp.hmm tmp.fasta' ) return mapping_alignment_to_struct, sequence_ref_aligned
def calculate_error(RBM, data_tr, N_sequences=800000, Nstep=10, background=None): N = RBM.n_v q = RBM.n_cv # Check how moments are reproduced # Means mudata = RBM.mu_data # empirical averages #datav, datah = RBM.gen_data(Nchains = int(100), Lchains = int(N_sequences/100), Nthermalize=int(500), background= background) datav, datah = RBM.gen_data(Nchains=int(100), Lchains=int(N_sequences / 100), Nthermalize=int(500)) mugen = utilities.average(datav, c=q, weights=None) # Correlations covgen = utilities.average_product( datav, datav, c1=q, c2=q) - mugen[:, np.newaxis, :, np.newaxis] * mugen[np.newaxis, :, np.newaxis, :] covdata = utilities.average_product( data_tr, data_tr, c1=q, c2=q) - mudata[:, np.newaxis, :, np.newaxis] * mudata[np.newaxis, :, np.newaxis, :] fdata = utilities.average_product(data_tr, data_tr, c1=q, c2=q) #put to zero the diagonal elements of the covariance for i in range(N): covdata[i, i, :, :] = np.zeros((q, q)) fdata[i, i, :, :] = np.zeros((q, q)) covgen[i, i, :, :] = np.zeros((q, q)) M = len(data_tr) maxp = float(1) / float(M) pp = 1 ps = 0.00001 # pseudocount for fully conserved sites # error on frequency pp = 1 errm = 0 neffm = 0 for i in range(N): for a in range(q): neffm += 1 if mudata[i, a] < maxp: errm += np.power((mugen[i, a] - mudata[i, a]), 2) / (float(1 - maxp) * float(maxp)) else: if mudata[i, a] != 1.0: errm += np.power( (mugen[i, a] - mudata[i, a]), 2) / (float(1 - mudata[i, a]) * float(mudata[i, a])) else: errm += np.power((mugen[i, a] - mudata[i, a]), 2) / ( float(1 - mudata[i, a] - ps) * float(mudata[i, a])) errmt = np.sqrt(float(1) / (float(neffm) * float(maxp)) * float(errm)) # rigourously, there would be also the regularization term in the difference errm! # error on correlations errc = 0 neffc = 0 for i in range(N): for j in range(i + 1, N): for a in range(q): for b in range(a + 1, q): neffc += 1 if covdata[i, j, a, b] < maxp: den = np.power( np.sqrt(float(1 - maxp) * float(maxp)) + mudata[i, a] * np.sqrt(mudata[j, b] * (1 - mudata[j, b])) + mudata[j, b] * np.sqrt(mudata[i, a] * (1 - mudata[i, a])), 2) errc += np.power( (covgen[i, j, a, b] - covdata[i, j, a, b]), 2) / float(den) else: den = np.power( np.sqrt( float(1 - fdata[i, j, a, b]) * float(fdata[i, j, a, b])) + mudata[i, a] * np.sqrt(mudata[j, b] * (1 - mudata[j, b])) + mudata[j, b] * np.sqrt(mudata[i, a] * (1 - mudata[i, a])), 2) errc += np.power( (covgen[i, j, a, b] - covdata[i, j, a, b]), 2) / float(den) errct = np.sqrt(float(1) / (float(neffc) * float(maxp)) * float(errc)) return (errmt, errct)
def fit(self, X, Y, weights=None, batch_size=100, learning_rate=None, lr_final=None, lr_decay=True, decay_after=0.5, extra_params=None, optimizer='ADAM', n_iter=10, verbose=1, regularizers=[]): self.batch_size = batch_size self.optimizer = optimizer self.n_iter = n_iter if self.n_iter <= 1: lr_decay = False if learning_rate is None: if self.optimizer == 'SGD': learning_rate = 0.01 elif self.optimizer == 'ADAM': learning_rate = 5e-4 else: print('Need to specify learning rate for optimizer.') if self.optimizer == 'ADAM': if extra_params is None: extra_params = [0.9, 0.99, 1e-3] self.beta1 = extra_params[0] self.beta2 = extra_params[1] self.epsilon = extra_params[2] if self.n_cout > 1: out0 = np.zeros([1, self.Nout, self.n_cout], dtype=curr_float) else: out0 = np.zeros([1, self.Nout], dtype=curr_float) grad = { 'weights': np.zeros_like(self.weights), 'output_layer': self.output_layer.internal_gradients(out0, out0, value='input', value_neg='input') } for key in grad['output_layer'].keys(): grad['output_layer'][key] *= 0 self.gradient_moment1 = copy.deepcopy(grad) self.gradient_moment2 = copy.deepcopy(grad) self.learning_rate_init = copy.copy(learning_rate) self.learning_rate = learning_rate self.lr_decay = lr_decay if self.lr_decay: self.decay_after = decay_after self.start_decay = int(self.n_iter * self.decay_after) if lr_final is None: self.lr_final = 1e-2 * self.learning_rate else: self.lr_final = lr_final self.decay_gamma = (float(self.lr_final) / float(self.learning_rate))**( 1 / float(self.n_iter * (1 - self.decay_after))) else: self.decay_gamma = 1 self.regularizers = regularizers n_samples = X.shape[0] n_batches = int(np.ceil(float(n_samples) / self.batch_size)) batch_slices = list( utilities.gen_even_slices(n_batches * self.batch_size, n_batches, n_samples)) X = np.asarray(X, dtype=self.input_layer.type, order='c') Y = np.asarray(Y, dtype=self.output_layer.type, order='c') if weights is not None: weights = weights.astype(curr_float) self.moments_Y = self.output_layer.get_moments(Y, weights=weights, value='data') self.moments_XY = utilities.average_product(X, Y, c1=self.n_cin, c2=self.n_cout, mean1=False, mean2=False, weights=weights) self.count_updates = 0 for epoch in range(1, n_iter + 1): if verbose: begin = time.time() if self.lr_decay: if (epoch > self.start_decay): self.learning_rate *= self.decay_gamma permutation = np.argsort(np.random.randn(n_samples)) X = X[permutation, :] Y = Y[permutation, :] if weights is not None: weights = weights[permutation] if verbose: print('Starting epoch %s' % (epoch)) for batch_slice in batch_slices: if weights is not None: self.minibatch_fit(X[batch_slice], Y[batch_slice], weights=weights[batch_slice]) else: self.minibatch_fit(X[batch_slice], Y[batch_slice], weights=None) if verbose: end = time.time() lik = utilities.average(self.likelihood(X, Y), weights=weights) regularization = 0 for regtype, regtarget, regvalue in self.regularizers: if regtarget == 'weights': target = self.weights else: target = self.output_layer.__dict__[regtarget] if regtype == 'l1': regularization += (regvalue * np.abs(target)).sum() elif regtype == 'l2': regularization += 0.5 * (regvalue * target**2).sum() else: print(regtype, 'not supported') regularization /= self.Nout message = "Iteration %d, time = %.2fs, likelihood = %.2f, regularization = %.2e, loss = %.2f" % ( epoch, end - begin, lik, regularization, -lik + regularization) print(message) return 'done'