def aux_gamma_beta(self, i, gamma, j, beta): # logging.info("(%d,%d): g %.4f; b %.4f", i, j, gamma, beta) stv_kls = np.zeros(self.n_tests) sntv_liars_kls = np.zeros(self.n_tests) sntv_kls = np.zeros(self.n_tests) for test_nr in range(self.n_tests): true_preferences = PreferenceCreator(self.n, self.m, self.political_spectrum).create_preferences() poll_results = get_poll(true_preferences, gamma) man_ballot = manipulate(true_preferences, np.nonzero(poll_results < self.electoral_threshold)[0], beta) first_distribution = get_first_choice_dist(true_preferences) stv_scores, *_ = voting.STV_scores(true_preferences, self.electoral_threshold, percentage=True) stv_outcome = apportion.largest_remainder(stv_scores, self.seats) stv_kls[test_nr] = utils.kl_divergence(stv_outcome / self.seats, first_distribution) # SNTV outcome truthful sntv_scores, *_ = voting.SNTV_scores(true_preferences, self.electoral_threshold, percentage=True) sntv_outcome = apportion.largest_remainder(sntv_scores, self.seats) sntv_kls[test_nr] = utils.kl_divergence(sntv_outcome / self.seats, first_distribution) del sntv_scores, sntv_outcome # SNTV outcome liars sntv_scores, *_ = voting.SNTV_scores(man_ballot, self.electoral_threshold, percentage=True) sntv_outcome = apportion.largest_remainder(sntv_scores, self.seats) sntv_liars_kls[test_nr] = utils.kl_divergence(sntv_outcome / self.seats, first_distribution) logging.info("(%d,%d): g %.4f; b %.4f; kl-sntv_liars: %.4f", i, j, gamma, beta, sntv_liars_kls.mean()) return i, j, (stv_kls.mean(), sntv_kls.mean(), sntv_liars_kls.mean())
def lalala(electoral_threshold, m, n, n_tests, political_spectrum, seats, beta, poll_covid): stv_propor = [] sntv_propor = [] for test_nr in range(n_tests): true_preferences = PreferenceCreator(n, m, political_spectrum).create_preferences() poll_results = get_poll(true_preferences, poll_covid) man_ballot = manipulate(true_preferences, np.nonzero(poll_results < electoral_threshold)[0], beta) # STV outcome # TODO reestablish the natural order stv_scores, *_ = voting.STV_scores(true_preferences, electoral_threshold, percentage=True) stv_outcome = apportion.largest_remainder(stv_scores, seats) # SNTV outcome sntv_scores, *_ = voting.SNTV_scores(man_ballot, electoral_threshold, percentage=True) sntv_outcome = apportion.largest_remainder(sntv_scores, seats) first_distribution = get_first_choice_dist(true_preferences) # Proportionality stv_propor.append(utils.kl_divergence(stv_outcome / seats, first_distribution)) sntv_propor.append(utils.kl_divergence(sntv_outcome / seats, first_distribution)) return np.array(stv_propor).mean(), np.array(sntv_propor).mean()
def test_gamma_beta(electoral_threshold, m, n, n_tests, political_spectrum, seats): steps = 3 gammass = np.geomspace(0.0001, 1, num=steps) betass = np.linspace(0, 1, num=steps) results = {'stv': np.zeros((steps, steps)), 'sntv': np.zeros((steps, steps))} for i, gamma in enumerate(gammass): for j, beta in enumerate(betass): logging.info("g %.4f; b %.4f", gamma, beta) stv_kls = np.zeros(n_tests) sntv_kls = np.zeros(n_tests) for test_nr in range(n_tests): true_preferences = PreferenceCreator(n, m, political_spectrum).create_preferences() poll_results = get_poll(true_preferences, gamma) man_ballot = manipulate(true_preferences, np.nonzero(poll_results < electoral_threshold)[0], beta) first_distribution = get_first_choice_dist(true_preferences) stv_scores, *_ = voting.STV_scores(true_preferences, electoral_threshold, percentage=True) stv_outcome = apportion.largest_remainder(stv_scores, seats) stv_kls[test_nr] = utils.kl_divergence(stv_outcome / seats, first_distribution) # SNTV outcome sntv_scores, *_ = voting.SNTV_scores(man_ballot, electoral_threshold, percentage=True) sntv_outcome = apportion.largest_remainder(sntv_scores, seats) sntv_kls[test_nr] = utils.kl_divergence(sntv_outcome / seats, first_distribution) results['stv'][i, j] = stv_kls.mean() results['sntv'][i, j] = sntv_kls.mean() return np.meshgrid(gammass, betass), results
def get_metrics(a, b, metric_type, operator_func): if metric_type == "kl_divergence": s1 = np.array([kl_divergence(a, b)]) s2 = np.array(kl_divergence(b, a)) elif metric_type == "js_divergence": s1 = np.array([js_divergence(a, b)]) s2 = np.array(js_divergence(b, a)) elif metric_type == "entropy": s1 = np.array([entropy(a)]) s2 = np.array([entropy(b)]) return operator_func(operator, s1, s2)
def elbo(model, x, K=1): """Computes E_{p(x)}[ELBO] """ qz_x, px_z, _ = model(x) lpx_z = px_z.log_prob(x).view(*px_z.batch_shape[:2], -1) * model.llik_scaling kld = kl_divergence(qz_x, model.pz(*model.pz_params)) return (lpx_z.sum(-1) - kld.sum(-1)).mean(0).sum()
def times_objective(self, mf, K=1, beta=1., alpha=1.): """ Optimizer Args: model: our model mf: Multifactors rets: realized returns """ qz_x, px_z, style_score = self.model(mf, K=K) # Reconstruction lpx_z = px_z.log_prob(mf).sum(-1) pz = self.model.pz(*self.model.pz_params) # Overlap kld = kl_divergence(qz_x, pz, samples=style_score).sum(-1) # Sparsity reg = (self.regs(pz.sample(torch.Size([mf.size(0)])).\ view(-1, style_score.size(-1)), style_score) \ if self.regs.samples else self.regs(pz, qz_x)) obj = lpx_z - (beta * kld) - (alpha * reg) recon = lpx_z.sum() overlap = kld.sum() sparsity = reg.sum() return obj.sum(), recon, overlap, sparsity
def get_metrics(a, b, metric_type, operator_func): if metric_type == "kl_divergence": s1 = np.array([kl_divergence(a, b)]) s2 = np.array(kl_divergence(b, a)) elif metric_type == "js_divergence": s1 = np.array([js_divergence(a, b)]) s2 = np.array(js_divergence(b, a)) elif metric_type == "entropy": s1 = np.array([entropy(a)]) s2 = np.array([entropy(b)]) else: raise ValueError("metric_type undefined") return operator_func(operator, s1, s2)
def cost(self, Xi, kl_reg): f_KM, a_KM = self.f_KM, self.a_KM Xi_f = Xi[:self.lib_f.shape[1]] Xi_s = Xi[self.lib_f.shape[1]:] f_vals = self.lib_f @ Xi_f s_vals = self.lib_s @ Xi_s a_vals = 0.5 * s_vals**2 f_vals += a_vals / self.centers1d # Diffusion-induced drift from polar change of variables # Solve adjoint Fokker-Planck equation self.afp.precompute_operator(f_vals, a_vals) f_tau, a_tau = self.afp.solve(self.tau, d=0) mask = np.nonzero(np.isfinite(f_KM))[0] V = np.sum(self.W[0, mask]*abs(f_tau[mask] - f_KM[mask])**2) \ + np.sum(self.W[1, mask]*abs(a_tau[mask] - a_KM[mask])**2) if kl_reg > 0: p_est = self.fp.solve(f_vals, a_vals) kl = utils.kl_divergence(self.p_hist, p_est, dx=self.fp.dx, tol=1e-6) kl = max(0, kl) V += kl_reg * kl return V
def test_alpha(electoral_threshold, m, n, n_tests, political_spectrum, seats, poll_covid): props = [] # progressbar = tqdm(total=50) alphass = np.linspace(0.01, 1, num=10) for i, alpha in enumerate(alphass): print(i) # progressbar.update() astv_propor = [] astv_l_propor = [] for test_nr in range(n_tests): true_preferences = PreferenceCreator(n, m, political_spectrum).create_preferences() poll_results = get_poll(true_preferences, poll_covid) man_ballot = manipulate(true_preferences, np.nonzero(poll_results < electoral_threshold)[0], 1.) first_distribution = get_first_choice_dist(true_preferences) # a-STV outcome astv_scores, *_ = voting.alpha_STV_scores(true_preferences, alpha, electoral_threshold, percentage=True) astv_outcome = apportion.largest_remainder(astv_scores, seats) # a-STV liars outcome astv_l_scores, *_ = voting.alpha_STV_scores(man_ballot, alpha, electoral_threshold, percentage=True) astv_l_outcome = apportion.largest_remainder(astv_l_scores, seats) astv_propor.append(utils.kl_divergence(astv_outcome / seats, first_distribution)) astv_l_propor.append(utils.kl_divergence(astv_l_outcome / seats, first_distribution)) a = np.array(astv_propor).mean() al = np.array(astv_l_propor).mean() # a = lalala(electoral_threshold, m, n, n_tests, political_spectrum, seats, beta, 0.01) props.append((a, al)) # progressbar.close() mean_astv_prop, mean_astv_l_prop = list(zip(*props)) a = np.array(mean_astv_prop) b = np.array(mean_astv_l_prop) # print(a, b) plt.plot(alphass, a, label='a-stv') plt.plot(alphass, b, label='a-stv-liars') plt.legend() plt.show()
def train(self, epoch): self.optimizer.lr = self.lr_schedule(epoch) train_loss = 0 train_acc = 0 for i, batch in enumerate(self.train_iter): x_array, t_array = chainer.dataset.concat_examples(batch) x = chainer.Variable(cuda.to_gpu(x_array)) t = chainer.Variable(cuda.to_gpu(t_array)) # self.optimizer.use_cleargrads(use=False) # self.optimizer.use_cleargrads() ## self.optimizer.reallocate_cleared_grads() # x.cleargrad() # t.cleargrad() # self.optimizer.zero_grads() # self.optimizer.setup(model) ### with chainer.no_backprop_mode(): This is not the origin y = self.model(x) self.model.cleargrads() if self.opt.BC: loss = utils.kl_divergence(y, t) acc = F.accuracy(y, F.argmax(t, axis=1)) else: loss = F.softmax_cross_entropy(y, t) acc = F.accuracy(y, t) # self.optimizer.check_nan_in_grads() self.optimizer.use_cleargrads(use=True) loss.backward() self.optimizer.update() train_loss += float(loss.data) * len(t.data) train_acc += float(acc.data) * len(t.data) elapsed_time = time.time() - self.start_time progress = (self.n_batches * (epoch - 1) + i + 1) * 1.0 / (self.n_batches * self.opt.nEpochs) eta = elapsed_time / progress - elapsed_time line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format( epoch, self.opt.nEpochs, i + 1, self.n_batches, self.optimizer.lr, utils.to_hms(elapsed_time), utils.to_hms(eta)) sys.stderr.write('\r\033[K' + line) sys.stderr.flush() self.train_iter.reset() train_loss /= len(self.train_iter.dataset) train_top1 = 100 * (1 - train_acc / len(self.train_iter.dataset)) return train_loss, train_top1
def m_elbo_naive(model, x, K=1): """Computes E_{p(x)}[ELBO] for multi-modal vae --- NOT EXPOSED""" qz_xs, px_zs, zss = model(x) lpx_zs, klds = [], [] for r, qz_x in enumerate(qz_xs): kld = kl_divergence(qz_x, model.pz(*model.pz_params)) klds.append(kld.sum(-1)) for d, px_z in enumerate(px_zs[r]): lpx_z = px_z.log_prob(x[d]) * model.vaes[d].llik_scaling lpx_zs.append(lpx_z.view(*px_z.batch_shape[:2], -1).sum(-1)) obj = (1 / len(model.vaes)) * (torch.stack(lpx_zs).sum(0) - torch.stack(klds).sum(0)) return obj.mean(0).sum()
def analyse(self, data, K): self.eval() with torch.no_grad(): qz_xs, _, zss = self.forward(data, K=K) pz = self.pz(*self.pz_params) zss = [ pz.sample(torch.Size([K, data[0].size(0) ])).view(-1, pz.batch_shape[-1]), *[zs.view(-1, zs.size(-1)) for zs in zss] ] zsl = [ torch.zeros(zs.size(0)).fill_(i) for i, zs in enumerate(zss) ] kls_df = tensors_to_df( [ *[kl_divergence(qz_x, pz).cpu().numpy() for qz_x in qz_xs], *[ 0.5 * (kl_divergence(p, q) + kl_divergence(q, p)).cpu().numpy() for p, q in combinations(qz_xs, 2) ] ], head='KL', keys=[ *[ r'KL$(q(z|x_{})\,||\,p(z))$'.format(i) for i in range(len(qz_xs)) ], *[ r'J$(q(z|x_{})\,||\,q(z|x_{}))$'.format(i, j) for i, j in combinations(range(len(qz_xs)), 2) ] ], ax_names=['Dimensions', r'KL$(q\,||\,p)$']) return embed_umap(torch.cat(zss, 0).cpu().numpy()), \ torch.cat(zsl, 0).cpu().numpy(), \ kls_df
def train(self, epoch): """ run one train epoch """ train_loss = 0 train_acc = 0 for i, (x_array, t_array) in enumerate(self.train_iter): device = torch.device("cuda" if cuda.is_available() else "cpu") self.optimizer.zero_grad() x = x_array.to(device) t = t_array.to(device) y = self.model(x) if self.opt.BC: t = t.to(device, dtype=torch.float32) y = y.to(device, dtype=torch.float32) loss = utils.kl_divergence(y, t) t_indices = torch.argmax(t, dim=1) acc = accuracy(y.data, t_indices) else: """ F.cross_entropy already combines log_softmax and NLLLoss """ t = t.to(device, dtype=torch.int64) loss = F.cross_entropy(y, t) acc = accuracy(y.data, t) loss.backward() self.optimizer.step() train_loss += float(loss.item()) * len(t.data) train_acc += float(acc.item()) * len(t.data) elapsed_time = time.time() - self.start_time progress = (self.n_batches * (epoch - 1) + i + 1) * 1.0 / (self.n_batches * self.opt.nEpochs) eta = elapsed_time / progress - elapsed_time line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format( epoch, self.opt.nEpochs, i + 1, self.n_batches, self.scheduler.get_last_lr(), utils.to_hms(elapsed_time), utils.to_hms(eta)) sys.stderr.write('\r\033[K' + line) sys.stderr.flush() train_loss /= len(self.train_iter.dataset) train_top1 = 100 * (1 - train_acc / len(self.train_iter.dataset)) return train_loss, train_top1
def decomp_objective(model, x, K=1, beta=1.0, alpha=0.0, regs=None, components=False): """Computes E_{p(x)}[ELBO_{\alpha,\beta}] """ qz_x, px_z, zs = model(x, K) lpx_z = px_z.log_prob(x).view(*px_z.batch_shape[:2], -1).sum(-1) pz = model.pz(*model.pz_params) kld = kl_divergence(qz_x, pz, samples=zs).sum(-1) reg = (regs(pz.sample(torch.Size([x.size(0)])).view(-1, zs.size(-1)), zs.squeeze(0)) if regs.samples else regs(pz, qz_x)) \ if regs else torch.tensor(0) obj = lpx_z - (beta * kld) - (alpha * reg) return obj.sum() if not components else (obj.sum(), lpx_z.sum(), kld.sum(), reg.sum())
def cost(self, Xi, kl_reg): r"""Least-squares cost function for optimization""" f_KM, a_KM = self.f_KM[0].flatten(), self.a_KM[0].flatten() Xi_f = Xi[:self.lib_f.shape[1]] Xi_s = Xi[self.lib_f.shape[1]:] f_vals = self.lib_f @ Xi_f s_vals = self.lib_s @ Xi_s a_vals = 0.5 * (np.real(s_vals)**2 + 1j * (np.imag(s_vals))**2) # Solve adjoint Fokker-Planck equation self.afp.precompute_operator( [np.real(f_vals), np.imag(f_vals)], [np.real(a_vals), np.imag(a_vals)]) f_tau, a_tau = self.afp.solve(self.tau, d=0) # Assumes real/imag symmetry mask = np.nonzero(np.isfinite(f_KM))[0] V = np.sum(self.W[0, mask]*abs(f_tau[mask] - f_KM[mask])**2) \ + np.sum(self.W[1, mask]*abs(a_tau[mask] - a_KM[mask])**2) if kl_reg > 0: p_est = self.fp.solve([ np.reshape(np.real(f_vals), self.fp.N), np.reshape(np.imag(f_vals), self.fp.N) ], [ np.reshape(np.real(a_vals), self.fp.N), np.reshape(np.imag(a_vals), self.fp.N) ]) kl = utils.kl_divergence(self.p_hist, p_est, dx=self.fp.dx, tol=1e-6) kl = max(0, kl) V += kl_reg * kl if not np.isfinite(V): print('Error in cost function') print(Xi) print(f) print(a) return None return V
def rank_phrase(case_file): ph_dist_map = {} smoothing_factor = 0.0 phrase_map, cell_map, cell_cnt = read_caseolap_result(case_file) unif = [1.0 / cell_cnt] * cell_cnt for ph in phrase_map: ph_vec = [x[1] for x in phrase_map[ph].items()] # Modified by MILI if len(ph_vec) < cell_cnt: ph_vec += [0] * (cell_cnt - len(ph_vec)) # smoothing ph_vec = [x + smoothing_factor for x in ph_vec] ph_vec = utils.l1_normalize(ph_vec) ph_dist_map[ph] = utils.kl_divergence(ph_vec, unif) ranked_list = sorted(ph_dist_map.items(), key=operator.itemgetter(1), reverse=True) return ranked_list
def analyse(self, data, K): self.eval() with torch.no_grad(): qz_x, _, zs = self.forward(data, K=K) pz = self.pz(*self.pz_params) zss = [ pz.sample(torch.Size([K, data.size(0) ])).view(-1, pz.batch_shape[-1]), zs.view(-1, zs.size(-1)) ] zsl = [ torch.zeros(zs.size(0)).fill_(i) for i, zs in enumerate(zss) ] kls_df = tensors_to_df([kl_divergence(qz_x, pz).cpu().numpy()], head='KL', keys=[r'KL$(q(z|x)\,||\,p(z))$'], ax_names=['Dimensions', r'KL$(q\,||\,p)$']) return embed_umap(torch.cat(zss, 0).cpu().numpy()), \ torch.cat(zsl, 0).cpu().numpy(), \ kls_df
def train(self, epoch): self.optimizer.lr = self.lr_schedule(epoch) train_loss = 0 train_acc = 0 for i, batch in enumerate(self.train_iter): x_array, t_array = chainer.dataset.concat_examples(batch) x_array = np.reshape(x_array,(self.opt.batchSize*2,-1)).astype('float32') t_array = np.reshape(t_array,(self.opt.batchSize*2,-1)).astype('float32') x = chainer.Variable(cuda.to_gpu(x_array[:, None, None, :])) t = chainer.Variable(cuda.to_gpu(t_array)) self.model.cleargrads() y , t = self.model(x, t, self.opt.mixup_type, self.opt.eligible, self.opt.batchSize) if self.opt.BC: loss = utils.kl_divergence(y, t) acc = F.accuracy(y, F.argmax(t, axis=1)) else: loss = F.softmax_cross_entropy(y, t) acc = F.accuracy(y, t) loss.backward() self.optimizer.update() train_loss += float(loss.data) * len(t.data) train_acc += float(acc.data) * len(t.data) elapsed_time = time.time() - self.start_time progress = (self.n_batches * (epoch - 1) + i + 1) * 1.0 / (self.n_batches * self.opt.nEpochs) if ((progress)!=0): eta = elapsed_time / progress - elapsed_time else: eta = 0 line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format( epoch, self.opt.nEpochs, i + 1, self.n_batches, self.optimizer.lr, utils.to_hms(elapsed_time), utils.to_hms(eta)) sys.stderr.write('\r\033[K' + line) sys.stderr.flush() self.train_iter.reset() train_loss /= len(self.train_iter.dataset)*2 train_top1 = 100 * (1 - train_acc / (len(self.train_iter.dataset)*2)) return train_loss, train_top1
def m_elbo(model, x, K=1): """Computes importance-sampled m_elbo (in notes3) for multi-modal vae """ qz_xs, px_zs, zss = model(x) lpx_zs, klds = [], [] for r, qz_x in enumerate(qz_xs): kld = kl_divergence(qz_x, model.pz(*model.pz_params)) klds.append(kld.sum(-1)) for d in range(len(px_zs)): lpx_z = px_zs[d][d].log_prob(x[d]).view( *px_zs[d][d].batch_shape[:2], -1) lpx_z = (lpx_z * model.vaes[d].llik_scaling).sum(-1) if d == r: lwt = torch.tensor(0.0) else: zs = zss[d].detach() lwt = (qz_x.log_prob(zs) - qz_xs[d].log_prob(zs).detach()).sum(-1) lpx_zs.append(lwt.exp() * lpx_z) obj = (1 / len(model.vaes)) * (torch.stack(lpx_zs).sum(0) - torch.stack(klds).sum(0)) return obj.mean(0).sum()
def reweight(embs, dp_file, lp_file): source_type = 'd' target_type = 'l' mid_type = 'p' ori_embs = embs agg_embs = copy.copy(embs) # Step 0: check original embedding's performance print '*********** Direct Embedding' evaluate(ori_embs, true_file, target_dim) pd_map = load_dp(dp_file, reverse=True) dp_map = load_edge_map(dp_file) lp_map = load_edge_map(lp_file) dist_map = {x:1 for x in embs[mid_type]} vec_size = 0 for d in ori_embs[mid_type]: vec_size = len(ori_embs[mid_type][d]) break # print '============= dp, pd maps loaded' # Step 1: check with D weighted avg, what's the performance agg_embs[source_type] = weighted_avg_embedding(dp_map, agg_embs[mid_type], dist_map, vec_size) # optional L - embedding also aggregated from P normal = False if not normal: agg_embs[target_type] = weighted_avg_embedding(lp_map, agg_embs[mid_type], dist_map, vec_size) # print '============= doc embedding aggregated.' print '*********** Aggregate iter 0' evaluate(agg_embs, true_file, target_dim) for i in range(2): if i > 0: normal = True print '============= iter ' + str(i+1) + ' of dist started.' pred_label, doc_score = doc_assignment(agg_embs, source_type, target_type) top_labels = [w.path for w in hier.get_nodes_at_level(1)] # print '============= docs assigned to labels' # # print meta stats # top_label_cnts = {} # for label in top_labels: # top_label_cnts[label] = 0 # for doc_pair in filtered_docs: # l = pred_label[doc_pair[0]] # top_label_cnts[l] += 1 # print top_label_cnts # print 'top level labels: ' + str(top_labels) label_to_idx = {} for idx, label in enumerate(top_labels): label_to_idx[label] = idx uniform_vec = [1.0/len(top_labels)] * len(top_labels) # print uniform_vec label_to_doc = {} for label in top_labels: label_to_doc[label] = set() docs_used = {} if normal: print 'used docs in reweighting: ' + str(len(pred_label)) for doc, score in doc_score.iteritems(): label_to_doc[pred_label[doc]].add(doc) else: for label in top_labels: p = label.lower() # idx = label_to_idx[label] for doc in pd_map[p]: label_to_doc[label].add(doc) if doc not in docs_used: docs_used[doc] = set() docs_used[doc].add(label) print 'docs used: %d' % len(docs_used) cnt_vec = [0.0] * len(top_labels) for label in label_to_doc: cnt_vec[label_to_idx[label]] = len(label_to_doc[label]) comp_vec = utils.l1_normalize(cnt_vec) print cnt_vec # print comp_vec distinct_map = {} if normal: for phrase in embs[mid_type]: p_vec = [0.0] * len(top_labels) # if len(pd_map[phrase]) < 100: # continue for doc in pd_map[phrase]: idx = label_to_idx[pred_label[doc]] p_vec[idx] += 1.0 if sum(p_vec) == 0: print 'ERROR!!!!!!!!!!' continue p_vec = utils.l1_normalize(p_vec) # kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec) kl = utils.kl_divergence(p_vec, uniform_vec) # kl = utils.kl_divergence(p_vec, comp_vec) distinct_map[phrase] = kl else: for phrase in embs[mid_type]: p_vec = [0.0] * len(top_labels) # if len(pd_map[phrase]) < 100: # continue for doc in pd_map[phrase]: if doc in docs_used: for label in docs_used[doc]: idx = label_to_idx[label] p_vec[idx] += 1.0 # print p_vec if sum(p_vec) == 0: distinct_map[phrase] = 0 # print 'ERROR!!!!!!!!!!' continue # p_vec = [x / cnt_vec[i] for i, x in enumerate(p_vec)] p_vec = utils.l1_normalize(p_vec) # kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec) # kl = utils.kl_divergence(p_vec, uniform_vec) kl = utils.kl_divergence(p_vec, comp_vec) distinct_map[phrase] = kl dist_map = distinct_map # with open('focal_comp.txt', 'w+') as g: # for (ph, score) in sorted(dist_map.items(), key=operator.itemgetter(1), reverse=True): # g.write('%s,%f\t' % (ph, score)) # print '============= phrase distinctness computed.' agg_embs[source_type] = weighted_avg_embedding(dp_map, agg_embs[mid_type], dist_map, vec_size) # print '============= doc embedding aggregated.' print '*********** Aggregate with distinct at iter ' + str(i + 1) evaluate(agg_embs, true_file, target_dim)
def expan_round(embs, seeds_map, all_seeds, limit, cate_lim, mode='EMB', pd_map=None): target_type = 'p' multiplier = 5 thre_softmax = 0.5 extended_seeds = set() candidates = {} if mode == 'EMB': for phrase in embs[target_type]: if phrase in all_seeds: continue t_emb = embs[target_type][phrase] rel_values = {} # flat comparison for label in seeds_map: max_sim = 0 for seed in seeds_map[label]: sim = multiplier * utils.cossim(t_emb, embs[target_type][seed]) if sim > max_sim: max_sim = sim rel_values[label] = max_sim utils.softmax_for_map(rel_values) best_label = sorted(rel_values.items(), key=operator.itemgetter(1), reverse=True)[0][0] candidates[best_label + '@' + phrase] = rel_values[best_label] elif mode == 'DIS': pred_label, doc_score = doc_assignment(embs, 'd', 'l', mode='FLAT') top_labels = [w.path for w in hier.get_all_nodes()] print 'Doc Assignment done...' label_to_idx = {} for idx, label in enumerate(top_labels): label_to_idx[label] = idx # print uniform_vec label_to_doc = {} for label in top_labels: label_to_doc[label] = set() for doc, score in doc_score.iteritems(): label_to_doc[pred_label[doc]].add(doc) cnt_vec = [0.0] * len(top_labels) for label in label_to_doc: cnt_vec[label_to_idx[label]] = len(label_to_doc[label]) comp_vec = utils.l1_normalize(cnt_vec) uniform_vec = [1.0/len(top_labels)] * len(top_labels) # print cnt_vec # print comp_vec for phrase in embs['p']: if phrase in all_seeds: continue p_vec = [0.0] * len(top_labels) for doc in pd_map[phrase]: idx = label_to_idx[pred_label[doc]] p_vec[idx] += 1.0 max_label_value = 0 best_label = '' best_cnt = 0 for label in top_labels: idx = label_to_idx[label] if p_vec[idx] > 0: norm_value = p_vec[idx] / cnt_vec[idx] if norm_value > max_label_value: max_label_value = norm_value best_label = label best_cnt = p_vec[idx] if sum(p_vec) == 0: print 'ERROR!!!!!!!!!!' continue p_vec = utils.l1_normalize(p_vec) # kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec) # kl = utils.kl_divergence(p_vec, comp_vec) kl = utils.kl_divergence(p_vec, uniform_vec) # best_label = sorted(rel_values.items(), key=operator.itemgetter(1), reverse=True)[0][0] pop = max_label_value # * (1 + math.log(1 + max_label_value)) candidates[best_label + '@' + phrase] = kl * max_label_value candidates = sorted(candidates.items(), key=operator.itemgetter(1), reverse=True) # cands_by_label = {} # for cand in candidates: # label, phrase = cand.split('@') # if label not in cands_by_label: # cands_by_label[label] = {} # cands_by_label[label][phrase] = candidates[cand] # for label in cands_by_label: # print '\n' + label # cand_cate = cands_by_label[label] # best_exps = sorted(cand_cate.items(), key=operator.itemgetter(1), reverse=True)[:10] # # best_exps = sorted(candidates.items(), key=operator.itemgetter(1), reverse=True)[:30] # print best_exps # exit(1) added = 0 added_cates = {} for (cand, score) in candidates: label, phrase = cand.split('@') if label not in added_cates: added_cates[label] = 0 if added_cates[label] >= cate_lim: continue if len(seeds_map[label]) >= 3: continue extended_seeds.add(cand) added_cates[label] += 1 added += 1 if added > limit: break print 'extended: ' + str(extended_seeds) return extended_seeds
def model_eval(eps, sigma, N, kl_reg): """ Construct and evaluate all models of the double-well 1. Analytic normal form model 2. PDF fitting without Kramers-Moyal average 3. Full Langevin regression """ ### Generate data x_eq = np.sqrt(eps) # Equilibrium value edges = np.linspace(-2 * x_eq, 2 * x_eq, N + 1) centers = 0.5 * (edges[:-1] + edges[1:]) dx = centers[1] - centers[0] dt = 1e-2 tmax = int(1e5) t, X = jl.run_sim(eps, sigma, dt, tmax) X, V = X[0, :], X[1, :] # PDF of states p_hist = np.histogram(X, edges, density=True)[0] # Dwell-time slope b, b_err = dwell_stats(X, x_eq, dt) print("\tData: ", b, b_err) ### 1. Normal form lamb1 = -1 + np.sqrt(1 + eps) lamb2 = -1 - np.sqrt(1 + eps) h = -lamb1 / lamb2 mu = -(1 + h)**2 * lamb1 / eps _, phi1 = jl.run_nf(lamb1, mu, sigma / (2 * np.sqrt(1 + eps)), dt, tmax) X_nf = (1 + h) * phi1[0, :] # Statistics p_nf = np.histogram(X_nf, edges, density=True)[0] b_nf, b_nf_err = dwell_stats(X_nf, x_eq, dt) print("\tNormal form: ", b_nf, b_nf_err) ### 2. PDF fit Xi = fit_pdf(X, edges, p_hist, dt, p0=[1, lamb1 / sigma**2, mu / sigma**2]) #print(Xi) # Monte Carlo evaluation _, X_pdf = jl.run_nf(Xi[0], Xi[1], Xi[2], dt, tmax) X_pdf = X_pdf[0, :] # Statistics p_pdf = np.histogram(X_pdf, edges, density=True)[0] b_pdf, b_pdf_err = dwell_stats(X_pdf, x_eq, dt) print("\tPDF fit: ", b_pdf, b_pdf_err) ### 3. Langevin regression Xi = langevin_regression(X, edges, p_hist, dt, stride=200, kl_reg=kl_reg) #print(Xi) # Monte Carlo evaluation _, X_lr = jl.run_nf(Xi[0], Xi[1], Xi[2], dt, tmax) X_lr = X_lr[0, :] # Statistics p_lr = np.histogram(X_lr, edges, density=True)[0] b_lr, b_lr_err = dwell_stats(X_lr, x_eq, dt) print("\tLangevin regression: ", b_lr, b_lr_err) ### KL-divergence of all models against true data KL_nf = utils.kl_divergence(p_hist, p_nf, dx=dx, tol=1e-6) KL_pdf = utils.kl_divergence(p_hist, p_pdf, dx=dx, tol=1e-6) KL_lr = utils.kl_divergence(p_hist, p_lr, dx=dx, tol=1e-6) print("\tKL div: ", KL_nf, KL_pdf, KL_lr) return [b, b_nf, b_pdf, b_lr], [KL_nf, KL_pdf, KL_lr]
def reweight_test(embs, dp_file): source_type = 'd' target_type = 'l' target_embs = embs[target_type] pred_label = {} doc_score = {} ratio = 1 for doc in embs[source_type]: doc_emb = embs[source_type][doc] sim_map = classify_doc(doc_emb, target_embs) pred_label[doc] = hier.get_node(sim_map[0][0]).get_ascendant(1).path doc_score[doc] = sim_map[0][1] doc_score = sorted(doc_score.items(), key=operator.itemgetter(1), reverse=True) filtered_docs = doc_score[:int(len(doc_score)*ratio)] top_labels = [w.path for w in hier.get_nodes_at_level(1)] # print meta stats top_label_cnts = {} for label in top_labels: top_label_cnts[label] = 0 for doc_pair in filtered_docs: l = pred_label[doc_pair[0]] top_label_cnts[l] += 1 print top_label_cnts print 'top level labels: ' + str(top_labels) # return label_to_idx = {} for idx, label in enumerate(top_labels): label_to_idx[label] = idx uniform_vec = [1.0/len(top_labels)] * len(top_labels) print uniform_vec label_to_doc = {} # new_filter = [] # new_pred_ls = {} # for (doc, score) in filtered_docs: # if pred_label[doc] not in top_labels: # continue # new_filter.append((doc, score)) # new_pred_ls[doc] = pred_label[doc] # filtered_docs = new_filter # pred_label = new_pred_ls pd_map = load_dp(dp_file, reverse=True) for label in top_labels: label_to_doc[label] = set() print 'used docs in reweighting: ' + str(len(filtered_docs)) for (doc, score) in filtered_docs: label_to_doc[pred_label[doc]].add(doc) distinct_map = {} cnt = 0 for phrase in embs['p']: p_vec = [0.0] * len(top_labels) if len(pd_map[phrase]) < 100: continue for doc in pd_map[phrase]: if doc not in pred_label: continue idx = label_to_idx[pred_label[doc]] p_vec[idx] += 1.0 if sum(p_vec) == 0: continue p_vec = utils.l1_normalize(p_vec) kl = utils.kl_divergence(p_vec, uniform_vec) distinct_map[phrase] = kl distinct_map = sorted(distinct_map.items(), key=operator.itemgetter(1), reverse=False) print distinct_map[:100] print print distinct_map[:-100]
def objective(vae_model, classification_model, device, x, mask, label, beta, nu, K=10, kappa=1.0, components=False): """Computes E_{p(x)}[ELBO_{\alpha,\beta}] """ types = vae_model.disease_types qz_x, px_z, zs = vae_model(x, K) # compute supervised loss bce = torch.nn.BCELoss() pred = classification_model(zs.squeeze(0)) supervised_loss = bce(pred, label) # compute vae loss lpx_z = px_z.log_prob(x).view(*px_z.batch_shape[:2], -1).sum(-1) pz = vae_model.pz(*vae_model.pz_params) kld = kl_divergence(qz_x, pz, samples=zs).sum(-1) # compute kl(p(z), q(z)) B, D = qz_x.loc.shape _zs = pz.rsample(torch.Size([B])) lpz = pz.log_prob(_zs).sum(-1).squeeze(-1) _zs_expand = _zs.expand(B, B, D) lqz = qz_x.log_prob(_zs_expand).sum(-1) #B*B qz = [] _max = torch.max(lqz) for i in range(types): ds = lqz[:, (mask == i)] - _max qz_j = torch.exp(ds) #B*k qz.append((qz_j * beta[i])) qz = torch.cat(qz, dim=1).to(device) lqz = torch.sum(qz, dim=1) lqz = _max + torch.log(lqz) - math.log(qz.size(1)) inc_kld = lpz - lqz inc_kld = inc_kld.mean(0, keepdim=True).expand(1, B) inc_kld = inc_kld.mean(0).sum() / B # compute kl(q(z), N(z)) _zs = qz_x.rsample(torch.Size([B])) #B*B*D lqz = qz_x.log_prob(_zs) #B*B*D kld2 = [] for i in range(types): tmp_zs = _zs[:, mask == i, :] #B*k*D lnj = dist.Normal(vae_model.pz_params[0][i], vae_model.pz_params[1][i]).log_prob(tmp_zs) _kl = (tmp_zs - lnj).mean(0) #k*D kld2.append((_kl * nu[i]).mean(0)) kld2 = torch.stack(kld2).to(device) kld2 = torch.sum(kld2) obj = supervised_loss - lpx_z + kld + kappa * inc_kld + kld2 return obj.sum() if not components else (obj.sum(), supervised_loss.sum(), lpx_z.sum(), kld.sum(), inc_kld.sum(), kld2.sum())
def expan(embs, l_prel_file, dp_file, lp_file, mode='EMB'): # the part to verify iterative expansion # Mode = EMB: meaning that the similarity is learned from embedding # Mode = DIS: meaning that the similarity is from L-P assignment target_type = 'p' source_type = 'l' multiplier = 5 thre_softmax = 0.5 ori_embs = embs agg_embs = copy.copy(embs) pd_map = load_dp(dp_file, reverse=True) dp_map = load_edge_map(dp_file) lp_map = load_edge_map(lp_file) dist_map = {x:1 for x in embs[target_type]} vec_size = 0 for d in ori_embs[target_type]: vec_size = len(ori_embs[target_type][d]) break seeds_map = {} # label : seed set all_seeds = set() with open(l_prel_file, 'r') as f: for line in f: segs = line.strip('\r\n').split('\t') if segs[1] == '*': continue seeds_map[segs[1]] = set() seeds_map[segs[1]].add(segs[2].lower()) all_seeds.add(segs[2].lower()) print '*********** Direct Embedding' evaluate(ori_embs, true_file, target_dim) agg_embs[source_type] = weighted_avg_embedding(lp_map, agg_embs[target_type], dist_map, vec_size) agg_embs['d'] = weighted_avg_embedding(dp_map, agg_embs[target_type], dist_map, vec_size) print '*********** Aggregate without expansion' evaluate(agg_embs, true_file, target_dim) for i in range(2): print '======== iter ' + str(i) + ' of expansion.' extended_seeds = expan_round(agg_embs, seeds_map, all_seeds, 3, 1, mode=mode, pd_map=pd_map) print '============= seeds expanded' for seed in extended_seeds: label, phrase = seed.split('@') if label not in lp_map or phrase in lp_map[label]: print 'ERRRROR!!! ' + seed all_seeds.add(phrase.lower()) seeds_map[label].add(phrase.lower()) lp_map[label][phrase] = 1 agg_embs[source_type] = weighted_avg_embedding(lp_map, agg_embs[target_type], dist_map, vec_size) print '*********** Aggregate with expansion at iter ' + str(i) evaluate(agg_embs, true_file, target_dim) normal = False source_type = 'd' target_type = 'l' mid_type = 'p' for i in range(2): if i > 0: normal = True print '============= iter ' + str(i) + ' of dist started.' pred_label, doc_score = doc_assignment(agg_embs, 'd', 'l') top_labels = [w.path for w in hier.get_nodes_at_level(1)] print '============= docs assigned to labels' # # print meta stats # top_label_cnts = {} # for label in top_labels: # top_label_cnts[label] = 0 # for doc_pair in filtered_docs: # l = pred_label[doc_pair[0]] # top_label_cnts[l] += 1 # print top_label_cnts # print 'top level labels: ' + str(top_labels) label_to_idx = {} for idx, label in enumerate(top_labels): label_to_idx[label] = idx uniform_vec = [1.0/len(top_labels)] * len(top_labels) # print uniform_vec label_to_doc = {} for label in top_labels: label_to_doc[label] = set() docs_used = {} if normal: print 'used docs in reweighting: ' + str(len(pred_label)) for doc, score in doc_score.iteritems(): label_to_doc[pred_label[doc]].add(doc) else: for label in top_labels: p = label.lower() # idx = label_to_idx[label] for doc in pd_map[p]: label_to_doc[label].add(doc) if doc not in docs_used: docs_used[doc] = set() docs_used[doc].add(label) print 'docs used: %d' % len(docs_used) cnt_vec = [0.0] * len(top_labels) for label in label_to_doc: cnt_vec[label_to_idx[label]] = len(label_to_doc[label]) comp_vec = utils.l1_normalize(cnt_vec) print cnt_vec # print comp_vec distinct_map = {} if normal: for phrase in embs[mid_type]: p_vec = [0.0] * len(top_labels) # if len(pd_map[phrase]) < 100: # continue for doc in pd_map[phrase]: idx = label_to_idx[pred_label[doc]] p_vec[idx] += 1.0 if sum(p_vec) == 0: print 'ERROR!!!!!!!!!!' continue p_vec = utils.l1_normalize(p_vec) # kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec) kl = utils.kl_divergence(p_vec, uniform_vec) # kl = utils.kl_divergence(p_vec, comp_vec) distinct_map[phrase] = kl else: for phrase in embs[mid_type]: p_vec = [0.0] * len(top_labels) # if len(pd_map[phrase]) < 100: # continue for doc in pd_map[phrase]: if doc in docs_used: for label in docs_used[doc]: idx = label_to_idx[label] p_vec[idx] += 1.0 # print p_vec if sum(p_vec) == 0: distinct_map[phrase] = 0 # print 'ERROR!!!!!!!!!!' continue # p_vec = [x / cnt_vec[i] for i, x in enumerate(p_vec)] p_vec = utils.l1_normalize(p_vec) # kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec) # kl = utils.kl_divergence(p_vec, uniform_vec) kl = utils.kl_divergence(p_vec, comp_vec) distinct_map[phrase] = kl dist_map = distinct_map with open('focal_comp.txt', 'w+') as g: for (ph, score) in sorted(dist_map.items(), key=operator.itemgetter(1), reverse=True): g.write('%s,%f\t' % (ph, score)) print '============= phrase distinctness computed.' agg_embs[source_type] = weighted_avg_embedding(dp_map, agg_embs[mid_type], dist_map, vec_size) print '============= doc embedding aggregated.' print '*********** Aggregate with distinct at iter ' + str(i) evaluate(agg_embs, true_file, target_dim) return
def objective(vae_model, c_model, device, x, mask, types, label, beta, nu, K=10, kappa=1.0, components=False): """Computes E_{p(x)}[ELBO_{\alpha,\beta}] """ types = vae_model.disease_types qz_x, px_z, zs = vae_model(x, K) # compute supervised loss pred = c_model(zs.squeeze(0), device) bce = torch.nn.BCELoss(reduction='none') onehot_label = torch.zeros((x.size(0), types), dtype=torch.float32) for i in range(len(x)): for j in range(len(mask[i])): onehot_label[i][mask[i][j]] = 1 supervised_loss = bce(pred, onehot_label) # compute vae loss lpx_z = px_z.log_prob(x).view(*px_z.batch_shape[:2], -1).sum(-1) pz = vae_model.pz(*vae_model.pz_params) kld = kl_divergence(qz_x, pz, samples=zs).sum(-1) # compute kl(p(z), q(z)) B, D = qz_x.loc.shape _zs = pz.rsample(torch.Size([B])) lpz = pz.log_prob(_zs).sum(-1).squeeze(-1) _zs_expand = _zs.expand(B, B, D) lqz = qz_x.log_prob(_zs_expand).sum(-1) #B*B # qz = [] # _max = torch.max(lqz) # for i in range(types): # tmp_mask = torch.sum(mask == i,dim=-1).bool() # ds = lqz[:, tmp_mask] - _max # qz_j = torch.exp(ds) #B*k # qz.append((qz_j * beta[i])) # qz = torch.cat(qz, dim=1).to(device) # lqz = torch.sum(qz, dim=1) # lqz = _max + torch.log(lqz) - math.log(qz.size(1)) lqz = log_mean_exp(lqz, dim=1) inc_kld = lpz - lqz inc_kld = inc_kld.mean(0, keepdim=True).expand(1, B) inc_kld = inc_kld.mean(0).sum() / B # compute kl(q(z), N(z)) _zs = qz_x.rsample(torch.Size([B])) #B*B*D lqz = qz_x.log_prob(_zs) #B*B*D kld2 = [] for i in range(types): tmp_mask = torch.sum(mask == i, dim=-1).bool() if torch.sum(tmp_mask) == 0: continue tmp_zs = _zs[:, tmp_mask, :] #B*k*D lnj = dist.Normal(vae_model.pz_params[0][i], vae_model.pz_params[1][i]).log_prob(tmp_zs) _kl = (tmp_zs - lnj).mean(0) #k*D kld2.append((_kl * nu[i]).mean(0)) kld2 = torch.stack(kld2).to(device) kld2 = torch.sum(kld2) obj = supervised_loss.sum( dim=-1) - lpx_z + kld.mean() + kappa * inc_kld + kld2 return obj.mean() if not components else ( obj.mean(), supervised_loss.mean(dim=0), lpx_z.mean(), kld.mean(), inc_kld.mean(), kld2.mean(), pred.cpu().detach().numpy(), onehot_label.cpu().detach().numpy())