def test_different_signature_dims_flow(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0, 3.0]) distance_matrix = np.array([[0.0, 0.5, 0.0], [0.5, 0.0, 0.0], [0.5, 0.0, 0.0]]) with pytest.raises(ValueError): emd_with_flow(first_signature, second_signature, distance_matrix)
def test_emd_with_flow_validate_square_distance_matrix(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0]) distance_matrix = np.array([[0.0, 0.5, 3.0], [0.5, 0.0]]) with pytest.raises(ValueError): emd_with_flow(first_signature, second_signature, distance_matrix)
def test_emd_with_flow_1(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (3.5, [[0.0, 0.0], [0.0, 1.0]]))
def test_case_2_flow(): first_signature = np.array([1.0, 1.0]) second_signature = np.array([1.0, 1.0]) distance_matrix = np.array([[0.0, 1.0], [1.0, 0.0]]) assert (emd_with_flow(first_signature, second_signature, distance_matrix) == (0.0, [[1.0, 0.0], [0.0, 1.0]]))
def word_mover_score(refs, hyps, idf_dict_ref, idf_dict_hyp, stop_words=[], n_gram=1, remove_subwords=True, batch_size=256): preds = [] for batch_start in range(0, len(refs), batch_size): batch_refs = refs[batch_start:batch_start + batch_size] batch_hyps = hyps[batch_start:batch_start + batch_size] ref_embedding, ref_lens, ref_masks, ref_idf, ref_tokens = get_bert_embedding( batch_refs, model, tokenizer, idf_dict_ref) hyp_embedding, hyp_lens, hyp_masks, hyp_idf, hyp_tokens = get_bert_embedding( batch_hyps, model, tokenizer, idf_dict_hyp) ref_embedding = ref_embedding[-1] hyp_embedding = hyp_embedding[-1] batch_size = len(ref_tokens) for i in range(batch_size): ref_ids = [ k for k, w in enumerate(ref_tokens[i]) if w in stop_words or '##' in w or w in set(string.punctuation) ] hyp_ids = [ k for k, w in enumerate(hyp_tokens[i]) if w in stop_words or '##' in w or w in set(string.punctuation) ] ref_embedding[i, ref_ids, :] = 0 hyp_embedding[i, hyp_ids, :] = 0 ref_idf[i, ref_ids] = 0 hyp_idf[i, hyp_ids] = 0 raw = torch.cat([ref_embedding, hyp_embedding], 1) raw.div_(torch.norm(raw, dim=-1).unsqueeze(-1) + 1e-30) distance_matrix = batched_cdist_l2(raw, raw).double().cpu().numpy() for i in range(batch_size): c1 = np.zeros(raw.shape[1], dtype=np.float) c2 = np.zeros(raw.shape[1], dtype=np.float) c1[:len(ref_idf[i])] = ref_idf[i] c2[len(ref_idf[i]):] = hyp_idf[i] c1 = _safe_divide(c1, np.sum(c1)) c2 = _safe_divide(c2, np.sum(c2)) dst = distance_matrix[i] _, flow = emd_with_flow(c1, c2, dst) flow = np.array(flow, dtype=np.float32) score = 1 - np.sum(flow * dst) preds.append(score) return preds
def test_emd_with_flow_5(): first_signature = np.array([3.0, 5.0]) second_signature = np.array([6.0, 2.0]) distance_matrix = np.array([[0.0, 0.0], [0.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (0.0, [[3.0, 0.0], [3.0, 2.0]]))
def emd_rep_loss(student_reps, teacher_reps, student_layer_weight, teacher_layer_weight, stu_layer_num, tea_layer_num, loss_func, device): student_layer_weight = np.concatenate( (student_layer_weight, np.zeros(tea_layer_num))) teacher_layer_weight = np.concatenate( (np.zeros(stu_layer_num), teacher_layer_weight)) total_num = stu_layer_num + tea_layer_num distance_matrix = torch.zeros([total_num, total_num]).to(device) for i in range(stu_layer_num): student_rep = student_reps[i] for j in range(tea_layer_num): teacher_rep = teacher_reps[j] tmp_loss = loss_func(student_rep, teacher_rep) distance_matrix[i][j + stu_layer_num] = distance_matrix[ j + stu_layer_num][i] = tmp_loss _, trans_matrix = emd_with_flow( student_layer_weight, teacher_layer_weight, distance_matrix.detach().cpu().numpy().astype('float64')) rep_loss = torch.sum( torch.tensor(trans_matrix).to(device) * distance_matrix) # tmp = distance_matrix.detach().cpu().numpy() return rep_loss, trans_matrix, distance_matrix
def attn_loss(self, student_atts, teacher_atts, return_distance=False): if len(teacher_atts[0].shape) == 4 and len(student_atts[0].shape) == 3: teacher_atts = [torch.mean(x, dim=1) for x in teacher_atts] elif len(student_atts[0].shape) == 4 and len( teacher_atts[0].shape) == 4: if student_atts[0].shape[1] != teacher_atts[0].shape[1]: teacher_atts = [torch.mean(x, dim=1) for x in teacher_atts] student_atts = [torch.mean(x, dim=1) for x in student_atts] elif len(student_atts[0].shape) == 4 and len( teacher_atts[0].shape) == 3: student_atts = [torch.mean(x, dim=1) for x in student_atts] _s_weight = np.concatenate( (self.s_weight, np.zeros_like(self.t_weight))) _t_weight = np.concatenate( (np.zeros_like(self.s_weight), self.t_weight)) totol_num = self.s_layer_num + self.t_layer_num distance_matrix = torch.zeros([totol_num, totol_num]).to(self.device) for i in range(self.s_layer_num): student_att = student_atts[i] for j in range(self.t_layer_num): teacher_att = teacher_atts[j] tmp_loss = self.d_method(student_att, teacher_att) distance_matrix[i][j + self.s_layer_num] = distance_matrix[ j + self.s_layer_num][i] = tmp_loss _, trans_matrix = emd_with_flow( _s_weight, _t_weight, distance_matrix.detach().cpu().numpy().astype('float64')) d = torch.sum( torch.tensor(trans_matrix).to(self.device) * distance_matrix) if return_distance: return d, trans_matrix, distance_matrix else: return d
def get_opinion_distance(model, noun_freq_polar1_model, noun_freq_polar1_terms, noun_freq_polar2_model, noun_freq_polar2_terms, filename1, filename2): dictionary = Dictionary( documents=[noun_freq_polar1_terms, noun_freq_polar2_terms]) # Compute the euclidean distance between word vectors semantic_distance_matrix = compute_semantic_distance_matrix( model, noun_freq_polar1_terms, noun_freq_polar2_terms, dictionary, filename1, filename2) if semantic_distance_matrix is None: print("Semantic distance is none") return np.nan, np.nan # Get normalized frequency and it's polarity normalized_freq1, pol1 = get_norm_freq_polarity(noun_freq_polar1_model, dictionary) normalized_freq2, pol2 = get_norm_freq_polarity(noun_freq_polar2_model, dictionary) # Change output to d1_terms, d2_terms, d_matrix[len(d1_terms),len(d2_terms)] emd_distance, matching_matrix = emd_with_flow(normalized_freq1, normalized_freq2, semantic_distance_matrix) polarity_distance = compute_polarity_distance(normalized_freq1, normalized_freq2, dictionary, matching_matrix, semantic_distance_matrix, pol1, pol2) return polarity_distance, emd_distance
def get_one_grad(self, id1, id2): vn = len( self.all_words ) n = len(self.tr_X) vec1 = np.zeros(vn) for k,w in enumerate( self.tr_words[id1][0] ): word = w#w[0] vec1[ self.word_id_map[word] ] = self.tr_BOW_X[id1][0][k] vec2 = np.zeros(vn) for k,w in enumerate( self.tr_words[id2][0] ): word = w#w[0] vec2[ self.word_id_map[word] ] = self.tr_BOW_X[id2][0][k] part_grad_A = np.zeros_like( self.A ) T = emd_with_flow( np.array(vec1), np.array(vec2), self.cw_dist_mat ) for id1,t1 in enumerate(T[1]): for id2,t2 in enumerate(t1): if t2 > 0.: x = np.array(self.all_X[ id1 ]) - np.array(self.all_X[ id2 ]) const = t2 / (2 * np.sqrt( np.sum( self.A.dot(x)**2 ) )) if np.sum( x**2 ) != 0 else 0 for i in range(self.vector_dim): Ax = self.A[i,:].dot(x) for j in range(self.vector_dim): part_grad_A[i,j] += const * 2 * Ax * x[j] return part_grad_A
def word_mover_score(mapping, projection, bias, model, tokenizer, src, hyps, \ n_gram=2, layer=8, dropout_rate=0.3, batch_size=256, device='cuda:0'): idf_dict_src = defaultdict(lambda: 1.) idf_dict_hyp = defaultdict(lambda: 1.) preds = [] for batch_start in range(0, len(src), batch_size): batch_src = src[batch_start:batch_start + batch_size] batch_hyps = hyps[batch_start:batch_start + batch_size] src_embedding, src_lens, src_masks, src_idf, src_tokens = get_bert_embedding( batch_src, model, tokenizer, idf_dict_src, device=device) hyp_embedding, hyp_lens, hyp_masks, hyp_idf, hyp_tokens = get_bert_embedding( batch_hyps, model, tokenizer, idf_dict_hyp, device=device) src_embedding = src_embedding[layer] hyp_embedding = hyp_embedding[layer] src_embedding = cross_lingual_mapping(mapping, src_embedding, projection, bias[0]) batch_size = src_embedding.shape[0] for i in range(batch_size): src_embedding_i = get_ngram_embs(src_embedding[i, :src_lens[i], :], ngram=n_gram) hyp_embedding_i = get_ngram_embs(hyp_embedding[i, :hyp_lens[i], :], ngram=n_gram) src_idf_i = [1] * (src_lens[i] - n_gram + 1) hyp_idf_i = [1] * (hyp_lens[i] - n_gram + 1) W = torch.cat([src_embedding_i, hyp_embedding_i], 0) W.div_(torch.norm(W, dim=-1).unsqueeze(-1)) c1 = list(src_idf_i) + [0] * len(hyp_idf_i) c2 = [0] * len(src_idf_i) + list(hyp_idf_i) c1 = c1 / np.sum(c1) + 1e-9 c2 = c2 / np.sum(c2) + 1e-9 dist = torch.cdist(W, W, p=2).double().cpu().numpy() flow = np.stack(emd_with_flow(c1, c2, dist)[1]) flow = torch.from_numpy(flow[:len(src_idf_i), len(src_idf_i):]) dist = torch.from_numpy(dist[:len(src_idf_i), len(src_idf_i):]) # remove noisy elements in a flow flow_flatten = flow.reshape(-1) idx = torch.nonzero(flow_flatten) threshold = flow_flatten[idx].topk(k=max( int(len(idx) * dropout_rate), 1), dim=0, largest=False)[0][-1] flow[flow < threshold] = 0 score = (flow * dist).sum() preds.append(1 - score) return preds
def test_emd_with_flow_6(): first_signature = np.array([1.0, 2.0, 1.0, 2.0]) second_signature = np.array([2.0, 1.0, 2.0, 1.0]) distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0], [1.0, 0.0, 2.0, 1.0], [1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (2.0, [[1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 1.0, 1.0]]))
def test_emd_with_flow_1(): first_signature = np.array([0.0, 1.0]) second_signature = np.array([5.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (3.5, [[0.0, 0.0], [0.0, 1.0]]) )
def test_emd_with_flow_5(): first_signature = np.array([3.0, 5.0]) second_signature = np.array([6.0, 2.0]) distance_matrix = np.array([[0.0, 0.0], [0.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (0.0, [[3.0, 0.0], [3.0, 2.0]]) )
def get_wmd(s1, s2, dists, w2i, get_flow=False): """ Get WMD for two input sentences """ s1, s2 = s1.split(), s2.split() h1, h2, words = get_wmd_histograms(s1, s2, w2i) D = dists[np.ix_(words, words)] if get_flow: return pyemd.emd_with_flow(h1, h2, D) return pyemd.emd(h1, h2, D)
def test_case_6_flow(): first_signature = np.array([1.0, 2.0, 1.0, 2.0]) second_signature = np.array([2.0, 1.0, 2.0, 1.0]) distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0], [1.0, 0.0, 2.0, 1.0], [1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0]]) emd_value, flow = emd_with_flow(first_signature, second_signature, distance_matrix) emd_value = round(emd_value, EMD_PRECISION) assert emd_value == 2.0 flow = np.round(flow, FLOW_PRECISION) assert np.array_equal(flow, [[1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 1.0, 1.0]])
def emd_rep_loss(student_reps, teacher_reps, student_layer_weight, teacher_layer_weight, stu_layer_num, tea_layer_num, loss_mse, args): student_layer_weight = np.concatenate( (student_layer_weight, np.zeros(tea_layer_num))) teacher_layer_weight = np.concatenate( (np.zeros(stu_layer_num), teacher_layer_weight)) total_num = stu_layer_num + tea_layer_num distance_matrix = torch.zeros([total_num, total_num]).to(args.device) for i in range(stu_layer_num): student_rep = student_reps[i] for j in range(tea_layer_num): teacher_rep = teacher_reps[j] if args.emd_type == "v6": tmp_loss = 1 - torch.sum(student_rep * teacher_rep, dim=2) tmp_loss = tmp_loss.mean() elif args.emd_type == "v8": # weight = torch.norm(teacher_rep, dim=2, keepdim=True) # weight = weight / torch.sum(weight, dim=1, keepdim=True) tmp_loss = torch.sum(v8_weight * (student_rep - teacher_rep)**2, dim=1).mean() else: tmp_loss = loss_mse(student_rep, teacher_rep) distance_matrix[i][j + stu_layer_num] = distance_matrix[ j + stu_layer_num][i] = tmp_loss if args.emd_type == "v10": tmp = distance_matrix.detach().cpu().numpy() student_layer_weight = tmp.mean(axis=1)[:stu_layer_num] teacher_layer_weight = tmp.mean(axis=0)[stu_layer_num:] student_layer_weight = sum( student_layer_weight) / student_layer_weight teacher_layer_weight = sum( teacher_layer_weight) / teacher_layer_weight student_layer_weight = utils.softmax(student_layer_weight / 20) teacher_layer_weight = utils.softmax(teacher_layer_weight / 20) student_layer_weight = np.concatenate( (student_layer_weight, np.zeros(tea_layer_num))) teacher_layer_weight = np.concatenate( (np.zeros(stu_layer_num), teacher_layer_weight)) _, trans_matrix = emd_with_flow( student_layer_weight, teacher_layer_weight, distance_matrix.detach().cpu().numpy().astype('float64')) rep_loss = torch.sum( torch.tensor(trans_matrix).to(args.device) * distance_matrix) tmp = distance_matrix.detach().cpu().numpy() return rep_loss, trans_matrix, distance_matrix
def test_emd_with_flow_extra_mass_penalty(): first_signature = np.array([0.0, 2.0, 1.0, 2.0]) second_signature = np.array([2.0, 1.0, 2.0, 1.0]) distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0], [1.0, 0.0, 2.0, 1.0], [1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix, extra_mass_penalty=2.5), (4.5, [[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 1.0, 1.0]]))
def emd_attn_loss(student_attns, teacher_attns, student_layer_weight, teacher_layer_weight, stu_layer_num, tea_layer_num, loss_mse, args): student_layer_weight = np.concatenate( (student_layer_weight, np.zeros(tea_layer_num))) teacher_layer_weight = np.concatenate( (np.zeros(stu_layer_num), teacher_layer_weight)) total_num = stu_layer_num + tea_layer_num distance_matrix = torch.zeros([total_num, total_num]).to(args.device) if args.emd_type == "v3": t_dis_matrix = torch.zeros([total_num, total_num]).to(args.device) for i in range(stu_layer_num): student_attn = student_attns[i] for j in range(tea_layer_num): teacher_attn = teacher_attns[j] if args.emd_type == "v6": tmp_loss = 1 - torch.sum(teacher_attn * student_attn, dim=2) tmp_loss = tmp_loss.mean() elif args.emd_type == "v8": # weight = torch.norm(teacher_attn, dim=2, keepdim=True) # weight = weight / torch.sum(weight, dim=1, keepdim=True) tmp_loss = torch.sum(v8_weight * (student_attn - teacher_attn)**2, dim=1).mean() else: tmp_loss = loss_mse(student_attn, teacher_attn) if args.emd_type == "v3": t_dis_matrix[i][j + stu_layer_num] = t_dis_matrix[ j + stu_layer_num][i] = tmp_loss tmp_loss *= ( 1 + abs(i / stu_layer_num - j / tea_layer_num) / 5) distance_matrix[i][j + stu_layer_num] = distance_matrix[ j + stu_layer_num][i] = tmp_loss _, trans_matrix = emd_with_flow( student_layer_weight, teacher_layer_weight, distance_matrix.detach().cpu().numpy().astype('float64')) if args.emd_type == "v3": attn_loss = torch.sum( torch.tensor(trans_matrix).to(args.device) * t_dis_matrix) else: attn_loss = torch.sum( torch.tensor(trans_matrix).to(args.device) * distance_matrix) tmp = distance_matrix.detach().cpu().numpy() return attn_loss, trans_matrix, distance_matrix
def test_emd_with_flow_6(): first_signature = np.array([1.0, 2.0, 1.0, 2.0]) second_signature = np.array([2.0, 1.0, 2.0, 1.0]) distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0], [1.0, 0.0, 2.0, 1.0], [1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix), (2.0, [[1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 1.0, 1.0]]) )
def test_extra_mass_penalty_flow(): first_signature = np.array([0.0, 2.0, 1.0, 2.0]) second_signature = np.array([2.0, 1.0, 2.0, 1.0]) distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0], [1.0, 0.0, 2.0, 1.0], [1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0]]) emd_value, flow = emd_with_flow(first_signature, second_signature, distance_matrix, extra_mass_penalty=2.5) emd_value = round(emd_value, EMD_PRECISION) assert emd_value == 4.5 flow = np.round(flow, FLOW_PRECISION) print(flow) assert np.array_equal(flow, [[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 1.0, 1.0]])
def test_emd_with_flow_extra_mass_penalty(): first_signature = np.array([0.0, 2.0, 1.0, 2.0]) second_signature = np.array([2.0, 1.0, 2.0, 1.0]) distance_matrix = np.array([[0.0, 1.0, 1.0, 2.0], [1.0, 0.0, 2.0, 1.0], [1.0, 2.0, 0.0, 1.0], [2.0, 1.0, 1.0, 0.0]]) emd_flow_assert( emd_with_flow(first_signature, second_signature, distance_matrix, extra_mass_penalty=2.5), (4.5, [[0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 1.0, 1.0]]) )
def loss(self, student_reps, teacher_reps, return_distance=False): _s_weight = np.concatenate( (self.s_weight, np.zeros_like(self.t_weight))) _t_weight = np.concatenate( (np.zeros_like(self.s_weight), self.t_weight)) totol_num = self.s_layer_num + self.t_layer_num distance_matrix = torch.zeros([totol_num, totol_num]).to(self.device) for i in range(self.s_layer_num): student_rep = student_reps[i] for j in range(self.t_layer_num): teacher_rep = teacher_reps[j + 1] tmp_loss = self.d_method(student_rep, teacher_rep) distance_matrix[i][j + self.s_layer_num] = distance_matrix[ j + self.s_layer_num][i] = tmp_loss _, trans_matrix = emd_with_flow( _s_weight, _t_weight, distance_matrix.detach().cpu().numpy().astype('float64')) d = torch.sum( torch.tensor(trans_matrix).to(self.device) * distance_matrix) if return_distance: return d, trans_matrix, distance_matrix else: return d
#from sys import argv import csv import numpy as np import pandas as pd #import os #from math import radians, cos, sin, asin, sqrt, exp #import copy #import random import pyemd from pyemd import emd_with_flow # Load species seasonal abundance distributions (estimated from eBird data) abundance_BR = pd.read_table('results/output/seasonalAbundance_BR.csv', sep=",") abundance_NB = pd.read_table('results/output/seasonalAbundance_NB.csv', sep=",") abundance_BR = abundance_BR.apply(lambda x: x/sum(x)) abundance_NB = abundance_NB.apply(lambda x: x/sum(x)) # Load matrix of pairwise distance between every hexagons on the grid distanceMatrix = np.loadtxt('results/output/distanceMatrix.csv', delimiter=";") # Compute optimal redistribution using the Earth Mover's Distance algorithm for s in abundance_BR.columns: EMD_results = emd_with_flow(np.array(abundance_BR[s]), np.array(abundance_NB[s]), distanceMatrix) EMD_results = np.array(EMD_results[1])[(np.array(abundance_BR[s]) > 0),:][:,(np.array(abundance_NB[s]) > 0)] np.savetxt("results/output/ORSIM_results_" + s + ".csv", EMD_results, delimiter=',') # Save simulated migratory connectivity
from pyemd import emd_with_flow import numpy as np first_histogram = np.array([0, 1.0]) second_histogram = np.array([5.0, 3.0]) distance_matrix = np.array([[0, 2.0], [2.5, 0]]) w_dis, F = emd_with_flow(first_histogram, second_histogram, distance_matrix) print(w_dis, F) sumF = np.sum(F) norm_dis = w_dis / sumF print(norm_dis)
def emd_distill_loss(self, layer_wise_hidden, teacher_layer_wise_hidden): # 1. Compute EMD loss # 2. Update weight student_weight = self.student_weights.copy() # [|S|] teacher_weight = self.teacher_weights.copy() # [|T|] student_weight_hist = np.concatenate( (student_weight, np.zeros(self.teacher_layers))) # [|S|+|T|] teacher_weight_hist = np.concatenate( (np.zeros(self.student_layers), teacher_weight)) # [|S|+|T|] total = self.teacher_layers + self.student_layers distance_matrix = torch.zeros([total, total], device=self.dummy.device) # Compute distance matrix, shape=(|S|, |T|) for i in range(self.student_layers): student_hidden = layer_wise_hidden[i] # [B, L, C] for j in range(self.teacher_layers): teacher_hidden = teacher_layer_wise_hidden[j] # [B, L, C] # KL Div distance_matrix[i][ j + self.student_layers] = self.layer_distill_loss( student_hidden, teacher_hidden) distance_matrix[ j + self.student_layers][i] = self.layer_distill_loss( teacher_hidden, student_hidden) # MSE symmetric # distance = self.mse(student_hidden, teacher_hidden) # distance_matrix[i][j + self.student_layers] = distance # distance_matrix[j + self.student_layers][i] = distance d_np = distance_matrix.detach().cpu().numpy().astype( "float64") # [|S|+|T|, |S|+|T|] _, transfer_matrix = emd_with_flow(student_weight_hist, teacher_weight_hist, d_np) transfer_matrix = np.array(transfer_matrix, dtype=np.float) # [|S|+|T|, |S|+|T|] transfer_matrix_torch = torch.tensor(transfer_matrix, device=self.dummy.device) kd_loss = torch.sum(transfer_matrix_torch * distance_matrix) # Update weight def update_weight(weight, t_mat, num_layers, bias=0): # t_mat: [|S|+|T|, |S|+|T|] transfer_weight = np.sum(t_mat * d_np, -1) # [|S|+|T|] for idx in range(num_layers): weight[idx] = transfer_weight[idx + bias] / weight[idx] weight_sum = np.sum(weight) for idx in range(num_layers): if weight[idx] != 0: weight[idx] = weight_sum / weight[idx] weight = np_softmax(weight / self.emd_temperature) return weight self.student_weights = update_weight(student_weight, transfer_matrix, self.student_layers) self.teacher_weights = update_weight(teacher_weight, np.transpose(transfer_matrix), self.teacher_layers, bias=self.student_layers) return kd_loss
def plot_example(is_flow, reference, translation, device='cuda:0'): idf_dict_ref = defaultdict(lambda: 1.) idf_dict_hyp = defaultdict(lambda: 1.) ref_embedding, ref_lens, ref_masks, ref_idf, ref_tokens = get_bert_embedding( [reference], model, tokenizer, idf_dict_ref, device=device) hyp_embedding, hyp_lens, hyp_masks, hyp_idf, hyp_tokens = get_bert_embedding( [translation], model, tokenizer, idf_dict_hyp, device=device) ref_embedding = ref_embedding[-1] hyp_embedding = hyp_embedding[-1] raw = torch.cat([ref_embedding, hyp_embedding], 1) raw.div_(torch.norm(raw, dim=-1).unsqueeze(-1) + 1e-30) distance_matrix = batched_cdist_l2(raw, raw) masks = torch.cat([ref_masks, hyp_masks], 1) masks = torch.einsum('bi,bj->bij', (masks, masks)) distance_matrix = masks * distance_matrix i = 0 c1 = np.zeros(raw.shape[1], dtype=np.float) c2 = np.zeros(raw.shape[1], dtype=np.float) c1[:len(ref_idf[i])] = ref_idf[i] c2[len(ref_idf[i]):] = hyp_idf[i] c1 = _safe_divide(c1, np.sum(c1)) c2 = _safe_divide(c2, np.sum(c2)) dst = distance_matrix[i].double().cpu().numpy() if is_flow: _, flow = emd_with_flow(c1, c2, dst) new_flow = np.array(flow, dtype=np.float32) res = new_flow[:len(ref_tokens[i]), len(ref_idf[i]):(len(ref_idf[i]) + len(hyp_tokens[i]))] else: res = 1 - dst[:len(ref_tokens[i]), len(ref_idf[i]):(len(ref_idf[i]) + len(hyp_tokens[i]))] r_tokens = ref_tokens[i] h_tokens = hyp_tokens[i] fig, ax = plt.subplots(figsize=(len(r_tokens) * 0.8, len(h_tokens) * 0.8)) im = ax.imshow(res, cmap='Blues') ax.set_xticks(np.arange(len(h_tokens))) ax.set_yticks(np.arange(len(r_tokens))) ax.set_xticklabels(h_tokens, fontsize=10) ax.set_yticklabels(r_tokens, fontsize=10) plt.xlabel("System Translation", fontsize=14) plt.ylabel("Human Reference", fontsize=14) plt.title("Flow Matrix", fontsize=14) plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # for i in range(len(r_tokens)): # for j in range(len(h_tokens)): # text = ax.text(j, i, '{:.2f}'.format(res[i, j].item()), # ha="center", va="center", color="k" if res[i, j].item() < 0.6 else "w") fig.tight_layout() plt.show()
def wmdo(wvvecs, ref, cand, ref_lang='en', cand_lang='en', delta=0.18, alpha=0.1): ''' wvvecs: word vectors -- retrieved from load_wv method ref: reference translation cand: candidate translation missing: missing word dictionary -- initialise as {} dim: word vector dimension delta: weight of fragmentation penalty alpha: weight of missing word penalty ''' ref_list = get_input_words(ref) cand_list = get_input_words(cand) ref = ' '.join(ref_list) cand = ' '.join(cand_list) common_vectorizer = CountVectorizer().fit(ref_list + cand_list) ref_count_vector, cand_count_vector = common_vectorizer.transform( [ref, cand]) ref_count_vector = ref_count_vector.toarray().ravel() cand_count_vector = cand_count_vector.toarray().ravel() dim = wvvecs[ref_lang].vector_size wvoc, missing = create_vocabulary(common_vectorizer, wvvecs, dim, ref_list, cand_list, ref_lang, cand_lang) distance_matrix = cosine_distances(wvoc) vocab_words = common_vectorizer.get_feature_names() for cand_word_idx, count in enumerate(cand_count_vector): if count > 0: most_similar_ref_indexes = np.argsort( distance_matrix[cand_word_idx]) for ref_word_index in most_similar_ref_indexes[1:]: if ref_count_vector[ref_word_index] > 0: print('{}: {}'.format(vocab_words[cand_word_idx], vocab_words[ref_word_index])) break if np.sum(distance_matrix) == 0.0: return 0., {} #return float('inf') ref_count_vector = ref_count_vector.astype(np.double) cand_count_vector = cand_count_vector.astype(np.double) ref_count_vector /= ref_count_vector.sum() cand_count_vector /= cand_count_vector.sum() distance_matrix = distance_matrix.astype(np.double) (wmd, flow) = emd_with_flow(ref_count_vector, cand_count_vector, distance_matrix) return wmd, {} # adding penalty ratio = fragmentation(ref_list, cand_list, common_vectorizer, flow) if ratio > 1: ratio = 1 penalty = delta * ratio # missing words penalty missingwords = 0 for w in cand_list: if w not in wvvecs: missingwords += 1 missingratio = missingwords / len(cand_list) missing_penalty = alpha * missingratio penalty += missing_penalty wmd += penalty return wmd, missing
def compute_loss(self, results_S, results_T): losses_dict = dict() total_loss = 0 if 'logits' in results_T and 'logits' in results_S: logits_list_T = results_T['logits'] # list of tensor logits_list_S = results_S['logits'] # list of tensor total_kd_loss = 0 if 'logits_mask' in results_S: masks_list_S = results_S['logits_mask'] logits_list_S = select_logits_with_mask( logits_list_S, masks_list_S) #(mask_sum, num_of_class) if 'logits_mask' in results_T: masks_list_T = results_T['logits_mask'] logits_list_T = select_logits_with_mask( logits_list_T, masks_list_T) #(mask_sum, num_of_class) for l_T, l_S in zip(logits_list_T, logits_list_S): if self.d_config.temperature_scheduler is not None: temperature = self.d_config.temperature_scheduler( l_S, l_T, self.d_config.temperature) else: temperature = self.d_config.temperature total_kd_loss += self.kd_loss(l_S, l_T, temperature) total_loss += total_kd_loss * self.d_config.kd_loss_weight losses_dict['unweighted_kd_loss'] = total_kd_loss inters_T = { feature: results_T.get(feature, []) for feature in FEATURES } inters_S = { feature: results_S.get(feature, []) for feature in FEATURES } inputs_mask_T = results_T.get('inputs_mask', None) inputs_mask_S = results_S.get('inputs_mask', None) #hidden states and embedding feature = self.emd_feature emd_loss_weight = self.emd_loss_weight loss_type = self.emd_loss_type match_loss = MATCH_LOSS_MAP[loss_type] feature_maps_S = inters_S[feature][1:] # list of features feature_maps_T = inters_T[feature][1:] # list of features embeddings_S = inters_S[feature][0] embeddings_T = inters_T[feature][0] assert isinstance(feature_maps_S, (tuple, list)) assert isinstance(feature_maps_T, (tuple, list)) assert isinstance(feature_maps_S[0], torch.Tensor) assert isinstance(feature_maps_T[0], torch.Tensor) assert len(feature_maps_S) == self.layer_num_S - 1 assert len(feature_maps_T) == self.layer_num_T - 1 if len(self.projs) > 0: assert len(self.projs) == self.layer_num_S embeddings_S = self.projs[0](embeddings_S) feature_maps_S = [ proj(s) for proj, s in zip(self.projs[1:], feature_maps_S) ] feature_num_S = len(feature_maps_S) feature_num_T = len(feature_maps_T) feature_num_A = feature_num_S + feature_num_T distance_matrix = torch.zeros([feature_num_A, feature_num_A]).to(feature_maps_S[0]) for s in range(feature_num_S): f_S = feature_maps_S[s] for t in range(feature_num_T): f_T = feature_maps_T[t] distance_matrix[s][t + feature_num_S] = distance_matrix[ t + feature_num_S][s] = match_loss(f_S, f_T, mask=inputs_mask_S) feature_weight_S = np.concatenate( [self.feature_weight_S, np.zeros(feature_num_T)]) feature_weight_T = np.concatenate( [np.zeros(feature_num_S), self.feature_weight_T]) _, trans_matrix = emd_with_flow( feature_weight_S, feature_weight_T, distance_matrix.detach().cpu().numpy().astype('float64')) trans_matrix = torch.tensor(trans_matrix).to(distance_matrix) emd_loss = torch.sum(trans_matrix * distance_matrix) total_loss += emd_loss * emd_loss_weight losses_dict[f'unweighted_{feature}_{loss_type}_emd'] = emd_loss if (self.feature_weight_S <= 0).any() or (self.feature_weight_T <= 0).any(): import sys logger.info(f"{self.feature_weight_S}") logger.info(f"{self.feature_weight_T}") if np.isnan(self.feature_weight_S).any() or np.isnan( self.feature_weight_T).any(): import sys logger.info(f"{self.feature_weight_S}") logger.info(f"{self.feature_weight_T}") sys.exit() #feature_weight_S = np.copy(self.feature_weight_S) #feature_weight_T = np.copy(self.feature_weight_T) #self.feature_weight_S, self.feature_weight_T = get_new_feature_weight( # trans_matrix, distance_matrix.detach(), feature_weight_S, feature_weight_T, self.d_config.temperature) #embedding matching embedding_loss = match_loss(embeddings_S, embeddings_T, mask=inputs_mask_S) total_loss += embedding_loss * emd_loss_weight #sharing the same weight losses_dict[f'unweighted_embedding_{loss_type}'] = embedding_loss if 'losses' in results_S: total_hl_loss = 0 for loss in results_S['losses']: # in case of multi-GPU total_hl_loss += loss.mean() total_loss += total_hl_loss * self.d_config.hard_label_weight losses_dict['unweighted_hard_label_loss'] = total_hl_loss return total_loss, losses_dict
import numpy as np from pyemd import emd from pyemd import emd_with_flow from pyemd import emd_samples s1 = 8 s2 = 8 np.random.seed(10) a = np.random.rand(s1) b = np.random.rand(s2) d = np.random.rand(s1, s2) result1 = emd(a, b, d) result2 = emd_with_flow(a, b, d) result3 = emd_samples(a, b) print (result1) print ("\n", result2) print (result3)
os.chdir( '/Volumes/Marius_SSD/American-Flyway/Connectivity_NAbirds/Redistribution-model' ) # Load species seasonal abundance distributions (estimated from eBird data) abundance_BR = np.loadtxt('Data/STEMs/seasonalAbundance_wlswar_BR.csv', delimiter=';') abundance_NB = np.loadtxt('Data/STEMs/seasonalAbundance_wlswar_NB.csv', delimiter=';') abundance_BR = abundance_BR / sum(abundance_BR) abundance_NB = abundance_NB / sum(abundance_NB) # Load matrix of pairwise distance between every hexagons on the grid distanceMatrix = np.loadtxt('ideal-optimal-redistribution/distanceMatrix.csv', delimiter=';') # Compute optimal redistribution using the Earth Mover's Distance algorithm EMD_results = emd_with_flow(abundance_BR, abundance_NB, distanceMatrix) flow = 0 for i in range(0, len(distanceMatrix)): flow = flow + sum(EMD_results[1][i]) EMD_results2 = EMD_results[0] / flow print EMD_results2 # Save simulated migratory connectivity np.savetxt("ORSIM-outputs/ORSIMresults_wlswar.csv", EMD_results[1], delimiter=',')
def wmdsimilarity(doc1, doc2, lang1, lang2, vecs, with_flow=False): tok1 = list(processing.tokenize(lang1, doc1, include_stopwords=True)) tok2 = list(processing.tokenize(lang2, doc2, include_stopwords=True)) print(tok1, tok2) dictionary = Dictionary(documents=[tok1, tok2]) vocab_len = len(dictionary) if vocab_len == 1: # Both documents are composed by a single unique token return 0.0 # Sets for faster look-up. docset1 = set(tok1) docset2 = set(tok2) print(dictionary, docset1, docset2) # Compute distance matrix. distance_matrix = np.zeros((vocab_len, vocab_len), dtype=np.double) for i, t1 in dictionary.items(): for j, t2 in dictionary.items(): if t1 not in docset1 or t2 not in docset2: continue # Compute Euclidean distance between word vectors. distance_matrix[i, j] = np.sqrt( np.sum((vecs[lang1][t1] - vecs[lang2][t2])**2)) if np.sum(distance_matrix) == 0.0: # `emd` gets stuck if the distance matrix contains only zeros. print('The distance matrix is all zeros. Aborting (returning inf).') return float('inf') def nbow(document): d = np.zeros(vocab_len, dtype=np.double) nbow = dictionary.doc2bow(document) # Word frequencies. doc_len = len(document) for idx, freq in nbow: d[idx] = freq / float(doc_len) # Normalized word frequencies. return d # Compute nBOW representation of documents. d1 = nbow(tok1) d2 = nbow(tok2) # Compute WMD. if with_flow: emd = emd_with_flow(d1, d2, distance_matrix) return { 'tokens': list(dictionary.values()), 'pdf1': list(d1), 'pdf2': list(d2), 'wmd': emd[0], 'flow': emd[1], 'dist_matrix': distance_matrix.tolist() } else: return { 'tokens': list(dictionary.values), 'pdf1': list(d1), 'pdf2': list(d2), 'wmd': emd(d1, d2, distance_matrix), 'dist_matrix': distance_matrix.tolist() }
def wmdo(wvvecs, ref, cand, missing, dim, delta, alpha): ''' wvvecs: word vectors -- retrieved from load_wv method ref: reference translation cand: candidate translation missing: missing word dictionary -- initialise as {} dim: word vector dimension delta: weight of fragmentation penalty alpha: weight of missing word penalty ''' ref_list = [w.lower() for w in word_tokenize(ref)] cand_list = [w.lower() for w in word_tokenize(cand)] vc = CountVectorizer().fit(ref_list + cand_list) v_obj, v_cap = vc.transform([ref, cand]) v_obj = v_obj.toarray().ravel() v_cap = v_cap.toarray().ravel() # need to deal with missing words wvoc = [] for w in vc.get_feature_names(): if w in wvvecs: wvoc.append(wvvecs[w]) else: if w not in missing: missing[w] = np.zeros(dim) wvoc.append(missing[w]) distance_matrix = cosine_distances(wvoc) if np.sum(distance_matrix) == 0.0: return float('inf') v_obj = v_obj.astype(np.double) v_cap = v_cap.astype(np.double) v_obj /= v_obj.sum() v_cap /= v_cap.sum() distance_matrix = distance_matrix.astype(np.double) (wmd, flow) = emd_with_flow(v_obj, v_cap, distance_matrix) # adding penalty penalty = 0 ratio = fragmentation(ref_list, cand_list, vc, flow) if ratio > 1: ratio = 1 penalty = delta * ratio # missing words penalty missingwords = 0 for w in cand_list: if w not in wvvecs: missingwords += 1 missingratio = missingwords / len(cand_list) missing = alpha * missingratio penalty += missing wmd += penalty return wmd
def test_larger_signatures_1(): first_signature = np.array([0.0, 1.0, 2.0]) second_signature = np.array([5.0, 3.0]) distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) with pytest.raises(ValueError): emd_with_flow(first_signature, second_signature, distance_matrix)