def majority_vote(target): X_test = load_obj("%s/X_test"%target) y_test = load_obj("%s/y_test"%target) domains = [] if "mlp" in target: domains = ["mlp/books","mlp/dvd","mlp/electronics","mlp/kitchen"] else: if "large" not in target: domains = ["books","dvd","electronics","kitchen"] if target not in domains: return else: domains =["large/baby","large/cell_phone","large/imdb","large/yelp2014"] models = [] for source in domains: if target == source: continue else: print source clf_func = load_obj("%s/self_clf"%source) models.append(clf_func) eclf = EnsembleVoteClassifier(clfs=models,refit=False)#weights=[1,1,1], eclf.fit(X_test,y_test) # this line is not doing work tmp_name = target.upper()[0] if "large" not in target else "large/"+target.upper()[6] tmp_name = target.upper()[0] if "mlp" not in target else "mlp/"+target.upper()[4] save_obj(eclf, '%s_eclf'%(tmp_name)) pred = eclf.predict(X_test) acc = accuracy_score(y_test,pred) if "large" not in target else f1_score(y_test,pred,average='macro') print 'self-train',acc pass
def compute_psi(target,k=None): pos_star = load_obj('%s/pos_star'%target) neg_star = load_obj('%s/neg_star'%target) star_matrix = concatenate(pos_star,neg_star) # print star_matrix X_joint = load_obj('%s/X_joint'%target) y_joint = load_obj('%s/y_joint'%target) # print np.array(X_joint).shape src_cost = load_obj("%s/src_cost"%target) # X_train = get_sents(X_joint) # print np.matmul(star_matrix,X_train.T) # psi_matrix = np.dot(star_matrix,X_train.T).T #softmax(np.dot(star_matrix,X_train.T).T) # print k if k == None: psi_matrix = [] for X_split in X_joint: # print np.dot(star_matrix,np.array(X_split).T).T # print np.array(X_split).shape # temp = softmax(np.dot(star_matrix,np.array(X_split).T).T,axis=0) temp = softmax(normalize(np.dot(star_matrix,np.array(X_split).T).T),axis=0) psi_matrix.append(temp) # print temp save_obj(np.array(psi_matrix),"%s/psi_matrix"%(target)) # np.save("../data/%s/psi_matrix"%(target),np.array(psi_matrix)) else: psi_matrix = [] X_psi = [] y_psi = [] cost_psi = [] X_index = [] for (X_split,y_split,cost_split) in zip(X_joint,y_joint,src_cost): temp = normalize(np.dot(star_matrix,np.array(X_split).T).T) # temp = np.dot(star_matrix,np.array(X_split).T).T filtered,index = top_k(temp,k) # print softmax(filtered,axis=0),index psi_matrix.append(softmax(filtered,axis=0)) # print filtered,filtered.shape X_temp = np.array(X_split)[index] X_psi.append(X_temp) y_temp = np.array(y_split)[index] y_psi.append(y_temp) cost_temp = np.array(cost_split)[index] cost_psi.append(cost_temp) X_index.append(index) # print y_temp.shape # print top_k(psi_matrix,k) # print psi_matrix[0].sum(axis=0).shape,psi_matrix[0].sum(axis=0) psi_matrix = np.array(psi_matrix) X_psi = np.array(X_psi) y_psi = np.array(y_psi) cost_psi = np.array(cost_psi) save_obj(psi_matrix,"%s/%s/psi_matrix"%(target,k)) save_obj(X_psi,"%s/%s/X_psi"%(target,k)) save_obj(y_psi,"%s/%s/y_psi"%(target,k)) save_obj(cost_psi,"%s/%s/src_cost_psi"%(target,k)) save_obj(X_index,"%s/%s/X_index"%(target,k)) # print sum([y for domain in y_psi for y in domain if y==1]) return np.array(psi_matrix)
def find_best_self(domain): X_train = load_obj("%s/X_train"%domain) y_train = load_obj("%s/y_train"%domain) X_test = load_obj("%s/X_test"%domain) y_test = load_obj("%s/y_test"%domain) X_un = load_obj("%s/X_un"%domain) thetas = [0.5,0.6,0.7,0.8,0.9] best_acc = 0.0 best_clf ="" best_theta = 0.0 resFile = open("../work/params/%s_in_theta.csv"%domain,"w") resFile.write("theta, acc\n") for theta in thetas: print "##############################" print "start with theta=%s"%theta print "##############################" acc,clf_func = self_train(domain,X_train,y_train,X_test,y_test,X_un,theta=theta) if best_acc<acc: best_acc = acc best_clf = clf_func best_theta = theta resFile.write("%f, %f\n"%(theta,acc)) resFile.flush() resFile.close() print "##############################" print "best_theta:",best_theta,"best_acc:",best_acc save_obj(best_clf,"%s/self_clf"%domain) pass
def majority_vote_mlp(target): X_test = load_obj("%s/X_test"%target) y_test = load_obj("%s/y_test"%target) # domains = ["mlp/books","mlp/dvd","mlp/electronics","mlp/kitchen"] data_name = ["books", "dvd", "electronics", "kitchen"] X_joint = load_obj("%s/X_joint"%target) y_joint = load_obj("%s/y_joint"%target) temp_un = load_obj("%s/X_un"%target) meta_sources = [] for i in range(len(data_name)): if 'mlp/'+data_name[i] != target: meta_sources.append(data_name[i]) # print meta_sources models = [] for j in range(len(meta_sources)): temp_X = X_joint[j] temp_y = y_joint[j] thetas = [0.5,0.6,0.7,0.8,0.9] best_acc = 0.0 best_clf ="" best_theta = 0.0 resFile = open("../work/params/%s_theta_self-%s.csv"%(target,meta_sources[j].upper()[0]),"w") resFile.write("theta, acc\n") for theta in thetas: print "##############################" print "start with theta=%s"%theta print "##############################" acc,clf_func = self_train(target,temp_X,temp_y,X_test,y_test,temp_un,theta=theta) if best_acc<acc: best_acc = acc best_clf = clf_func best_theta = theta resFile.write("%f, %f\n"%(theta,acc)) resFile.flush() resFile.close() print "##############################" print "best_theta:",best_theta,"best_acc:",best_acc models.append(best_clf) eclf = EnsembleVoteClassifier(clfs=models,refit=False)#weights=[1,1,1], eclf.fit(X_test,y_test) # this line is not doing work # tmp_name = target.upper()[0] if "large" not in target else "large/"+target.upper()[6] # tmp_name = 'mlp/'+target.upper()[4] save_obj(eclf, "%s/self_clf"%target) pred = eclf.predict(X_test) # print pred acc = accuracy_score(y_test,pred) print 'self-train',acc pass
def src_cost(target): X_joint = load_obj("%s/X_joint"%target) src_train = get_sents(X_joint) tgt_un = load_obj("%s/X_un"%target) c_t = compute_centriod(tgt_un) # print src_train sim = [cos_sim(x,c_t) for x in src_train] # s = sum(sim) # sim = [x/s for x in sim] # print normalized_sim sim = list(split_list(sim,3)) save_obj(sim,"%s/src_cost"%target) pass
def test(target, pos_star, neg_star, clf='lr'): X_train = concatenate(pos_star, neg_star) y_train = concatenate(np.ones(len(pos_star)), np.zeros(len(neg_star))) X_test = load_obj("%s/X_test" % target) y_test = load_obj("%s/y_test" % target) # print X_train.shape,y_train.shape if len(X_train) == 0: return 0.0 clf_func = get_clf_func(clf) clf_func.fit(X_train, y_train) pred = clf_func.predict(X_test) acc = accuracy_score(y_test, pred) if "large" not in target else f1_score( y_test, pred, average='macro') # print acc*100 return acc * 100
def test(target): X_test = [] y_test = [] if "large" not in target: X_test, y_test = prepare_evaluate(target) eclf = load_obj('%s_eclf'%(target.upper()[0])) else: X_test = load_obj("%s/X_test"%target) y_test = load_obj("%s/y_test"%target) tmp_name = "large/"+target.upper()[6] eclf = load_obj('%s_eclf'%tmp_name) pred = eclf.predict(X_test) acc = accuracy_score(y_test,pred) if "large" not in target else f1_score(y_test,pred,average='macro') print acc pass
def unlabel_sim(target): tgt_un = load_obj("%s/X_un"%target) # print target,tgt_un.shape c_t = compute_centriod(tgt_un) computed_tgt_sim = [cos_sim(x,c_t) for x in tgt_un] save_obj(computed_tgt_sim,"%s/tgt_sim"%target) pass
def predict_tops(target,k=2000,theta=0.5): # source = "d1" tgt_un =np.array(load_obj("%s/X_un"%target)) print "loaded data %s." %target eclf = "" # if "large" not in target: # eclf = load_obj('%s_eclf'%(target.upper()[0])) # else: # tmp_name = "large/"+target.upper()[6] # eclf = load_obj('%s_eclf'%tmp_name) eclf = load_obj("%s/joint_clf"%target) # eclf = load_obj("%s/self_clf"%target) # eclf = load_obj("%s/tri_clf"%target) print "loaded trained classifier" tgt_sim = load_obj("%s/tgt_sim"%target) print "loaded target similarity" labels_proba = eclf.predict_proba(tgt_un) best_acc = 0.0 best_sorting = "" best_pos_star = "" best_neg_star = "" best_pos_start = 0.0 best_pos_end = 0.0 best_neg_start = 0.0 best_neg_end = 0.0 pos_star,neg_star,pos_start,pos_end,neg_start,neg_end,acc,method,pos_proba,neg_proba\ = find_best_method(target,tgt_un,labels_proba,tgt_sim,k,theta,'asc') if best_acc< acc: best_pos_star,best_neg_star,best_pos_start,best_pos_end,best_neg_start,best_neg_end,\ best_acc,best_method,best_pos_proba,best_neg_proba = pos_star,neg_star,pos_start,pos_end,neg_start,neg_end,\ acc,method,pos_proba,neg_proba best_sorting = "asc" pos_star,neg_star,pos_start,pos_end,neg_start,neg_end,acc,method,pos_proba,neg_proba\ = find_best_method(target,tgt_un,labels_proba,tgt_sim,k,theta,'dsc') if best_acc< acc: best_pos_star,best_neg_star,best_pos_start,best_pos_end,best_neg_start,best_neg_end,\ best_acc,best_method,best_pos_proba,best_neg_proba = pos_star,neg_star,pos_start,pos_end,neg_start,neg_end,\ acc,method,pos_proba,neg_proba best_sorting = "dsc" print "got pseudo labels" print "pos:",len(best_pos_star),best_pos_start,best_pos_end print "neg:",len(best_neg_star),best_neg_start,best_neg_end # save_obj(pos_star,"%s/pos_star"%target) # save_obj(neg_star,"%s/neg_star"%target) return best_pos_star,best_neg_star,best_acc,best_method,best_sorting,best_pos_proba,best_neg_proba
def test_embedding(target,k): domains = ["books","dvd","electronics","kitchen"] index_set = load_obj("%s/%s/X_index"%(target,k)) # print index_set temp = index_to_source_sentence(index_set,target,domains) f = open("../work/example_%s.txt"%target, "w") for sentence,source,label in temp: f.write('%s,%d\n'%(source,label)) f.write("%s\n\n"%sentence) f.close() pass
def test_confidence(target,pos_star,neg_star,option=0,clf='lr',theta=0.5,k=2000): X_train = [] y_train = [] if option == 0: # T_L* # pos_star = load_obj('%s/pos_star'%(target))[:k] # neg_star = load_obj('%s/neg_star'%(target))[:k] # pos_star,neg_star = predict_tops(target,k=k,theta=0.5,sorting="dsc") X_train = concatenate(pos_star,neg_star) y_train = concatenate(np.ones(len(pos_star)),np.zeros(len(neg_star))) elif option == 1: # S_L X_joint = load_obj("%s/X_joint"%target) y_joint = load_obj("%s/y_joint"%target) X_train = get_sents(X_joint) y_train = get_sents(y_joint) else: X_joint = load_obj("%s/X_joint"%target) y_joint = load_obj("%s/y_joint"%target) X_train1 = get_sents(X_joint) y_train1 = get_sents(y_joint) # pos_star = load_obj('%s/pos_star'%(target))[:k] # neg_star = load_obj('%s/neg_star'%(target))[:k] X_train2 = concatenate(pos_star,neg_star) y_train2 = concatenate(np.ones(len(pos_star)),np.zeros(len(neg_star))) X_train = concatenate(X_train1,X_train2) y_train = concatenate(y_train1,y_train2) X_test = load_obj("%s/X_test"%target) y_test = load_obj("%s/y_test"%target) clf_func = get_clf_func(clf) clf_func.fit(X_train,y_train) pred = clf_func.predict(X_test) acc = accuracy_score(y_test,pred) if "large" not in target else f1_score(y_test,pred,average='macro') print acc*100 return acc*100
def joint_train(target): X_test = load_obj("%s/X_test"%target) y_test = load_obj("%s/y_test"%target) domains = [] temp_X = [] temp_y = [] temp_un = [] if "mlp" in target: domains = ["mlp/books","mlp/dvd","mlp/electronics","mlp/kitchen"] temp_X = get_sents(load_obj("%s/X_joint"%target)) temp_y = get_sents(load_obj("%s/y_joint"%target)) for source in domains: if target == source: continue else: X_un = load_obj("%s/X_un"%source) temp_un = concatenate(temp_un,X_un) else: if "large" not in target: domains = ["books","dvd","electronics","kitchen"] if target not in domains: return else: domains =["large/baby","large/cell_phone","large/imdb","large/yelp2014"] for source in domains: if target == source: continue else: print source X_train = load_obj("%s/X_train"%source) y_train = load_obj("%s/y_train"%source) X_un = load_obj("%s/X_un"%source) temp_X = concatenate(temp_X,X_train) temp_y = concatenate(temp_y,y_train) temp_un = concatenate(temp_un,X_un) thetas = [0.5,0.6,0.7,0.8,0.9] best_acc = 0.0 best_clf ="" best_theta = 0.0 resFile = open("../work/params/%s_theta_joint.csv"%domain,"w") resFile.write("theta, acc\n") for theta in thetas: print "##############################" print "start with theta=%s"%theta print "##############################" acc,clf_func = self_train(target,temp_X,temp_y,X_test,y_test,temp_un,theta=theta) if best_acc<acc: best_acc = acc best_clf = clf_func best_theta = theta resFile.write("%f, %f\n"%(theta,acc)) resFile.flush() resFile.close() print "##############################" print "best_theta:",best_theta,"best_acc:",best_acc save_obj(best_clf,"%s/joint_clf"%target) return acc,clf_func
def exam_tops(target, k=100, theta=0.5, sort_prob="dsc", sim="target"): tgt_un = load_obj("%s/X_un" % target) print "loaded data %s." % target tmp_name = target.upper( )[0] if "large" not in target else "large/" + target.upper()[6] eclf = load_obj('%s_eclf' % tmp_name) print "loaded trained classifier" tgt_sim = "" if sim == "target": tgt_sim = load_obj("%s/tgt_sim" % target) print "loaded target similarity" else: tgt_sim = load_obj("%s/src_sim" % target) print "loaded source similarity" labels_proba = eclf.predict_proba(tgt_un) resFile = "" if sim == "target": if "large" not in target: resFile = open( "../work/params/%s/exam_stars_%s_n_%s.csv" % (sort_prob, target, k), 'w') else: resFile = open( "../work/params/%s/exam_stars_%s_n_%s.csv" % (sort_prob, target.replace("large/", ""), k), 'w') else: if "large" not in target: resFile = open( "../work/params/src/%s/exam_stars_%s_n_%s.csv" % (sort_prob, target, k), 'w') else: resFile = open( "../work/params/src/%s/exam_stars_%s_n_%s.csv" % (sort_prob, target.replace("large/", ""), k), 'w') resFile.write("method,num_pos,num_neg,acc\n") pos_star, neg_star, pos_len, neg_len = just_prob(tgt_un, labels_proba, tgt_sim, k, theta, sort_prob) acc = test(target, pos_star, neg_star) resFile.write("just_prob,%d,%d,%f\n" % (pos_len, neg_len, acc)) resFile.flush() pos_star, neg_star, pos_len, neg_len = just_sim(tgt_un, labels_proba, tgt_sim, k, theta) acc = test(target, pos_star, neg_star) resFile.write("just_sim,%d,%d,%f\n" % (pos_len, neg_len, acc)) resFile.flush() pos_star, neg_star, pos_len, neg_len = prob_sim(tgt_un, labels_proba, tgt_sim, k, theta, sort_prob) acc = test(target, pos_star, neg_star) resFile.write("prob_sim,%d,%d,%f\n" % (pos_len, neg_len, acc)) resFile.flush() pos_star, neg_star, pos_len, neg_len = sim_prob(tgt_un, labels_proba, tgt_sim, k, theta, sort_prob) acc = test(target, pos_star, neg_star) resFile.write("sim_prob,%d,%d,%f\n" % (pos_len, neg_len, acc)) resFile.flush() pos_star, neg_star, pos_len, neg_len = add_prob_sim( tgt_un, labels_proba, tgt_sim, k, theta, sort_prob) acc = test(target, pos_star, neg_star) resFile.write("prob+sim,%d,%d,%f\n" % (pos_len, neg_len, acc)) resFile.flush() pos_star, neg_star, pos_len, neg_len = multi_prob_sim( tgt_un, labels_proba, tgt_sim, k, theta, sort_prob) acc = test(target, pos_star, neg_star) resFile.write("prob*sim,%d,%d,%f\n" % (pos_len, neg_len, acc)) resFile.flush() resFile.close() pass
def train(target, EPOCH=5000, k=0, src_train=0, rescale=0): # EMBEDDING_DIM = 300 # EMBEDDING_DIM = 1024 # EMBEDDING_DIM = 500 SOURCE_SIZE = 3 best_test_acc = 0.0 # print k psi_matrix = [] X_joint = [] y_joint = [] if k == 0: psi_matrix = load_obj("%s/psi_matrix" % target) X_joint = load_obj("%s/X_joint" % target) y_joint = load_obj("%s/y_joint" % target) else: psi_matrix = load_obj("%s/%s/psi_matrix" % (target, k)) X_joint = load_obj("%s/%s/X_psi" % (target, k)) y_joint = load_obj("%s/%s/y_psi" % (target, k)) psi_matrix = get_all(psi_matrix).T y_train_np = get_all(y_joint) y_train = label_to_tensor(y_train_np) # print np.array(psi_matrix).shape,np.array(X_joint).shape X_test_np = load_obj("%s/X_test" % target) y_test_np = load_obj("%s/y_test" % target) psi_test = compute_psi_for_test(X_joint, X_test_np) X_test = to_tensor(X_test_np) y_test = to_tensor(y_test_np).view(len(y_test_np), -1) psi_test = sent_to_tensor(psi_test) EMBEDDING_DIM = X_test.size(1) pos_star = load_obj('%s/pos_star' % target) neg_star = load_obj('%s/neg_star' % target) X_star = concatenate(pos_star, neg_star) y_star = concatenate(np.ones(len(pos_star)), np.zeros(len(neg_star))) pos_proba = load_obj('%s/pos_proba' % target) neg_proba = load_obj('%s/neg_proba' % target) if len(pos_proba) == 1: # remove duplicate brackets pos_proba = np.array(pos_proba[0])[0] neg_proba = np.array(neg_proba[0])[0] proba = concatenate(pos_proba, neg_proba) # print proba.shape print "#train:", len(y_star) if src_train > 0: print "source training enabled" src_data = get_all(X_joint) src_data = [src_train * np.array(x) for x in src_data] if src_train != 1 else src_data src_labels = get_all(y_joint) src_cost = load_obj("%s/src_cost" % target) if k == 0 else load_obj( "%s/%s/src_cost_psi" % (target, k)) src_cost = get_all(src_cost) s = sum(src_cost) src_cost = [x / s for x in src_cost] # print np.array(src_cost).shape psi_src = compute_psi_for_test(X_joint, src_data) tgt_train = 1.0 - src_train X_star = [tgt_train * np.array(x) for x in X_star] X_star = concatenate(X_star, src_data) y_star = concatenate(y_star, src_labels) proba = concatenate(proba, src_cost) # print psi_matrix.shape,psi_src.shape psi_matrix = concatenate(psi_matrix, psi_src) print "UPDATED #train:", len(y_star) model = DomainAttention(embedding_dim=EMBEDDING_DIM, source_size=SOURCE_SIZE, y=y_train) # LR = 2e-4 LR = 1e-3 # LR =1.0 # optimizer = optim.Adam(model.parameters(),lr=LR,weight_decay=1e-5) optimizer = optim.Adam(model.parameters(), lr=LR) # optimizer = optim.Adadelta(model.parameters()) # optimizer = optim.SGD(model.parameters(),lr=0.01) # optimizer = optim.Adam([{'params': model.phi_srcs.parameters(),'lr':LR}, # {'params': model.bias,'lr':LR}]) loss_function = nn.BCELoss(reduction='none') # Binary Cross Entropy Loss # loss_function = nn.BCELoss() # loss_function = nn.CrossEntropyLoss() no_up = 0 for i in range(EPOCH): print 'epoch: %d start!' % i X, y, psi, cost = shuffle(X_star, y_star, psi_matrix, proba, random_state=0) # X = to_tensor(X) y = to_tensor(y).view(len(y), -1) psi = to_tensor(psi) cost = to_tensor(cost) # print X.shape,y.shape,psi.shape train_epoch(model, X, y, psi, cost, loss_function, optimizer, i, rescale=rescale) test_acc = evaluate_epoch(model, X_test, y_test, psi_test, loss_function) # evaluate_epoch(model,X_joint,y_joint,X_test_np,y_test_np) if test_acc > best_test_acc: best_test_acc = test_acc if k == 0: os.system('rm ../work/%s/*.model' % target) print 'New Best Test!!!' filename = '../work/%s/best_model' % target if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: if exc.errno != errno.EEXIST: raise torch.save( model.state_dict(), '../work/%s/%s_' % (target, i) + str(int(test_acc * 10000)) + '.model') else: os.system('rm ../work/%s/%s/*.model' % (target, k)) print 'New Best Test!!!' filename = '../work/%s/%s/best_model' % (target, k) if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: if exc.errno != errno.EEXIST: raise torch.save( model.state_dict(), '../work/%s/%s/%s_' % (target, k, i) + str(int(test_acc * 10000)) + '.model') no_up = 0 else: no_up += 1 if no_up >= 20 and i > 100: exit() print "######################################################" pass
def compute_sim(train_data, target): # src_un = load_obj("%s/X_un"%source) tgt_un = load_obj("%s/X_un" % target) c_t = compute_centriod(tgt_un) sim = [cos_sim(x, c_t) for x in train_data] return sim
def compute_single_evidence(target, test_index, tops=20): domains = ["books", "dvd", "electronics", "kitchen"] # sentence (text), domain, label, psi,theta, psi_theta evidence = [] # [0] : test instance, [1:] : contributed source instances x = load_obj("%s/X_test" % target)[test_index] test_sent = prepare_test(target, label=True, sent=True)[test_index] filename = "../work/%s/*/*_*.model" % (target) model_paths = glob.glob(filename) model_path = "" # best_path best_acc = 0.0 best_k = 0.0 for path in model_paths: k = int(os.path.basename(os.path.dirname(path))) acc = float( os.path.basename(path).replace('.model', '').split('_') [1]) / 100.0 if path is not None else 0.0 if best_acc < acc: best_acc = acc best_k = k model_path = path print model_path model = torch.load(model_path, map_location=device) k = best_k if "mlp" not in target else 0 print k example = open( "../work/examples/test_%s_%s_k_%s.txt" % (target, test_index, k), 'w') print "###test instance###" print target org_label = test_sent[1] org_sent = ' '.join(test_sent[0]) print org_sent, org_label evidence.append([org_sent, target, org_label]) if k == None or k == 1600: X_joint = load_obj("%s/X_joint" % target) y_joint = load_obj("%s/y_joint" % target) else: X_joint = load_obj("%s/%s/X_psi" % (target, k)) y_joint = load_obj("%s/%s/y_psi" % (target, k)) X_index = load_obj("%s/%s/X_index" % (target, k)) # compute psi_matrix for test_data psi_matrix = [] for X_split in X_joint: temp = softmax(np.dot(x, X_split.T).T) psi_matrix.append(temp) psi_matrix = label_to_tensor(get_all(psi_matrix)).to(device) psi_splits = torch.chunk(psi_matrix, len(domains) - 1, dim=0) # psi_matrix = psi_matrix.view(-1,len(psi_matrix)).to(device) # psi_splits = torch.chunk(psi_matrix,len(domains),dim=1) x = sent_to_tensor(x) y = label_to_tensor(get_all(y_joint)) y_splits = torch.chunk(y, len(domains) - 1, dim=0) theta_splits = [] sum_src = 0.0 for i in range(len(domains) - 1): phi_src = model['phi_srcs.%s' % i].to(device) temp = torch.exp(torch.mm(x, phi_src)) theta_splits.append(temp) sum_src += temp # print theta_splits sum_matrix = 0 sum_matrix2 = 0 psi_values = 0 theta_values = 0 count = 0 for psi_split, theta_split, y_split in zip(psi_splits, theta_splits, y_splits): # phi_src = model['phi_srcs.%s'%i].to(device) theta_split = theta_split / sum_src temp = psi_split * theta_split # print psi_split.size(),theta_split.size() temp2 = y_split * psi_split * theta_split # print temp.size(),temp2.size() # print torch.sum(y_split*psi_split),torch.sum(temp2) theta_temp = theta_split.expand_as(psi_split) if count == 0: sum_matrix = temp psi_values = psi_split theta_values = theta_temp # print theta_values.size() count += 1 else: sum_matrix = torch.cat((sum_matrix, temp), dim=0) psi_values = torch.cat((psi_values, psi_split), dim=0) theta_values = torch.cat((theta_values, theta_temp), dim=0) sum_matrix2 += torch.sum(temp2) # print sum_matrix2 sum_matrix2 = sum_matrix2 + model['bias'].to(device) sigmoid = torch.nn.Sigmoid() y_hat = sigmoid(sum_matrix2) # print y_hat predicted_label = np.round(y_hat.data.item()) print predicted_label, org_label == predicted_label example.write("###test instance###\n%s %s %s %s\n%s\n\n" % (target, org_label, predicted_label, org_label == predicted_label, org_sent)) example.write("########################\n") # print sum_matrix.size(),sum_matrix.sum() tops = tops if k * 3 > tops else k * 3 temp, index = torch.topk(sum_matrix, tops, dim=0) # print temp.size() psi_list = psi_values.data[index] theta_list = theta_values.data[index] # print psi_list.size(),theta_list.size() for value, sent_index, psi, theta in zip(temp.data, index.data, psi_list, theta_list): source, label, sent = 0, 0, 0 if k == 0 or k == 1600: source, label, sent = index_to_source_sentence( sent_index, target, domains) else: source, label, sent = index_to_source_sentence( sent_index, target, domains, X_index) if label == org_label: example.write('+') else: example.write('-') evidence.append([ sent, source, label, psi.data.item(), theta.data.item(), value.data.item() ]) example.write("%s %s %s\n" % (source, label, value.data.item())) example.write("%s\n\n" % sent) example.flush() example.close() return evidence