def clipMatrix(matrix, minPerRow=1, minPerCol=1): """ zeros out elements of a matrix such that only the highest values in each row column are left guarantees at least minPerRow (or Col) - but in the case of ties, it may include more (it uses rank.rank in order to handle ties correctly) also, a row may include many elements if this is required to keep the columns happy (and vice versa) :param matrix: :param minPerRow: :param minPerCol: :return: """ m = N.zeros(matrix.shape) if minPerRow: for j, r in enumerate(matrix): # each row a = rank.rank(r, ties="max") lastIdx = len(r) - minPerRow for i in range(len(r)): if a[i] >= lastIdx: m[j, i] = 1 if minPerCol: for j, c in enumerate(matrix.transpose()): # each column a = rank.rank(c, ties="max") lastIdx = len(c) - minPerCol for i in range(len(c)): if a[i] >= lastIdx: m[i, j] = 1 m = m * matrix return m
def DFS_bound2(start, goal, depth, cycle_detection=True, verbose=False): bases, _, patterns, columns = rank.get_information(start) #parameters for unrank frontier = [] start_int = rank.rank(start) #convert start matrix to its rank integer frontier.append([start_int]) while frontier: path = frontier.pop() last_vertex = path[-1] # convert to matrix for goal and neighbors matrix_last = unrank.unrank(last_vertex, bases, patterns, columns) if is_goal(matrix_last, goal): if verbose: #if asked by user, print path print(path) print("length:", len(path)) return path if len(path) == depth: continue for next_vertex in neighbors(matrix_last): int_next = rank.rank(next_vertex) #convert neighbor matrix to integer if cycle_detection: if int_next in path: continue new_path = path + [int_next] frontier.append(new_path) return None
def DFS_prune2(start, goal, verbose=False): bases, _, patterns, columns = rank.get_information(start) #parameters for unrank frontier = [] start_int = rank.rank(start) #convert start matrix to its rank integer frontier.append([start_int]) #figure out size of visited array (by multiplying bases) and initialize it prod = 1 for i in bases: prod *= i visited = [False for u in range(prod)] visited[start_int] = True while frontier: path = frontier.pop() last_vertex = path[-1] # convert to matrix for goal and neighbors matrix_last = unrank.unrank(last_vertex, bases, patterns, columns) if is_goal(matrix_last, goal): if verbose: #if asked by user, print path print(path) print("length:", len(path)) return path for next_vertex in neighbors(matrix_last): int_next = rank.rank(next_vertex) #convert neighbor matrix to integer if visited[int_next]: continue new_path = path + [int_next] visited[int_next] = True frontier.append(new_path) return None
def BFS3(start, goal, cycle_detect=False, verbose=False, ranking=False): frontier = [] if ranking: bases, _, patterns, columns = rank.get_information(start) start_int = rank.rank(start) frontier.append([start_int]) #figure out size of visited array (by multiplying bases) and initialize it prod = 1 for i in bases: prod *= i visited = [False for u in range(prod)] visited[start_int] = True else: frontier.append([start]) while frontier: path = frontier.pop(0) #select and remove first path from frontier last_vertex = path[-1] if ranking: # convert from integer to matrix to check if goal and/or find neighbors last_vertex = unrank.unrank(last_vertex, bases, patterns, columns) ####!!!!!#### if is_goal(last_vertex, goal): #check if last vertex in path is goal if verbose: #if asked by user, print each vertex(matrix) in path if ranking: print(path) else: for matrices in path: for line in matrices: print(line) print() print(len(path)) #print length of solution for convenience return path #enter procedure here, if user asked to use ranking/unranking if ranking: for next_vertex in neighbors(last_vertex): ###!!!### int_next = rank.rank(next_vertex) #convert neighbor matrix to integer if cycle_detect: if visited[int_next]: continue new_path = path + [int_next] visited[int_next] = True frontier.append(new_path) #enter procedure here, if user did not want to use ranking/unranking else: for next_vertex in neighbors(last_vertex): if cycle_detect: #include cycle detection if asked for if next_vertex in path: continue new_path = path + [next_vertex] frontier.append(new_path) return None
def graph_draw(graph): rank.rank(graph) print "done rank" tmp_graph = ordering.ordering(graph) print "done ordering" position.position(tmp_graph) print "done position" return tmp_graph
def crossvalidate(input, fold, outputdir, distance, ntrees, reuse, trainingdir, randomforestdir, predictiondir): cases = [] with open(input) as inputfile: reader = csv.reader(inputfile, delimiter=',', quotechar='"') for row in reader: cases.append((row[0], row[1:])) iteration = 1 kfold = KFold(len(cases), n_folds=fold) for train_indices, test_indices in kfold: generateTrainingInput = {} for train_index in train_indices: generateTrainingInput[cases[train_index][0]] = \ cases[train_index][1] trainingout = None if trainingdir: trainingout = os.path.join(trainingdir, 'cv-fold%d-training.csv' % iteration) if reuse and trainingout and os.path.isfile(trainingout): print("Reusing training set CSV file %s..." % trainingout) trainingset = pd.read_csv(trainingout, quoting=csv.QUOTE_NONNUMERIC) else: trainingset = generateTrainingSet(generateTrainingInput, distance, output=trainingout) rfoutput = None if randomforestdir: rfoutput = os.path.join(randomforestdir, 'cv-fold%d-rf.joblib' % iteration) if reuse and rfoutput and os.path.isfile(rfoutput): print("Reusing RandomForest file %s..." % rfoutput) rf = joblib.load(rfoutput) else: rf = train(trainingset, ntrees, rfoutput) for test_index in test_indices: predictionList = predict(rf, cases[test_index][1], saveOutput=predictiondir, outputdir=predictiondir, templateFile=cases[test_index][0]) for j, model in enumerate(predictionList): model.label = os.path.basename(cases[test_index][1][j]) rank(predictionList, os.path.join(outputdir, '%s-ranking.csv' % os.path.splitext(os.path.basename(cases[test_index][0]))[0])) iteration += 1
def dominanceMain(population, functionObject): ###为函数对象赋值新的种群个体 functionObject.population = population #计算新种群目标函数数值,并建立矩阵 funScore funScore = np.vstack( (functionObject.objFun_1(), functionObject.objFun_2())) funScore = np.transpose(funScore) N = population.shape[0] nN = N / 2 #输入函数数值矩阵,求得个体 分层和拥挤距离 字典 r_dict = dominance(funScore) layerDict = rank(r_dict) s = 0 indicate = [] for i in xrange(1, len(layerDict) + 1): s += len(layerDict[i]) if s < nN: indicate.extend(layerDict[i]) continue elif s == nN: indicate.extend(layerDict[i]) break else: s -= len(layerDict[i]) temp = crowddist(funScore, layerDict[i]) indicate.extend(temp[:nN - s]) break #返回新种群 return population[indicate]
def create_nw(data, replace_nans): nw = np.corrcoef(data) np.fill_diagonal(nw, 1) nw = rank(nw) if replace_nans: nw[np.isnan(nw)] = bottleneck.nanmean(nw) return nw
def nw_aggregation(nw_paths, genes, file_key='nw'): """Function for aggregating co-expression networks Takes a list of paths to HDF5 files and reads in networks, avearges them and then re-ranks. Each HDF5 needs to be in the Pytable in the fixed format with the network stored under the key listed in the keyword argument file_key Arguments: nw_paths {list} -- list of strings or paths to HDF5 files genes {np.array} -- numpy array of genes for network Keyword Arguments: file_key {str} -- key in HDF5 network is stored under (default: {'nw'}) Returns: pd.DataFrame -- Aggregate Network """ agg_nw = np.zeros([genes.shape[0], genes.shape[0]]) for nw_path in nw_paths: nw = pd.read_hdf(nw_path,file_key) fill = bottleneck.nanmean(nw.values,axis=None) agg_nw +=nw.loc[genes,genes].fillna(fill).values del nw gc.collect() return pd.DataFrame(rank(agg_nw),index=genes, columns=genes)
def create_nw(data, replace_nans=True): array = data.values.T nw = np.corrcoef(array) np.fill_diagonal(array, 1) nw = rank(nw) if replace_nans: nw[np.isnan(nw)] = bottleneck.nanmean(nw) return nw
def LCFS2(start, goal, pruning=True, verbose=False): bases, _, patterns, columns = rank.get_information( start) #parameters for unrank start_int = rank.rank(start) #convert start matrix to its rank integer #frontier will look like [(cost, [path]), (cost, [path]), (cost, [path]),...] frontier = [] #this frontier will be used as a heap heapq.heappush( frontier, (0, [start_int])) #heap prioritized by first element of tuple # figure out size of visited array (by multiplying bases) and initialize it prod = 1 for i in bases: prod *= i visited = [False for u in range(prod)] visited[start_int] = True while frontier: #path_tuple = (cost, [path]) path_tuple = heapq.heappop( frontier) #select and remove first path tuple from frontier #last_vertex = path[-1] from path_tuple last_vertex = path_tuple[1][-1] # convert from integer to matrix to check if goal and/or find neighbors matrix_last = unrank.unrank(last_vertex, bases, patterns, columns) if is_goal(matrix_last, goal): #check if last vertex in path is goal if verbose: #if asked by user, print each vertex(matrix) in path print(path_tuple[1]) print("cost\t:", path_tuple[0]) #print cost of soln for convenience print("length\t:", len( path_tuple[1])) #print length of solution for convenience return path_tuple for next_tuple in neighbors(matrix_last, with_cost=True): int_next = rank.rank(next_tuple[1]) if pruning: #include cycle detection if asked for if visited[int_next]: continue new_path = (path_tuple[0] + next_tuple[0], path_tuple[1] + [int_next]) visited[int_next] = True heapq.heappush(frontier, new_path) return None
def create_bootstrap_nw(datasets_selected): agg_nw = np.zeros([genes.shape[0], genes.shape[0]]) for dataset in datasets_selected: logging.info(dataset) nw = pd.read_hdf( f'/home/bharris/biccn_paper/data/bulk_rna/networks/{dataset}_pearson_nw.hdf5', 'nw') agg_nw += nw.values del nw gc.collect() return pd.DataFrame(rank(agg_nw), index=genes, columns=genes)
def __init__(self, p_suit, p_rank): r = rank() valid_rank = r.validate(p_rank) s = suit() valid_suit = s.validate(p_suit) if (valid_rank and valid_suit): self.suit = p_suit self.rank = p_rank else: raise ValueError("Invalid Rank or Suit")
def __init__(self, new_deck=False, cards=None): if (cards is None): cards = [] self.cards = cards if (new_deck): ranks = rank() suits = suit() for s in suits.values: for r in ranks.values: c = card(s, r) self.cards.append(c)
def find_keyword(word_list,lentext=3): ''' ระบบค้นหาคำสำคัญ หลักการ ลบ stopword ออกแล้ว นับจำนวนคำที่ส่งค่าออกมา find_keyword(word_list,lentext=3) word_list คือ คำที่อยู่ใน list lentext คือ จำนวนคำที่มีอยู่ใน list สำหรับใช้กำหนดค่าหา keyword ค่าเริ่มต้นคือ 3 ''' filtered_words = [word for word in word_list if word not in set(stopwords.words('thai'))] word_list=rank(filtered_words) return {k:v for k, v in word_list.items() if v>=lentext}
def run_ranking(self): file = open(self.queries, "r") query = file.readline().rstrip() with open(self.op_file, "w+") as output: while query: rank_sys = rank.rank(self.indexer) list_top_k = rank_sys.get_top_k(self.k, query) self.record(rank_sys, query, list_top_k, output) print("Done with query: ", query) query = file.readline().rstrip() output.write("\n") output.write("\n") output.close() file.close()
def main(start, end, apiKey, mode): path, places, dist = route(start, end, apiKey, mode) finalDestinations = rank(calcDistance(places, path), dist) happyTrail = [] nameAddress = [] for poi in finalDestinations: if poi is not None: happyTrail.append(poi[0]) nameAddress.append((poi[1]['name'], poi[1]['formatted_address'])) return happyTrail, nameAddress
def estimate(population, functionObject): ###为函数对象赋值新的种群个体 functionObject.population = population #计算新种群目标函数数值,并建立矩阵 funScore funScore = np.vstack( (functionObject.objFun_1(), functionObject.objFun_2())) funScore = np.transpose(funScore) #输入函数数值矩阵,求得个体 分层和拥挤距离 字典 r_dict = dominance(funScore) layerDict = rank(r_dict) print funScore[layerDict[1]]
def __init__(self, parameter_suit, parameter_rank): r = rank() valid_rank = r.validate(parameter_rank) s = suit() valid_suit = s.validate(parameter_suit) if (valid_rank and valid_suit): self.suit = parameter_rank self.rank = parameter_rank else: raise ValueError("Ivalid Rank or Suit") self.suit = "invalid" self.rank = "invalid"
def rankInRows(matrix, reverse=True, frank=False): """ perform a rank transform on each ROW of the matrix so each row ranks its columns :param matrix: :param frank: floating point ranking tolerance for ties :return: an integer matrix """ rmat = N.zeros(matrix.shape, dtype='int32') for i in range(matrix.shape[0]): r = matrix[i] n = rank.rank(r, ties="max", frank=frank) rmat[i] = N.max(n) - n if reverse else n return rmat
def OnClickRun(self, evt): self.beginTime = int(time.mktime(datetime.datetime.now().timetuple())) runtime = 0 # 如果功能按钮: 只刷指数和获取排名同时开启的话, 提示错误 runType = self.runTypeBtn.GetValue() getRank = self.getRankBtn.GetValue() if runType and getRank: self.errInfo(u'功能选择中, "只刷指数"和"获取排名"只能选择其中之一. ') return func = (1 if runType else 2 if getRank else 0) # 如果未填写targetURLkw, 提示错误 self.urlkw = self.target_kw.GetValue().strip() if (not self.urlkw and not self.runType): self.errInfo(u"请填写目标页面标题关键字, 并以半角英文分隔!") return else: self.urlkw = self.urlkw.split(",") # 如果未选择keyworks文件, 提示错误 if not self.keyworks: self.errInfo(u"请选择关键词配置文件!") return self.proxyConfig = self.proxyText.GetValue().strip() # 如果选择了固定运行次数, 但是赋值为空, 提示错误 if self.runTime.GetValue(): runtime = self.runText.GetValue().strip() if (not runtime) or (not runtime.isdigit()) or (not int(runtime)): self.errInfo(u"运行次数配置有误!") return if func == 2: # 如果选择获取排名功能, 执行次数固定设置为1 runtime = 1 # 如果代理配置为空, 提示错误 if self.proxyConfig == "" or self.proxyConfig == u"点击右侧按钮选择文件...": self.errInfo(u"代理设置不能为空!") return self.multiText.SetValue("") self.buttonRun.SetLabel(u"运行中") self.buttonStop.SetLabel(u"停止") evt.GetEventObject().Disable() self.DisableOnRun() self.OnStart() searcher = self.EvtRadioBox_SPF(evt) drvierType = self.EvtRadioBox_PF(evt) isPhantomjs = self.getIsPhantomjs(evt) self.rankObj = rank(searcher, drvierType, isPhantomjs, self.proxyType, self.proxyConfig, self.keyworks, self.urlkw, func, int(runtime))
def dominanceMain(population, func_object): # 为函数对象赋值新的种群个体 func_object.population = population # 计算新种群目标函数数值,并建立矩阵 funScore func_score = vstack((func_object.objFun_1(), func_object.objFun_2())) set_printoptions(suppress=True) # 不使用科学计数 func_score = transpose(func_score) # 转置 N = population.shape[0] # 获得矩阵的行数 nN = N // 2 # 更改为双斜线 #nN = N # 输入函数数值矩阵,求得个体 分层和拥挤距离 字典 r_dict = dominance(func_score) # 关系字典{个体号码:[支配其的个数, 被其支配的个体列表]} # print(r_dict) layer_dict = rank(r_dict) # 分层字典layerDict {1:[3,1,4], 2:[2,0]} s = 0 indicate = [] for i in range(1, len(layer_dict) + 1): s += len(layer_dict[i]) if s < nN: indicate.extend(layer_dict[i]) continue elif s == nN: indicate.extend(layer_dict[i]) break else: s -= len(layer_dict[i]) temp = crowddist(func_score, layer_dict[i]) indicate.extend(temp[:nN - s]) break for i in range(len(layer_dict)): # i从零开始 需要+1 #print(layerDict[i+1]) ceng_population = array(population[layer_dict[i + 1]]) N = ceng_population.shape[0] n = len(array(list(set([tuple(t) for t in ceng_population])))) print("第 ", i, "层的重复率:", n / N, "--该层个体数量:", N, "--不重复个体数量:", n) print(len(indicate)) # 返回新种群 return population[indicate]
def estimate(population, functionObject): # 为函数对象赋值新的种群个体 functionObject.population = population # 计算新种群目标函数数值,并建立矩阵 funScore funScore = vstack((functionObject.objFun_1(), functionObject.objFun_2())) set_printoptions(suppress=True) funScore = transpose(funScore) # 二维数组是转置效果 # print("funScore",funScore) # 输入函数数值矩阵,求得个体 分层和拥挤距离 字典 r_dict = dominance(funScore) layerDict = rank(r_dict) #ls = np.append(layerDict[1],layerDict[2]) # print(ls) print(funScore[layerDict[1]])
def submit_selected_urls(self, positive, negative): #Perform ranking and diversifing on all urls with regard to the positive urls # #Args: # labeled_urls: a list of pair <url, label>. Label 1 means positive and 0 means negative. #Returns: # urls: list of urls with ranking scores # Test new positive and negative examples with exisitng classifier # If accuracy above threshold classify pages # Ranking # Diversification documents = {} other = [] all_docs = get_bag_of_words(list(self.urls_set)) for url in positive: if url in all_docs: self.positive_urls_set.add(url) self.negative_urls_set.discard(url) for url in negative: if url in all_docs: self.negative_urls_set.add(url) self.positive_urls_set.discard(url) for url in all_docs.keys(): content = all_docs[url] if (len(self.negative_urls_set) == 0) or (url not in self.negative_urls_set): documents[url] = content if url not in self.positive_urls_set: other.append(url) self.tfidf = tfidf.tfidf(documents) chdir(self.memex_home + '/seed_crawler/ranking') ranker = rank.rank() [ranked_urls,scores] = ranker.results(self.tfidf,self.positive_urls_set, other) return [ranked_urls, scores] # classified, ranked, diversified
def create_nw(data, replace_nans): """Compute Co-expression network from the data Core network building function. We always run with replace_nans = True Slicing single cell data will reguarly produce genes with no counts. And any correlation with a vector of all 0s is Nan. Arguments: data {np.array} -- Array of float values in shape of genes x cells replace_nans {bool} -- Flag for whether to replace Nans in network Returns: np.array -- ranked co-expression matrix of genes x genes """ nw = np.corrcoef(data) np.fill_diagonal(nw, 1) nw = rank(nw) if replace_nans: nw[np.isnan(nw)] = bottleneck.nanmean(nw) return nw
def vs_query(argv): #get index directory path = argv[0] N = len(os.listdir(path)) - 1 numDocs = int(argv[1]) termString = ' '.join(argv[3:]) #tokenize terms before scoring temp = termString.split(' "') phrases = [] regular = [] for i in temp: if '"' in i: phrases.append(i) else: regular += i.split(' ') ALL = [] for i in phrases: ALL += [' '.join(tknize(i))] for i in regular: ALL += tknize(i) #calling the doc grabber grabber = gd.DocGrabber(ALL, path) grabbed, grabbed4Jacy = grabber.grab_relavent() #getting the weighted query score = scoreQuery(ALL, N, path, grabbed4Jacy) #ranking the docs stuff = rank.rank(score, grabbed, numDocs) for thing in stuff: doc, score = thing if sys.argv[3].lower() == "y": print(doc + "\t" + str(score)) else: print(doc)
del andata2 gc.collect() expression = andata.to_df() agg_nw = np.zeros([genes.shape[0], genes.shape[0]]) metacell_values = metacell_assignment['x'].unique() logging.info(np.max(metacell_values)) for metacell in metacell_values: logging.info(metacell) #Generate Mask for slicing mask = metacell_assignment['x'] == metacell if mask.sum() < 20: logging.info(f'{metacell} too small') del mask gc.collect() continue #Expression is DataFrame of cells x genes data = expression[mask].values.T nw = create_nw(data, True) agg_nw += nw del nw, mask, data gc.collect() dataset_nw = pd.DataFrame(rank(agg_nw), index=genes, columns=genes) fn = f'/home/bharris/biccn_paper/data/networks/metacells/metacell_agg_nw_{dataset}_{args.outname}.hdf5' logging.info(f'writing to : {fn}') dataset_nw.to_hdf(fn, 'nw')
def evaluate(args, outdir, split): # Load Model tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) output_model_file = os.path.join(outdir, WEIGHTS_NAME) output_config_file = os.path.join(outdir, CONFIG_NAME) config = BertConfig(output_config_file) model = BertScoring(config) model.load_state_dict(torch.load(output_model_file)) model.to(args.device) n_gpu = torch.cuda.device_count() logger.info("device: {} n_gpu: {}".format(args.device, n_gpu)) if n_gpu > 1: model = torch.nn.DataParallel(model) # Data evalDataObject = RetrievalDataset(args, split=split, tokenizer=tokenizer, istrain=False) eval_dataloader = torch.utils.data.DataLoader(evalDataObject, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) # Run prediction logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", eval_dataloader.__len__()) logger.info(" Batch size = %d", args.eval_batch_size) model.eval() scores1 = np.zeros((evalDataObject.num_samples, 1)) scores2 = np.zeros((evalDataObject.num_samples, 1)) idxq = np.zeros((evalDataObject.num_samples, 1)) labels = np.zeros((evalDataObject.num_samples, 1)) firstidx = 0 for _, batch in enumerate(tqdm(eval_dataloader, desc="Iteration")): input_ids, input_mask, segment_ids, id_q, truelabel = batch input_ids = input_ids.to(args.device) input_mask = input_mask.to(args.device) segment_ids = segment_ids.to(args.device) with torch.no_grad(): scores = model(input_ids, segment_ids, input_mask) scores = scores.detach().cpu().numpy() numSamplesBatch = truelabel.numpy().shape[0] scores1[firstidx:firstidx + numSamplesBatch, 0] = scores[:, 0] scores2[firstidx:firstidx + numSamplesBatch, 0] = scores[:, 1] idxq[firstidx:firstidx + numSamplesBatch, 0] = id_q.numpy() labels[firstidx:firstidx + numSamplesBatch, 0] = truelabel.numpy() firstidx = firstidx + numSamplesBatch utils.save_obj( scores1, os.path.join(args.data_dir, 'retieval_scores_%s.pckl' % split)) utils.save_obj( idxq, os.path.join(args.data_dir, 'retrieval_idxq_%s.pckl' % split)) utils.save_obj( labels, os.path.join(args.data_dir, 'retrieval_labels_%s.pckl' % split)) medR1, recall1, medR2, recall2 = rank(scores1, scores2, idxq, labels) logger.info('Accuracy medR {medR:.2f}\t Recall {recall}'.format( medR=medR1, recall=recall1))
import numpy as np from get_params import get_params from build_database import build_database from get_features import get_features from rank import rank from eval_rankings import eval_rankings import warnings warnings.filterwarnings("ignore") #Extraccio dels parametres params=get_params() #Creacio de la base de dades params['split']='train' build_database(params) params['split']='val' build_database(params) #Extraccio de les características get_features(params) #Calcul del ranking rank(params) #Evaluacio del ranking ap_list=eval_rankings(params) print "-Llista de Average Precission: " print ap_list print "\n" print "-Mean Average Precission: " print np.mean(ap_list)
ruta1=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\images' ruta2=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train\\images' savepath1=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val' savepath2=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train' build_database(ruta1,savepath1); build_database(ruta2,savepath2); get_features(ruta1,savepath1,savepath1); get_features(ruta2,savepath2,savepath2); savepath_principal=os.path.dirname(os.path.abspath(__file__)) features_val=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val' features_train=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\train' rank(features_val,features_train,savepath_principal); feat=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\Features.txt' path_out=os.path.dirname(os.path.abspath(__file__)) labels=os.path.dirname(os.path.abspath(__file__))+'\\labels.txt' classify(feat,path_out,labels) path=os.path.dirname(os.path.abspath(__file__)) Gt_val_test=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\annotation.txt' evaluate_ranking(path,Gt_val_test) automatic_annot=os.path.dirname(os.path.abspath(__file__))+'\\classify.txt' annotation=os.path.dirname(os.path.abspath(__file__))+'\\TerrassaBuildings900\\val\\annotation.txt' evaluate_classification(automatic_annot,annotation)
import numpy as np from get_params import get_params from build_database import build_database from get_features import get_features from rank import rank from eval_rankings import eval_rankings import warnings warnings.filterwarnings("ignore") #Extraccio dels parametres params = get_params() #Creacio de la base de dades params['split'] = 'train' build_database(params) params['split'] = 'val' build_database(params) #Extraccio de les características get_features(params) #Calcul del ranking rank(params) #Evaluacio del ranking ap_list = eval_rankings(params) print "-Llista de Average Precission: " print ap_list print "\n" print "-Mean Average Precission: " print np.mean(ap_list)
def test_rank((m,n)=(8,4), alpha=0.5 ) : """ Test rank finding routine """ A = generate_A( (m,n) ) #A = generate_A( (m,n), 'random' ) #err = generate_A( (1,m), 'random' ) #A[:,n-1] = A[:,0] + 0.1 * A[:,1] #A[:,n-1] = A[:,0] + 0.000001 * err print "A = \n", A therank,R = rank.rank(A) print "R = \n", R print "rank = ", therank if __name__=='__main__': arg = sys.argv[1:] if (len(arg) > 0) and (arg[0] == 'ls') : print "doing leastsquares test" test_ls() elif (len(arg) > 0) and (arg[0] == 'rank') : print "doing rank test" test_rank()
diffused_keys = {} def diffuse(k): k *= M return k & ((1 << bits) - 1) for k in keys: d = diffuse(k) assert d not in diffused_keys diffused_keys[diffuse(k)] = k for k, v in diffused_keys.iteritems(): r = k / side assert square[k % side][r] == NOT_A_VALUE square[k % side][r] = rank(v) length = 0 for i in xrange(0, len(offset)): for j in xrange(0, len(ranks)): collision = False for k in xrange(0, side): s = square[k][i] h = ranks[j+k] collision = (s != NOT_A_VALUE and h != NOT_A_VALUE and s != h) if collision: break if not collision: offset[i] = j for k in xrange(0, side): s = square[k][i]
def get_texts(json_object): """ Parsing logic for getting texts """ texts = list() texts.append(json_object.get(FIELD_NAMES_CITIES['text_field'])) return texts def get_annotated_list(json_object): """ Parsing logic for getting annotated field """ return json_object.get(FIELD_NAMES_CITIES['annotated_field']) embeddings_dict = read_embedding_file(EMBEDDINGS_FILE) classifier = train_ranker.train_ranker(embeddings_dict, TRAINING_FILE_CITIES, FIELD_NAMES_CITIES) with codecs.open(ACTUAL_FILE_CITIES, 'r', 'utf-8') as f: for line in f: obj = json.loads(line) list_of_texts = get_texts(obj) annotated_list = get_annotated_list(obj) print "Annotated tokens:", print annotated_list ranked_list = rank.rank(embeddings_dict, list_of_texts, annotated_list, classifier) print "Ranked List:", print ranked_list
# rather than a list. In this case, convert it to a list with one element searchresults = processresults.processResults(resultsdict, searchresults) resultcount += nr_of_results #----------------------------------- # PART 4: Rank results #----------------------------------- # determine relevance searchresults = rank.calculateRelevance(searchresults, recommendation, majorterms) # calculate scientific strength searchresults = rank.calculatestrength(searchresults) # create sorted list of article evidence outstring, c, goals_found, top25 = rank.rank(searchresults, evidence, goals) out.write(outstring) print "Total number of search results: " + str(resultcount) print "Percentage of goal articles found:" print str((goals_found/len(goals)) * 100) + "%" print "Percentage of found goal articles in top 25: " if goals_found == 0: print "NA" else: print str((top25/goals_found * 100)) + "%" out.write("</body></html") out.close()
if len(ps) > 0: cur_P = ps[0] for j in xrange(1, len(ps)): for n in ps[j]: cro = [1 for m in cur_P if n < m] crossings += sum(cro) return crossings if __name__ == "__main__": import graph import rank length = 8 edges = [[0, 1], [0, 2], [1, 4], [1, 5], [2, 3], [4, 6], [3, 7], [5, 6]] g = graph.Graph(edges, length) rank.rank(g) order = ordering(g) print crossing(order) for o in order: s = "Rank: " for v in o: s += " " + str(v.index) + " " print s
#print(distVector,indicate) a = argsort(distVector) #a代表代表按照拥挤度排好序个体序的元素 dist_indicate = indicate[argsort(distVector)].tolist() #print(dist_indicate) return dist_indicate[::-1] if __name__ == "__main__": funScore = array([[1, 2], [2, 3], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [2, 2], [3, 2], [4, 3], [2, 1], [3, 1], [3, 2], [3, 3], [3, 4]]) layerDict = {1: [4, 9], 2: [8], 3: [1, 3, 7], 4: [2, 6], 5: [0, 5]} d = dominance(funScore) print(d) r = rank(d) print(r) #print(crowddist(funScore, layerDict[3])) print(crowddist(funScore, r[2])) ''' p = population(100, 4) #print(p) f = fitness_pop(p) #print(f) d = dominance(f) print("支配关系字典d:",d) r = rank(d) print("分层字典r:",r) c = crowddist(f,r[5]) print(c) '''