def get_k(queryAddress,documentAddress,e): document1, length_by_docid1 = make_invidx(documentAddress, e) query1=calculate_query(get_query(queryAddress)) simi1= cosine_distance(document1,query1,100) result=readFile(resultAddress) for num in simi1: for weight1,docid in simi1[num]: if docid in result: print(docid)
def get_topK(queryAddress,documentAddress,extend): for e in extend: document1, length_by_docid1 = make_invidx(documentAddress, e) document2 = make_invidx_by_OkapiBM25(documentAddress, e) #print(document) #print(length_by_docid) #saveIndex(index, (document, length_by_docid)) #document, length_by_docid = loadIndex(index) #get query query1=calculate_query(get_query(queryAddress)) for num in query1: document3= make_index_AIG(documentAddress,e,query1[num]) simi3=sum_score(document3,10) simi2=sum_keyword(document2,query1[num],10) #print(simi) #document= make_index_AIG(documentAddress,e,query1[num]) simi1= cosine_distance(document1,query1,10) # simi=sum_keyword(document, query1[num],10) #print(query1) #cosine similarity #simi=score(document,query1,10) #simi= cosine_distance(document,query1,10) #print(simi) for num in simi1: for weight1,docid in simi1[num]: #print(docid) for weight2,docid2 in simi2: if docid==docid2: print(docid2," : ",weight2) for weight3,docid3 in simi3: if docid==docid3: print(docid3,":",weight3) for weight2,docid2 in simi2: for weight3,docid3 in simi3: if docid2==docid3: print(docid3,":",weight3)