query = split[0].strip() if query not in tagged: print query if __name__ == '__main__': argv = sys.argv #getEntities(argv[1]) #toTag(argv[1],argv[2]); dexter = Dexter('tagURL', 'catURL', argv[6]) #load the Category co-occurrence bit catCoMan = CoOcManager(argv[4], CoOccurrence(), ' ') #category vector catVect = loadCategoryVector(argv[2]) catManage1 = CategoryManager(catVect, argv[3], Category) catManage2 = CategoryManager(catVect, argv[5], CategorySubcluster) #ranker ranker = Ranker() entExp1 = CatThesExpansion(dexter, catManage1, ranker, catCoMan, None) entExp2 = CatThesExpansion(dexter, catManage2, ranker, catCoMan, None) oFile1 = open(argv[1][:argv[1].rfind('.')] + '_ent.txt', 'w') oFile2 = open(argv[1][:argv[1].rfind('.')] + '_entSub.txt', 'w') i = 0 porter = stem.porter.PorterStemmer() totalVocab = loadFileInList(argv[7])
def getStatsPerQuery(argv): tagURL = 'http://localhost:8080/rest/annotate' catURL = 'http://localhost:8080/rest/graph/get-entity-categories' catVector = loadCategoryVector(argv[3]) f1Dict = getCats(argv[2]) sFound = 0.0 sTotal = 0.0 eTotal = set() eRemov = set() catFoundNoTerm = set() catNotFound = set() catTermFound = set() catEntity = set() outfile = open('match_session_dom.txt', 'w') #categoryVectors = {} for session in getSessionWithNL(argv[1]): catCount = {} entCount = {} querySpotList = {} for query in session: #find the entities in query try: spotDict = None #tagQueryWithDexter(query, tagURL,catURL) querySpotList[query] = spotDict for text in spotDict.keys(): for entry in spotDict[text]['cat'].split(): catCount[entry] = catCount.setdefault(entry, 1) + 1 entCount[text] = entCount.setdefault(text, 1) + 1 except Exception as err: print err #print 'SESSION', session, 'CATCOUNT', catCount, 'ENTCOUNT',entCount found = False if len(catCount) > 0: #find the dominant entity maxEnt = max(entCount.values()) #sessionQueryMapping = {} for query, spotList in querySpotList.iteritems(): matchl = spotList.keys() for entry in matchl: eTotal.add(entry) if entCount[entry] < maxEnt: spotList.pop(entry, None) print 'Removing spot', query, entry eRemov.add(entry) else: #get the categories #catTermMatch = {} rquery = query.replace(entry, '') queryTerms = set(rquery.split()) for cat in spotList[entry]['cat'].lower().split(): catEntity.add(entry + '_' + cat) if cat in f1Dict: phrase1 = loadPhrasesWithScore(argv[2] + '/' + f1Dict[cat]) pVector = catVector[cat] queryDict = getDictFromSet(queryTerms) pTotal = sum(phrase1.values()) pset = set(phrase1.keys()) sint = pset & queryTerms score = 0.0 cscore = get_cosine(queryDict, pVector) for iphrase in sint: score += phrase1[iphrase] / pTotal if len(queryTerms) > 0: score *= (1.0 * len(sint)) / len(queryTerms) if sint: outfile.write(query + '\t' + entry + '\t' + cat + '\t' + str(cscore) + '\t' + ', '.join(sint) + '\n') found = True catTermFound.add(entry + '_' + cat) else: outfile.write(query + '\t' + entry + '\t' + cat + '\t0\t0\n') catFoundNoTerm.add(cat + '_' + entry) else: outfile.write( query + '\t' + entry + '\t' + cat + '\t0\tNOT FOUND\n') catNotFound.add(cat + '_' + entry) #load the terms for category #check if these terms match if found: sFound += 1 sTotal += 1 outfile.write('\n') print 'Total Sessions ', sTotal print 'Sessions with dominant entity in AOL', sFound print '# Unique Entities', len(eTotal) print '# Removed Entities (non dominant)', len(eRemov) print '# no of entity types', len(catEntity) print '# no of entity types with terms match ', len(catTermFound) print '# no of entity types with no term match', len(catFoundNoTerm) print '# no of entity types with no match in AOL', len(catNotFound)
def main(argv): #open the index searcher = SearchIndex(argv[2]) searcher.initializeAnalyzer() ipaddress = 'localhost' #dexter object tagURL = 'http://' + ipaddress + ':8080/rest/annotate' catURL = 'http://' + ipaddress + ':8080/rest/graph/get-entity-categories' dexter = Dexter(tagURL, catURL, argv[7]) #category vector catVect = loadCategoryVector(argv[3]) catManage1 = CategoryManager(catVect, argv[4], Category) catManage2 = CategoryManager(catVect, argv[5], CategorySubcluster) #load the Category co-occurrence bit catCoMan = CoOcManager(argv[6], CoOccurrence(), ' ') #ranker ranker = Ranker() #task extraction htcTask = TaskExpansion('Indexes/htcIndex', ranker, 3000) qccTask = TaskExpansion('Indexes/qccIndex', ranker, 3000) #taskK = argv[5][argv[5].rfind('/')+1:] #totalVocab = loadFileInList(argv[6]); #expansion entExp1 = CatThesExpansion(dexter, catManage1, ranker, catCoMan) entExp2 = CatThesExpansion(dexter, catManage2, ranker, catCoMan) #term expansion coOccExp = CoOccurExpansion(catCoMan, None, ranker) rel, noRel = loadRelJudgements(argv[8]) outFolder = argv[9] #randomWalk #randWalk = RandomWalk(argv[3],argv[4],ranker) #randWalk = RandomWalk(catManage,catCoMan,entTermVect, catTermVect,ranker) #result String #query key terms #queryList = loadQueryList(argv[4]); #plotMap = {'baseline':{},'ent':{}, 'entSub':{}, 'qccTask':{}, 'htcTask':{},'co':{}}; #plotNDCG = {'baseline':{},'ent':{}, 'entSub':{}, 'qccTask':{}, 'htcTask':{},'co':{}}; #viewedFileFolder = argv[5] #i=0 ##qMap = []; ##qNdcg = []; #meth = 'baseline' #oFile = open(outFolder+'/baseline.RL1','w'); #covered = {}; #porter = stem.porter.PorterStemmer(); #for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML(argv[1]): #i+=1 #query = session[0].strip(); #if i in rel and query not in covered: #covered[query] = 1.0; #docList = searcher.getTopDocuments(query,1000,'content','id'); #k = 1 #for dtuple in docList: #oFile.write(str(i)+' Q0 '+dtuple[0]+' '+str(k)+' '+str(round(dtuple[1],2))+' baseline\n'); #k +=1 #'''qmap = findAvgPrec(docList,rel[i],noRel[i]); #dcg10, idcg10 = findDCG(docList[:10],rel[i]); ##print dcg10, idcg10, rel[i].values(); #ndcg10 = 0.0; #if idcg10 > 0: #ndcg10 = dcg10/idcg10; #qMap.append(qmap); #qNdcg.append(ndcg10); #oFile.write('ndcg10 '+str(i)+' '+str(ndcg10)+'\n'); #oFile.write('map '+str(i)+' '+str(qmap)+'\n'); #''' #else: #print 'No rel ', i, session[0]; #oFile.close(); #''' #fmap = sum(qMap)/len(qMap); #fnd = sum(qNdcg)/len(qNdcg); #oFile.write('all map ' +str(fmap)+'\n'); #oFile.write('all ndcg10 '+str(fnd)+'\n'); #for val in range(0,55,5): #plotMap[meth][val] = fmap; #plotNDCG[meth][val] = fnd; #oFile.close(); #''' i = 0 #qMap = {}; #qNdcg = {}; oFile = {} meth = 'co' covered = {} for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML( argv[1]): i += 1 query = session[0].strip() if i in rel and query not in covered: covered[query] = 1.0 coExpTerms = coOccExp.expandTextWithStep(query, 50, 55, 5) for noTerms, terms in coExpTerms.items(): #if noTerms not in qMap: # qMap[noTerms] = []; # qNdcg[noTerms] = []; if noTerms not in oFile: oFile[noTerms] = open( outFolder + '/' + meth + '_' + str(noTerms) + '.RL1', 'w') docList = searcher.getTopDocumentsWithExpansion(query, terms, 1000, 'content', 'id') k = 1 for dtuple in docList: oFile[noTerms].write(str(i) + ' Q0 ' + dtuple[0] + ' ' + str(k) + ' ' + str(round(dtuple[1], 2)) + ' baseline\n') k += 1 '''qmap = findAvgPrec(docList,rel[i],noRel[i]); dcg10, idcg10 = findDCG(docList[:10],rel[i]); ndcg10 = 0.0; if idcg10 > 0: ndcg10 = dcg10/idcg10; qMap[noTerms].append(qmap); qNdcg[noTerms].append(ndcg10); oFile[noTerms].write('ndcg10 '+str(i)+' '+str(ndcg10)+' '+str(dcg10)+' '+str(idcg10)+'\n'); oFile[noTerms].write('map '+str(i)+' '+str(qmap)+'\n'); ''' ''' for entry, vlist in qMap.items(): i = len(vlist); fmap = sum(vlist)/i; fnd = sum(qNdcg[entry])/i; print sum(vlist), len(vlist); oFile[entry].write('all map ' +str(fmap)+'\n'); oFile[entry].write('all ndcg10 '+str(fnd)+'\n'); plotMap[meth][entry] = fmap; plotNDCG[meth][entry] = fnd; oFile[entry].close(); ''' for entry in oFile.keys(): oFile[entry].close() i = 0 #qMap = {}; #qNdcg = {}; oFile = {} meth = 'ent' covered = {} for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML( argv[1]): i += 1 query = session[0].strip() cText = normalize(' '.join(cTitle[0]), porter) if i in rel and query not in covered: covered[query] = 1.0 entStatus1, entExpTerms1 = entExp1.expandTextWithStep(query, cText, 1, 50, 55, 5) for noTerms, terms in entExpTerms1.items(): #if noTerms not in qMap: # qMap[noTerms] = []; # qNdcg[noTerms] = []; if noTerms not in oFile: oFile[noTerms] = open( outFolder + '/' + meth + '_' + str(noTerms) + '.RL1', 'w') docList = searcher.getTopDocumentsWithExpansion(session[0], terms, 1000, 'content', 'id') k = 1 for dtuple in docList: oFile[noTerms].write(str(i) + ' Q0 ' + dtuple[0] + ' ' + str(k) + ' ' + str(round(dtuple[1], 2)) + ' baseline\n') k += 1 ''' qmap = findAvgPrec(docList,rel[i],noRel[i]); dcg10, idcg10 = findDCG(docList[:10],rel[i]); ndcg10 = 0.0; if idcg10 > 0: ndcg10 = dcg10/idcg10; qMap[noTerms].append(qmap); qNdcg[noTerms].append(ndcg10); oFile[noTerms].write('ndcg10 '+str(i)+' '+str(ndcg10)+' '+str(dcg10)+' '+str(idcg10)+'\n'); oFile[noTerms].write('map '+str(i)+' '+str(qmap)+'\n'); for entry, vlist in qMap.items(): i = len(vlist); fmap = sum(qMap[entry])/i; fnd = sum(qNdcg[entry])/i; oFile[entry].write('all map ' +str(fmap)+'\n'); oFile[entry].write('all ndcg10 '+str(fnd)+'\n'); plotMap[meth][entry] = fmap; plotNDCG[meth][entry] = fnd; oFile[entry].close(); ''' for entry in oFile.keys(): oFile[entry].close() i = 0 #qMap = {}; #qNdcg = {}; oFile = {} meth = 'entSub' covered = {} for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML( argv[1]): i += 1 query = session[0].strip() cText = normalize(' '.join(cTitle[0]), porter) if i in rel and query not in covered: covered[query] = 1.0 entStatus2, entExpTerms2 = entExp2.expandTextWithStepAndSubcluster( query, cText, 1, 50, 55, 5) for noTerms, terms in entExpTerms2.items(): #if noTerms not in qMap: #qMap[noTerms] = []; #qNdcg[noTerms] = []; if noTerms not in oFile: oFile[noTerms] = open( outFolder + '/' + meth + '_' + str(noTerms) + '.RL1', 'w') docList = searcher.getTopDocumentsWithExpansion(session[0], terms, 1000, 'content', 'id') k = 1 for dtuple in docList: oFile[noTerms].write(str(i) + ' Q0 ' + dtuple[0] + ' ' + str(k) + ' ' + str(round(dtuple[1], 2)) + ' baseline\n') k += 1 '''qmap = findAvgPrec(docList,rel[i],noRel[i]); dcg10, idcg10 = findDCG(docList[:10],rel[i]); ndcg10 = 0.0; if idcg10 > 0: ndcg10 = dcg10/idcg10; qMap[noTerms].append(qmap); qNdcg[noTerms].append(ndcg10); oFile[noTerms].write('ndcg10 '+str(i)+' '+str(ndcg10)+' '+str(dcg10)+' '+str(idcg10)+'\n'); oFile[noTerms].write('map '+str(i)+' '+str(qmap)+'\n'); for entry, vlist in qMap.items(): i = len(vlist); fmap = sum(qMap[entry])/i; fnd = sum(qNdcg[entry])/i; oFile[entry].write('all map ' +str(fmap)+'\n'); oFile[entry].write('all ndcg10 '+str(fnd)+'\n'); plotMap[meth][entry] = fmap; plotNDCG[meth][entry] = fnd; oFile[entry].close(); ''' for entry in oFile.keys(): oFile[entry].close() i = 0 #qMap = {}; #qNdcg = {}; oFile = {} meth = 'qccTask' covered = {} for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML( argv[1]): i += 1 query = session[0].strip() if i in rel and query not in covered: covered[query] = 1.0 qccTaskTerms = qccTask.expandTextWithStep(query, 50, 55, 5) for noTerms, terms in qccTaskTerms.items(): #if noTerms not in qMap: #qMap[noTerms] = []; #qNdcg[noTerms] = []; if noTerms not in oFile: oFile[noTerms] = open( outFolder + '/' + meth + '_' + str(noTerms) + '.RL1', 'w') docList = searcher.getTopDocumentsWithExpansion(session[0], terms, 1000, 'content', 'id') k = 1 for dtuple in docList: oFile[noTerms].write(str(i) + ' Q0 ' + dtuple[0] + ' ' + str(k) + ' ' + str(round(dtuple[1], 2)) + ' baseline\n') k += 1 #qmap = findAvgPrec(docList,rel[i],noRel[i]); #dcg10, idcg10 = findDCG(docList[:10],rel[i]); #ndcg10 = 0.0; #if idcg10 > 0: #ndcg10 = dcg10/idcg10; # #qMap[noTerms].append(qmap); #qNdcg[noTerms].append(ndcg10); #oFile[noTerms].write('ndcg10 '+str(i)+' '+str(ndcg10)+' '+str(dcg10)+' '+str(idcg10)+'\n'); #oFile[noTerms].write('map '+str(i)+' '+str(qmap)+'\n'); # #for entry, vlist in qMap.items(): #i = len(vlist); #fmap = sum(qMap[entry])/i; #fnd = sum(qNdcg[entry])/i; #oFile[entry].write('all map ' +str(fmap)+'\n'); #oFile[entry].write('all ndcg10 '+str(fnd)+'\n'); #plotMap[meth][entry] = fmap; #plotNDCG[meth][entry] = fnd; #oFile[entry].close(); # for entry in oFile.keys(): oFile[entry].close() i = 0 #qMap = {}; #qNdcg = {}; oFile = {} meth = 'htcTask' covered = {} for session, viewDocs, clickDocs, cTitle, cSummary in getSessionWithXML( argv[1]): i += 1 query = session[0].strip() if i in rel and query not in covered: covered[query] = 1.0 htcTaskTerms = htcTask.expandTextWithStep(query, 50, 55, 5) for noTerms, terms in htcTaskTerms.items(): #if noTerms not in qMap: #qMap[noTerms] = []; #qNdcg[noTerms] = []; if noTerms not in oFile: oFile[noTerms] = open( outFolder + '/' + meth + '_' + str(noTerms) + '.RL1', 'w') docList = searcher.getTopDocumentsWithExpansion(session[0], terms, 1000, 'content', 'id') k = 1 for dtuple in docList: oFile[noTerms].write(str(i) + ' Q0 ' + dtuple[0] + ' ' + str(k) + ' ' + str(round(dtuple[1], 2)) + ' baseline\n') k += 1 #qmap = findAvgPrec(docList,rel[i],noRel[i]); #dcg10, idcg10 = findDCG(docList[:10],rel[i]); #ndcg10 = 0.0; #if idcg10 > 0: #ndcg10 = dcg10/idcg10; #qMap[noTerms].append(qmap); #qNdcg[noTerms].append(ndcg10); #oFile[noTerms].write('ndcg10 '+str(i)+' '+str(ndcg10)+' '+str(dcg10)+' '+str(idcg10)+'\n'); #oFile[noTerms].write('map '+str(i)+' '+str(qmap)+'\n'); # #for entry, vlist in qMap.items(): #i = len(vlist); #fmap = sum(qMap[entry])/i; #fnd = sum(qNdcg[entry])/i; #oFile[entry].write('all map ' +str(fmap)+'\n'); #oFile[entry].write('all ndcg10 '+str(fnd)+'\n'); #plotMap[meth][entry] = fmap; #plotNDCG[meth][entry] = fnd; #oFile[entry].close(); for entry in oFile.keys(): oFile[entry].close() #plotMultipleSys(plotMap,'No of Terms', 'MAP',outFolder+'/map.png','Retrieval MAP Plot'); #plotMultipleSys(plotNDCG,'No of Terms', 'NDCG@10',outFolder+'/ndcg10.png','Retrieval NDCG Plot'); searcher.close()
def main(argv): ipaddress = "localhost" # dexter object tagURL = "http://" + ipaddress + ":8080/rest/annotate" catURL = "http://" + ipaddress + ":8080/rest/graph/get-entity-categories" dexter = Dexter(tagURL, catURL, argv[5]) # load the Category co-occurrence bit catCoMan = CoOcManager(argv[4], CoOccurrence(), " ") # category vector catVect = loadCategoryVector(argv[2]) catManage1 = CategoryManager(catVect, argv[3], Category) catManage2 = CategoryManager(catVect, argv[7], CategorySubcluster) # ranker ranker = Ranker() totalVocab = loadFileInList(argv[6]) # task extraction # htcTask = TaskExpansion('Indexes/htcIndex',ranker,3000); qccTask = TaskExpansion("Indexes/qccIndex", ranker, 3000, totalVocab) # taskK = argv[5][argv[5].rfind('/')+1:] wordFeatMan = None # WordManager(argv[8],False); # expansion # entExp1 = CatThesExpansion(dexter, catManage1, ranker,catCoMan,wordFeatMan); entExp2 = CatThesExpansion(dexter, catManage2, ranker, catCoMan, wordFeatMan) # term expansion coOccExp = CoOccurExpansion(catCoMan, None, ranker) # randomWalk # randWalk = RandomWalk(argv[2],argv[3],ranker) prec = {"ent": {}, "qccTask": {}, "htcTask": {}, "co": {}, "entSub": {}} mrr = {"ent": {}, "qccTask": {}, "htcTask": {}, "co": {}, "entSub": {}} ent_prec = {"ent": {}, "qccTask": {}, "htcTask": {}, "co": {}, "entSub": {}} ent_mrr = {"ent": {}, "qccTask": {}, "htcTask": {}, "co": {}, "entSub": {}} """ sess_prec = {}; sess_mrr = {}; """ covered = {} i = 0 porter = stem.porter.PorterStemmer() ttype = argv[10] for session, doc, click, cTitle, cSummary in getSessionWithXML(argv[1]): query = session[0] qSet = getQueryTerms(query) # print 'Title, Summary clicked ',cTitle[0], cSummary[0]; aTerms = None # cText = normalize(' '.join(cTitle[0]),porter); if ttype == "query": aTerms, rTerms = addedAndRemovedTerms(query, session[1:], totalVocab) elif ttype == "title": aTerms = getTerms(cTitle, qSet, totalVocab, porter, range(1, len(session) - 1)) else: aTerms = getTerms(cTitle, qSet, totalVocab, porter, range(1, len(session) - 1)) bTerms = getTerms(cSummary, qSet, totalVocab, porter, range(1, len(session) - 1)) aTerms = aTerms | bTerms print i, "Query", query, aTerms, len(aTerms) if len(aTerms) > 0: # and query not in covered: covered[query] = 1 coExpTerms = coOccExp.expandTextWithStep(query, 0, 55, 5) # entStatus1, entExpTerms1 = entExp1.expandTextWithStep(query,'',1,0,55,5); entStatus1, entExpTerms2 = entExp2.expandTextWithStepAndSubcluster(query, "", 1, 0, 55, 5) qccTaskTerms = qccTask.expandTextWithStep(query, 0, 55, 5) # htcTaskTerms = htcTask.expandTextWithStep(query,0,55,5) # randExpTerms = randWalk.expandTextWithStep(query,55,105,5) if not entStatus1: print i, "Ent False", query # addLen = getBand(len(aTerms)); # if addLen not in sess_prec: # sess_prec[addLen] = {'ent':{}};#, 'qccTask':{}, 'htcTask':{}, 'co':{} }; # sess_mrr[addLen] = {'ent':{}};#, 'qccTask':{}, 'htcTask':{}, 'co':{} }; # for noTerms in entExpTerms1.keys(): # print 'ETerms\t',i,'\t',query,'\t',entExpTerms1[noTerms],'\t',noTerms; # prec1 , mrr1 = getPrecRecall(entExpTerms1[noTerms],aTerms); # prec = updateStats(noTerms, 'ent',prec1, prec); # mrr = updateStats(noTerms, 'ent',mrr1, mrr); # if entStatus1: # ent_prec = updateStats(noTerms, 'ent',prec1, ent_prec) # ent_mrr = updateStats(noTerms, 'ent',mrr1, ent_mrr); ##sess_prec[addLen] = updateStats(noTerms, 'ent',prec1, sess_prec[addLen]) ##sess_mrr[addLen] = updateStats(noTerms, 'ent',mrr1, sess_mrr[addLen]); # print 'EMetrics ',i,'\t',noTerms,'\t', len(aTerms), '\t', aTerms, '\t',prec1, '\t',mrr1; # for noTerms in entExpTerms2.keys(): print "ESubTerms\t", i, "\t", query, "\t", entExpTerms2[noTerms], "\t", noTerms prec1, mrr1 = getPrecRecall(entExpTerms2[noTerms], aTerms) prec = updateStats(noTerms, "entSub", prec1, prec) mrr = updateStats(noTerms, "entSub", mrr1, mrr) if entStatus1: ent_prec = updateStats(noTerms, "entSub", prec1, ent_prec) ent_mrr = updateStats(noTerms, "entSub", mrr1, ent_mrr) # sess_prec[addLen] = updateStats(noTerms, 'ent',prec1, sess_prec[addLen]) # sess_mrr[addLen] = updateStats(noTerms, 'ent',mrr1, sess_mrr[addLen]); print "ESubMetrics ", i, "\t", noTerms, "\t", len(aTerms), "\t", aTerms, "\t", prec1, "\t", mrr1 for noTerms in qccTaskTerms.keys(): print "qccTaskTerms\t", i, "\t", query, "\t", qccTaskTerms[noTerms], "\t", noTerms prec1, mrr1 = getPrecRecall(qccTaskTerms[noTerms], aTerms) prec = updateStats(noTerms, "qccTask", prec1, prec) mrr = updateStats(noTerms, "qccTask", mrr1, mrr) if entStatus1: ent_prec = updateStats(noTerms, "qccTask", prec1, ent_prec) ent_mrr = updateStats(noTerms, "qccTask", mrr1, ent_mrr) """ sess_prec[addLen] = updateStats(noTerms, 'qccTask',prec1, sess_prec[addLen]) sess_mrr[addLen] = updateStats(noTerms, 'qccTask',mrr1, sess_mrr[addLen]); """ print "qccTaskMetrics ", i, "\t", noTerms, "\t", len(aTerms), "\t", aTerms, "\t", prec1, "\t", mrr1 # for noTerms in htcTaskTerms.keys(): # print 'htcTaskTerms\t',i,'\t',query,'\t',htcTaskTerms[noTerms],'\t',noTerms # prec1 , mrr1 = getPrecRecall(htcTaskTerms[noTerms],aTerms) # prec = updateStats(noTerms, 'htcTask',prec1, prec) # mrr = updateStats(noTerms, 'htcTask',mrr1, mrr); # if entStatus1: # ent_prec = updateStats(noTerms, 'htcTask',prec1, ent_prec) # ent_mrr = updateStats(noTerms, 'htcTask',mrr1, ent_mrr); ##sess_prec[addLen] = updateStats(noTerms, 'htcTask',prec1, sess_prec[addLen]) ##sess_mrr[addLen] = updateStats(noTerms, 'htcTask',mrr1, sess_mrr[addLen]); # # print 'htcTaskMetrics ',i,'\t',noTerms,'\t', len(aTerms), '\t', aTerms, '\t',prec1, '\t',mrr1 for noTerms in coExpTerms.keys(): print "CoTerms\t", i, "\t", query, "\t", coExpTerms[noTerms], "\t", noTerms prec1, mrr1 = getPrecRecall(coExpTerms[noTerms], aTerms) prec = updateStats(noTerms, "co", prec1, prec) mrr = updateStats(noTerms, "co", mrr1, mrr) if entStatus1: ent_prec = updateStats(noTerms, "co", prec1, ent_prec) ent_mrr = updateStats(noTerms, "co", mrr1, ent_mrr) """ sess_prec[addLen] = updateStats(noTerms, 'co',prec1, sess_prec[addLen]) sess_mrr[addLen] = updateStats(noTerms, 'co' ,mrr1, sess_mrr[addLen]); """ print "CoMetrics ", i, "\t", noTerms, "\t", len(aTerms), "\t", aTerms, "\t", prec1, "\t", mrr1 else: pass # print 'NO ADDED TERMS in', i; i += 1 printMetric(prec, "entSub", "Prec") printMetric(mrr, "entSub", "Mrr") printMetric(prec, "ent", "Prec") printMetric(mrr, "ent", "Mrr") printMetric(prec, "htcTask", "Prec") printMetric(mrr, "htcTask", "Mrr") printMetric(prec, "qccTask", "Prec") printMetric(mrr, "qccTask", "Mrr") printMetric(prec, "co", "Prec") printMetric(mrr, "co", "Mrr") printMetric(ent_prec, "entSub", "EntPrec") printMetric(ent_mrr, "entSub", "EntMrr") printMetric(ent_prec, "ent", "EntPrec") printMetric(ent_mrr, "ent", "EntMrr") printMetric(ent_prec, "htcTask", "EntPrec") printMetric(ent_mrr, "htcTask", "EntMrr") printMetric(ent_prec, "qccTask", "EntPrec") printMetric(ent_mrr, "qccTask", "EntMrr") printMetric(ent_prec, "co", "EntPrec") printMetric(ent_mrr, "co", "EntMrr") plotMultipleSys( prec, "No of Terms", "Prec", argv[9] + "/" + argv[1][argv[1].rfind("/") + 1 : -4] + "_" + "prec.png", "Term Prediction Prec Plot", ) plotMultipleSys( mrr, "No of Terms", "MRR", argv[9] + "/" + argv[1][argv[1].rfind("/") + 1 : -4] + "_" + "mrr.png", "Term Prediction MRR Plot", ) plotMultipleSys( ent_prec, "No of Terms", "Prec", argv[9] + "/" + argv[1][argv[1].rfind("/") + 1 : -4] + "_" + "_ent_prec.png", "Term Prediction Prec Plot (Ent queries)", ) plotMultipleSys( ent_mrr, "No of Terms", "MRR", argv[9] + "/" + argv[1][argv[1].rfind("/") + 1 : -4] + "_" + "_ent_mrr.png", "Term Prediction MRR Plot (Ent queries)", ) # htcTask.closeIndex(); qccTask.closeIndex() """