def getResultsTimeSpan(topNResults, tweetsEpoch, queriesEpoch): resultsTimeSpan = {} for qid in topNResults.keys(): queryEpoch = queriesEpoch[qid] resultsTimeSpan.setdefault(qid, list()) docDict = topNResults[qid] for docId in docDict.keys(): tweetEpoch = tweetsEpoch[docId] if tweetEpoch <= queryEpoch: timeSpan = getTimeSpan(queryEpoch, tweetEpoch ) resultsTimeSpan[qid].append(timeSpan) return resultsTimeSpan
def getQwordTimeSpan(qid, qword, topNResults, wordsIndex, tweetsEpoch, queriesEpoch): qwordTimeSpans = list() queryEpoch = queriesEpoch[qid] qidTopNDocs = topNResults[qid].keys() qwordAllDocs = wordsIndex[qword] for tweetId in qidTopNDocs: if qwordAllDocs.has_key(tweetId): tweetEpoch = tweetsEpoch[tweetId] if tweetEpoch <= queryEpoch: timeSpan = getTimeSpan(queryEpoch, tweetEpoch ) key = qid + '_' + qword qwordTimeSpans.append(timeSpan) return qwordTimeSpans
def dtfWeightingT(queriesEpoch, tweetsEpoch, kdeQwordDict, qid, qword, docId, wordsIndex, docsLength, avgDocsLength, k1, b): kde = kdeQwordDict[qid + '_' + qword] queryEpoch = queriesEpoch[qid] tweetEpoch = tweetsEpoch[docId] if queryEpoch >= tweetEpoch: timeSpan = getTimeSpan(queryEpoch, tweetEpoch ) probDens = prediction(kde, [timeSpan]) probDen = probDens[0] count = wordsIndex[qword][docId] docLength = docsLength[docId] numerator = (k1 + 1) * count * probDen denominator = k1 * (1 - b + b * docLength / avgDocsLength) + count * probDen dtf = 1.0 * numerator / denominator else: dtf = 0 return dtf
def predictResultsProbDens(retrievalResults, queriesEpoch, tweetsEpoch, kdeDict): probDens = {} for qid in retrievalResults.keys(): probDens.setdefault(qid, {}) kde = kdeDict[qid] queryEpoch = queriesEpoch[qid] resultsList = retrievalResults[qid] for result in resultsList: docId = result.docId tweetEpoch = tweetsEpoch[docId] if queryEpoch >= tweetEpoch: timeSpan = getTimeSpan(queryEpoch, tweetEpoch ) probDen = prediction(kde, [timeSpan]) probDens[qid][docId] = probDen[0] else: probDens[qid][docId] = 0 print qid return probDens