Пример #1
0
def getResultsTimeSpan(topNResults, tweetsEpoch, queriesEpoch):
    resultsTimeSpan = {}
    for qid in topNResults.keys():
        queryEpoch = queriesEpoch[qid]
        resultsTimeSpan.setdefault(qid, list())
        docDict = topNResults[qid] 
        for docId in docDict.keys():
            tweetEpoch = tweetsEpoch[docId]
            if tweetEpoch <= queryEpoch:
                timeSpan = getTimeSpan(queryEpoch, tweetEpoch )
                resultsTimeSpan[qid].append(timeSpan)
                
    return resultsTimeSpan 
Пример #2
0
def getQwordTimeSpan(qid, qword, topNResults, wordsIndex, tweetsEpoch, queriesEpoch):
    qwordTimeSpans = list()
    queryEpoch = queriesEpoch[qid]
    qidTopNDocs = topNResults[qid].keys()
    qwordAllDocs = wordsIndex[qword]
    for tweetId in qidTopNDocs:
        if qwordAllDocs.has_key(tweetId):
            tweetEpoch = tweetsEpoch[tweetId]
            if tweetEpoch <= queryEpoch:
                timeSpan = getTimeSpan(queryEpoch, tweetEpoch )
                key = qid + '_' + qword
                qwordTimeSpans.append(timeSpan)
    return qwordTimeSpans
Пример #3
0
def dtfWeightingT(queriesEpoch, tweetsEpoch, kdeQwordDict, qid, qword, docId, wordsIndex, docsLength, avgDocsLength, k1, b):
    kde = kdeQwordDict[qid + '_' + qword]
    queryEpoch = queriesEpoch[qid]
    tweetEpoch = tweetsEpoch[docId]
    if queryEpoch >= tweetEpoch:
        timeSpan = getTimeSpan(queryEpoch, tweetEpoch )
        probDens = prediction(kde, [timeSpan])
        probDen = probDens[0]
        count = wordsIndex[qword][docId]
        docLength = docsLength[docId]
        numerator = (k1 + 1) * count * probDen
        denominator = k1 * (1 - b + b * docLength / avgDocsLength) + count * probDen
        dtf = 1.0 * numerator / denominator
    else:
        dtf = 0
        
    return dtf
Пример #4
0
def predictResultsProbDens(retrievalResults, queriesEpoch, tweetsEpoch, kdeDict):
    probDens = {}
    for qid in retrievalResults.keys():
        probDens.setdefault(qid, {})
        kde = kdeDict[qid]
        queryEpoch = queriesEpoch[qid]
        
        resultsList = retrievalResults[qid]
        for result in resultsList:
            docId = result.docId
            tweetEpoch = tweetsEpoch[docId]
            if queryEpoch >= tweetEpoch:
                timeSpan = getTimeSpan(queryEpoch, tweetEpoch )
                probDen = prediction(kde, [timeSpan])
                probDens[qid][docId] = probDen[0]
            else:
                probDens[qid][docId] = 0
        print qid        
    return probDens