示例#1
0
def generate_token_vectors(people):
    vectors = {}
    for ID in people:
        vector = generate(people[ID][data])
        vectors[ID]['name'] = people[ID]['name']
        vectors[ID]['tokens'] = vector
    return vectors
def generate_token_vectors(people):
    vectors = defaultdict(dict)
    for ID in people:
        try:
            vector = generate(people[ID]['data'])
            vectors[ID]['name'] = people[ID]['name']
            vectors[ID]['tokens'] = vector.split() # Need a string
        except AttributeError:
            # Handling the one NoneType blob
            pass
    return vectors
示例#3
0
def queryVector (rawQuery):
    cleanQuery = generate(rawQuery)
    tokens = cleanQuery.split(' ')
    doc = {}
    
    queryTfidf = {}
    for token in tokens:
        currentVal = doc.get(token, 0)
        newVal = currentVal + 1
        doc[token] = newVal

    for tok in doc.keys():
        #if (tok in idfIndex):
            #tokenIdf = idfIndex[token]
            #tokenTf = doc[token]
                #score = tokenIdf * tokenTf
            #queryTfidf[tok] = score
        tokenIdf = idfIndex.get(token, 0)
        tokenTf = doc.get(token, 0)
        score = tokenIdf * tokenTf
        queryTfidf[tok] = score
        
            
    return queryTfidf