Exemplo n.º 1
0
def sumMeasure(M_lil, M_csc, queryString):

    """

    """

    ### TEMP ###
    # (Lav precomputed RL-hash for labelmatrices hvis noedvendigt)
    #vectorLength=SearchTermDoc.createRLHash(M_lil, None,False)
    ############

    # Extract the relevant indices of the row-vectors (pmid-hashes)
    searchIndices,hashedSearchTerms = SearchTermDoc.extractRowIndices(M_csc, queryString)

    # Union the arrays to avoid searching each row more than once
    searchIndices = reduce(set.union,map(set,searchIndices))

    results=[]
    for pmidHash in searchIndices:
        Sum=0
        for termHash in hashedSearchTerms:
            Sum+=M_lil[pmidHash,termHash] #/ vectorLength[pmidHash]
        results.append((Sum,pmidHash))

    return results
Exemplo n.º 2
0
def cosineMeasure(M_lil, M_csc, queryString):

    """
    This function calculates the square-root of the cosine score for each
    document containing one or more of the query-terms in the query string
    (thereby the implicit 'or' between each query-term).

    It returns a scored list of all the documents mentioned above.
    """

    # Extract the relevant indices of the row-vectors (pmid-hashes)
    searchIndices,hashedSearchTerms = SearchTermDoc.extractRowIndices(M_csc, queryString)

    # Union the arrays to avoid searching each row more than once
    searchIndices = reduce(set.union,map(set,searchIndices))

    results=[]
    for pmidHash in searchIndices:
        Sum=0
        for termHash in hashedSearchTerms:
            Sum+=M_lil[pmidHash,termHash]
        results.append((Sum,pmidHash))

    return results