示例#1
0
def score_pods(query, query_dist, query_freqs):
    '''Score pods for a query'''
    pod_scores = {}
    score_sum = 0.0
    pods = db.session.query(Pods).filter_by(registered=True).all()
    for p in pods:
        DS_score = cosine_similarity(convert_to_array(p.DS_vector), query_dist)
        term_score, coverage = term_cosine.run(query, query_freqs,
                                               p.word_vector)
        score = DS_score + term_score + 2 * coverage
        if math.isnan(score):
            score = 0
        pod_scores[p.name] = score
        score_sum += score
    print(pod_scores)
    '''If all scores are rubbish, search entire pod collection (we're desperate!)'''
    if score_sum < 1:
        return list(pod_scores.keys())
    else:
        best_pods = []
        for k in sorted(pod_scores, key=pod_scores.get, reverse=True):
            if len(best_pods) < 1:
                best_pods.append(k)
            else:
                break
        return best_pods
示例#2
0
def score(query, query_dist, query_freqs):
    """ Get distributional score """
    DS_scores = {}
    term_scores = {}
    coverages = {}
    for p in db.session.query(Pods).filter_by(registered=False).all():
        DS_scores[p.url] = cosine_similarity(convert_to_array(p.DS_vector), query_dist)
        term_scores[p.url], coverages[p.url] = term_cosine.run(query, query_freqs, p.word_vector)
    return DS_scores, term_scores
示例#3
0
def score(query, query_dist, query_freqs, pod):
    """ Get various scores -- This is slow, slow, slow. Add code for vec to matrix calculations """
    DS_scores = {}
    URL_scores = {}
    title_scores = {}
    term_scores = {}
    coverages = {}
    #cosines = cosine_to_matrix(query_dist,DS_M)	#Code for vec to matrix cosine calculation -- work in progress
    for u in db.session.query(Urls).filter_by(pod=pod).all():
        DS_scores[u.url] = cosine_similarity(convert_to_array(u.vector),
                                             query_dist)
        #DS_scores[u.url] = cosines[url_to_mat[u.url]]
        URL_scores[u.url] = score_url_overlap(query, u.url)
        title_scores[u.url] = generic_overlap(query, u.title)
        term_scores[u.url], coverages[u.url] = term_cosine.run(
            query, query_freqs, u.freqs)
    return DS_scores, URL_scores, title_scores, term_scores, coverages