def score_pods(query, query_dist, query_freqs): '''Score pods for a query''' pod_scores = {} score_sum = 0.0 pods = db.session.query(Pods).filter_by(registered=True).all() for p in pods: DS_score = cosine_similarity(convert_to_array(p.DS_vector), query_dist) term_score, coverage = term_cosine.run(query, query_freqs, p.word_vector) score = DS_score + term_score + 2 * coverage if math.isnan(score): score = 0 pod_scores[p.name] = score score_sum += score print(pod_scores) '''If all scores are rubbish, search entire pod collection (we're desperate!)''' if score_sum < 1: return list(pod_scores.keys()) else: best_pods = [] for k in sorted(pod_scores, key=pod_scores.get, reverse=True): if len(best_pods) < 1: best_pods.append(k) else: break return best_pods
def score(query, query_dist, query_freqs): """ Get distributional score """ DS_scores = {} term_scores = {} coverages = {} for p in db.session.query(Pods).filter_by(registered=False).all(): DS_scores[p.url] = cosine_similarity(convert_to_array(p.DS_vector), query_dist) term_scores[p.url], coverages[p.url] = term_cosine.run(query, query_freqs, p.word_vector) return DS_scores, term_scores
def score(query, query_dist, query_freqs, pod): """ Get various scores -- This is slow, slow, slow. Add code for vec to matrix calculations """ DS_scores = {} URL_scores = {} title_scores = {} term_scores = {} coverages = {} #cosines = cosine_to_matrix(query_dist,DS_M) #Code for vec to matrix cosine calculation -- work in progress for u in db.session.query(Urls).filter_by(pod=pod).all(): DS_scores[u.url] = cosine_similarity(convert_to_array(u.vector), query_dist) #DS_scores[u.url] = cosines[url_to_mat[u.url]] URL_scores[u.url] = score_url_overlap(query, u.url) title_scores[u.url] = generic_overlap(query, u.title) term_scores[u.url], coverages[u.url] = term_cosine.run( query, query_freqs, u.freqs) return DS_scores, URL_scores, title_scores, term_scores, coverages