def compute_vectors(target_url, keyword): print("Computing vectors for", target_url) if not db.session.query(Urls).filter_by(url=target_url).all(): u = Urls(url=target_url) title, body_str, snippet, cc = extract_from_url(target_url) if title != "": text = title + " " + body_str text = clean_text(text) vector = compute_dist_vector(text, dm_dict_en) freqs = compute_freq_vector(text) u.title = str(title) u.vector = convert_to_string(vector) u.freqs = convert_dict_to_string(freqs) if keyword == "": keyword = "generic" u.keyword = keyword u.pod = "Me" if snippet != "": u.snippet = str(snippet) else: u.snippet = u.title if cc: u.cc = True # print(u.url,u.title,u.vector,u.snippet,u.cc) db.session.add(u) db.session.commit() return True else: return False else: return True
def compute_pod_summary(name): '''This function is very similar to 'self' in PeARS-pod''' DS_vector = np.zeros(400) word_vector = "" freqs = {} for u in db.session.query(Urls).filter_by(pod=name).all(): DS_vector += convert_to_array(u.vector) for k, v in convert_string_to_dict(u.freqs).items(): if k in freqs: freqs[k] += int(v) else: freqs[k] = int(v) DS_vector = convert_to_string(normalise(DS_vector)) c = 0 for w in sorted(freqs, key=freqs.get, reverse=True): word_vector += w + ':' + str(freqs[w]) + ' ' c += 1 if c == 300: break return DS_vector, word_vector
def pod_from_scratch(name,url,language,description): if not db.session.query(Pods).filter_by(url=url).all(): p = Pods(url=url) db.session.add(p) db.session.commit() p = Pods.query.filter(Pods.url == url).first() p.name = name p.description = description p.language = language #Using compute_query_vector as hack to get vectors from pod's name vector, freqs = compute_query_vectors(name.lower()+' '+description.lower(), dm_dict_en) p.DS_vector = convert_to_string(normalise(vector)) word_vector = "" c = 0 for w in sorted(freqs, key=freqs.get, reverse=True): word_vector += w + ':' + str(freqs[w]) + ' ' c += 1 if c == 300: break p.word_vector = word_vector if not p.registered: p.registered = False db.session.commit()