def keywords_work(self, title, s=True): title_keywords = get_keywords(title) keywords = [] for w in title_keywords: if w in self.removed_keywords: continue keywords.append(w) if s: return ' '.join(keywords) else: return keywords
def add_link(self, link, page=1): self.lock.acquire() name = link.get('name', '') title = link.get('title', '') sub = link.get('subreddit', '') title_keywords = get_keywords(title) if page == 0: for w in title_keywords: self.keyword_counts_newest[w] = self.keyword_counts_newest.get(w, 0)+1 self.keyword_titles_newest[w] = self.keyword_titles_newest.get(w, [])+[title] if name in self.link_names: self.lock.release() return False self.titles.append(title) self.link_names[name] = None guess = self.identify_work(title) if guess == sub: self.correct += 1 self.subs[sub] = self.subs.get(sub, 0) + len(title_keywords) for w in title_keywords: if w in self.words: if self.words[w] is None: continue self.words[w][sub] = self.words[w].get(sub, 0)+1 self.words[w][u'all'] = self.words[w].get(u'all', 0)+1 else: self.words[w] = {sub: 1, u'all': 1} self.lock.release() return True