示例#1
0
    def keywords_work(self, title, s=True):
        title_keywords = get_keywords(title)
        keywords = []
        for w in title_keywords:
            if w in self.removed_keywords:
                continue
            keywords.append(w)

        if s:
            return ' '.join(keywords)
        else:
            return keywords
示例#2
0
    def add_link(self, link, page=1):
        self.lock.acquire()
        name = link.get('name', '')
        
        title = link.get('title', '')
        sub = link.get('subreddit', '')
        title_keywords = get_keywords(title)

        if page == 0:
            for w in title_keywords:
                self.keyword_counts_newest[w] = self.keyword_counts_newest.get(w, 0)+1
                self.keyword_titles_newest[w] = self.keyword_titles_newest.get(w, [])+[title]

        if name in self.link_names:
            self.lock.release()
            return False

        self.titles.append(title)
        self.link_names[name] = None

        guess = self.identify_work(title)
        if guess == sub:
            self.correct += 1

        self.subs[sub] = self.subs.get(sub, 0) + len(title_keywords)

        for w in title_keywords:
            if w in self.words:
                if self.words[w] is None:
                    continue
                
                self.words[w][sub] = self.words[w].get(sub, 0)+1
                self.words[w][u'all'] = self.words[w].get(u'all', 0)+1
            else:
                self.words[w] = {sub: 1, u'all': 1}

        self.lock.release()
        return True