def compare_documents(feed): """ Compare a list of documents by pair. Pairs of duplicates are sorted by "retrieved date". """ duplicates = [] for pair in itertools.combinations(feed.articles, 2): date1, date2 = pair[0].date, pair[1].date if clear_string(pair[0].title) == clear_string(pair[1].title) and \ (date1 - date2) < timedelta(days = 1): if pair[0].retrieved_date < pair[1].retrieved_date: duplicates.append((pair[0], pair[1])) else: duplicates.append((pair[1], pair[0])) return duplicates
def top_words(articles, n=10, size=5): """ Return the n most frequent words in a list. """ stop_words = load_stop_words() words = Counter() wordre = re.compile(r'\b\w{%s,}\b' % size, re.I) for article in articles: for word in [elem.lower() for elem in wordre.findall(clear_string(article.content)) \ if elem.lower() not in stop_words]: words[word] += 1 return words.most_common(n)
def article(article_id=None): """ Presents the content of an article. """ article = ArticleController(g.user.id).get(id=article_id) previous_article = article.previous_article() if previous_article is None: previous_article = article.source.articles[0] next_article = article.next_article() if next_article is None: next_article = article.source.articles[-1] return render_template('article.html', head_titles=[clear_string(article.title)], article=article, previous_article=previous_article, next_article=next_article)