Пример #1
0
 def get_duplicates(self, feed_id):
     """
     Compare a list of documents by pair.
     Pairs of duplicates are sorted by "retrieved date".
     """
     feed = self.get(id=feed_id)
     duplicates = []
     for pair in itertools.combinations(feed.articles, 2):
         date1, date2 = pair[0].date, pair[1].date
         if clear_string(pair[0].title) == clear_string(pair[1].title) \
                 and (date1 - date2) < timedelta(days=1):
             if pair[0].retrieved_date < pair[1].retrieved_date:
                 duplicates.append((pair[0], pair[1]))
             else:
                 duplicates.append((pair[1], pair[0]))
     return feed, duplicates
Пример #2
0
Файл: feed.py Проект: JARR/JARR
 def get_duplicates(self, feed_id):
     """
     Compare a list of documents by pair.
     Pairs of duplicates are sorted by "retrieved date".
     """
     feed = self.get(id=feed_id)
     duplicates = []
     for pair in itertools.combinations(feed.articles[:1000], 2):
         date1, date2 = pair[0].date, pair[1].date
         if clear_string(pair[0].title) == clear_string(pair[1].title) \
                 and (date1 - date2) < timedelta(days=1):
             if pair[0].retrieved_date < pair[1].retrieved_date:
                 duplicates.append((pair[0], pair[1]))
             else:
                 duplicates.append((pair[1], pair[0]))
     return feed, duplicates
Пример #3
0
def feed_view(feed_id=None, user_id=None):
    feed = FeedController(user_id).get(id=feed_id)
    word_size = 6
    category = None
    if feed.category_id:
        category = CategoryController(user_id).get(id=feed.category_id)
    articles = ArticleController(user_id) \
            .read(feed_id=feed_id) \
            .order_by(desc("date")).all()
    top_words = misc_utils.top_words(articles, n=50, size=int(word_size))
    tag_cloud = misc_utils.tag_cloud(top_words)

    today = datetime.now()
    try:
        last_article = articles[0].date
        first_article = articles[-1].date
        delta = last_article - first_article
        average = round(float(len(articles)) / abs(delta.days), 2)
    except:
        last_article = datetime.fromtimestamp(0)
        first_article = datetime.fromtimestamp(0)
        delta = last_article - first_article
        average = 0
    elapsed = today - last_article

    return render_template('feed.html',
                           head_titles=[utils.clear_string(feed.title)],
                           feed=feed, articles=articles,
                           tag_cloud=tag_cloud,
                           first_post_date=first_article,
                           end_post_date=last_article, category=category,
                           average=average, delta=delta, elapsed=elapsed)
Пример #4
0
def article(article_id=None):
    """
    Presents an article.
    """
    article = ArticleController(current_user.id).get(id=article_id)
    return render_template('article.html',
                           head_titles=[clear_string(article.title)],
                           article=article)
Пример #5
0
def article(article_id=None):
    """
    Presents an article.
    """
    article = ArticleController(current_user.id).get(id=article_id)
    return render_template('article.html',
                           head_titles=[clear_string(article.title)],
                           article=article)
Пример #6
0
def article_pub(article_id=None):
    """
    Presents an article of a public feed if the profile of the owner is also
    public.
    """
    article = ArticleController().get(id=article_id)
    if article.source.private or not article.source.user.is_public_profile:
        return render_template('errors/404.html'), 404
    return render_template('article_pub.html',
                           head_titles=[clear_string(article.title)],
                           article=article)
Пример #7
0
def article_pub(article_id=None):
    """
    Presents an article of a public feed if the profile of the owner is also
    public.
    """
    article = ArticleController().get(id=article_id)
    if article.source.private or not article.source.user.is_public_profile:
        return render_template('errors/404.html'), 404
    return render_template('article_pub.html',
                           head_titles=[clear_string(article.title)],
                           article=article)
Пример #8
0
def article(article_id=None):
    """
    Presents an article.
    """
    art_contr = ArticleController(current_user.id)
    article = art_contr.get(id=article_id)
    if not article.readed:
        art_contr.update({"id": article.id}, {"readed": True})
    return render_template("article.html",
                           head_titles=[clear_string(article.title)],
                           article=article)
Пример #9
0
def top_words(articles, n=10, size=5):
    """
    Return the n most frequent words in a list.
    """
    stop_words = load_stop_words()
    words = Counter()
    wordre = re.compile(r'\b\w{%s,}\b' % size, re.I)
    for article in articles:
        for word in [elem.lower() for elem in
                wordre.findall(clear_string(article.content)) \
                if elem.lower() not in stop_words]:
            words[word] += 1
    return words.most_common(n)
Пример #10
0
def top_words(articles, n=10, size=5):
    """
    Return the n most frequent words in a list.
    """
    stop_words = load_stop_words()
    words = Counter()
    wordre = re.compile(r'\b\w{%s,}\b' % size, re.I)
    for article in articles:
        for word in [elem.lower() for elem in
                wordre.findall(clear_string(article.content)) \
                if elem.lower() not in stop_words]:
            words[word] += 1
    return words.most_common(n)
Пример #11
0
def feed_view(feed_id=None, user_id=None):
    feed = FeedController(user_id).get(id=feed_id)
    word_size = 6
    category = None
    if feed.category_id:
        category = CategoryController(user_id).get(id=feed.category_id)
    filters = {}
    filters["feed_id"] = feed_id
    articles = ArticleController(user_id).read_light(**filters)

    # Server-side pagination
    page, per_page, offset = get_page_args(per_page_parameter="per_page")
    pagination = Pagination(
        page=page,
        total=articles.count(),
        css_framework="bootstrap3",
        search=False,
        record_name="articles",
        per_page=per_page,
    )

    today = datetime.now()
    try:
        last_article = articles[0].date
        first_article = articles[-1].date
        delta = last_article - first_article
        average = round(float(articles.count()) / abs(delta.days), 2)
    except Exception as e:
        last_article = datetime.fromtimestamp(0)
        first_article = datetime.fromtimestamp(0)
        delta = last_article - first_article
        average = 0
    elapsed = today - last_article

    return render_template(
        "feed.html",
        head_titles=[utils.clear_string(feed.title)],
        feed=feed,
        category=category,
        articles=articles.offset(offset).limit(per_page),
        pagination=pagination,
        first_post_date=first_article,
        end_post_date=last_article,
        average=average,
        delta=delta,
        elapsed=elapsed,
    )
Пример #12
0
def feed_view(feed_id=None, user_id=None):
    feed = FeedController(user_id).get(id=feed_id)
    word_size = 6
    category = None
    if feed.category_id:
        category = CategoryController(user_id).get(id=feed.category_id)
    articles = ArticleController(user_id) \
            .read(feed_id=feed_id) \
            .order_by(desc("date")).all()
    top_words = misc_utils.top_words(articles, n=50, size=int(word_size))
    tag_cloud = misc_utils.tag_cloud(top_words)

    today = datetime.now()
    try:
        last_article = articles[0].date
        first_article = articles[-1].date
        delta = last_article - first_article
        average = round(float(len(articles)) / abs(delta.days), 2)
    except:
        last_article = datetime.fromtimestamp(0)
        first_article = datetime.fromtimestamp(0)
        delta = last_article - first_article
        average = 0
    elapsed = today - last_article

    return render_template('feed.html',
                           head_titles=[utils.clear_string(feed.title)],
                           feed=feed,
                           articles=articles,
                           tag_cloud=tag_cloud,
                           first_post_date=first_article,
                           end_post_date=last_article,
                           category=category,
                           average=average,
                           delta=delta,
                           elapsed=elapsed)