Пример #1
0
def digest_feeds_entries(feed_url, keywords, posted_titles, words_scores):
    all_feed_entries = feedworker.process_feed(feed_url)
    # exclude published titles
    feed_entries = [entry for entry in all_feed_entries if entry["title"] not in posted_titles]
    if len(feed_entries) == 0:
        return None
    # here each entry becomes element of a list
    entries_content = [entry["content"] for entry in feed_entries]
    # which I can pass to function
    pure_entries = textworker.purify_texts(entries_content, True, True)
    # turn each entry into a string
    pure_entries = [" ".join(entry) for entry in pure_entries]
    feed_records = []
    for i in range(0, len(pure_entries)):
        # match_count and match_words are necessary only for debugging
        entry_score, match_count, matched_words = calcworker.get_document_score(
            textworker.count_word_frequency(pure_entries[i], keywords), words_scores
        )
        feed_records.append(
            {
                "title": feed_entries[i]["title"],
                "link": feed_entries[i]["link"],
                "score": entry_score,
                "match_count": match_count,
                "matched_words": matched_words,
            }
        )
    return feed_records
Пример #2
0
def process_user_statuses(screen_name, rank, keywords):
    statuses_objs = get_user_statuses(screen_name, True, 100)
    statuses_texts = [status.text.encode('utf-8') for status in statuses_objs] 
    pure_texts = textworker.purify_texts(statuses_texts)
    concat_text = textworker.concat_texts(pure_texts)
    words_frequency = textworker.count_word_frequency(concat_text, keywords)
    words_local_scores = calcworker.get_words_local_scores(words_frequency, rank) 
    return words_local_scores
Пример #3
0
def process_user_statuses(screen_name, rank, keywords):
    statuses_objs = get_user_statuses(screen_name, True, 100)
    statuses_texts = [status.text.encode("utf-8") for status in statuses_objs]
    pure_texts = textworker.purify_texts(statuses_texts)
    concat_text = textworker.concat_texts(pure_texts)
    words_frequency = textworker.count_word_frequency(concat_text, keywords)
    words_local_scores = calcworker.get_words_local_scores(words_frequency, rank)
    return words_local_scores
Пример #4
0
def digest_feeds_entries(feed_url, keywords, posted_titles, words_scores):
    all_feed_entries = feedworker.process_feed(feed_url)
    # exclude published titles
    feed_entries = [entry for entry in all_feed_entries if entry["title"] not in posted_titles]
    if len(feed_entries) == 0: return None
    # here each entry becomes element of a list
    entries_content = [entry["content"] for entry in feed_entries]
    # which I can pass to function
    pure_entries = textworker.purify_texts(entries_content, True, True)
    # turn each entry into a string 
    pure_entries = [" ".join(entry) for entry in pure_entries]
    feed_records = [] 
    for i in range(0, len(pure_entries)):
        # match_count and match_words are necessary only for debugging
        entry_score, match_count, matched_words = calcworker.get_document_score(textworker.count_word_frequency(pure_entries[i], keywords), words_scores)
        feed_records.append({"title": feed_entries[i]["title"], "link":
            feed_entries[i]["link"], "score": entry_score, "match_count": match_count, "matched_words": matched_words})
    return feed_records