Exemplo n.º 1
0
def find_article_to_parse(create_post=True):
    """Finds a list article in CollegeHumor's latest article archive and posts the list article to Reddit."""

    website = ArticleType.CollegeHumor
    website_name = convert_enum_to_string(website)

    print(f"Searching {website_name}'s archive.")
    soup = lvm.soup_session(archive_link)

    for article in soup.find_all('h3', attrs={'class': 'title'}):

        article_link = 'http://www.collegehumor.com' + article.find(
            'a')['href']

        if not lvm.article_title_meets_posting_requirements(
                website, article.text):
            continue

        if article_published_today(article_link):
            article_list_text = get_article_list_text(
                article_link, lvm.get_article_list_count(article.text))
            if article_list_text and not lvm.post_previously_made(
                    article_link):
                print(f"{website_name} list article found: " + article.text)
                if create_post:
                    post_to_reddit(article.text, article_list_text,
                                   article_link, website)
                return True

    print(f"No {website_name} list articles were found to parse at this time.")
    return False
Exemplo n.º 2
0
def find_article_to_parse(create_post=True):
    """Finds a list article in Polygon's latest article archive and posts the list article to Reddit."""

    website = ArticleType.Polygon
    website_name = convert_enum_to_string(website)

    print(f"Searching {website_name}'s archive.")
    soup = lvm.soup_session(archive_link)

    for link in soup.find_all('h2',
                              attrs={'class': 'c-entry-box--compact__title'},
                              limit=max_articles_to_search):

        article_header = link.find('a', href=True)
        article_link = article_header['href']
        print("Parsing article: " + article_link)
        time.sleep(1)

        if not lvm.article_title_meets_posting_requirements(
                website, article_header.text):
            continue

        article_list_text = get_article_list_text(
            article_link, lvm.get_article_list_count(article_header.text))
        if article_list_text and not lvm.post_previously_made(article_link):
            print(f"{website_name} list article found: " + article_header.text)
            if create_post:
                post_to_reddit(article_header.text, article_list_text,
                               article_link, website)
            return True

    print(f"No {website_name} list articles were found to parse at this time.")
    return False
def find_article_to_parse(create_post=True):
    """Finds a list article in Business Insider's latest article archive and posts the list article to Reddit."""

    website = ArticleType.Business_Insider
    website_name = convert_enum_to_string(website)

    print(f"Searching {website_name}'s archive.")
    soup = lvm.soup_session(archive_link)

    for link in soup.find_all('h2', attrs={'class': 'tout-title default-tout'}):

        article_title = link.find('a', href=True)
        article_link = article_title['href'] if article_title['href'].startswith("http") else "http://www.businessinsider.com" + article_title['href']

        print("Parsing article: " + article_link)
        time.sleep(1)

        if not lvm.article_title_meets_posting_requirements(website, article_title.text):
            continue

        article_list_text = get_article_list_text(article_link, lvm.get_article_list_count(article_title.text))
        if article_list_text and not lvm.post_previously_made(article_link):
            print(f"{website_name} list article found: " + article_title.text)
            if create_post:
                post_to_reddit(article_title.text, article_list_text, article_link, website)
            return True

    print(f"No {website_name} list articles were found to parse at this time.")
    return False
Exemplo n.º 4
0
def find_article_to_parse(create_post=True):
    """Finds a list article in BuzzFeed's latest article archive and posts the list article to Reddit."""

    website = ArticleType.BuzzFeed
    website_name = convert_enum_to_string(website)

    print(f"Searching {website_name}'s archive.")
    soup = lvm.soup_session(archive_link)

    for link in soup.find_all('article', attrs={'data-buzzblock': 'story-card'}, limit=max_articles_to_search):

        article_title = link.find('a', href=True)
        article_link = article_title['href']
        print("Parsing article: " + article_link)
        time.sleep(1)

        if not lvm.article_title_meets_posting_requirements(website, article_title.text):
            continue

        no_of_elements = lvm.get_article_list_count(article_title.text)

        article_list_text = get_article_list_text(article_link, no_of_elements)
        if not article_list_text:
            article_list_text = paragraph_article_text(article_link, no_of_elements)

        if article_list_text and not lvm.post_previously_made(article_link):
            print(f"{website_name} list article found: " + article_title.text)
            if create_post:
                post_to_reddit(article_title.text, article_list_text, article_link, website)
            return True

    print(f"No {website_name} list articles were found to parse at this time.")
    return False
Exemplo n.º 5
0
def find_article_to_parse(create_post=True):
    """Finds a list article in Screen Rant's latest article archive and posts the list article to Reddit."""

    website = ArticleType.Screen_Rant
    website_name = convert_enum_to_string(website)

    print(f"Searching {website_name}'s archive.")
    soup = lvm.soup_session(archive_link)

    for article in soup.find_all("h3",
                                 attrs={"class": "bc-title"},
                                 limit=max_articles_to_search):
        article = article.find("a", href=True)

        if article:

            article_title = article['title']
            article_link = article['href'] if article['href'].startswith(
                "http") else "http://www.screenrant.com" + article['href']

            print(f"Parsing article: {article_link}")
            time.sleep(1)

            if not lvm.article_title_meets_posting_requirements(
                    website, article_title):
                continue

            article_list_text = get_article_list_text(
                article_link, lvm.get_article_list_count(article_title))
            if article_list_text and not lvm.post_previously_made(
                    article_link):
                print(f"{website_name} list article found: {article_title}")
                if create_post:
                    post_to_reddit(article_title, article_list_text,
                                   article_link, website)
                return True

    print(f"No {website_name} list articles were found to parse at this time.")
    return False