Exemplo n.º 1
0
def get_search_result(soup):
    search_result_class_tag = "search-results"
    headlines_class_tag = "headline"
    footer_date_tag = "flags btm"
    date_class_tag = "display-date"

    search_results = soup.find("ol", {
        "class": search_result_class_tag
    }).find_all("li")

    articles = list()

    for result in search_results:
        news_article = NewsArticle()

        result = result.find("div")
        result_headline = result.find("h1", {"itemprop": headlines_class_tag})
        # date under tags: footer -> dl -> dd -> time
        result_date = result.find("footer").find("dl", {
            "class": footer_date_tag
        }).find("dd").find("time", {"class": date_class_tag})

        news_article.title = result_headline.find("a").string.strip()
        news_article.url = result_headline.find("a")['href']
        #TODO: put date in correct format
        news_article.date = result_date.string.strip()
        news_article.source = "BBC"

        articles.append(news_article)

    return articles