Пример #1
0
def crawler_FRB():
    html = urlopen(url_frb_2016)
    bsObj = BeautifulSoup(html, "html.parser")
    events_list_obj = bsObj.find("div", {
        "class": "row eventlist"
    }).find("div", {"class": "col-xs-12 col-sm-8 col-md-8"})
    event_rows_obj = events_list_obj.findAll("div", {"class": "row"})

    # news_list = list()

    with open(base_dir + "csv_frb.csv", "a") as fw:
        csvwriter = csv.writer(fw)
        csvwriter.writerow(["title", "href", "date", "type", "content"])
        for event_row_obj in event_rows_obj:
            try:
                news = News()
                date_obj = event_row_obj.find(
                    "div", {"class": "col-xs-3 col-md-2 eventlist__time"})
                news.date = date_obj.find("time").text
                event_obj = event_row_obj.find(
                    "div", {"class": "col-xs-9 col-md-10 eventlist__event"})
                news.href = url_domain_frb + event_obj.find("a").attrs['href']
                news.title = event_obj.find("p").find("a").find("em").text
                news.type = event_obj.find("p", {
                    "class": "eventlist__press"
                }).find("em").find("strong").text
                news.content = get_content(news.href)
                r = [news.title, news.href, news.date, news.type, news.content]
                csvwriter.writerow(r)
                # news_list.append(news)
            except:
                print("except..")