예제 #1
0
def call_cons(category):
    news_scrap = wscrap.NewsScraper(category, wlog)

    news_scrap.retrieve_webpage()
    # news_scrap.write_webpage_as_html()
    # news_scrap.read_webpage_from_html()
    news_scrap.convert_data_to_bs4()
    # news_scrap.print_data()
    news_scrap.parse_soup_to_simple_html()
예제 #2
0
# --------------------------
#      Web Scraping
# --------------------------

## News title, link scraping

from webscrap import wlog
from webscrap import wscrap

# Define log file location
wlog.set_custom_log_info('html/scratch.log')

news_scrap = wscrap.NewsScraper(wscrap.url_aj, wlog)

## UNCOMMENT THE FOLLOWING 2 LINES OF CODE TO GET SITE'S LATEST DATA
## OTHERWISE THIS PROGRAM PARSE DATA FROM DISK FILE RETRIEVED PREVIOUSLY

news_scrap.retrieve_webpage()
news_scrap.write_webpage_as_html()

news_scrap.read_webpage_from_html()
news_scrap.convert_data_to_bs4()
#news_scrap.print_beautiful_soup()
news_scrap.parse_soup_to_simple_html()
예제 #3
0
from webscrap import wlog
from webscrap import wscrap

wlog.set_custom_log_info('html/error.log')

# news_scrap = wscrap.NewsScraper(wscrap.url_aj,wlog)
news_scrap = wscrap.NewsScraper(wscrap.my_url, wlog)
news_scrap.retrive_webpage()
news_scrap.write_webpage_as_html()

news_scrap.read_webpage_from_html()
news_scrap.convert_data_to_bs4()
# news_scrap.print_data()
news_scrap.parse_soup_to_simple_html()