try: ### Get data from file curr_time, curr_time_utc = parserfunctions.get_curr_time( homepage, pub_tz) document_data = parserfunctions.open_data_file(homepage) except: message = "Failed to open document" seriousness = 1 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ### Create a souped object try: document_soup = parserfunctions.soupify(document_data) except: message = "Failed to soupify document" seriousness = 1 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ##### Get Most Viewed Items try: mostviewed_1to5 = [ link.find("a", href=link_pattern) for link in document_soup.find( "div", id="river-container").find_all("li") ] mostviewed_linklist = []
### Reset key variables curr_time, curr_time_utc, document_data, document_soup, document_soup_on_page, insert_statements, is_pop, is_pro, layout, link, message, mostviewed_linklist, on_page, on_page_link_list, on_page_link_list_tmp, pop_rank, pop_top_5_links, pro_rank, pro_top_5_links, prominence_linklist, seriousness = [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None] try: ### Get data from file curr_time, curr_time_utc = parserfunctions.get_curr_time(homepage, pub_tz) document_data = parserfunctions.open_data_file(homepage) except: message = "Failed to open document" seriousness = 1 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ### Create a souped object try: document_soup = parserfunctions.soupify(document_data) except: message = "Failed to soupify document" seriousness = 1 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ### Check layout layout = None try: if document_soup.find("div", class_=re.compile("co_3colvisual|co_2colvisual|co_2_5colvisual")) is not None: layout = 1 except: pass if layout is None: try: