try: link = link.get("href") mostviewed_linklist = parserfunctions.linklist_actions( link, mostviewed_linklist) except: pass except: message = "Failed at getting MV link list" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue try: pop_top_5_links = parserfunctions.get_top_5_list_links( mostviewed_linklist) except: pop_top_5_links = [] message = "Failed to get a POP link" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ##### Get Most Prominent Items ### LAYOUT #1 if layout == 1: try: # Carousel Stories -- although carousel is randomized in browser, it is static in the code, so we prioritize the code prominence_1to3 = [ link.find("a", href=link_pattern)
try: link = link.get("href") mostviewed_linklist = parserfunctions.linklist_actions( link, mostviewed_linklist) except: pass except: message = "Failed at getting MV link list" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue try: pop_top_5_links = parserfunctions.get_top_5_list_links( mostviewed_linklist) except: pop_top_5_links = [] message = "Failed to get a POP link" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ###### Get list of links on the page try: document_soup_on_page = parserfunctions.soupify(document_data) except: message = "Failed to resoupify the document" seriousness = 1
mostviewed_1to5 = [link.find("a", href=re.compile(pattern)) for link in document_soup.find("div", id="mostreademailed").find_all("li")] # ID is most e-mailed, but page states most read mostviewed_linklist = [] for link in mostviewed_1to5: try: link = link.get("href") mostviewed_linklist = parserfunctions.linklist_actions(link, mostviewed_linklist) except: pass except: message = "Failed at getting MV link list" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue try: pop_top_5_links = parserfunctions.get_top_5_list_links(mostviewed_linklist) except: pop_top_5_links = [] message = "Failed to get a POP link" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ##### Get Most Prominent Items ### LAYOUT #1 if layout == 1: try: # Lead story prominence_1 = [link.find("a", href=re.compile(pattern)) for link in document_soup.find("div", class_=re.compile("co_3colvisual|co_2colvisual|co_2_5colvisual")).find_all(re.compile("h1|h2|h3|h4"))] prominence_linklist = [] for link in prominence_1: try: