# STEP 5 Fetch # print('Fetching start!...') # ...Then I realized that it needs to be in a kind of order. # Or when you figure out a broken post page, it doesn't restart from that. # It will hurt your feelings. # So...sh*t important_info_key_list = [key for key in important_info] important_info_key_list.sort() for key in important_info_key_list: name = key href = important_info[name] print('Fetching %s...' % name) print('URL %s' % href) title = p9b.get_title_from_url(href) print('TITLE %s' % title) if title.find(name) == -1: print('WARN The title does not contain %s.' % name) print('Something may be wrong.') post = p9c.Post(href) post.match() post.migration() floors = post.get_real_floors() if not floors: print('ERROR Can not get info from url.') break important_info_list = list() for real_floor in floors: if real_floor['comments'] is None:
printl("Initializing...") from P0009 import btbb as p9b from P0009 import btbbc as p9c # API # ROOT_URL = "http://tieba.baidu.com/p/4260990232" IGNORE_FLOOR = [] STORE_FILE_NAME = "/home/wan/N_E_I_H version1.0/C0011/233.txt" print("[OK]") # FETCH DATA # data = dict() printl("Fetching title of url: '%s'..." % ROOT_URL) data["title"] = p9b.get_title_from_url(ROOT_URL) print("[OK]") print("TITLE %s" % data["title"]) printl("Fetching data of url: '%s'..." % ROOT_URL) root_post = p9c.Post(ROOT_URL) print("[OK]") printl("Process step #1...") root_post.match() root_post.migration() print("[OK]") print("Process step #2...") root_info = root_post.get_real_floors() child_list = list()