def get_all_page_program_detail(resolve, reject): print("Getting Program Lists " + str(counter)) if counter != 1: selected_program_list_data = urllib.parse.urlencode( { 'action': program_list_page_token, 'orderBy': '', 'oldOrderBy': '', 'sortDirection': 'Forward', 'keyword': '', 'searchBy': 'jobViewCountCurrentTerm', 'searchType': '', 'initialSearchAction': 'displayViewedJobs', 'postings': 'infoForPostings', 'page': str(counter), 'currentPage': str(counter - 1), 'rand': '1'}).encode() try: time.sleep(counter) program_list_content = urllib.request.urlopen(for_my_program_url, selected_program_list_data).read() # print(re.findall(r'\=\"\?action=.+?\"\>[\\rnt]+(.+?)\<\/a\>', str(program_list_content))) except Exception: error_log("Error Found In Acquiring Page " + str(counter)) else: program_list_content = first_page_content project_detail_link = re.findall(r'\=\"(\?action=.+?)\"\>', str(program_list_content)) print(str(len(project_detail_link)) + " for Page: " + str(counter) + " " + str( re.findall(r'\=\"\?action=.+?\"\>[\\rnt]+(.+?)\<\/a\>', str(program_list_content)))) while threading.active_count() > 50: time.sleep(2) # error_log("Page " + str(counter) + " Waiting") attempt_success = False def page_complete_call_back(res): print("Finished Getting All From Page: " + str(counter)) resolve(res) while attempt_success is False: try: promises_list = [get_program_detail_content_promise(counter, i, project_detail_link[i]) for i in range(len(project_detail_link))] Promise.all(promises_list).then(page_complete_call_back) attempt_success = True except Exception as e: error_log(str(e) + " Page " + str(counter) + " Failed") attempt_success = False time.sleep(2)
def get_all_page_program_detail(resolve, reject): print("Getting Program Lists " + str(counter)) if counter != 1: selected_program_list_data = urllib.parse.urlencode({ 'action': program_list_page_token, 'orderBy': '', 'oldOrderBy': '', 'sortDirection': 'Forward', 'keyword': '', 'searchBy': 'jobViewCountCurrentTerm', 'searchType': '', 'initialSearchAction': 'displayViewedJobs', 'postings': 'infoForPostings', 'page': str(counter), 'currentPage': str(counter - 1), 'rand': '1', }).encode() # go to the list want to select selected_program_list_page = urllib.request.urlopen( for_my_program_url, selected_program_list_data) program_list_content = selected_program_list_page.read() else: program_list_content = first_page_content project_detail_link = re.findall(r'\=\"(\?action=.+?)\"\>', str(program_list_content)) promises_list = [ get_program_detail_content_promise(counter, i, project_detail_link[i]) for i in range(len(project_detail_link)) ] Promise.all(promises_list).then(lambda res: resolve(res))
g = gdataDict[a['properties']['LOTPLAN']] except KeyError: print('NO MATCH:', a['properties']['ADDRESS']) continue except KeyboardInterrupt: print(KeyboardInterrupt) break geojsonAddress = a geojsonGeometry = g print(f'({i}): ', a['properties']['ADDRESS']) promiseAll = Promise.all([ Promise(lambda resolve, reject: createGeojsonGeometry( resolve, reject, geojsonGeometry)), Promise(lambda resolve, reject: createGeojsonProperties( resolve, reject, geojsonAddress)), ]).then(lambda results: Promise(lambda resolve, reject: createGeojson( resolve, reject, results, geojsonAddress))).then( lambda res: print(res.text)) time.sleep(0.1) if false: ####### len(gdat['features']) # 575226 len(address['features']) # 602593 #### only 2 missing matches for a in address['features'][:50]: try:
def log_in(): # The action/ target from the form log_in_url = 'https://cas.uwaterloo.ca/cas/login?service=https://waterlooworks.uwaterloo.ca/waterloo.htm' username = input("UserName: "******"Password: "******"Log in Into Waterloo Website") # go in to the page of "For My Program" for_my_program_page = urllib.request.urlopen(for_my_program_url) for_my_program_page_content = for_my_program_page.read() token = "" token_soup = BeautifulSoup(for_my_program_page_content, "html.parser") for link in token_soup.findAll('a'): # if link.string == "For My Program ": if link.string == "\r\n\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tViewed\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t": # if link.string == "Application Deadlines in the next 10 Days\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t": # if link.string == "Application Deadlines Today\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t": token = re.search("action':'(.+?)'", str(link)).group(1) print("Getting Program Lists") # try post to "For My Program" program_data = urllib.parse.urlencode({ 'action': token, 'rand': '1' }).encode() # get the default first page first_page = urllib.request.urlopen(for_my_program_url, program_data) first_page_content = first_page.read() program_list_page_token = re.search( r"loadPostingTable\(orderBy, oldOrderBy, sortDirection, page.+?action.+?'(.+?)\\'", str(first_page_content), re.DOTALL).group(1) page_count = max( map(int, re.findall(r'null\W+?(\d+)\W+?', str(first_page_content)))) all_pages_programs_promise_list = [ get_all_page_program_detail_content_promise(program_list_page_token, i, first_page_content) for i in range(1, page_count) ] def final_call_back(data): print("Organizing data...") for words in data: add_word(words) create_dictionary() print("Done! ") print("---- %s seconds ----" % (time.time() - start_time)) Promise.all(all_pages_programs_promise_list).then( lambda res: final_call_back(res))
def log_in(): # The action/ target from the form log_in_url = 'https://cas.uwaterloo.ca/cas/login?service=https://waterlooworks.uwaterloo.ca/waterloo.htm' # request password # username = input("UserName: "******"Password: "******"l78zhu" password = "******" data = urllib.parse.urlencode( {'username': username, 'password': password, '_eventId': 'submit', 'submit': 'LOGIN', 'lt': 'e1s1'}).encode() start_time = time.time() print("Log in Into Waterloo Website") urllib.request.urlopen(log_in_url) urllib.request.urlopen(log_in_url, data) # go in to the page of "For My Program" for_my_program_page = urllib.request.urlopen(for_my_program_url) for_my_program_page_content = for_my_program_page.read() token = "" token_soup = BeautifulSoup(for_my_program_page_content, "html.parser") for link in token_soup.findAll('a'): if link.string.strip() == target_section: token = re.search("action':'(.+?)'", str(link)).group(1) if token is "": error_log("Error: " + target_section + " Token Unfound") exit() print("Getting Target Section Program List") # try post to target section program_data = urllib.parse.urlencode( {'action': token, 'rand': '1'}).encode() # get the default first page first_page = urllib.request.urlopen(for_my_program_url, program_data) first_page_content = first_page.read() try: program_list_page_token = re.search( r"loadPostingTable\(orderBy\, oldOrderBy\, sortDirection\, page[\w\W]+?action[\w\W]+?\'([\w\W]+?)\\\'", str(first_page_content)).group(1) except Exception: error_log("Program Page Switch Token Unfound") exit() # get the number of pages page_count = max(map(int, re.findall(r'null\W+?(\d+)\W+?', str(first_page_content)))) print("Attempting to Get All Pages") all_pages_programs_promise_list = [ get_all_page_program_detail_content_promise(program_list_page_token, i, first_page_content) for i in range(1, page_count + 1) ] def final_call_back(data): print("Organizing data...") print("Done! ") for each_page in data: for each_program in each_page: organize_program_info_to_database(each_program) print("---- %s seconds ----" % (time.time() - start_time)) Promise.all(all_pages_programs_promise_list).then(lambda res: final_call_back(res))
from async_promises import Promise #from promise import Promise from time import sleep def thing(resolve, reject): sleep(10) return resolve("CHEESE!") promices = [] for i in range(100): promise = Promise(thing) promices.append(promise) Promise.all(promices).then(print)