Пример #1
0
    def get_all_page_program_detail(resolve, reject):
        print("Getting Program Lists " + str(counter))
        if counter != 1:
            selected_program_list_data = urllib.parse.urlencode(
                {
                    'action': program_list_page_token,
                    'orderBy': '',
                    'oldOrderBy': '',
                    'sortDirection': 'Forward',
                    'keyword': '',
                    'searchBy': 'jobViewCountCurrentTerm',
                    'searchType': '',
                    'initialSearchAction': 'displayViewedJobs',
                    'postings': 'infoForPostings',
                    'page': str(counter),
                    'currentPage': str(counter - 1),
                    'rand': '1'}).encode()

            try:
                time.sleep(counter)
                program_list_content = urllib.request.urlopen(for_my_program_url, selected_program_list_data).read()
                # print(re.findall(r'\=\"\?action=.+?\"\>[\\rnt]+(.+?)\<\/a\>', str(program_list_content)))
            except Exception:
                error_log("Error Found In Acquiring Page " + str(counter))
        else:
            program_list_content = first_page_content

        project_detail_link = re.findall(r'\=\"(\?action=.+?)\"\>', str(program_list_content))
        print(str(len(project_detail_link)) + " for Page: " + str(counter) + " " + str(
            re.findall(r'\=\"\?action=.+?\"\>[\\rnt]+(.+?)\<\/a\>', str(program_list_content))))

        while threading.active_count() > 50:
            time.sleep(2)
            # error_log("Page " + str(counter) + " Waiting")

        attempt_success = False

        def page_complete_call_back(res):
            print("Finished Getting All From Page: " + str(counter))
            resolve(res)

        while attempt_success is False:
            try:
                promises_list = [get_program_detail_content_promise(counter, i, project_detail_link[i]) for i in
                                 range(len(project_detail_link))]
                Promise.all(promises_list).then(page_complete_call_back)
                attempt_success = True
            except Exception as e:
                error_log(str(e) + " Page " + str(counter) + " Failed")
                attempt_success = False
                time.sleep(2)
Пример #2
0
    def get_all_page_program_detail(resolve, reject):
        print("Getting Program Lists " + str(counter))
        if counter != 1:
            selected_program_list_data = urllib.parse.urlencode({
                'action':
                program_list_page_token,
                'orderBy':
                '',
                'oldOrderBy':
                '',
                'sortDirection':
                'Forward',
                'keyword':
                '',
                'searchBy':
                'jobViewCountCurrentTerm',
                'searchType':
                '',
                'initialSearchAction':
                'displayViewedJobs',
                'postings':
                'infoForPostings',
                'page':
                str(counter),
                'currentPage':
                str(counter - 1),
                'rand':
                '1',
            }).encode()
            # go to the list want to select
            selected_program_list_page = urllib.request.urlopen(
                for_my_program_url, selected_program_list_data)
            program_list_content = selected_program_list_page.read()
        else:
            program_list_content = first_page_content

        project_detail_link = re.findall(r'\=\"(\?action=.+?)\"\>',
                                         str(program_list_content))
        promises_list = [
            get_program_detail_content_promise(counter, i,
                                               project_detail_link[i])
            for i in range(len(project_detail_link))
        ]

        Promise.all(promises_list).then(lambda res: resolve(res))
Пример #3
0
        g = gdataDict[a['properties']['LOTPLAN']]
    except KeyError:
        print('NO MATCH:', a['properties']['ADDRESS'])
        continue
    except KeyboardInterrupt:
        print(KeyboardInterrupt)
        break

    geojsonAddress = a
    geojsonGeometry = g
    print(f'({i}):    ', a['properties']['ADDRESS'])

    promiseAll = Promise.all([
        Promise(lambda resolve, reject: createGeojsonGeometry(
            resolve, reject, geojsonGeometry)),
        Promise(lambda resolve, reject: createGeojsonProperties(
            resolve, reject, geojsonAddress)),
    ]).then(lambda results: Promise(lambda resolve, reject: createGeojson(
        resolve, reject, results, geojsonAddress))).then(
            lambda res: print(res.text))
    time.sleep(0.1)

if false:
    #######
    len(gdat['features'])
    # 575226
    len(address['features'])
    # 602593

    #### only 2 missing matches
    for a in address['features'][:50]:
        try:
Пример #4
0
def log_in():
    # The action/ target from the form
    log_in_url = 'https://cas.uwaterloo.ca/cas/login?service=https://waterlooworks.uwaterloo.ca/waterloo.htm'

    username = input("UserName: "******"Password: "******"Log in Into Waterloo Website")
    # go in to the page of "For My Program"

    for_my_program_page = urllib.request.urlopen(for_my_program_url)
    for_my_program_page_content = for_my_program_page.read()

    token = ""
    token_soup = BeautifulSoup(for_my_program_page_content, "html.parser")
    for link in token_soup.findAll('a'):
        # if link.string == "For My Program ":
        if link.string == "\r\n\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tViewed\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t":
            # if link.string == "Application Deadlines in the next 10 Days\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t":
            # if link.string == "Application Deadlines Today\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t":
            token = re.search("action':'(.+?)'", str(link)).group(1)

    print("Getting Program Lists")
    # try post to "For My Program"
    program_data = urllib.parse.urlencode({
        'action': token,
        'rand': '1'
    }).encode()
    # get the default first page
    first_page = urllib.request.urlopen(for_my_program_url, program_data)
    first_page_content = first_page.read()
    program_list_page_token = re.search(
        r"loadPostingTable\(orderBy, oldOrderBy, sortDirection, page.+?action.+?'(.+?)\\'",
        str(first_page_content), re.DOTALL).group(1)
    page_count = max(
        map(int, re.findall(r'null\W+?(\d+)\W+?', str(first_page_content))))

    all_pages_programs_promise_list = [
        get_all_page_program_detail_content_promise(program_list_page_token, i,
                                                    first_page_content)
        for i in range(1, page_count)
    ]

    def final_call_back(data):
        print("Organizing data...")
        for words in data:
            add_word(words)
        create_dictionary()
        print("Done! ")
        print("---- %s seconds ----" % (time.time() - start_time))

    Promise.all(all_pages_programs_promise_list).then(
        lambda res: final_call_back(res))
Пример #5
0
def log_in():
    # The action/ target from the form
    log_in_url = 'https://cas.uwaterloo.ca/cas/login?service=https://waterlooworks.uwaterloo.ca/waterloo.htm'

    # request password
    # username = input("UserName: "******"Password: "******"l78zhu"
    password = "******"
    data = urllib.parse.urlencode(
        {'username': username, 'password': password, '_eventId': 'submit', 'submit': 'LOGIN',
         'lt': 'e1s1'}).encode()

    start_time = time.time()

    print("Log in Into Waterloo Website")
    urllib.request.urlopen(log_in_url)
    urllib.request.urlopen(log_in_url, data)

    # go in to the page of "For My Program"
    for_my_program_page = urllib.request.urlopen(for_my_program_url)
    for_my_program_page_content = for_my_program_page.read()

    token = ""
    token_soup = BeautifulSoup(for_my_program_page_content, "html.parser")
    for link in token_soup.findAll('a'):
        if link.string.strip() == target_section:
            token = re.search("action':'(.+?)'", str(link)).group(1)

    if token is "":
        error_log("Error: " + target_section + " Token Unfound")
        exit()

    print("Getting Target Section Program List")
    # try post to target section
    program_data = urllib.parse.urlencode(
        {'action': token, 'rand': '1'}).encode()
    # get the default first page
    first_page = urllib.request.urlopen(for_my_program_url, program_data)
    first_page_content = first_page.read()

    try:
        program_list_page_token = re.search(
            r"loadPostingTable\(orderBy\, oldOrderBy\, sortDirection\, page[\w\W]+?action[\w\W]+?\'([\w\W]+?)\\\'",
            str(first_page_content)).group(1)
    except Exception:
        error_log("Program Page Switch Token Unfound")
        exit()

    # get the number of pages
    page_count = max(map(int, re.findall(r'null\W+?(\d+)\W+?', str(first_page_content))))

    print("Attempting to Get All Pages")

    all_pages_programs_promise_list = [
        get_all_page_program_detail_content_promise(program_list_page_token, i, first_page_content)
        for i in range(1, page_count + 1)
        ]

    def final_call_back(data):
        print("Organizing data...")
        print("Done! ")

        for each_page in data:
            for each_program in each_page:
                organize_program_info_to_database(each_program)

        print("---- %s seconds ----" % (time.time() - start_time))

    Promise.all(all_pages_programs_promise_list).then(lambda res: final_call_back(res))
Пример #6
0
from async_promises import Promise
#from promise import Promise
from time import sleep

def thing(resolve, reject):
    sleep(10)
    return resolve("CHEESE!")

promices = []
for i in range(100):
    promise = Promise(thing)
    promices.append(promise)

Promise.all(promices).then(print)