Пример #1
0
def create_table_if_not_exists(report_retriever, tablename):
    with db(db_to_load) as db_con:
        print('Checking if table "{}" exists'.format(tablename))
        res = db_con.execute("""
        select count(*) from INFORMATION_SCHEMA.tables as t
            where t.table_name = '{}';
        """.format(tablename))

        res = res.fetchall()[0][0]

    if res == 0:
        print("Table did not exist, attempting to create.")
        headers_received = False
        dt_to_pull = datetime.datetime.today()

        for i in xrange(0, 11):
            dt_pull_fmt = dt_to_pull.strftime('%Y-%m-%d')
            headers = re.split(
                r'\t+',
                run_report_return_data(report_retriever, tablename,
                                       dt_pull_fmt).splitlines()[0])

            if headers:
                # print headers
                print(
                    'Headers received for create table statement.Formatting.')
                headers = [
                    re.sub('[^A-Za-z0-9]', '', column.lower())
                    for column in headers
                ]
                print('Headers formatted.')
                break
            # try again if necessary
            dt_to_pull += datetime.timedelta(days=-i)

        sql_part = '[sellerid] varchar(1000),'

        if 'startdate' not in headers:
            sql_part += '[startdate] varchar(1000),'
            sql_part += '[enddateate] varchar(1000),'

        for column in headers:
            column = re.sub('[^A-Za-z0-9]', '', column.lower())
            sql_part += '[{column}] varchar(4000),\n'.format(column=column)

        sql = \
            'create table [{tbl}] ({columns});'.format(tbl=tablename, columns=sql_part.rstrip(','))
        # print sql
        with db('azure') as db_con:

            print("Creating table")
            db_con.execute(sql)

        print("Table Created")

    else:
        print("Table Existed")
Пример #2
0
            wantedtext = loclink.text
    parsedLoc = re.search(
        r"Lat: (?P<lat>[\d\.\-]*), Long: (?P<long>[\d\.\-]*)", wantedtext)
    lat = parsedLoc.group('lat')
    long = parsedLoc.group('long')
    print("Latitude : " + str(lat))
    fo.write(str(lat) + "#")
    print("Longitude : " + str(long))
    fo.write(str(long) + "\n")


if __name__ == '__main__':
    fo = open("list_London_UK.txt", "w")
    fo.write("country#city#snapshot_url#latitude#longitude\n")
    origin = "http://www.trafficdelays.co.uk/london-traffic-cameras/"
    for page_num in xrange(1, 27):  #iterate through each page from 1 to 26
        print("\n" + "P." + str(page_num) + "\n")
        add_page_num = "?lcp_page0=" + str(page_num) + "#lcp_instance_0"
        r = requests.get(origin + add_page_num)
        soup = BeautifulSoup(r.content, "html.parser")
        linkset = soup.find_all("h4", {"class": "lcp_title"})
        for link in linkset:  # for each page, this will go through every links to camera avaliable.
            print("Country : United Kingdom")
            fo.write("GB#")
            print("City : London")
            fo.write("London#")
            scraping(link.find_all("a")[0].get(
                "href"))  # utilize the scraping function
            print("Note : " + link.find_all("a")[0].get("title").replace(
                '| London Jam Cams', ''))
            print("Reference : " + link.find_all("a")[0].get("href") + "\n")
Пример #3
0
def run_report_return_data(report_retriever, report_type, date):
    seller_id = report_retriever.account_id

    resp = report_retriever.request_report(report_type, date).parsed
    report_request_id = resp.get("ReportRequestInfo").get(
        "ReportRequestId").get("value")

    print("Requested report id: {}".format(report_request_id))

    ########################################################################
    # Loop until report ready or cancelled
    ########################################################################
    report_ready = False
    report_stat = {"ReportProcessingStatus": {"value": "_DEFAULT_"}}
    while not report_ready:
        try:
            report_stat = report_retriever.get_report_request_list(
                [report_request_id]).parsed.get("ReportRequestInfo")

        except mws.MWSError as error:
            if '503 Server Error' in error.message:
                print("Encountered throttle notice. Sleeping 1 minute.")
                sleep(60)
                report_ready = False

        report_status = [
            report_stat.get("ReportProcessingStatus").get("value")
        ]

        if report_status == ["_DONE_"]:
            print("Report ready. Retrieving generated report ID")

            for i in xrange(0, 11):
                try:
                    generated_report_id = report_retriever. \
                        get_report_request_list([report_request_id]).parsed. \
                        get("ReportRequestInfo").get("GeneratedReportId").get("value")
                    report_ready = True
                    break
                except mws.mws.MWSError as error:
                    if '503 Server Error' in error.message:
                        print(
                            "Received throttling notice when Trying to get GeneratedReportId. Sleeping 1 minute before next attempt"
                        )
                        sleep(60)  # sleep for a minute and then try again

        elif "_CANCELLED_" in report_status:
            print(
                "Could not get report for date: {}. Report was cancelled by API!"
                .format(date))
            insert_placeholder_row(report_type, seller_id, date)
            report_data = 'You did not have any data for this date'
            # raise Exception("Report was cancelled!")
            break
        else:
            sleep_time = 30  # seconds
            print("Report status was: {}".format(report_status))
            print("Report not ready, sleeping {} seconds.".format(sleep_time))
            sleep(sleep_time)
    # END LOOP #############################################################

    if report_ready:
        print(
            "Retrieving report for report id: {}".format(generated_report_id))

        for i in xrange(0, 11):

            try:
                report_data = report_retriever.get_report(
                    generated_report_id).parsed
                break  # Report done
            except mws.mws.MWSError as error:
                if '503 Server Error' in error.message:
                    print(
                        "Received throttling notice when calling getReport. Sleeping 1 minute before next attempt"
                    )
                    sleep(60)  # sleep for a minute and then try again
                else:
                    raise error

            if i == 10:
                print(
                    "Unable to retrieve report after 10 tries. Aborting. Attempt will be made the next time this report runs."
                )
                report_data = []

    return report_data