Пример #1
0
def LEASE_NTC_INIT():
    current_page = get_web_page(
        LEASE_URL, urlJumpIp)  # return a dict of dict of list of dict
    total_rows = get_total_rows(current_page)

    page_count = 0
    row_data = []

    while page_count <= total_rows:
        data = get_info(current_page)
        row_data += data
        page_count += pageRow
        current_page = get_web_page(
            LEASE_URL + "&firstRow=" + str(page_count) + "&totalRows=" +
            str(total_rows), urlJumpIp)
        progress(page_count, total_rows, __file__)

    save(row_data)
    print(str(__file__) + " complete")
Пример #2
0
def INFO_BOX_TPE_INIT():
    row_data = read_excel("sells/data/TPE/info/total_rows_TPE.xlsx") # get the excel info

    info_boxes = []
    for data in row_data:
        page = get_web_page(DETAIL_URL + data["url"], urlJumpIp)
        info_boxes += get_info_box(page, data["post_id"])
        showProgess(__file__)

    save(info_boxes, "sells/data/TPE/info/info_box_TPE")
    print(str(__file__) + " complete")
Пример #3
0
def HOUSE_BOX_TPE_INIT():
    row_data = read_excel("lease/data/TPE/info/total_rows_TPE.xlsx") # get the excel info

    house_boxes = []
    for data in row_data:
        page = get_web_page(DETAIL_URL + data["url"], urlJumpIp)
        house_boxes += get_house_box(page, data["post_id"])
        showProgess(__file__)

    save(house_boxes, "lease/data/TPE/info/house_box_TPE")
    print(str(__file__) + " complete")
Пример #4
0
def IMG_TPE_INIT():
    row_data = read_excel(
        "sells/data/TPE/info/total_rows_TPE.xlsx")  # get the excel info

    dir = "D:/Python/database/sells/images/TPE/"
    if os.path.exists(dir):  # 先刪除原本的images資料夾
        shutil.rmtree(dir, ignore_errors=True)

    for data in row_data:
        page = get_web_page(DETAIL_URL + data["url"], urlJumpIp)
        img_urls = get_images(page)
        save(img_urls, data["post_id"], dir)
        showProgess(__file__)

    print(str(__file__) + " complete")