def LEASE_NTC_INIT(): current_page = get_web_page( LEASE_URL, urlJumpIp) # return a dict of dict of list of dict total_rows = get_total_rows(current_page) page_count = 0 row_data = [] while page_count <= total_rows: data = get_info(current_page) row_data += data page_count += pageRow current_page = get_web_page( LEASE_URL + "&firstRow=" + str(page_count) + "&totalRows=" + str(total_rows), urlJumpIp) progress(page_count, total_rows, __file__) save(row_data) print(str(__file__) + " complete")
def INFO_BOX_TPE_INIT(): row_data = read_excel("sells/data/TPE/info/total_rows_TPE.xlsx") # get the excel info info_boxes = [] for data in row_data: page = get_web_page(DETAIL_URL + data["url"], urlJumpIp) info_boxes += get_info_box(page, data["post_id"]) showProgess(__file__) save(info_boxes, "sells/data/TPE/info/info_box_TPE") print(str(__file__) + " complete")
def HOUSE_BOX_TPE_INIT(): row_data = read_excel("lease/data/TPE/info/total_rows_TPE.xlsx") # get the excel info house_boxes = [] for data in row_data: page = get_web_page(DETAIL_URL + data["url"], urlJumpIp) house_boxes += get_house_box(page, data["post_id"]) showProgess(__file__) save(house_boxes, "lease/data/TPE/info/house_box_TPE") print(str(__file__) + " complete")
def IMG_TPE_INIT(): row_data = read_excel( "sells/data/TPE/info/total_rows_TPE.xlsx") # get the excel info dir = "D:/Python/database/sells/images/TPE/" if os.path.exists(dir): # 先刪除原本的images資料夾 shutil.rmtree(dir, ignore_errors=True) for data in row_data: page = get_web_page(DETAIL_URL + data["url"], urlJumpIp) img_urls = get_images(page) save(img_urls, data["post_id"], dir) showProgess(__file__) print(str(__file__) + " complete")