Пример #1
0
def main(url, checkRange=30):
    header.processBegin(url=url)
    header.clearFolder()  #[2019.02.11]

    try:
        soup = request2soup(url)

        df_1 = parsingTitle(soup, checkRange)
        if len(df_1) != 0:
            #outputCsv(df_1, "第一層結果", FinalPath)
            header.outputCsv(df_1, "第一層結果")
            df_2 = parsingDetail(df_1)
            #outputCsv(df_2, "第二層結果", FinalPath)
            header.outputCsv(df_2, "第二層結果")
            header.RESULT_COUNT = len(df_2)
        header.zipFile()
        header.createInfoFile()
        header.createOKFile()
        header.outputLastResult(df_1, header.lastResult,
                                checkRange)  #[2019.02.11]新增產出lastResult方法
    except:
        logging.error("執行爬網作業失敗")
        traceback.print_exc()
        header.createInfoFile()

    header.processEnd()
def main(url, tabNumber, checkRange=15):

    header.processBegin()
    header.clearFolder()
    DownloadTool = SeleniumUtil.ChromeDownload()
    DownloadTool.setDownLoadTempPath(header.TEMP_PATH)
    DownloadTool.setDownLoadFinalPath(FinalPath)
    chrome_options = DownloadTool.getChromeOptions()
    driver = webdriver.Chrome(
        chrome_options=chrome_options)  # open chrome browser with Options
    try:
        if tabNumber >= 19 and tabNumber <= 22 and isinstance(tabNumber, int):
            url = url + str(tabNumber)
        else:
            raise ValueError("tabNumber 必須為 19 到 22 的整數")

        driver.get(url)
        df_1 = parsingTitle(url, driver, checkRange)
        if len(df_1) != 0:
            header.outputCsv(df_1, "第一層結果", FinalPath)

            df_2 = parsingDetail(df_1, tabNumber, FinalPath)
            header.outputCsv(df_2, "第二層結果", FinalPath)
            header.RESULT_COUNT = len(df_1)
            header.zipFile()
            header.createInfoFile()
            header.createOKFile()
            header.outputLastResult(df_1, header.lastResult,
                                    checkRange)  # 2019-02-01新增產出lastResult方法
    except:
        logging.error("執行爬網作業失敗")
        header.EXIT_CODE = -1
        traceback.print_exc()

    header.processEnd()
def main(url, checkRange = 30):
    header.processBegin()
    header.clearFolder()
    try:
        df_1 = parsingTitle(url, checkRange)
        if len(df_1) == 0:
            return
        header.outputCsv(df_1, "第一層結果", FinalPath)
        df_2 = parsingDetail(df_1, FinalPath)
        header.outputCsv(df_2, "第二層結果", FinalPath)
        header.RESULT_COUNT = len(df_1)
        header.zipFile()
        header.createInfoFile()
        header.createOKFile()
        header.outputLastResult(df_1, header.lastResult, checkRange)   # 2019-02-01新增產出lastResult方法
    except:
        logging.error("執行爬網作業失敗")
        header.EXIT_CODE = -1
        traceback.print_exc()
    
    header.processEnd()