def download(self, pdf_url): content_list = re.match(r'downloadLinkClick\((.*?)\);return false', a).group(1).split(",") filename = content_list[0].replace("'", "") url = "http://ds.yuden.co.jp/TYCOMPAS/cs/detail.do?mode=download&fileName=" + filename isSeriesData = content_list[1] isProductsData = content_list[2] isProductsDataGraph = content_list[3] DownloadForm = { "action": "detail.do", "classificationID": "AE", "fileName": filename, "isSeriesData": isSeriesData, "isProductsData": isProductsData, "isProductsDataGraph": isProductsDataGraph } html_analyse = HtmlAnalyse(url) html_analyse.post_download( data=DownloadForm, path="I:\PythonPrj\StandardSpider\DataAnalyse\\NewRules\\a.pdf") filename = self.path + str(random.random()) + '.pdf' try: html_analyse = HtmlAnalyse(url, proxy=self.proxy_ip) html_analyse.download(filename) print("下载完成。。。") except Exception as e: print(e) self.proxy_pool.remove(self.proxy_ip) self.proxy_ip = self.proxy_pool.get() self.download(pdf_url) return filename
threading_pool = ThreadingPool() threading_pool.multi_thread(thread, pdf_urls) if __name__ == "__main__": # pdfdownload = PdfDownload(task_code="CCT2016120900000001") # # pdfdownload.go() a = "downloadLinkClick('E-HTQ_e.pdf',true,false,false);return false" content_list = re.match(r'downloadLinkClick\((.*?)\);return false', a).group(1).split(",") filename = content_list[0].replace("'", "") url = "http://ds.yuden.co.jp/TYCOMPAS/cs/detail.do?mode=download&fileName=" + filename isSeriesData = content_list[1] isProductsData = content_list[2] isProductsDataGraph = content_list[3] DownloadForm = { "action": "detail.do", "classificationID": "AE", "fileName": filename, "isSeriesData": isSeriesData, "isProductsData": isProductsData, "isProductsDataGraph": isProductsDataGraph } html_analyse = HtmlAnalyse(url) html_analyse.post_download( data=DownloadForm, path="I:\PythonPrj\StandardSpider\DataAnalyse\\NewRules\\a.pdf")