Exemplo n.º 1
0
    def download(self, pdf_url):
        content_list = re.match(r'downloadLinkClick\((.*?)\);return false',
                                a).group(1).split(",")
        filename = content_list[0].replace("'", "")

        url = "http://ds.yuden.co.jp/TYCOMPAS/cs/detail.do?mode=download&fileName=" + filename

        isSeriesData = content_list[1]
        isProductsData = content_list[2]
        isProductsDataGraph = content_list[3]
        DownloadForm = {
            "action": "detail.do",
            "classificationID": "AE",
            "fileName": filename,
            "isSeriesData": isSeriesData,
            "isProductsData": isProductsData,
            "isProductsDataGraph": isProductsDataGraph
        }
        html_analyse = HtmlAnalyse(url)
        html_analyse.post_download(
            data=DownloadForm,
            path="I:\PythonPrj\StandardSpider\DataAnalyse\\NewRules\\a.pdf")

        filename = self.path + str(random.random()) + '.pdf'
        try:
            html_analyse = HtmlAnalyse(url, proxy=self.proxy_ip)
            html_analyse.download(filename)
            print("下载完成。。。")
        except Exception as e:
            print(e)
            self.proxy_pool.remove(self.proxy_ip)
            self.proxy_ip = self.proxy_pool.get()
            self.download(pdf_url)

        return filename
Exemplo n.º 2
0
        threading_pool = ThreadingPool()
        threading_pool.multi_thread(thread, pdf_urls)


if __name__ == "__main__":
    # pdfdownload = PdfDownload(task_code="CCT2016120900000001")
    #
    # pdfdownload.go()

    a = "downloadLinkClick('E-HTQ_e.pdf',true,false,false);return false"
    content_list = re.match(r'downloadLinkClick\((.*?)\);return false',
                            a).group(1).split(",")
    filename = content_list[0].replace("'", "")

    url = "http://ds.yuden.co.jp/TYCOMPAS/cs/detail.do?mode=download&fileName=" + filename

    isSeriesData = content_list[1]
    isProductsData = content_list[2]
    isProductsDataGraph = content_list[3]
    DownloadForm = {
        "action": "detail.do",
        "classificationID": "AE",
        "fileName": filename,
        "isSeriesData": isSeriesData,
        "isProductsData": isProductsData,
        "isProductsDataGraph": isProductsDataGraph
    }
    html_analyse = HtmlAnalyse(url)
    html_analyse.post_download(
        data=DownloadForm,
        path="I:\PythonPrj\StandardSpider\DataAnalyse\\NewRules\\a.pdf")