示例#1
0
class FilePipeline(object):
    bank_http_service = BankHttpService()

    def process_item(self, item, spider):
        # 本地存放路径
        base_path = spider.settings.get("SAVE_PATH") + '/' + time.strftime(
            '%Y%m%d', time.localtime(time.time()))

        if 'proCode' in item.keys():
            self.base_path = base_path + "/" + item['bankCode'] + "/" + item[
                'channel'] + "/" + item['proCode'] + "/"
        # 判断是否有产品说明书
        '''
        if 'instructionUrl' in item.keys() and item['instructionUrl']:
            self.download(item['instructionUrl'], item['bankCode'])
        # 判断是否含有风险说明书
        if 'riskDisclosureUrl' in item.keys() and item['riskDisclosureUrl']:
            self.download(item['riskDisclosureUrl'], item['bankCode'])
        '''
        return item

    def download(self, downloadUrl, bucket_name):
        isExists = os.path.exists(self.base_path)
        # 判断是否存在目录,不存在创建
        if not isExists:
            os.makedirs(self.base_path)
        pass
        # 兼容多种下载路径
        strs = re.split('/|=', downloadUrl)
        num = len(strs)
        file_name = strs[num - 1]
        # 要存放的路径
        file_path = self.base_path + file_name
        self.bank_http_service.downloadFile(downloadUrl, file_path)
示例#2
0
 def close(self, reason):
     bank_http_service = BankHttpService()
     bank_http_service.uploadResult({'bankCode': 'ceb'})
     pass