Пример #1
0
    def parse_content(self, response):
        try:
            data = json.loads(response.text)
        except:
            print("-----------------------获取到json:" + response.text + "------------------------------")
            return
        try:
            model = data['strategyModel']
            category = model['categoryName']
            title = model['title']
            description = model['description']
            content = model['context']

            design_strategy_item = DesignStrategyItem()  # type: DesignStrategyItem
            design_strategy_item['category'] = category
            design_strategy_item['title'] = title
            design_strategy_item['description'] = description
            design_strategy_item['content'] = content
            design_strategy_item['html_url'] = response.url
            yield design_strategy_item
        except Exception as e:
            print("-----------------------获取到json:" + response.text + "------------------------------")
            log.warn("%s ( refer: %s )" % (e, response.url))
            if config.USE_PROXY:
                proxy_pool.add_failed_time(response.meta['proxy'].replace('http://', ''))
Пример #2
0
 def process_exception(self, request, exception, spider):
     try:
         proxy_pool.add_failed_time(request.meta['proxy'].replace(
             'http://', ''))
     except Exception as e:
         log.error(e)
         pass
 def process_response(self, request, response, spider):
     if response.status < 200 or response.status >= 400:
         try:
             proxy_pool.add_failed_time(request.meta['proxy'].replace(
                 'http://', ''))
         except KeyError:
             pass
     return response
Пример #4
0
 def process_response(self, request, response, spider):
     print("CatchException:" + request.url + " " + str(response.status))
     if response.status < 200 or response.status >= 400:
         try:
             if 'splash' not in request.meta:
                 proxy_pool.add_failed_time(request.meta['proxy'].replace(
                     'http://', ''))
             else:
                 proxy_pool.add_failed_time(
                     request.meta['splash']['args']['proxy'].replace(
                         'http://', ''))
         except KeyError:
             pass
         except Exception as e:
             log.error(e)
     return response
Пример #5
0
 def download_img(img_url, file_path):
     proxies = None
     proxy = ''
     if config.USE_PROXY:
         proxy = proxy_pool.random_choice_proxy()
         proxies = {
             'http': "http://%s" % proxy,
         }
     try:
         response = requests.get(img_url, stream=True, proxies=proxies)
         if response.status_code == 200:
             with open(file_path, 'wb') as f:
                 for chunk in response.iter_content(1024):
                     f.write(chunk)
         else:
             if config.USE_PROXY:
                 proxy_pool.add_failed_time(proxy)
     except:
         if config.USE_PROXY:
             proxy_pool.add_failed_time(proxy)
Пример #6
0
 def parse_content(self, response):
     uuid = utils.get_uuid()
     cid = response.meta['cid']
     title = response.meta['title']
     try:
         data = json.loads(response.text)
     except:
         print("-----------------------获取到json:" + response.text +
               "------------------------------")
         return
     data_img_list = data['dataImg']
     for _data_img in data_img_list:
         data_album_list = _data_img['album']
         for data_album in data_album_list:
             data_img = data_album['l']
             # http://pic.to8to.com/case/1605/05/20160505_f0af86a239d0b02e9635a47ih5l1riuq_sp.jpg
             img_url = 'http://pic.to8to.com/case/{short_name}'.format(
                 short_name=data_img['s'])
             if self.design_picture_service.is_duplicate_url(img_url):
                 break
             sub_title = data_img['t']
             original_width = data_img['w']
             original_height = data_img['h']
             tags = []
             try:
                 zoom_type = ZONE_TYPE[data_img['zid']]
                 if zoom_type is not None or not zoom_type.strip() == '':
                     tags.append(zoom_type)
             except KeyError:
                 pass
             try:
                 style_id = STYLE_ID[data_img['sid']]
                 if style_id is not None or not style_id.strip() == '':
                     tags.append(style_id)
             except KeyError:
                 pass
             try:
                 area = AREA[data_img['a']]
                 if area is not None or not area.strip() == '':
                     tags.append(area)
             except KeyError:
                 pass
             try:
                 color_id = COLOR_ID[data_img['coid']]
                 if color_id is not None or not color_id.strip() == '':
                     tags.append(color_id)
             except KeyError:
                 pass
             try:
                 house_type = HX_ID[data_img['hxid']]
                 if house_type is not None or not house_type.strip() == '':
                     tags.append(house_type)
             except KeyError:
                 pass
             try:
                 part = PART_ID[data_img['pid']]
                 if part is not None or not part.strip() == '':
                     tags.append(part)
             except KeyError:
                 pass
             try:
                 design_picture_item = DesignPictureItem(
                 )  # type: DesignPictureItem
                 design_picture_item['fid'] = uuid
                 design_picture_item['html_url'] = response.url
                 design_picture_item['img_url'] = img_url
                 design_picture_item['tags'] = tags
                 design_picture_item['title'] = title
                 design_picture_item['sub_title'] = sub_title
                 design_picture_item['img_width'] = str(original_width)
                 design_picture_item['img_height'] = str(original_height)
                 design_picture_item['description'] = design_picture_item[
                     'title']
                 yield design_picture_item
             except Exception as e:
                 print("-----------------------获取到json:" + response.text +
                       "------------------------------")
                 log.warn("%s ( refer: %s )" % (e, response.url))
                 if config.USE_PROXY:
                     proxy_pool.add_failed_time(
                         response.meta['proxy'].replace('http://', ''))