Пример #1
0
 def item_parse(self, _configs: list, response, response1=None) -> dict:
     '''
     @parsma _configs->字段抓取设置  list
     @params response->Response
     @output -->result  字段-值 的字典
     '''
     if hasattr(response,'url'):
         response1 = response
     for configs in _configs:
         response_change = self.change_response_f_type(configs, response)
         if configs['list']['v']:
             _response_copy = S.select_content(response_change,
                                               configs['list'], response1) or []
         else:
             if isinstance(response_change, list):
                 _response_copy = response_change
             else:
                 _response_copy = [response_change]
         for _response in _response_copy:
             if not _response:
                 return
             result = dict()
             for config in configs['data']:
                 result[config['En']] = S.select_content(
                     _response, config, response1)
                 result[config['En']] = S.replace_all(result[config['En']])
             item = self.item_db_parse(configs, result)
             if item:
                 # 持久化记录item
                 self.state['items_count'] = self.state.get(
                     'items_count', 0) + 1
                 yield item
Пример #2
0
 def infoParse(self, response):
     item = CyzoneItem()
     request = checkTimeError(response)
     if request:
         yield request
         return None
     item = CyzoneItem()
     configs = self.configChance(response.url)
     result = dict()
     for config in configs['data']:
         k = config['En']
         result[k] = S.select_content(response, config, response)
         result[k] = S.replace_all(result[k])
     item['result'] = result
     item['keys'] = configs['list']['keys']
     item['db'] = configs['list']['db']
     if result[configs['list']['check']]:
         yield item