def item_parse(self, _configs: list, response, response1=None) -> dict: ''' @parsma _configs->字段抓取设置 list @params response->Response @output -->result 字段-值 的字典 ''' if hasattr(response,'url'): response1 = response for configs in _configs: response_change = self.change_response_f_type(configs, response) if configs['list']['v']: _response_copy = S.select_content(response_change, configs['list'], response1) or [] else: if isinstance(response_change, list): _response_copy = response_change else: _response_copy = [response_change] for _response in _response_copy: if not _response: return result = dict() for config in configs['data']: result[config['En']] = S.select_content( _response, config, response1) result[config['En']] = S.replace_all(result[config['En']]) item = self.item_db_parse(configs, result) if item: # 持久化记录item self.state['items_count'] = self.state.get( 'items_count', 0) + 1 yield item
def infoParse(self, response): item = CyzoneItem() request = checkTimeError(response) if request: yield request return None item = CyzoneItem() configs = self.configChance(response.url) result = dict() for config in configs['data']: k = config['En'] result[k] = S.select_content(response, config, response) result[k] = S.replace_all(result[k]) item['result'] = result item['keys'] = configs['list']['keys'] item['db'] = configs['list']['db'] if result[configs['list']['check']]: yield item