def process(count): import run run.config_logging() nametoid_to_json_name = 'nametoid' + str(count) + '.json' crawler = NameToIDCrawler('./enterprise_crawler/nametoid/' + str(nametoid_to_json_name)) nametoid_name = 'nametoid' + str(count) + '.txt' enterprise_list = get_enterprise_list('./enterprise_list/' + str(nametoid_name)) print(len(enterprise_list)) for ent_name in enterprise_list: ent_name = str(ent_name).rstrip('\n') print( '############ Start to crawl nametoid %d with name %s ################\n' % (count, ent_name)) crawler.run(ent_name=ent_name)
'open_detail_info_entry': '' } def __init__(self, json_restore_path): ZongjuCrawler.__init__(self, json_restore_path) self.json_restore_path = json_restore_path self.parser = HunanParser(self) class HunanParser(ZongjuParser): def __init__(self, crawler): self.crawler = crawler if __name__ == '__main__': from CaptchaRecognition import CaptchaRecognition import run run.config_logging() HunanCrawler.code_cracker = CaptchaRecognition('hunan') crawler = HunanCrawler('./enterprise_crawler/hunan.json') enterprise_list = CrawlerUtils.get_enterprise_list( './enterprise_list/hunan.txt') # enterprise_list = ['430000000011972'] for ent_number in enterprise_list: ent_number = ent_number.rstrip('\n') settings.logger.info( '################### Start to crawl enterprise with id %s ###################\n' % ent_number) crawler.run(ent_number=ent_number)