def start(): keywords = globals().get('keywords') for keyword in keywords: raw_companies = qcc_client.search(keyword) log.info('正在处理爬取[%s]' % keyword) # company对象 company = Company() for raw_company in raw_companies: company.keyword = keyword # 组装公司信息 manager.assembly(company, raw_company) raw_company_detail = qcc_client.search_detail( raw_company.get('KeyNo')) # 补充公司详细信息 manager.assembly_detail(company, raw_company_detail) log.info(company) log.info('completed')
def __post_process__(self): """""" if not self.src: return todos = self.src for t in todos: detail = Request(TycEntApi.format(eid=t.get("id")), proxy=True, headers=REQUEST_HEADERS).data if not detail: continue detail = json.loads(detail) if detail.get("state") == 'ok': td = detail.get("data", {}) company = Company() company.keyword = self.keyword # 复制主体信息 TycClient.TycEntHelper.__copy_props__(t, company) # 复制公司组织代码、注册资本 TycClient.TycEntHelper.__copy_extras__(td, company) self.companies.append(company)
def __post_process__(self): if not self.src: return company_list = self.src for company in company_list: company_entity = Company() # 公司检索的关键字 company_entity.keyword = self.keyword # 公司主体基本信息 self.EntityHelper.__basic_info__(company, company_entity) def is_equal(b_and_a): return company.get('id') == b_and_a.get('graphId') try: # 公司主体融资阶段、竟品信息 brand_and_agency = filter(is_equal, self.brand_and_agencies).__next__() self.EntityHelper.__another_info__(brand_and_agency, company_entity) except: logging.warning('竟品信息获取失败!') pass """ 公司详情 """ detail_resp = Request(TycPortraitApi.format(eid=company.get("id")), proxy=True, headers=REQUEST_HEADERS).data if detail_resp: company_portrait = json.loads(detail_resp) # 公司详情补充信息 if company_portrait.get("state") == 'ok': self.EntityHelper.__additional__( company_portrait.get("data", {}), company_entity) shareholder_request_body = { "graphId": company.get("id"), "hkVersion": 1, "typeList": { "shareHolder": { "pageNum": 1, "pageSize": 20, "required": "true" } } } """ 股东信息 """ shareholder_resp = Request(TycShareholderPostApi, method='post', json=shareholder_request_body, proxy=True, headers=REQUEST_HEADERS).data if shareholder_resp: company_shareholder = json.loads(shareholder_resp) # 公司详情补充信息 if company_shareholder.get("state") == 'ok': self.EntityHelper.__shareholder__( company_shareholder.get("data", {}).get("shareHolder", {}), company_entity) manager_request_body = { "graphId": company.get("id"), "hkVersion": 1, "typeList": { "companyStaff": { "pageNum": 1, "pageSize": 20, "required": "true" } } } """ 高管信息 """ manager_resp = Request(TycEnterpriseManagerPostApi, method='post', json=manager_request_body, proxy=True, headers=REQUEST_HEADERS).data if manager_resp: company_manager = json.loads(manager_resp) # 公司详情补充信息 if company_manager.get("state") == 'ok': self.EntityHelper.__company_manager__( company_manager.get("data", {}).get("companyStaff", {}), company_entity) self.companies.append(company_entity)