示例#1
0
def start():
    keywords = globals().get('keywords')
    for keyword in keywords:
        raw_companies = qcc_client.search(keyword)
        log.info('正在处理爬取[%s]' % keyword)
        # company对象
        company = Company()
        for raw_company in raw_companies:
            company.keyword = keyword
            # 组装公司信息
            manager.assembly(company, raw_company)
            raw_company_detail = qcc_client.search_detail(
                raw_company.get('KeyNo'))
            # 补充公司详细信息
            manager.assembly_detail(company, raw_company_detail)
            log.info(company)
    log.info('completed')
示例#2
0
    def __post_process__(self):
        """"""
        if not self.src:
            return

        todos = self.src
        for t in todos:
            detail = Request(TycEntApi.format(eid=t.get("id")),
                             proxy=True,
                             headers=REQUEST_HEADERS).data
            if not detail:
                continue
            detail = json.loads(detail)
            if detail.get("state") == 'ok':
                td = detail.get("data", {})
                company = Company()
                company.keyword = self.keyword
                # 复制主体信息
                TycClient.TycEntHelper.__copy_props__(t, company)
                # 复制公司组织代码、注册资本
                TycClient.TycEntHelper.__copy_extras__(td, company)
                self.companies.append(company)
示例#3
0
    def __post_process__(self):
        if not self.src:
            return

        company_list = self.src
        for company in company_list:
            company_entity = Company()
            # 公司检索的关键字
            company_entity.keyword = self.keyword
            # 公司主体基本信息
            self.EntityHelper.__basic_info__(company, company_entity)

            def is_equal(b_and_a):
                return company.get('id') == b_and_a.get('graphId')

            try:
                # 公司主体融资阶段、竟品信息
                brand_and_agency = filter(is_equal,
                                          self.brand_and_agencies).__next__()
                self.EntityHelper.__another_info__(brand_and_agency,
                                                   company_entity)
            except:
                logging.warning('竟品信息获取失败!')
                pass
            """ 公司详情 """
            detail_resp = Request(TycPortraitApi.format(eid=company.get("id")),
                                  proxy=True,
                                  headers=REQUEST_HEADERS).data
            if detail_resp:
                company_portrait = json.loads(detail_resp)
                # 公司详情补充信息
                if company_portrait.get("state") == 'ok':
                    self.EntityHelper.__additional__(
                        company_portrait.get("data", {}), company_entity)

            shareholder_request_body = {
                "graphId": company.get("id"),
                "hkVersion": 1,
                "typeList": {
                    "shareHolder": {
                        "pageNum": 1,
                        "pageSize": 20,
                        "required": "true"
                    }
                }
            }
            """ 股东信息 """
            shareholder_resp = Request(TycShareholderPostApi,
                                       method='post',
                                       json=shareholder_request_body,
                                       proxy=True,
                                       headers=REQUEST_HEADERS).data
            if shareholder_resp:
                company_shareholder = json.loads(shareholder_resp)
                # 公司详情补充信息
                if company_shareholder.get("state") == 'ok':
                    self.EntityHelper.__shareholder__(
                        company_shareholder.get("data",
                                                {}).get("shareHolder", {}),
                        company_entity)

            manager_request_body = {
                "graphId": company.get("id"),
                "hkVersion": 1,
                "typeList": {
                    "companyStaff": {
                        "pageNum": 1,
                        "pageSize": 20,
                        "required": "true"
                    }
                }
            }
            """ 高管信息 """
            manager_resp = Request(TycEnterpriseManagerPostApi,
                                   method='post',
                                   json=manager_request_body,
                                   proxy=True,
                                   headers=REQUEST_HEADERS).data
            if manager_resp:
                company_manager = json.loads(manager_resp)
                # 公司详情补充信息
                if company_manager.get("state") == 'ok':
                    self.EntityHelper.__company_manager__(
                        company_manager.get("data",
                                            {}).get("companyStaff", {}),
                        company_entity)
            self.companies.append(company_entity)