Exemplo n.º 1
0
 def assembly_detail(cls, company: Company, raw_company_detail: dict):
     company.homepage = raw_company_detail.get('websiteList', '-')
     company.register_code = raw_company_detail.get('regNumber', '-')
     company.organization_code = raw_company_detail.get('orgNumber', '-')
     company.english_name = raw_company_detail.get('property3', '-')
     company.authorization = raw_company_detail.get('regInstitute', '-')
     company.actual_capital = raw_company_detail.get('actualCapital', '缺省')
     company.industry = raw_company_detail.get('industry', '-')
     company.used_name = raw_company_detail.get('historyNames', '-')
Exemplo n.º 2
0
 def assembly_detail(cls, company: Company, raw_company_detail: dict):
     company.homepage = raw_company_detail.get('WebSite', '-')[0:30]
     company.register_code = raw_company_detail.get('No', '-')
     company.organization_code = raw_company_detail.get('OrgNo', '-')
     company.english_name = raw_company_detail.get('EnglishName', '-')
     company.authorization = raw_company_detail.get('BelongOrg', '-')
     company.actual_capital = raw_company_detail.get('RealCapi', '缺省')
     company.industry = raw_company_detail.get('Industry',
                                               dict()).get('Industry', '-')
     company.used_name = raw_company_detail.get('OriginalName', '-')
Exemplo n.º 3
0
def start():
    """ 入口函数 """
    keys = globals().get('keywords', list())
    for key in keys:
        raw_companies = tyc_client.search(key)
        cost_time = 2 * raw_companies.__len__()
        log.info('正在处理爬取[%s],大概需要%s秒' % (key, cost_time))
        # company对象
        company = Company()
        for raw_company in raw_companies:
            company.keyword = key
            manager.assembly(company, raw_company)
            # company detail
            raw_company_detail = tyc_client.search_detail(raw_company.get('id'))
            manager.assembly_detail(company, raw_company_detail)
            log.info(company)
            # save(company.__dict__)
            company.clear()
    log.info("completed")
Exemplo n.º 4
0
def start():
    keywords = globals().get('keywords')
    for keyword in keywords:
        raw_companies = qcc_client.search(keyword)
        log.info('正在处理爬取[%s]' % keyword)
        # company对象
        company = Company()
        for raw_company in raw_companies:
            company.keyword = keyword
            # 组装公司信息
            manager.assembly(company, raw_company)
            raw_company_detail = qcc_client.search_detail(raw_company.get('KeyNo'))
            # 补充公司详细信息
            manager.assembly_detail(company, raw_company_detail)
            # 保存到数据库
            # save(company.__dict__)
            log.info(company)
            company.clear()
    log.info('completed')
Exemplo n.º 5
0
def start():
    keywords = globals().get('keywords')
    if keywords:
        for keyword in keywords:
            raw_companies = qcc_client.search(keyword)
            cost_time = 2 * raw_companies.__len__() + 4
            log.info('正在处理爬取[%s],大概需要%s秒' % (keyword, cost_time))
            # company对象
            company = Company()
            for raw_company in raw_companies:
                company.keyword = keyword
                # 组装公司信息
                manager.assembly(company, raw_company)
                raw_company_detail = qcc_client.search_detail(
                    raw_company.get('KeyNo'))
                # 补充公司详细信息
                manager.assembly_detail(company, raw_company_detail)
                # 保存到数据库
                save(company.__dict__)
                # 重置当前对象
                company.clear()

    log.info('completed')
Exemplo n.º 6
0
 def assembly(cls, company: Company, raw_company: dict):
     company.name = raw_company.get('Name', '-')
     company.representative = raw_company.get('OperName', '-')
     company.address = raw_company.get('Address', '-')
     company.region = raw_company.get('AreaCode', '-')  # todo
     company.city = raw_company.get('AreaCode', '-')  # todo
     company.district = raw_company.get('AreaCode', '-')  # todo
     company.biz_status = raw_company.get('Status', '-')
     company.credit_code = raw_company.get('CreditCode', '-')
     company.email = raw_company.get('Email', '-')
     company.phone = raw_company.get('ContactNumber', '-')
     company.biz_scope = raw_company.get('Scope', '-')
     company.company_type = raw_company.get('EconKind', '-')
     company.taxpayer_code = raw_company.get('CreditCode', '-')
     company.registered_capital = raw_company.get('RegistCapi', '-')
     company.lat_long = str({
         'lat': raw_company.get('X', '-'),
         'long': raw_company.get('Y', '-')
     })
     company.setup_time = raw_company.get('StartDate', '-')
Exemplo n.º 7
0
 def assembly(cls, company: Company, raw_company: dict):
     company.name = raw_company.get('name',
                                    '-').replace('<em>',
                                                 '').replace('</em>', '')
     company.representative = raw_company.get('legalPersonName', '-')
     company.address = raw_company.get('regLocation', '-')
     company.region = raw_company.get('base', '-')
     company.city = raw_company.get('city', '-')
     company.district = raw_company.get('district', '-')
     company.biz_status = raw_company.get('regStatus', '-')
     company.credit_code = raw_company.get('creditCode', '-')
     company.email = raw_company.get('emails',
                                     ['-']).split(';')[0].replace('\t', '')
     company.phone = raw_company.get('phoneNum', '-')
     company.biz_scope = raw_company.get('businessScope', '-')
     company.company_type = raw_company.get('companyOrgType',
                                            '-').replace('\t', '')
     company.taxpayer_code = raw_company.get('creditCode', '-')
     company.registered_capital = raw_company.get('regCapital', '-')
     company.lat_long = str({
         'lat': raw_company.get('latitude', '-'),
         'long': raw_company.get('longitude', '-')
     })
     company.setup_time = raw_company.get('estiblishTime', '-')[0:10]