Exemplo n.º 1
0
def getCompany(request):
    content = request.POST.get("content")
    if content == None or content == "":
        logger.info("数据不能为空")
        return HttpResponse(AjaxJson.getUnSuccessData("数据不能为空"))
    result = qichachastart.getCompanyInfoByName(content)
    jsonInfo = AjaxJson.getDumpsAjaxJsonByData(result)
    return HttpResponse(jsonInfo)
Exemplo n.º 2
0
def findDownLoadUrl(request):
    content = request.POST.get("content")
    if content == None or content == "":
        logger.info("数据不能为空")
        return HttpResponse(AjaxJson.getUnSuccessData("数据不能为空"))
    data_json = simplejson.loads(content, encoding='utf-8')
    result = {}
    jsonInfo = AjaxJson.getDumpsAjaxJsonByData(result)
    return HttpResponse(jsonInfo)
Exemplo n.º 3
0
def participle(request):
    content = request.POST.get("content")
    if content == None or content == "":
        return HttpResponse(AjaxJson.getUnSuccessData("文章不能为空"))
    filePath = file_path.stopwords_file_path
    keyWords = NLP_API.fenci_get(content, filePath)
    logger.info('获取文章分词+participle+完成')
    jsonInfo = AjaxJson.getDumpsAjaxJsonByData(keyWords)
    return HttpResponse(jsonInfo)
Exemplo n.º 4
0
def autoSelect(request):
    if not request.body:
        logger.info("数据不能为空")
        return HttpResponse(AjaxJson.getUnSuccessData("数据不能为空"))
    body = str(request.body, encoding="utf-8")
    data_json = AjaxJson.loadsJsonByStrData(body)
    html = data_json["html"]
    result = autoselect.Auto_select(html)
    jsonInfo = AjaxJson.getDumpsAjaxJsonByData(result)
    return HttpResponse(jsonInfo)
Exemplo n.º 5
0
def correctArea(request):
    if not request.body:
        logger.info("数据不能为空")
        return HttpResponse(AjaxJson.getUnSuccessData("数据不能为空"))
    body = str(request.body, encoding="utf-8")
    data_json = AjaxJson.loadsJsonByStrData(body)
    point = data_json["point"]
    url = data_json["url"]
    result = Correct.return_area(url, point)
    jsonInfo = AjaxJson.getDumpsAjaxJsonByData(result)
    return HttpResponse(jsonInfo)
Exemplo n.º 6
0
def similar(request):
    content = request.POST.get("content")
    if content == None or content == "":
        logger.info("数据不能为空")
        return HttpResponse(AjaxJson.getUnSuccessData("数据不能为空"))
    data_list = simplejson.loads(content, encoding='utf-8')
    if len(data_list) < 1:
        return HttpResponse(AjaxJson.getUnSuccessData("数据不能为空"))
    result = main.similar(data_list=data_list)
    logger.info('数据similar加工完成')
    jsonInfo = AjaxJson.getDumpsAjaxJsonByData(result)
    return HttpResponse(jsonInfo)
Exemplo n.º 7
0
def polarityEn(request):
    if not request.body:
        logger.info("数据不能为空")
        return HttpResponse(AjaxJson.getUnSuccessData("数据不能为空"))
    body = str(request.body, encoding="utf-8")
    data_json = AjaxJson.loadsJsonByStrData(body)
    st = data_json["st"]
    if language_identification.language_identification(st) == 'en':
        result = polarity_en.polarity_en(st)
        data = {'polarity_en': result}
        jsonInfo = AjaxJson.getDumpsAjaxJsonByData(data)
        return HttpResponse(jsonInfo)
Exemplo n.º 8
0
def download(request):
    # content = request.POST.get("content")
    if not request.body:
        logger.info("数据不能为空")
        return HttpResponse(AjaxJson.getUnSuccessData("数据不能为空"))
    body = str(request.body, encoding="utf-8")
    data_json = AjaxJson.loadsJsonByStrData(body)
    down_load_path = file_path.down_load_file
    result = downloadUtil.geturllist(contents=data_json["content"],
                                     url_in=data_json["url"],
                                     suffix_path=down_load_path)
    jsonInfo = AjaxJson.getDumpsAjaxJsonByData(result)
    return HttpResponse(jsonInfo)
Exemplo n.º 9
0
def findLongest(request):
    if not request.body:
        logger.info("数据不能为空")
        return HttpResponse(AjaxJson.getUnSuccessData("数据不能为空"))
    body = str(request.body, encoding="utf-8")
    data_json = AjaxJson.loadsJsonByStrData(body)
    string1 = data_json["string1"]
    string2 = data_json["string2"]
    if not string1 or not string2:
        logger.info("string1或者string2不能为空")
        return HttpResponse(AjaxJson.getUnSuccessData("string1或者string2不能为空"))
    result = longest_repeating_strings.find_longest_repeating_strings(
        string1, string2)
    jsonInfo = AjaxJson.getDumpsAjaxJsonByData(result)
    return HttpResponse(jsonInfo)
Exemplo n.º 10
0
def getLatAndLng(address):
    address = quote(address)
    url = "https://apis.map.qq.com/ws/geocoder/v1/?key=" + QQ_MAP_KEY + "&address=" + address
    response = requests.get(url, timeout=3)
    data = {}
    text = response.text
    #装换成json格式数据
    response = AjaxJson.loadsJsonByStrData(text)
    #如果请求失败
    if response["status"] != 0:
        data['lat'] = None
        data['lng'] = None
        return data
    data['lat'] = response['result']['location']['lat']
    data['lng'] = response['result']['location']['lng']
    print(data)
    return data
Exemplo n.º 11
0
def company_infoget(url, cookie):
    global count
    global article_num
    print("已爬取数据:", count)

    headers = {
        'User-Agent': proxymiddlewares.get_random_user_agent(),
        'cookie': cookie,
        'referer': 'http://www.qichacha.com/',
        'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
        'accept-encoding': 'json, br',
        'accept': '*/*',
        'Connection': 'keep-alive',
        'x-requested-with': 'XMLHttpRequest',
        'Cache-Control': 'max-age=0',
        'Host': 'www.qichacha.com'
    }
    dataJson = {}
    try:
        count = count + 1
        url_company = url
        url_company.encode("utf-8")
        sys_time.sleep(get_random_time())
        headers['referer'] = url_company
        reponse = request.Request(
            url=url_company,
            headers=headers,
            origin_req_host=proxymiddlewares.get_that_random_ip())
        htmls = request.urlopen(reponse, timeout=30)
        soup = BeautifulSoup(htmls.read(), 'html.parser')
        unique = url.split('_')[1].replace('.html', '')

        gongsimingcheng = soup.select(".content .row  h1")
        if len(gongsimingcheng) == 0:
            print('企业名称:无')
            # write_str = '"enterpriseName"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterpriseName"] = "无"
        else:
            print('企业名称:', gongsimingcheng[0].get_text())
            # write_str = '"enterpriseName"' + ':"' + gongsimingcheng[0].get_text() + '",\n'
            # file.write(write_str)
            qiye_name = gongsimingcheng[0].get_text()
            dataJson["enterpriseName"] = qiye_name

        #判断是否有这个标题
        title_list = []
        cdes_list = soup.select('.content .cdes')
        for item in cdes_list:
            title_list.append(item.get_text())
        cvlu = soup.select(".row .cvlu")

        if title_list.index('电话:') < 0:
            dianhua = []
        else:
            index = title_list.index('电话:')
            dianhua = cvlu[index].select('span')
        if len(dianhua) == 0:
            print('企业电话:无')
            # write_str = '"enterprisePhone"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterprisePhone"] = "无"
        else:
            data1 = dianhua[0].get_text().strip()
            if data1 == None:
                data1 = '无'
            print('企业电话:', data1)
            # 判断电话来看是否登陆
            # if is_login(data1):
            #     count = count - 1
            #     article_num = article_num - 1
            #     login()
            #     raise Exception("登陆失效,重新登陆")
            # write_str = '"enterprisePhone"' + ':"' +data1 + '",\n'
            # file.write(write_str)
            dataJson["enterprisePhone"] = data1

        if title_list.index('官网:') < 0:
            guanwang = []
        else:
            index = title_list.index('官网:')
            guanwang = cvlu[index].select('a')
        if len(guanwang) == 0:
            print('企业官网:无')
            # write_str = '"enterpriseOfficialNetwork"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterpriseOfficialNetwork"] = "无"
        else:
            data1 = guanwang[0].get('href')
            if data1 == None:
                data1 = '无'
            print('企业官网:', data1)
            # write_str = '"enterpriseOfficialNetwork"' + ':"' + data1 + '",\n'
            # file.write(write_str)
            dataJson["enterpriseOfficialNetwork"] = data1

        if title_list.index('邮箱:') < 0:
            youxiang = []
        else:
            index = title_list.index('邮箱:')
            youxiang = cvlu[index].select('a')
        if len(youxiang) == 0:
            print('企业邮箱:无')
            # write_str = '"enterpriseEmail"' + ':"' + '无' + '",\n'
            # file.write(write_str)
            dataJson["enterpriseEmail"] = '无'
        else:
            data = youxiang[0].get('href')
            if data == None:
                data = '无'
            print('企业邮箱:', data)
            # write_str = '"enterpriseEmail"' + ':"' + data + '",\n'
            # file.write(write_str)

            dataJson["enterpriseEmail"] = data

        if title_list.index('地址:') < 0:
            dizhi = []
        else:
            index = title_list.index('地址:')
            dizhi = cvlu[index].select('a')
        if len(dizhi) == 0:
            print('企业地址:无')
            # write_str = '"enterpriseAddress"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterpriseAddress"] = '无'
        else:
            data1 = dizhi[0].get_text().strip()
            if data1 == None:
                data1 = '无'
            print('企业地址:', data1)
            # write_str = '"enterpriseAddress"' + ':"' + data1 + '",\n'
            # file.write(write_str)
            dataJson["enterpriseAddress"] = data1
            latLng = QQMapHelp.getLatAndLng(data1)
            dataJson['lat'] = latLng['lat']
            dataJson['lng'] = latLng['lng']

        jianjie = soup.select('#textShowMore')
        if len(jianjie) == 0:
            print('企业简介:无')
            # write_str = '"enterpriseIntroduce"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterpriseIntroduce"] = "无"
        else:
            data1 = jianjie[0].get_text()
            print('企业简介:', data1)
            # write_str = '"enterpriseIntroduce"' + ':"' + data1 + '",\n'
            # file.write(write_str)
            dataJson["enterpriseIntroduce"] = data1

        #百度简介
        url_baidubaike = '%s%s' % ('https://baike.baidu.com/item/',
                                   quote(qiye_name))
        req_baidubaike = request.Request(url_baidubaike)
        html_data = request.urlopen(req_baidubaike, timeout=30)
        html_baidubaike = BeautifulSoup(html_data.read(), 'html.parser')
        article_baibubaike = html_baidubaike.select(
            'div.lemma-summary > div.para')
        qiyearticle_baibubaike = []
        if len(article_baibubaike) == 0:
            print('企业百度百科简介:无')
            # write_str = '"enterpriseBaiDuIntroduce"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterpriseBaiDuIntroduce"] = "无"
        else:
            delimiter = ''
            for i in article_baibubaike:
                data1 = i.get_text().strip('\n\t\r').strip().split()
                qiyearticle_baibubaike.append(delimiter.join(data1))
            qiyearticle_baibubaike = delimiter.join(qiyearticle_baibubaike)
            print('企业百度百科简介:', qiyearticle_baibubaike)
            # write_str = '"enterpriseBaiDuIntroduce"' + ':"' + qiyearticle_baibubaike + '",\n'
            # file.write(write_str)
            dataJson["enterpriseBaiDuIntroduce"] = qiyearticle_baibubaike

        gsxx = soup.select('#Cominfo table')[1].select('td')
        if len(gsxx) == 0:
            print('企业工商信息:无')
            # write_str = '"enterpriseBusinessInformation"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterpriseBusinessInformation"] = "无"
        else:
            enterpriseBusinessInformation = {}

            print('注册资本:', gsxx[1].get_text().strip())
            # write_str = '"registeredCapital"' + ':"' + gsxx[1].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["registeredCapital"] = gsxx[
                1].get_text().strip()

            print('实缴资本:', gsxx[3].get_text().strip())
            # write_str = '"paidCapital"' + ':"' + gsxx[3].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["paidCapital"] = gsxx[3].get_text(
            ).strip()

            print('经营状态:', gsxx[5].get_text().strip())
            # write_str = '"operationState"' + ':"' + gsxx[5].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["operationState"] = gsxx[5].get_text(
            ).strip()

            print('成立日期:', gsxx[7].get_text().strip())
            # write_str = '"createTime"' + ':"' + gsxx[7].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["createTime"] = gsxx[7].get_text(
            ).strip()

            print('统一社会信用代码:', gsxx[9].get_text().strip())
            # write_str = '"unifiedSocialCreditCode"' + ':"' + gsxx[9].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["unifiedSocialCreditCode"] = gsxx[
                9].get_text().strip()

            print('纳税人识别号:', gsxx[11].get_text().strip())
            # write_str = '"taxpayerIdentificationNumber"' + ':"' + gsxx[11].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation[
                "taxpayerIdentificationNumber"] = gsxx[11].get_text().strip()

            print('注册号:', gsxx[13].get_text().strip())
            # write_str = '"registrationNumber"' + ':"' + gsxx[13].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["registrationNumber"] = gsxx[
                13].get_text().strip()

            print('组织机构代码:', gsxx[15].get_text().strip())
            # write_str = '"organizationCode"' + ':"' + gsxx[15].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["organizationCode"] = gsxx[
                15].get_text().strip()

            print('公司类型:', gsxx[17].get_text().strip())
            # write_str = '"companyType"' + ':"' + gsxx[17].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["companyType"] = gsxx[17].get_text(
            ).strip()

            print('所属行业:', gsxx[19].get_text().strip())
            # write_str = '"industry"' + ':"' + gsxx[19].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["industry"] = gsxx[19].get_text(
            ).strip()

            print('核准日期:', gsxx[21].get_text().strip())
            # write_str = '"dateOfApproval"' + ':"' + gsxx[21].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["dateOfApproval"] = gsxx[
                21].get_text().strip()

            print('登记机关:', gsxx[23].get_text().strip())
            # write_str = '"registrationAuthority"' + ':"' + gsxx[23].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["registrationAuthority"] = gsxx[
                23].get_text().strip()

            print('所属地区:', gsxx[25].get_text().strip())
            # write_str = '"affiliatedArea"' + ':"' + gsxx[25].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["affiliatedArea"] = gsxx[
                25].get_text().strip()

            print('英文名称:', gsxx[27].get_text().strip())
            # write_str = '"englishName"' + ':"' + gsxx[27].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["englishName"] = gsxx[27].get_text(
            ).strip()

            print('曾用名:', gsxx[29].get_text().strip())
            # write_str = '"nameUsedBefore"' + ':"' + gsxx[29].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["nameUsedBefore"] = gsxx[
                29].get_text().strip()

            print('参保人数:', gsxx[31].get_text().strip())
            # write_str = '"insuredNumber"' + ':"' + gsxx[31].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["insuredNumber"] = gsxx[31].get_text(
            ).strip()

            print('人员规模:', gsxx[33].get_text().strip())
            # write_str = '"personnelScale"' + ':"' + gsxx[33].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["personnelScale"] = gsxx[
                33].get_text().strip()

            print('营业期限:', gsxx[35].get_text().strip())
            # write_str = '"timeLimitForBusiness"' + ':"' + gsxx[35].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["timeLimitForBusiness"] = gsxx[
                35].get_text().strip()

            print('企业地址:', gsxx[37].get_text().strip().split('\n')[0])
            # write_str = '"enterpriseAddress"' + ':"' + gsxx[37].get_text().strip().split('\n')[0] + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["enterpriseAddress"] = gsxx[
                37].get_text().strip().split('\n')[0]

            print('经营范围:', gsxx[39].get_text().strip())
            # write_str = '"scopeOfOperation"' + ':"' + gsxx[39].get_text().strip() + '",\n'
            # file.write(write_str)
            enterpriseBusinessInformation["scopeOfOperation"] = gsxx[
                39].get_text().strip()

            dataJson[
                'enterpriseBusinessInformation'] = enterpriseBusinessInformation

        #企业高管信息
        ggxx = soup.select('#Mainmember table tr')
        gg = []
        if len(ggxx) == 0:
            print('企业高管信息:无')
            # write_str = '"enterpriseExecutivesInformation"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterpriseExecutivesInformation"] = "无"
        else:
            for i in range(len(ggxx)):
                if i == 0:
                    continue
                data = ggxx[i].select('td')
                name = data[2].get_text().strip()
                zhiwei = data[1].select('a')[0].get_text().strip()
                xx = {'position': name, 'executiveName': zhiwei}
                gg.append(xx)
            print('企业高管信息:', gg)
            # write_str = '"enterpriseExecutivesInformation"' + ':"' + AjaxJson.toJsonStrData(gg) + '",\n'
            # file.write(write_str)
            dataJson["enterpriseExecutivesInformation"] = gg

        #企业股东信息
        gdxx = soup.select('#Sockinfo table tr')
        gd = []
        if len(gdxx) == 0:
            print('企业股东信息:无')
            # write_str = '"enterpriseShareholderInformation"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterpriseShareholderInformation"] = "无"
        else:
            for i in range(len(gdxx)):
                if i == 0:
                    continue
                data = gdxx[i].select('td')
                name = data[1].select('a')[0].get_text().strip().split('\n')[0]
                bili = data[2].get_text().strip()
                jine = data[3].get_text().strip()
                time = data[4].get_text().strip()
                # xx = {'股东': name, '出资比例': bili, '认缴出资': jine, '出资时间': time}
                xx = {
                    'shareholder': name,
                    'proportionOfCapital ': bili,
                    'subscribeContribution': jine,
                    'investmentTime': time
                }
                gd.append(xx)
            print('企业股东信息:', gd)
            # write_str = '"enterpriseShareholderInformation"' + ':"' + AjaxJson.toJsonStrData(gd).replace('"',"'") + '",\n'
            # file.write(write_str)
            dataJson["enterpriseShareholderInformation"] = gd

        tzxx = soup.select('#touzilist > table tr')
        tz = []
        if len(tzxx) == 0:
            print('企业投资信息:无')
            # write_str = '"enterpriseInvestmentInformation"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterpriseInvestmentInformation"] = '无'
        else:
            for i in range(len(tzxx)):
                if i == 0:
                    continue
                data = tzxx[i].select('td')
                gsname = data[0].get_text().strip()
                frname = data[1].get_text().strip().split('\n')[0]
                ziben = data[2].get_text().strip()
                zhanbi = data[3].get_text().strip()
                time = data[4].get_text().strip()
                zhuangtai = data[5].get_text().strip()
                # xx = {'被投资公司名称': gsname, '被投资法定代表人': frname, '注册资本': ziben, '投资占比': zhanbi, '注册时间': time,
                #       '状态': zhuangtai}
                xx = {
                    'nameOfInvestedCompany': gsname,
                    'legalRepresentativeInvested': frname,
                    'registeredCapital': ziben,
                    'investmentRatio': zhanbi,
                    'registrationTime': time,
                    'status': zhuangtai
                }
                tz.append(xx)
            print('企业投资信息:', tz)
            # write_str = '"enterpriseInvestmentInformation"' + ':"' + AjaxJson.toJsonStrData(tz).replace('"',"'") + '",\n'
            # file.write(write_str)
            dataJson["enterpriseInvestmentInformation"] = tz

        #诉讼信息
        nav_heads = soup.select('.company-nav-tab .company-nav-head')
        head_names = []
        for nav_head in nav_heads:
            head_names.append(nav_head.select("h2")[0].get_text())
        ss_index = head_names.index("法律诉讼")
        if ss_index > 0:
            susongSoup = nav_heads[ss_index].select("span")
        else:
            susongSoup = []
        if len(susongSoup) > 0:
            #防止有999+的情况
            sifaNum = re.sub("\D", "", susongSoup[0].get_text())
            # sifaNum = soup.select('#susong_title span')[0].get_text()
        else:
            sifaNum = '0'
        if int(sifaNum) > 0:
            sys_time.sleep(get_random_time())
            tab = 'susong'
            #https://www.qichacha.com/company_getinfos?unique=9cce0780ab7644008b73bc2120479d31&companyname=%E5%B0%8F%E7%B1%B3%E7%A7%91%E6%8A%80%E6%9C%89%E9%99%90%E8%B4%A3%E4%BB%BB%E5%85%AC%E5%8F%B8&tab=susong
            url_sfx = 'https://www.qichacha.com/company_getinfos?unique=%s&companyname=%s&tab=susong' % (
                unique, qiye_name)
            url_sfx = quote(url_sfx, safe=";/?:@&=+$,", encoding="utf-8")
            headers['referer'] = url_sfx
            reponse = request.Request(
                url=url_sfx,
                headers=headers,
                origin_req_host=proxymiddlewares.get_that_random_ip())
            htmls = request.urlopen(reponse, timeout=30).read()
            htmls = str(htmls, encoding="utf-8")
            soup = BeautifulSoup(htmls, 'html.parser')
            #企业司法信息   无 #wenshulist > table
            sffx = soup.select('#wenshulist > table > tr')
        else:
            sffx = []
        sf = []
        if len(sffx) == 0:
            print('企业司法信息:无')
            # write_str = '"enterpriseJudicialInformation' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterpriseJudicialInformation"] = '无'
        else:
            for i in range(len(sffx)):
                if i == 0:
                    continue
                data = sffx[i].select('td')
                time = data[2].get_text().strip()
                caseName = data[1].select('a')[0].get_text().strip()
                caseNum = data[3].get_text().strip()
                caseIndentity = data[4].get_text().strip()
                executePlace = data[5].get_text().strip()
                xx = {
                    'createDate': time,
                    'caseName': caseName,
                    'caseNumber': caseNum,
                    'caseIdentity': caseIndentity,
                    'courtOfExecution': executePlace
                }
                sf.append(xx)
            print('企业司法信息:', sf)
            # write_str = '"enterpriseJudicialInformation"' + ':"' + AjaxJson.toJsonStrData(sf).replace('"',"'") + '",\n'
            # file.write(write_str)
            dataJson["enterpriseJudicialInformation"] = sf

        # 产品信息
        ss_index = head_names.index("经营状况")
        if ss_index > 0:
            produceSoup = nav_heads[ss_index].select("span")
        else:
            produceSoup = []
        if len(produceSoup) > 0:
            jinpinNum = re.sub("\D", "", produceSoup[0].get_text())
            # jinpinNum = produceSoup[0].get_text()
        else:
            jinpinNum = '0'
        if int(jinpinNum) > 0:
            sys_time.sleep(get_random_time())
            tab = 'run'
            url_sfx = 'https://www.qichacha.com/company_getinfos?unique=%s&companyname=%s&tab=run' % (
                unique, qiye_name)
            url_sfx = quote(url_sfx, safe=";/?:@&=+$,", encoding="utf-8")
            headers['referer'] = url_sfx
            reponse = request.Request(
                url=url_sfx,
                headers=headers,
                origin_req_host=proxymiddlewares.get_that_random_ip())
            htmls = request.urlopen(reponse, timeout=30).read()
            htmls = str(htmls, encoding="utf-8")
            soup = BeautifulSoup(htmls, 'html.parser')
            #企业竞品信息
            jpxx = soup.select('#productlist  table  tr')
        else:
            jpxx = []
        jp = []
        if len(jpxx) == 0:
            print('企业竞品信息:无')
            # write_str = '"enterprisesCompetitiveInformation"' + ':' + '"无",' + '\n'
            # file.write(write_str)
            dataJson["enterprisesCompetitiveInformation"] = '无'
        else:
            for i in range(len(jpxx)):
                if i == 0:
                    continue
                data = jpxx[i].select('td')
                name = data[2].get_text().strip()
                area = data[5].get_text().strip()
                ronzi = data[3].get_text().strip()
                time = data[4].get_text().strip()
                introduce = data[6].get_text().strip()
                xx = {
                    'product': name,
                    'area': area,
                    'financingInformation': ronzi,
                    'createTime': time,
                    'productIntroduce': introduce
                }
                jp.append(xx)
            print('企业竞品信息:', jp)
            dataJson["enterprisesCompetitiveInformation"] = jp

    except IndexError as e:
        print("无法提取信息的企业有:", url)
        file_error = open(error_url_file, 'a', encoding='utf-8')
        file_error.write(url + '\n')
        file_error.close()
        print('找不到企业信息,请查看关键字是否有误!' + e)
    except Exception as e:
        print("无法提取信息的企业有:", url)
        file_error = open(error_url_file, 'a', encoding='utf-8')
        file_error.write(url + '\n')
        file_error.close()
        print('标签信息有误!')
    #写入文件,也可以返回数据
    jsonStr = AjaxJson.toJsonStrData(dataJson)
    return jsonStr