def parse(self,response): # print(response.text) item = QiyemuluItem() try: dataJson= json.loads(response.text) alldata = dataJson.get("data","") if alldata: '''股东信息''' result = alldata.get("result", "") if result: infoList = [] for info in result: tags = [] infoDict = {} infoDict['id'] = info.get("id", '') infoDict['name'] = info.get("name", '') if info.get("tagList", ''): for tag in info.get("tagList", ''): tags.append(tag.get("name", "0")) infoDict['tags'] = ','.join(tags) if info.get("capital", ''): for cap in info.get("capital", ''): infoDict['amomon'] = cap.get("amomon", "") infoDict['percent'] = cap.get("percent", "") infoList.append(infoDict) item['companyId'] = response.meta['id'] item["holderInfo"] = str(infoList) yield item except Exception as e: print(e.args)
def content_parse(self, response): item = QiyemuluItem() infoDict = response.meta['infoDict'] content = response.css("div.container ::text").extract() infoDict['container'] = ''.join(content).replace(" ", "") item['companyId'] = response.meta['id'] item['allInfo'] = str(infoDict) yield item
def parse(self,response): # print(response.text) item = QiyemuluItem() try: dataJson= json.loads(response.text) alldata = dataJson.get("data","") if alldata: result = alldata.get("resultList", "") if result: infoList = [] for info in result: infoDict = {} plaintiffs = info.get("plaintiff","") defendants = info.get("defendant","") if plaintiffs: plaintiff_list = [] for pla in plaintiffs: plaintiff_list.append(pla.get('name','')) infoDict['plaintiff'] = ','.join(plaintiff_list) if defendants: defendant_list = [] for defend in defendants: defendant_list.append(defend.get('name','')) infoDict['defendant'] = ','.join(defendant_list) infoDict['caseReason'] = info.get("caseReason", '') infoDict['caseNo'] = info.get("caseNo", '') infoDict['startDate'] = info.get("startDate", '') infoDict['litigant'] = info.get("litigant", '') infoDict['contractors'] = info.get("contractors", '') infoDict['judge'] = info.get("judge", '') infoDict['court'] = info.get("court", '') infoDict['courtroom'] = info.get("courtroom", '') infoList.append(infoDict) item['companyId'] = response.meta['id'] item["allInfo"] = str(infoList) yield item except Exception as e: print(e.args)
def parse(self, response): # print(response.text) item = QiyemuluItem() try: dataJson = json.loads(response.text) alldata = dataJson.get("data", "") if alldata: '''高管信息''' result = alldata.get("result", "") if result: infoList = [] for info in result: infoDict = {} infoDict['id'] = info.get("id", '') infoDict['name'] = info.get("name", '') infoDict['typeJoin'] = ','.join( info.get("typeJoin", "")) infoList.append(infoDict) item['companyId'] = response.meta['id'] item["allInfo"] = str(infoList) yield item except Exception as e: print(e.args)
def parse(self,response): # print(response.text) item = QiyemuluItem() try: dataJson= json.loads(response.text) alldata = dataJson.get("data","") if alldata: result = alldata.get("result", "") if result: infoList = [] for info in result: infoDict = {} infoDict['changeItem'] = info.get("changeItem", '') infoDict['changeTime'] = info.get("changeTime", '') infoDict['createTime'] = info.get("createTime", '') infoDict['contentBefore'] = remove_tags(info.get("contentBefore", '')) infoDict['contentAfter'] = remove_tags(info.get("contentAfter", '')) infoList.append(infoDict) item['companyId'] = response.meta['id'] item["allInfo"] = str(infoList) yield item # print(infoList) except Exception as e: print(e.args)
def parse(self, response): item = QiyemuluItem() if response.status == 200: json_data = json.loads(response.text) state = json_data.get("state", '') try: if state == "ok": data = json_data.get('data', '') if data: for info in data[:-1]: item['cname'] = response.meta['name'] item['companyId'] = info.get('id', "") item['companyName'] = remove_tags( info.get('name', "")) item['alias'] = info.get('alias', "") item['legalPersonName'] = info.get( 'legalPersonName', "") item['phoneList'] = info.get('phone', "") item['emailList'] = info.get('emails', "") item['websiteList'] = info.get('websites', "") item['estiblishTime'] = info.get( 'estiblishTime', "") item['regCapital'] = info.get('regCapital', "") item['creditCode'] = info.get('creditCode', "") item['taxNumber'] = info.get('creditCode', "") item['orgNumber'] = info.get('creditCode', "")[9:18] item['regStatus'] = info.get('regStatus', "") item['companyOrgType'] = info.get( 'companyOrgType', "") item['industry'] = info.get('categoryStr', "") item['regLocation'] = info.get('regLocation', "") item['logo'] = info.get('logo', "") item['businessScope'] = info.get( 'businessScope', "") item['base'] = info.get('base', "") item['district'] = info.get('district', "") item['historyNames'] = info.get('historyNames', "") #后期补充数据 item['intro'] = info.get('intro', "") item['regNumber'] = info.get('regNumber', "") item['actualCapital'] = info.get( 'actualCapital', "") item['taxQualification'] = info.get( 'taxQualification', "") item['englishName'] = info.get('property3', "") item['staffNumRange'] = info.get( 'staffNumRange', "") item['socialStaffNum'] = info.get( 'socialStaffNum', "") item['operatingPeriod'] = info.get( 'operatingPeriod', "") item['approvedTime'] = info.get('approvedTime', "") item['regInstitute'] = info.get('regInstitute', "") item['updatetime'] = info.get('updatetime', "") yield item # break else: # self.redis_pool.srem("names:key",response.mata['name']) print(response.mata['name'], "没有数据") except Exception as e: print(e.args)
def parse(self,response): # print(response.text) try: dataJson= json.loads(response.text) alldata = dataJson.get("data","") if alldata: item = QiyemuluItem() #公司Id item['companyId'] = alldata.get("id","")#response.meta['id'] #公司名称 item['companyName'] = alldata.get("name","")#response.meta['name'] #别名 item['alias'] = alldata.get("alias","") #曾用名 item['historyNames'] = alldata.get("historyNames","") #法定代表人 item['legalPersonName'] = alldata.get("legalPersonName","") #联系方式列表 item['phoneList'] = "" #邮箱列表 item['emailList'] = "" #网址 item['websiteList'] = str(alldata.get("websiteList","")) #成立日期 item['estiblishTime'] = alldata.get("estiblishTime","") #工商注册号 item['regNumber'] = alldata.get("regNumber","") #注册资本 item['regCapital'] = alldata.get("regCapital","") #实缴资本 item['actualCapital'] = alldata.get("actualCapital","") #统一信用代码 item['creditCode'] = alldata.get("creditCode","") #纳税人识别号 item['taxNumber'] = alldata.get("taxNumber","") #组织机构代码 item['orgNumber'] = alldata.get("orgNumber","") #纳税人资质 item['taxQualification'] = alldata.get("taxQualification","") #英文名称 item['englishName'] = alldata.get("property3","") #经营状态 item['regStatus'] = alldata.get("regStatus","") #人员规模 item['staffNumRange'] = alldata.get("staffNumRange","") #参保人数 item['socialStaffNum'] = alldata.get("socialStaffNum","") #公司类型 item['companyOrgType'] = alldata.get("companyOrgType","") #行业 item['industry'] = alldata.get("industry","") #营业期限 fromTime = alldata.get("fromTime","") toTime = alldata.get("toTime","") item['operatingPeriod'] = "%s/%s"%(fromTime,toTime) #注册地址 item['regLocation'] = alldata.get("regLocation","") #核准日期 item['approvedTime'] = alldata.get("approvedTime","") #登记机关 item['regInstitute'] = alldata.get("regInstitute","") #公司logo地址 item['logo'] = alldata.get("logo","") #股权结构图 # item['equityUrl'] = baseInfo.get("equityUrl","") #最新更新时间 item['updatetime'] = alldata.get("updatetime","") #经营范围 item['businessScope'] = alldata.get("businessScope","") #简介 item['intro'] = alldata.get("baseInfo","") #所属地区简称 item['base'] = alldata.get("base","") #所述区域简称 item['district'] = "" yield item # print(item) except Exception as e: print(e.args)
def parse(self,response): print(response.text) item = QiyemuluItem()