def basic_project(self, response): dd = Selector(response=response).xpath('//div[@id="project_baseinfo"]/dl/dd') name = Selector(response=response).xpath('//span[@class="user-name"]/text()').extract_first() code = dd[0].xpath('text()').extract_first() unit = dd[2].xpath('text()').extract_first() unitLicenseNum = dd[3].xpath('text()').extract_first() catalog = dd[5].xpath('text()').extract_first() area = dd[6].xpath('text()').extract_first() purpose = dd[8].xpath('text()').extract_first() trait = dd[10].xpath('text()').extract_first() acreage = dd[13].xpath('text()').extract_first() docuCode = Selector(response=response).xpath( '//div[@id="project_approvalinfo"]/dl/dd[1]/text()').extract_first() level = Selector(response=response).xpath('//div[@id="project_approvalinfo"]/dl/dd[2]/text()').extract_first() money = Selector(response=response).xpath('//div[@id="project_moneyincome"]/dl/dd[1]/text()').extract_first() money = re.findall('(.*) (万元)', money)[0] acreage = re.findall('(.*)(平方米/米)', acreage)[0] basic = templates.Project(companyName=response.meta['company_name'], code=code, name=name, provinceCode=code, unit=unit, unitLicenseNum=unitLicenseNum, catalog=catalog, area=area, purpose=purpose, trait=trait, docuCode=docuCode, level=level, acreage=acreage, money=money ) basic_data = basic.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', headers={'Content-Type': 'application/json'}, callback=self.project_zz, body=json.dumps(basic_data), method='POST', meta={'type': '基本信息', 'company_name': basic_data['companyName']} )
def basic(self, response): data_line = response.text data_dict = re.split('jQuery\d+_\d+\(', data_line)[1] data_dict = data_dict.replace(')', '') json_data = json.loads(data_dict) data = json_data['data']['ProjectInfoList'][0] basic = templates.Project(companyName=response.meta['companyName'], name=data['ProjectName'], code=data['ProjectNum'], provinceCode=data['ProjectNum'], area=data['AreaName'], unit=data['BuildCorpName'], unitLicenseNum=data['BuildCorpCode'], catalog=data['ProjectType'], acreage=data['AreaCode'], level=data['PrjApprovalLevelNum'], money=data['AllInvest'], trait=data['PrjPropertyNum'], docuCode=data['PrjApprovalNum'], purpose=data['ProjectType'] ) basic_data = basic.data() if basic_data['ProjectName'] or basic_data['ProjectNum']: print(basic_data, '基本信息') yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(basic_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '基本信息', 'company_name': basic_data['companyName']} )
def project(self, response): basic_info = templates.Projects('Project') attrs = [ {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[1]/td[2]/text()', 'name': 'name'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[2]/td[2]/text()', 'name': 'code'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[2]/td[2]/text()', 'name': 'provinceCode'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[3]/td[2]/text()', 'name': 'unit'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[3]/td[4]/text()', 'name': 'catalog'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[4]/td[2]/text()', 'name': 'unitLicenseNum'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[4]/td[4]/text()', 'name': 'area'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[6]/td[2]/text()', 'name': 'docuCode'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[6]/td[4]/text()', 'name': 'level'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[7]/td[2]/text()', 'name': 'money'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[7]/td[4]/text()', 'name': 'acreage'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[8]/td[2]/text()', 'name': 'trait'}, {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[8]/td[4]/text()', 'name': 'purpose'}, ] basic_data = basic_info.html_analysis(response, attrs) basic_data['companyName'] = response.meta['companyName'] basic = templates.Project(**basic_data) b_data = basic.data() print(b_data, '基本信息', b_data['companyName']) yield scrapy.Request( # url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(b_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '基本信息', 'company_name': b_data['companyName']} ) bid_url = Selector(response=response).xpath('//div[@class="classContent t1"]/table/tr') bid_url = bid_url[1:] for b in bid_url: a = b.xpath('./td[7]/a/@href').extract_first() a = 'http://115.29.2.37:8080/' + a yield scrapy.Request(url=a, callback=self.bid_info, headers=heads, meta={'companyName': response.meta['companyName']} ) drawing_info = Selector(response=response).xpath('//div[@class="classContent t2"]/table/tr') drawing_info = drawing_info[1:] print(len(drawing_info), '施工图纸审查----bbbbbbbbbbbbbbbbbbbbbbbbbbbbb', response.url) for d in drawing_info: censorNum = d.xpath('./td[2]/text()').extract_first() surveyCorpName = d.xpath('./td[3]/text()').extract_first() designCorpName = d.xpath('./td[4]/text()').extract_first() censorCorpName = d.xpath('./td[5]/text()').extract_first() censorEDate = d.xpath('./td[6]/text()').extract_first() drawing_data = templates.MakeDrawing(censorNum=censorNum, surveyCorpName=surveyCorpName, designCorpName=designCorpName, censorCorpName=censorCorpName, censorEDate=censorEDate ) drawing_data = drawing_data.data() print(drawing_data, '施工图纸审查') yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm', body=json.dumps(drawing_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '施工图纸审查'} ) contract_list = Selector(response=response).xpath('//div[@class="classContent t3"]/table/tr') contract_list = contract_list[1:] for c in contract_list: print(c.xpath('./td[6]/a/@href').extract_first(), '合同备案url') u = 'http://115.29.2.37:8080/' + c.xpath('./td[6]/a/@href').extract_first() yield scrapy.Request(url=u, callback=self.contract_info, meta={'companyName': response.meta['companyName']}) construction_list = Selector(response=response).xpath('//div[@class="classContent t4"]/table/tr/td/a/@href') for c in construction_list: u = 'http://115.29.2.37:8080/' + c.extract() yield scrapy.Request(url=u, callback=self.construction_info, meta={'companyName': response.meta['companyName']}) finish_list = Selector(response=response).xpath('//div[@class="classContent t5"]/table/tr/td/a/@href') for f in finish_list: u = 'http://115.29.2.37:8080/' + f.extract() yield scrapy.Request(url=u, callback=self.finish_info, meta={'companyName': response.meta['companyName']})
def project(self, response): basic = templates.Projects('Project') attrs = [ {'that': '', 'attr': '//td[@id="lblPrjName"]/text()', 'name': 'name'}, {'that': '', 'attr': '//td[@id="lblPrjNum"]/text()', 'name': 'code'}, {'that': '', 'attr': '//td[@id="lblPrjNum"]/text()', 'name': 'provinceCode'}, {'that': '', 'attr': '//td[@id="lblPrjTypeNum"]/text()', 'name': 'catalog'}, {'that': '', 'attr': '//td[@id="lblBuildCorpName"]/text()', 'name': 'unit'}, {'that': '', 'attr': '//td[@id="lblBuildCorpCode"]/text()', 'name': 'unitLicenseNum'}, {'that': '', 'attr': '//td[@id="lblCountyNum"]/text()', 'name': 'area'}, {'that': '', 'attr': '//td[@id="lblPrjApprovalNum"]/text()', 'name': 'docuCode'}, {'that': '', 'attr': '//td[@id="lblPrjApprovalLevelNum"]/text()', 'name': 'level'}, {'that': '', 'attr': '//td[@id="lblAllInvest"]/text()', 'name': 'money'}, {'that': '', 'attr': '//td[@id="lblAllArea"]/text()', 'name': 'acreage'}, {'that': '', 'attr': '//td[@id="lblPrjPropertyNum"]/text()', 'name': 'trait'}, {'that': '', 'attr': '//td[@id="lblPrjFunctionNum"]/text()', 'name': 'purpose'}, ] basic_text = basic.html_analysis(response=response, attrs=attrs) basic_text['companyName'] = response.meta['companyName'] if basic_text['level'] == '暂无': basic_text['level'] = '' basic_data = templates.Project(**basic_text) basic_data = basic_data.data() print('基本信息', '*******************************', basic_data) yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(basic_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '基本信息', 'company_name': basic_data['companyName']} ) # bid_list = Selector(response=response).xpath('//div[@id="project_step1"]/table/tbody/tr') for b in bid_list: tenderClass = b.xpath('./td[2]/text()').extract_first() tenderType = b.xpath('./td[3]/text()').extract_first() tenderCorpName = b.xpath('./td[4]/a/text()').extract_first() tenderResultDate = b.xpath('./td[5]/text()').extract_first() tenderMoney = b.xpath('./td[6]/text()').extract_first() tenderNum = b.xpath('./td[7]/a/text()').extract_first() bid_data = templates.Mark(tenderClass=tenderClass, tenderType=tenderType, tenderCorpName=tenderCorpName, tenderResultDate=tenderResultDate, tenderMoney=tenderMoney, tenderNum=tenderNum, provinceTenderNum=tenderNum, code=basic_data['code'], companyName=response.meta['companyName'] ) bid_data = bid_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm', body=json.dumps(bid_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '招标信息',} ) # print('招标信息', '*******************************', bid_data) drawing_list = Selector(response=response).xpath('//div[@id="project_step2"]/table/tbody/tr') for d in drawing_list: censorNum = d.xpath('./td[2]/text()').extract_first() provinceCensorNum = d.xpath('./td[3]/text()').extract_first() surveyCorpName = d.xpath('./td[4]/a/text()').extract_first() designCorpName = d.xpath('./td[5]/a/text()').extract_first() censorCorpName = d.xpath('./td[6]/a/text()').extract_first() censorEDate = d.xpath('./td[7]/a/text()').extract_first() drawing_data = templates.MakeDrawing(censorNum=censorNum, provinceCensorNum=provinceCensorNum, surveyCorpName=surveyCorpName, designCorpName=designCorpName, censorCorpName=censorCorpName, censorEDate=censorEDate, code=basic_data['code'], companyName=response.meta['companyName'] ) drawing_data = drawing_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm', body=json.dumps(drawing_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '施工图纸审查', } ) print('施工图纸审查', '*******************************', drawing_data) contract_list = Selector(response=response).xpath('//div[@id="project_step3"]/table/tbody/tr') for d in contract_list: contractType = d.xpath('./td[2]/text()').extract_first() recordNum = d.xpath('./td[3]/text()').extract_first() provinceRecordNum = d.xpath('./td[4]/text()').extract_first() contractMoney = d.xpath('./td[5]/text()').extract_first() contractDate = d.xpath('./td[6]/text()').extract_first() contract_data = templates.Contract(contractType=contractType, recordNum=recordNum, provinceRecordNum=provinceRecordNum, contractMoney=contractMoney, contractDate=contractDate, code=basic_data['code'], companyName=response.meta['companyName'] ) contract_data = contract_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract.htm', body=json.dumps(contract_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '合同备案', } ) print('合同备案', '*******************************', contract_data) print(response.url, 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX') construction_list = Selector(response=response).xpath('//div[@id="project_step4"]/table/tbody/tr') for d in construction_list: builderLicenceNum = d.xpath('./td[2]/text()').extract_first() consCorpName = d.xpath('./td[3]/a/text()').extract_first() contractMoney = d.xpath('./td[4]/text()').extract_first() area = d.xpath('./td[5]/text()').extract_first() createDate = d.xpath('./td[6]/text()').extract_first() construction_data = templates.ConstructionPermit(builderLicenceNum=builderLicenceNum, provinceBuilderLicenceNum=builderLicenceNum, consCorpName=consCorpName, contractMoney=contractMoney, area=area, code=basic_data['code'], createDate=createDate, companyName=response.meta['companyName'] ) construction_data = construction_data.data() yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm', body=json.dumps(construction_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '施工许可', } ) print('施工许可', '*******************************', construction_data) completion_list = Selector(response=response).xpath('//div[@id="project_step4"]/table/tbody/tr') for c in completion_list: prjFinishNum = c.xpath('./td[2]/text()').extract_first() provincePrjFinishNum = c.xpath('./td[3]/a/text()').extract_first() factCost = c.xpath('./td[4]/text()').extract_first() factArea = c.xpath('./td[5]/text()').extract_first() factBeginDate = c.xpath('./td[6]/text()').extract_first() factEndDate = c.xpath('./td[6]/text()').extract_first() completion_data = templates.Completion(prjFinishNum=prjFinishNum, provincePrjFinishNum=provincePrjFinishNum, factCost=factCost, factArea=factArea, factBeginDate=factBeginDate, code=basic_data['code'], factEndDate=factEndDate, companyName=response.meta['companyName'] ) completion_data = completion_data.data() yield scrapy.Request( url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectFinish.htm', body=json.dumps(completion_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={'type': '竣工验收', } ) print('竣工验收', '*******************************', completion_data)
def info(self, response): basic_data = Selector( response=response).xpath('//div[@class="news_con"]/table/tr') basic_data = basic_data[1:] name = basic_data[0].xpath('./td[2]/span/text()').extract_first() code = basic_data[0].xpath('./td[4]/span/text()').extract_first() unit = basic_data[1].xpath('./td[2]/span/text()').extract_first() area = basic_data[1].xpath('./td[4]/span/text()').extract_first() docuCode = basic_data[3].xpath('./td[2]/span/text()').extract_first() level = basic_data[3].xpath('./td[4]/span/text()').extract_first() money = basic_data[4].xpath('./td[2]/span/text()').extract_first() acreage = basic_data[4].xpath('./td[4]/span/text()').extract_first() trait = basic_data[5].xpath('./td[2]/span/text()').extract_first() purpose = basic_data[5].xpath('./td[4]/span/text()').extract_first() basic_zz = templates.Project(companyName=response.meta['company_name'], name=name, code=code, unit=unit, area=area, provinceCode=code, docuCode=docuCode, level=level, money=money, acreage=acreage, trait=trait, purpose=purpose) b_data = basic_zz.data() print(b_data, '基本信息-------------------') yield scrapy.Request( # url='http://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', url= 'https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', headers={'Content-Type': 'application/json'}, callback=self.project_zz, body=json.dumps(b_data), method='POST', meta={ 'type': '基本信息', 'company_name': response.meta['company_name'] }) # wining_bib_list = Selector(response=response).xpath( '//table[@id="GridView1"]')[0].xpath('./tr') wining_bib_list = wining_bib_list[1:] for w in wining_bib_list: bid_url = w.xpath('./td[3]/a/@href').extract_first() bid_url = 'http://hngcjs.hnjs.gov.cn' + bid_url yield scrapy.Request(url=bid_url, callback=self.win_bid, meta={ 'company_name': response.meta['company_name'], 'code': code }) examination = Selector(response=response).xpath( '//table[@id="GridView1"]')[1].xpath('./tr') examination = examination[1:] for e in examination: censorNum = e.xpath('./td[2]/text()').extract_first() censorCorpName = e.xpath('./td[3]/text()').extract_first() data = templates.MakeDrawing( companyName=response.meta['company_name'], code=code, censorNum=censorNum, provinceCensorNum=censorNum, censorCorpName=censorCorpName, # censorEDate=censorEDate ) e_data = data.data() censorEDate = e.xpath('./td[4]/text()').extract_first() time_tuple = (time.strptime(censorEDate, "%Y/%m/%d %H:%M:%S")) time1 = time.strftime("%Y-%m-%d", time_tuple) e_data['censorEDate'] = time1 yield scrapy.Request( url= 'http://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm', headers={'Content-Type': 'application/json'}, callback=self.project_zz, body=json.dumps(e_data), method='POST', meta={'type': '图纸审查'}) print(data.data(), 'ttttttttttttttttttttttttttttttt', '图纸审查') # contract = Selector(response=response).xpath( '//table[@id="GridView1"]')[2].xpath('./tr') contract = contract[1:] for c in contract: contract_u = c.xpath('./td[2]/a/@href').extract_first() recordNum = c.xpath('./td[2]/a/text()').extract_first() bid_url = 'http://hngcjs.hnjs.gov.cn' + contract_u yield scrapy.Request(url=bid_url, callback=self.contract_info, meta={ 'company_name': response.meta['company_name'], 'recordNum': recordNum, 'code': code }) permit = Selector(response=response).xpath( '//table[@id="GridView1"]')[3].xpath('./tr') permit = permit[1:] for p in permit: builderLicenceNum = p.xpath('./td[2]/text()').extract_first() consCorpName = p.xpath('./td[3]/text()').extract_first() # censorEDate = p.xpath('./td[4]').extract_first() p_data = templates.ConstructionPermit( companyName=response.meta['company_name'], code=code, builderLicenceNum=builderLicenceNum, provinceBuilderLicenceNum=builderLicenceNum, consCorpName=consCorpName, ) per_data = p_data.data() yield scrapy.Request( url= 'http://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm', headers={'Content-Type': 'application/json'}, callback=self.project_zz, body=json.dumps(per_data), method='POST', meta={'type': '施工许可'}) print(p_data.data(), 'ttttttttttttttttttttttttttttttt', '施工许可') completed = Selector( response=response).xpath('//table[@id="GV_Cert"]/tr') completed = completed[1:] for c in completed: completed_u = c.xpath('./td[2]/a/@href').extract_first() bid_url = 'http://hngcjs.hnjs.gov.cn' + completed_u yield scrapy.Request(url=bid_url, callback=self.completed_info, meta={ 'company_name': response.meta['company_name'], 'code': code })
def company_project(self, response): basic = Selector(response=response).xpath('//table[@class="detailTable"]')[0] basic_tr = basic.xpath('./tr') project_name = basic_tr[1].xpath('./td')[3].xpath('text()').extract_first() project_number = basic_tr[1].xpath('./td')[1].xpath('text()').extract_first() area = basic_tr[3].xpath('./td')[3].xpath('text()').extract_first() if area is not None: area_data = '' for a in area.split(): area_data += a else: area_data = '' unit = basic_tr[2].xpath('./td')[3].xpath('text()').extract_first() unitLicenseNum = basic_tr[3].xpath('./td')[1].xpath('text()').extract_first() catalog = basic_tr[2].xpath('./td')[1].xpath('text()').extract_first() traits = basic_tr[7].xpath('./td')[3].xpath('text()').extract_first() purpose = basic_tr[8].xpath('./td')[1].xpath('text()').extract_first() money = basic_tr[6].xpath('./td')[1].xpath('text()').extract_first() acreage = basic_tr[6].xpath('./td')[3].xpath('text()').extract_first() level = basic_tr[4].xpath('./td')[3].xpath('text()').extract_first() docuCode = basic_tr[4].xpath('./td')[1].xpath('text()').extract_first() ccc = templates.Project(name=project_name, companyName=response.meta['company_name'], area=area_data, provinceCode=project_number, unit=unit, unitLicenseNum=unitLicenseNum, catalog=catalog, trait=traits, purpose=purpose, money=money, acreage=acreage, level=level, docuCode=docuCode, code=project_number ) basic_data = ccc.data() print('基本信息', basic_data) yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', headers={'Content-Type': 'application/json'}, method='POST', body=json.dumps(basic_data), callback=self.project_zz, meta={'type': '基本信息', 'company_name': basic_data['companyName']} ) # print(data) mark = Selector(response=response).xpath('//table[@class="detailTable"]')[1] mark_tr = mark.xpath('./tr') del mark_tr[0] mark_list = [k for index, k in enumerate(mark_tr) if (index % 2 != 0)] print(len(mark_list), 'mmmmmmmmmmmmmmmmmmmm') for m in mark_list: td = m.xpath('./td') if len(td) == 1: pass # print(len(td), '没有招标信息的', project_name) else: project_code = td[0].xpath('./a/text()').extract_first() build_size = td[1].xpath('text()').extract_first() mark_name = td[2].xpath('text()').extract_first() have_project = td[3].xpath('text()').extract_first() mark_data = templates.Mark(companyName=response.meta['company_name'], tenderNum=project_code, prjSize=build_size, provinceTenderNum=project_code, agencyCorpName=mark_name, tenderCorpName=have_project, code=project_number) make_zz_data = mark_data.data() print(project_code, build_size, mark_name, have_project, '招标信息') yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm', method='POST', headers={'Content-Type': 'application/json'}, body=json.dumps(make_zz_data), callback=self.project_zz, meta={'type': '招标信息'} ) contract = Selector(response=response).xpath('//table[@class="detailTable"]')[2] contract = contract.xpath('./tr') del contract[0] contract_list = [k for index, k in enumerate(contract) if (index % 2 != 0)] for m in contract_list: td = m.xpath('./td') if len(td) == 1: print(len(td), '没有合同信息的', project_name) else: contract_mark_number = td[0].xpath('text()').extract_first() contract_number = td[1].xpath('text()').extract_first() send_company = td[2].xpath('text()').extract_first() make_company = td[3].xpath('text()').extract_first() union_company = td[4].xpath('text()').extract_first() contract_object = templates.Contract(companyName=response.meta['company_name'], code=project_number, recordNum=contract_mark_number, contractNum=contract_number, proprietorCorpName=send_company, contractorCorpName=make_company, unionCorpName=union_company, provinceRecordNum=contract_mark_number ) contract_data = contract_object.data() print('合同信息', contract_data) yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract' '.htm', method='POST', headers={'Content-Type': 'application/json'}, body=json.dumps(contract_data), callback=self.project_zz, meta={'type': '合同信息'} ) make_drawing = Selector(response=response).xpath('//table[@class="detailTable"]')[3] make_drawing = make_drawing.xpath('./tr') make_see_number = make_drawing[1].xpath('./td')[1].xpath('text()').extract_first() make_see_name = make_drawing[1].xpath('./td')[3].xpath('text()').extract_first() drawing_num = make_drawing[2].xpath('./td')[1].xpath('text()').extract_first() see_date = make_drawing[2].xpath('./td')[3].xpath('text()').extract_first() see_name = make_drawing[3].xpath('./td')[1].xpath('text()').extract_first() see_num = make_drawing[3].xpath('./td')[3].xpath('text()').extract_first() desgin_name = make_drawing[4].xpath('./td')[1].xpath('text()').extract_first() desgin_num = make_drawing[4].xpath('./td')[3].xpath('text()').extract_first() make_size = make_drawing[5].xpath('./td')[1].xpath('text()').extract_first() ok_pass = make_drawing[5].xpath('./td')[3].xpath('text()').extract_first() see_error = make_drawing[6].xpath('./td')[1].xpath('text()').extract_first() see_number = make_drawing[6].xpath('./td')[3].xpath('text()').extract_first() drawing = templates.MakeDrawing( censorNum=make_see_number, censorCorpName=make_see_name, censorCorpCode=drawing_num, censorEDate=see_date, surveyCorpName=see_name, surveyCorpCode=see_num, designCorpName=desgin_name, designCorpCode=desgin_num, companyName=response.meta['company_name'], code=project_number, prjSize=make_size, engineers=[], provinceCensorNum=make_see_number ) drawing_data = drawing.data() if drawing_data['censorNum']: print(drawing_data, '施工图纸信息') yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm', headers={'Content-Type': 'application/json'}, method='POST', body=json.dumps(drawing_data), callback=self.project_zz, meta={'type': '施工图纸信息'} ) construction = Selector(response=response).xpath('//table[@class="detailTable"]')[5] construction_tr = construction.xpath('./tr') # 施工许可证系统备案编号 construction_num = construction_tr[1].xpath('./td')[1].xpath('text()').extract_first() # 暂时用不到 usr_plan_land_num = construction_tr[1].xpath('./td')[3].xpath('text()').extract_first() # 暂时用不到 usr_make_land_num = construction_tr[2].xpath('./td')[1].xpath('text()').extract_first() # 施工图审查合格书编号 qualified_num = construction_tr[2].xpath('./td')[3].xpath('text()').extract_first() # 合同金额(万元) money_capital = construction_tr[3].xpath('./td')[1].xpath('text()').extract_first() # 面积(平方米) construction_area = construction_tr[3].xpath('./td')[3].xpath('text()').extract_first() # 建设规模暂时用不到 construction_size = construction_tr[4].xpath('./td')[1].xpath('text()').extract_first() # 发证日期 construction_date = construction_tr[4].xpath('./td')[3].xpath('text()').extract_first() # 勘察单位名称 c_survey_name = construction_tr[5].xpath('./td')[1].xpath('text()').extract_first() # 勘察单位组织机构代码 c_survey_num = construction_tr[5].xpath('./td')[3].xpath('text()').extract_first() # 设计单位名称 c_degsin_name = construction_tr[6].xpath('./td')[1].xpath('text()').extract_first() # 设计单位组织机构代码 c_degsin_num = construction_tr[6].xpath('./td')[3].xpath('text()').extract_first() # 施工单位名称 c_make_name = construction_tr[7].xpath('./td')[1].xpath('text()').extract_first() # 施工单位组织机构代码 c_make_num = construction_tr[7].xpath('./td')[3].xpath('text()').extract_first() # 施工单位安全生产许可证编号 c_make_safe_num = construction_tr[8].xpath('./td')[1].xpath('text()').extract_first() # 监理单位名称 c_supervisor_name = construction_tr[8].xpath('./td')[3].xpath('text()').extract_first() # 监理单位组织机构代码 c_supervisor_num = construction_tr[9].xpath('./td')[1].xpath('text()').extract_first() # 项目经理姓名 c_project_person_name = construction_tr[9].xpath('./td')[3].xpath('text()').extract_first() # 施工图审查人员证件类型 c_name_person_idctype = construction_tr[10].xpath('./td')[1].xpath('text()').extract_first() # 项目经理身份证 c_name_person_idcard = construction_tr[10].xpath('./td')[3].xpath('text()').extract_first() # 总监理工程师姓名 c_chief_name = construction_tr[11].xpath('./td')[1].xpath('text()').extract_first() # 总监理工程师证件类型 c_chief_idtype = construction_tr[11].xpath('./td')[3].xpath('text()').extract_first() # 总监理工程师证件号码 c_chief_idcard = construction_tr[12].xpath('./td')[1].xpath('text()').extract_first() # 安全生产管理人员姓名 c_safe_manager = construction_tr[12].xpath('./td')[3].xpath('text()').extract_first() # 安全生产管理证件类型 c_safe_idtype = construction_tr[13].xpath('./td')[1].xpath('text()').extract_first() # 安全生产管理人员姓名 c_safe_idcard = construction_tr[13].xpath('./td')[3].xpath('text()').extract_first() # 安全生产考核合格证书编号 c_safe_assessenment_num = construction_tr[14].xpath('./td')[1].xpath('text()').extract_first() # 安全生产管理人员类型 c_safe_assessenment_type = construction_tr[14].xpath('./td')[3].xpath('text()').extract_first() construction_model = templates.ConstructionPermit(builderLicenceNum=construction_num, censorNum=qualified_num, contractMoney=money_capital, area=construction_area, econCorpName=c_survey_name, econCorpCode=c_survey_num, designCorpName=c_degsin_name, designCorpCode=c_degsin_num, consCorpName=c_make_name, consCorpCode=c_make_num, superCorpName=c_supervisor_name, superCorpCode=c_supervisor_num, constructorName=c_project_person_name, constructorIDCard=c_name_person_idcard, supervisionName=c_chief_name, supervisionIDCard=c_chief_idcard, companyName=response.meta['company_name'], code=project_number, provinceBuilderLicenceNum=construction_num ) construction_make_data = construction_model.data() print('施工许可信息', construction_make_data) if construction_make_data['builderLicenceNum']: yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo' '/addCompanyProjectBuilderLicence.htm', headers={'Content-Type': 'application/json'}, method='POST', body=json.dumps(construction_make_data), callback=self.project_zz, meta={'type': '施工许可信息'} ) completion_info = Selector(response=response).xpath('//table[@class="detailTable"]')[8] completion_tr = completion_info.xpath('./tr') # 竣工备案编号 completion_num = completion_tr[1].xpath('./td')[1].xpath('text()').extract_first() # 施工许可证编号 completion_make_numer = completion_tr[1].xpath('./td')[3].xpath('text()').extract_first() # 质量检测机构名称 test_name = completion_tr[2].xpath('./td')[1].xpath('text()').extract_first() # 质量检测机构组织机构代码 test_number = completion_tr[2].xpath('./td')[3].xpath('text()').extract_first() # 实际造价(万元) actual_capital = completion_tr[3].xpath('./td')[1].xpath('text()').extract_first() # 实际面积(平方米) actual_area = completion_tr[3].xpath('./td')[3].xpath('text()').extract_first() # 实际建设规模 actual_size = completion_tr[4].xpath('./td')[1].xpath('text()').extract_first() # 结构体系 c_body = completion_tr[4].xpath('./td')[3].xpath('text()').extract_first() # 备注 remarks = completion_tr[5].xpath('./td')[1].xpath('text()').extract_first() Completion_data = templates.Completion( companyName=response.meta['company_name'], code=project_number, prjFinishNum=completion_num, factCost=actual_capital, factArea=actual_area, factSize=actual_size, prjStructureType=c_body, mark=remarks, provincePrjFinishNum=completion_num ) Completion_zz = Completion_data.data() if Completion_zz['prjFinishNum']: print('当前公司----%s---%s--竣工数据' % (project_name, Completion_zz)) yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectFinish.htm', headers={'Content-Type': 'application/json'}, method='POST', body=json.dumps(Completion_zz), callback=self.project_zz, meta={'type': '竣工验收'} )
def project_basic(self, response): name = Selector(response=response).xpath( '//td[@colspan="3"]/text()').extract_first() code = Selector(response=response).xpath( '//td[@class="name_level3 col_01_value"]/text()').extract_first() provinceCode = Selector(response=response).xpath( '//td[@class="col_02_value"]')[0].xpath('text()').extract_first() unit = Selector(response=response).xpath( '//td[@class="col_01_value"]')[1].xpath('text()').extract_first() catalog = Selector(response=response).xpath( '//td[@class="col_02_value"]')[1].xpath('text()').extract_first() unitLicenseNum = Selector(response=response).xpath( '//td[@class="col_01_value"]')[2].xpath('text()').extract_first() area = Selector(response=response).xpath( '//td[@class="col_01_value"]')[3].xpath('text()').extract_first() docuCode = Selector(response=response).xpath( '//td[@class="col_01_value"]')[4].xpath('text()').extract_first() level = Selector(response=response).xpath( '//td[@class="col_02_value"]')[4].xpath('text()').extract_first() money = Selector(response=response).xpath( '//td[@class="col_01_value"]')[5].xpath('text()').extract_first() acreage = Selector(response=response).xpath( '//td[@class="col_02_value"]')[5].xpath('text()').extract_first() trait = Selector(response=response).xpath( '//td[@class="col_01_value"]')[6].xpath('text()').extract_first() purpose = Selector(response=response).xpath( '//td[@class="col_02_value"]')[6].xpath('text()').extract_first() basic = templates.Project(name=name, companyName=response.meta['companyName'], code=code, provinceCode=provinceCode, unit=unit, catalog=catalog, unitLicenseNum=unitLicenseNum, area=area, docuCode=docuCode, level=level, money=money, acreage=acreage, trait=trait, purpose=purpose) basic_data = basic.data() print(basic_data, '基本信息') yield scrapy.Request( url= 'https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(basic_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={ 'type': '基本信息', 'company_name': basic_data['companyName'] }, ) xx = 'PrjId=(.*)' bid_url = 'http://cx.jlsjsxxw.com/handle/ProjectHandler.ashx?method=ztb&PRJID=%s&_=1556177544518'\ % re.findall(xx, response.url)[0] yield scrapy.Request(url=bid_url, callback=self.project_bid_list, meta={ 'companyName': response.meta['companyName'], 'code': code }) drawing_url = 'http://cx.jlsjsxxw.com/handle/ProjectHandler.ashx?method=sgtsc&PRJID=%s&_=1558342067012' % \ re.findall(xx, response.url)[0] yield scrapy.Request(url=drawing_url, callback=self.project_drawing_list, meta={ 'companyName': response.meta['companyName'], 'code': code }) contract_url = 'http://cx.jlsjsxxw.com/handle/ProjectHandler.ashx?method=htba&PRJID=%s&_=1558342067013' % \ re.findall(xx, response.url)[0] yield scrapy.Request(url=contract_url, callback=self.project_contract_list, meta={ 'companyName': response.meta['companyName'], 'code': code }) construction_url = 'http://cx.jlsjsxxw.com/handle/ProjectHandler.ashx?method=sgxk&PRJID=%s&_=1558342067014' % \ re.findall(xx, response.url)[0] yield scrapy.Request(url=construction_url, callback=self.project_construction_list, meta={ 'companyName': response.meta['companyName'], 'code': code })
def project_basic(self, response): basic = templates.Projects('Project') attrs = [ { 'that': '', 'attr': '//td[@colspan="3"]/text()', 'name': 'name' }, { 'that': '', 'attr': '//td[@class="name_level3 col_01_value"]/text()', 'name': 'code' }, { 'that': 0, 'attr': '//td[@class="col_02_value"]', 'name': 'provinceCode', 'then': 'text()' }, { 'that': 1, 'attr': '//td[@class="col_01_value"]', 'name': 'unit', 'then': 'text()' }, { 'that': 1, 'attr': '//td[@class="col_02_value"]', 'name': 'catalog', 'then': 'text()' }, { 'that': 2, 'attr': '//td[@class="col_01_value"]', 'name': 'unitLicenseNum', 'then': 'text()' }, { 'that': 2, 'attr': '//td[@class="col_02_value"]', 'name': 'area', 'then': 'text()' }, { 'that': 4, 'attr': '//td[@class="col_01_value"]', 'name': 'docuCode', 'then': 'text()' }, { 'that': 4, 'attr': '//td[@class="col_02_value"]', 'name': 'level', 'then': 'text()' }, { 'that': 5, 'attr': '//td[@class="col_01_value"]', 'name': 'money', 'then': 'text()' }, { 'that': 5, 'attr': '//td[@class="col_02_value"]', 'name': 'acreage', 'then': 'text()' }, { 'that': 6, 'attr': '//td[@class="col_01_value"]', 'name': 'trait', 'then': 'text()' }, { 'that': 6, 'attr': '//td[@class="col_02_value"]', 'name': 'purpose', 'then': 'text()' }, ] code = Selector(response=response).xpath( '//td[@class="name_level3 col_01_value"]/text()').extract_first() name = Selector(response=response).xpath( '//td[@colspan="3"]/text()').extract_first() code = code.split()[0] xx = 'PRJNUM=(.*)' basic_d = basic.html_analysis(response, attrs) basic_d['companyName'] = response.meta['companyName'] basic = templates.Project(**basic_d) basic_data = basic.data() print(basic_data, '基本信息') yield scrapy.Request( url= 'https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm', body=json.dumps(basic_data), callback=self.project_zz, headers={'Content-Type': 'application/json'}, method='POST', meta={ 'type': '基本信息', 'company_name': basic_data['companyName'] }, ) # bid_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=ztb&PRJNUM=%s&_=1558598717869' \ % re.findall(xx, response.url)[0] yield scrapy.Request(url=bid_url, callback=self.project_bid_list, meta={ 'companyName': response.meta['companyName'], 'code': code }) drawing_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=sgtsc&PRJNUM=%s&_=1558598717869' % \ re.findall(xx, response.url)[0] print(response.url, '施工图纸审查') yield scrapy.Request(url=drawing_url, callback=self.project_drawing_list, meta={ 'companyName': response.meta['companyName'], 'code': code }) contract_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=htba&PRJNUM=%s&_=1558598717869' % \ re.findall(xx, response.url)[0] print(contract_url, '合同备案全部') yield scrapy.Request(url=contract_url, callback=self.project_contract_list, meta={ 'companyName': response.meta['companyName'], 'code': code }) construction_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=sgxk&PRJNUM=%s&_=1558598717869' % \ re.findall(xx, response.url)[0] print(construction_url, '施工许可详list') yield scrapy.Request(url=construction_url, callback=self.project_construction_list, meta={ 'companyName': response.meta['companyName'], 'name': name })