def basic_project(self, response):
     dd = Selector(response=response).xpath('//div[@id="project_baseinfo"]/dl/dd')
     name = Selector(response=response).xpath('//span[@class="user-name"]/text()').extract_first()
     code = dd[0].xpath('text()').extract_first()
     unit = dd[2].xpath('text()').extract_first()
     unitLicenseNum = dd[3].xpath('text()').extract_first()
     catalog = dd[5].xpath('text()').extract_first()
     area = dd[6].xpath('text()').extract_first()
     purpose = dd[8].xpath('text()').extract_first()
     trait = dd[10].xpath('text()').extract_first()
     acreage = dd[13].xpath('text()').extract_first()
     docuCode = Selector(response=response).xpath(
         '//div[@id="project_approvalinfo"]/dl/dd[1]/text()').extract_first()
     level = Selector(response=response).xpath('//div[@id="project_approvalinfo"]/dl/dd[2]/text()').extract_first()
     money = Selector(response=response).xpath('//div[@id="project_moneyincome"]/dl/dd[1]/text()').extract_first()
     money = re.findall('(.*) (万元)', money)[0]
     acreage = re.findall('(.*)(平方米/米)', acreage)[0]
     basic = templates.Project(companyName=response.meta['company_name'], code=code, name=name,
                               provinceCode=code, unit=unit, unitLicenseNum=unitLicenseNum,
                               catalog=catalog, area=area, purpose=purpose, trait=trait,
                               docuCode=docuCode, level=level, acreage=acreage, money=money
                               )
     basic_data = basic.data()
     yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
                          headers={'Content-Type': 'application/json'},
                          callback=self.project_zz,
                          body=json.dumps(basic_data),
                          method='POST',
                          meta={'type': '基本信息', 'company_name': basic_data['companyName']}
                          )
 def basic(self, response):
     data_line = response.text
     data_dict = re.split('jQuery\d+_\d+\(', data_line)[1]
     data_dict = data_dict.replace(')', '')
     json_data = json.loads(data_dict)
     data = json_data['data']['ProjectInfoList'][0]
     basic = templates.Project(companyName=response.meta['companyName'],
                               name=data['ProjectName'], code=data['ProjectNum'], provinceCode=data['ProjectNum'],
                               area=data['AreaName'], unit=data['BuildCorpName'],
                               unitLicenseNum=data['BuildCorpCode'],
                               catalog=data['ProjectType'], acreage=data['AreaCode'],
                               level=data['PrjApprovalLevelNum'],
                               money=data['AllInvest'], trait=data['PrjPropertyNum'],
                               docuCode=data['PrjApprovalNum'],
                               purpose=data['ProjectType']
                               )
     basic_data = basic.data()
     if basic_data['ProjectName'] or basic_data['ProjectNum']:
         print(basic_data, '基本信息')
         yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
                              body=json.dumps(basic_data), callback=self.project_zz,
                              headers={'Content-Type': 'application/json'}, method='POST',
                              meta={'type': '基本信息', 'company_name': basic_data['companyName']}
                              )
    def project(self, response):
        basic_info = templates.Projects('Project')
        attrs = [
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[1]/td[2]/text()', 'name': 'name'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[2]/td[2]/text()', 'name': 'code'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[2]/td[2]/text()', 'name': 'provinceCode'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[3]/td[2]/text()', 'name': 'unit'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[3]/td[4]/text()', 'name': 'catalog'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[4]/td[2]/text()', 'name': 'unitLicenseNum'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[4]/td[4]/text()', 'name': 'area'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[6]/td[2]/text()', 'name': 'docuCode'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[6]/td[4]/text()', 'name': 'level'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[7]/td[2]/text()', 'name': 'money'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[7]/td[4]/text()', 'name': 'acreage'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[8]/td[2]/text()', 'name': 'trait'},
            {'that': '', 'attr': '//div[@class="detail_list"]/table/tr[8]/td[4]/text()', 'name': 'purpose'},

        ]
        basic_data = basic_info.html_analysis(response, attrs)
        basic_data['companyName'] = response.meta['companyName']
        basic = templates.Project(**basic_data)
        b_data = basic.data()
        print(b_data, '基本信息', b_data['companyName'])
        yield scrapy.Request(
            # url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
            url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
            body=json.dumps(b_data),
            callback=self.project_zz,
            headers={'Content-Type': 'application/json'}, method='POST',
            meta={'type': '基本信息', 'company_name': b_data['companyName']}
        )

        bid_url = Selector(response=response).xpath('//div[@class="classContent t1"]/table/tr')
        bid_url = bid_url[1:]
        for b in bid_url:
            a = b.xpath('./td[7]/a/@href').extract_first()
            a = 'http://115.29.2.37:8080/' + a
            yield scrapy.Request(url=a, callback=self.bid_info, headers=heads,
                                 meta={'companyName': response.meta['companyName']}
                                 )

        drawing_info = Selector(response=response).xpath('//div[@class="classContent t2"]/table/tr')
        drawing_info = drawing_info[1:]
        print(len(drawing_info), '施工图纸审查----bbbbbbbbbbbbbbbbbbbbbbbbbbbbb', response.url)
        for d in drawing_info:
            censorNum = d.xpath('./td[2]/text()').extract_first()
            surveyCorpName = d.xpath('./td[3]/text()').extract_first()
            designCorpName = d.xpath('./td[4]/text()').extract_first()
            censorCorpName = d.xpath('./td[5]/text()').extract_first()
            censorEDate = d.xpath('./td[6]/text()').extract_first()
            drawing_data = templates.MakeDrawing(censorNum=censorNum, surveyCorpName=surveyCorpName,
                                                 designCorpName=designCorpName, censorCorpName=censorCorpName,
                                                 censorEDate=censorEDate
                                                 )
            drawing_data = drawing_data.data()
            print(drawing_data, '施工图纸审查')
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm',
                                 body=json.dumps(drawing_data),
                                 callback=self.project_zz,
                                 headers={'Content-Type': 'application/json'}, method='POST',
                                 meta={'type': '施工图纸审查'}
                                 )
        contract_list = Selector(response=response).xpath('//div[@class="classContent t3"]/table/tr')
        contract_list = contract_list[1:]
        for c in contract_list:
            print(c.xpath('./td[6]/a/@href').extract_first(), '合同备案url')
            u = 'http://115.29.2.37:8080/' + c.xpath('./td[6]/a/@href').extract_first()
            yield scrapy.Request(url=u, callback=self.contract_info,
                                 meta={'companyName': response.meta['companyName']})

        construction_list = Selector(response=response).xpath('//div[@class="classContent t4"]/table/tr/td/a/@href')
        for c in construction_list:
            u = 'http://115.29.2.37:8080/' + c.extract()
            yield scrapy.Request(url=u, callback=self.construction_info,
                                 meta={'companyName': response.meta['companyName']})

        finish_list = Selector(response=response).xpath('//div[@class="classContent t5"]/table/tr/td/a/@href')
        for f in finish_list:
            u = 'http://115.29.2.37:8080/' + f.extract()
            yield scrapy.Request(url=u, callback=self.finish_info,
                                 meta={'companyName': response.meta['companyName']})
示例#4
0
    def project(self, response):
        basic = templates.Projects('Project')
        attrs = [
            {'that': '', 'attr': '//td[@id="lblPrjName"]/text()', 'name': 'name'},
            {'that': '', 'attr': '//td[@id="lblPrjNum"]/text()', 'name': 'code'},
            {'that': '', 'attr': '//td[@id="lblPrjNum"]/text()', 'name': 'provinceCode'},
            {'that': '', 'attr': '//td[@id="lblPrjTypeNum"]/text()', 'name': 'catalog'},
            {'that': '', 'attr': '//td[@id="lblBuildCorpName"]/text()', 'name': 'unit'},
            {'that': '', 'attr': '//td[@id="lblBuildCorpCode"]/text()', 'name': 'unitLicenseNum'},
            {'that': '', 'attr': '//td[@id="lblCountyNum"]/text()', 'name': 'area'},
            {'that': '', 'attr': '//td[@id="lblPrjApprovalNum"]/text()', 'name': 'docuCode'},
            {'that': '', 'attr': '//td[@id="lblPrjApprovalLevelNum"]/text()', 'name': 'level'},
            {'that': '', 'attr': '//td[@id="lblAllInvest"]/text()', 'name': 'money'},
            {'that': '', 'attr': '//td[@id="lblAllArea"]/text()', 'name': 'acreage'},
            {'that': '', 'attr': '//td[@id="lblPrjPropertyNum"]/text()', 'name': 'trait'},
            {'that': '', 'attr': '//td[@id="lblPrjFunctionNum"]/text()', 'name': 'purpose'},
        ]
        basic_text = basic.html_analysis(response=response, attrs=attrs)
        basic_text['companyName'] = response.meta['companyName']
        if basic_text['level'] == '暂无':
            basic_text['level'] = ''
        basic_data = templates.Project(**basic_text)
        basic_data = basic_data.data()
        print('基本信息', '*******************************', basic_data)
        yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
                             body=json.dumps(basic_data), callback=self.project_zz,
                             headers={'Content-Type': 'application/json'}, method='POST',
                             meta={'type': '基本信息', 'company_name': basic_data['companyName']}
                             )
        #
        bid_list = Selector(response=response).xpath('//div[@id="project_step1"]/table/tbody/tr')
        for b in bid_list:
            tenderClass = b.xpath('./td[2]/text()').extract_first()
            tenderType = b.xpath('./td[3]/text()').extract_first()
            tenderCorpName = b.xpath('./td[4]/a/text()').extract_first()
            tenderResultDate = b.xpath('./td[5]/text()').extract_first()
            tenderMoney = b.xpath('./td[6]/text()').extract_first()
            tenderNum = b.xpath('./td[7]/a/text()').extract_first()
            bid_data = templates.Mark(tenderClass=tenderClass, tenderType=tenderType, tenderCorpName=tenderCorpName,
                                      tenderResultDate=tenderResultDate, tenderMoney=tenderMoney, tenderNum=tenderNum,
                                      provinceTenderNum=tenderNum, code=basic_data['code'],
                                      companyName=response.meta['companyName']
                                      )
            bid_data = bid_data.data()
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm',
                                 body=json.dumps(bid_data), callback=self.project_zz,
                                 headers={'Content-Type': 'application/json'}, method='POST',
                                 meta={'type': '招标信息',}
                                 )
            # print('招标信息', '*******************************', bid_data)

        drawing_list = Selector(response=response).xpath('//div[@id="project_step2"]/table/tbody/tr')
        for d in drawing_list:
            censorNum = d.xpath('./td[2]/text()').extract_first()
            provinceCensorNum = d.xpath('./td[3]/text()').extract_first()
            surveyCorpName = d.xpath('./td[4]/a/text()').extract_first()
            designCorpName = d.xpath('./td[5]/a/text()').extract_first()
            censorCorpName = d.xpath('./td[6]/a/text()').extract_first()
            censorEDate = d.xpath('./td[7]/a/text()').extract_first()
            drawing_data = templates.MakeDrawing(censorNum=censorNum, provinceCensorNum=provinceCensorNum,
                                                 surveyCorpName=surveyCorpName,
                                                 designCorpName=designCorpName, censorCorpName=censorCorpName,
                                                 censorEDate=censorEDate, code=basic_data['code'],
                                                 companyName=response.meta['companyName']
                                                 )
            drawing_data = drawing_data.data()
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm',
                                 body=json.dumps(drawing_data), callback=self.project_zz,
                                 headers={'Content-Type': 'application/json'}, method='POST',
                                 meta={'type': '施工图纸审查', }
                                 )
            print('施工图纸审查', '*******************************', drawing_data)

        contract_list = Selector(response=response).xpath('//div[@id="project_step3"]/table/tbody/tr')
        for d in contract_list:
            contractType = d.xpath('./td[2]/text()').extract_first()
            recordNum = d.xpath('./td[3]/text()').extract_first()
            provinceRecordNum = d.xpath('./td[4]/text()').extract_first()
            contractMoney = d.xpath('./td[5]/text()').extract_first()
            contractDate = d.xpath('./td[6]/text()').extract_first()
            contract_data = templates.Contract(contractType=contractType, recordNum=recordNum,
                                               provinceRecordNum=provinceRecordNum,
                                               contractMoney=contractMoney,
                                               contractDate=contractDate, code=basic_data['code'],
                                               companyName=response.meta['companyName']
                                               )
            contract_data = contract_data.data()
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract.htm',
                                 body=json.dumps(contract_data), callback=self.project_zz,
                                 headers={'Content-Type': 'application/json'}, method='POST',
                                 meta={'type': '合同备案', }

                                 )
            print('合同备案', '*******************************', contract_data)
        print(response.url, 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')

        construction_list = Selector(response=response).xpath('//div[@id="project_step4"]/table/tbody/tr')
        for d in construction_list:
            builderLicenceNum = d.xpath('./td[2]/text()').extract_first()
            consCorpName = d.xpath('./td[3]/a/text()').extract_first()
            contractMoney = d.xpath('./td[4]/text()').extract_first()
            area = d.xpath('./td[5]/text()').extract_first()
            createDate = d.xpath('./td[6]/text()').extract_first()
            construction_data = templates.ConstructionPermit(builderLicenceNum=builderLicenceNum, provinceBuilderLicenceNum=builderLicenceNum,
                                                             consCorpName=consCorpName,
                                                             contractMoney=contractMoney,
                                                             area=area, code=basic_data['code'],
                                                             createDate=createDate,
                                                             companyName=response.meta['companyName']
                                                             )
            construction_data = construction_data.data()
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm',
                                 body=json.dumps(construction_data), callback=self.project_zz,
                                 headers={'Content-Type': 'application/json'}, method='POST',
                                 meta={'type': '施工许可', }
                                 )
            print('施工许可', '*******************************', construction_data)

        completion_list = Selector(response=response).xpath('//div[@id="project_step4"]/table/tbody/tr')
        for c in completion_list:
            prjFinishNum = c.xpath('./td[2]/text()').extract_first()
            provincePrjFinishNum = c.xpath('./td[3]/a/text()').extract_first()
            factCost = c.xpath('./td[4]/text()').extract_first()
            factArea = c.xpath('./td[5]/text()').extract_first()
            factBeginDate = c.xpath('./td[6]/text()').extract_first()
            factEndDate = c.xpath('./td[6]/text()').extract_first()
            completion_data = templates.Completion(prjFinishNum=prjFinishNum,
                                                   provincePrjFinishNum=provincePrjFinishNum,
                                                   factCost=factCost,
                                                   factArea=factArea,
                                                   factBeginDate=factBeginDate,
                                                   code=basic_data['code'],
                                                   factEndDate=factEndDate,
                                                   companyName=response.meta['companyName']
                                                   )
            completion_data = completion_data.data()
            yield scrapy.Request(
                url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectFinish.htm',
                body=json.dumps(completion_data), callback=self.project_zz,
                headers={'Content-Type': 'application/json'}, method='POST',
                meta={'type': '竣工验收', }
            )
            print('竣工验收', '*******************************', completion_data)
示例#5
0
    def info(self, response):
        basic_data = Selector(
            response=response).xpath('//div[@class="news_con"]/table/tr')
        basic_data = basic_data[1:]
        name = basic_data[0].xpath('./td[2]/span/text()').extract_first()
        code = basic_data[0].xpath('./td[4]/span/text()').extract_first()
        unit = basic_data[1].xpath('./td[2]/span/text()').extract_first()
        area = basic_data[1].xpath('./td[4]/span/text()').extract_first()
        docuCode = basic_data[3].xpath('./td[2]/span/text()').extract_first()
        level = basic_data[3].xpath('./td[4]/span/text()').extract_first()
        money = basic_data[4].xpath('./td[2]/span/text()').extract_first()
        acreage = basic_data[4].xpath('./td[4]/span/text()').extract_first()
        trait = basic_data[5].xpath('./td[2]/span/text()').extract_first()
        purpose = basic_data[5].xpath('./td[4]/span/text()').extract_first()
        basic_zz = templates.Project(companyName=response.meta['company_name'],
                                     name=name,
                                     code=code,
                                     unit=unit,
                                     area=area,
                                     provinceCode=code,
                                     docuCode=docuCode,
                                     level=level,
                                     money=money,
                                     acreage=acreage,
                                     trait=trait,
                                     purpose=purpose)
        b_data = basic_zz.data()
        print(b_data, '基本信息-------------------')
        yield scrapy.Request(
            # url='http://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
            url=
            'https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
            headers={'Content-Type': 'application/json'},
            callback=self.project_zz,
            body=json.dumps(b_data),
            method='POST',
            meta={
                'type': '基本信息',
                'company_name': response.meta['company_name']
            })
        #
        wining_bib_list = Selector(response=response).xpath(
            '//table[@id="GridView1"]')[0].xpath('./tr')
        wining_bib_list = wining_bib_list[1:]
        for w in wining_bib_list:
            bid_url = w.xpath('./td[3]/a/@href').extract_first()
            bid_url = 'http://hngcjs.hnjs.gov.cn' + bid_url
            yield scrapy.Request(url=bid_url,
                                 callback=self.win_bid,
                                 meta={
                                     'company_name':
                                     response.meta['company_name'],
                                     'code': code
                                 })

        examination = Selector(response=response).xpath(
            '//table[@id="GridView1"]')[1].xpath('./tr')
        examination = examination[1:]
        for e in examination:
            censorNum = e.xpath('./td[2]/text()').extract_first()
            censorCorpName = e.xpath('./td[3]/text()').extract_first()
            data = templates.MakeDrawing(
                companyName=response.meta['company_name'],
                code=code,
                censorNum=censorNum,
                provinceCensorNum=censorNum,
                censorCorpName=censorCorpName,
                # censorEDate=censorEDate
            )
            e_data = data.data()
            censorEDate = e.xpath('./td[4]/text()').extract_first()
            time_tuple = (time.strptime(censorEDate, "%Y/%m/%d %H:%M:%S"))
            time1 = time.strftime("%Y-%m-%d", time_tuple)
            e_data['censorEDate'] = time1
            yield scrapy.Request(
                url=
                'http://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm',
                headers={'Content-Type': 'application/json'},
                callback=self.project_zz,
                body=json.dumps(e_data),
                method='POST',
                meta={'type': '图纸审查'})
            print(data.data(), 'ttttttttttttttttttttttttttttttt', '图纸审查')
        #
        contract = Selector(response=response).xpath(
            '//table[@id="GridView1"]')[2].xpath('./tr')
        contract = contract[1:]
        for c in contract:
            contract_u = c.xpath('./td[2]/a/@href').extract_first()
            recordNum = c.xpath('./td[2]/a/text()').extract_first()
            bid_url = 'http://hngcjs.hnjs.gov.cn' + contract_u
            yield scrapy.Request(url=bid_url,
                                 callback=self.contract_info,
                                 meta={
                                     'company_name':
                                     response.meta['company_name'],
                                     'recordNum': recordNum,
                                     'code': code
                                 })

        permit = Selector(response=response).xpath(
            '//table[@id="GridView1"]')[3].xpath('./tr')
        permit = permit[1:]
        for p in permit:
            builderLicenceNum = p.xpath('./td[2]/text()').extract_first()
            consCorpName = p.xpath('./td[3]/text()').extract_first()
            # censorEDate = p.xpath('./td[4]').extract_first()
            p_data = templates.ConstructionPermit(
                companyName=response.meta['company_name'],
                code=code,
                builderLicenceNum=builderLicenceNum,
                provinceBuilderLicenceNum=builderLicenceNum,
                consCorpName=consCorpName,
            )
            per_data = p_data.data()
            yield scrapy.Request(
                url=
                'http://api.maotouin.com/rest/companyInfo/addCompanyProjectBuilderLicence.htm',
                headers={'Content-Type': 'application/json'},
                callback=self.project_zz,
                body=json.dumps(per_data),
                method='POST',
                meta={'type': '施工许可'})
            print(p_data.data(), 'ttttttttttttttttttttttttttttttt', '施工许可')

        completed = Selector(
            response=response).xpath('//table[@id="GV_Cert"]/tr')
        completed = completed[1:]
        for c in completed:
            completed_u = c.xpath('./td[2]/a/@href').extract_first()
            bid_url = 'http://hngcjs.hnjs.gov.cn' + completed_u
            yield scrapy.Request(url=bid_url,
                                 callback=self.completed_info,
                                 meta={
                                     'company_name':
                                     response.meta['company_name'],
                                     'code': code
                                 })
示例#6
0
    def company_project(self, response):
        basic = Selector(response=response).xpath('//table[@class="detailTable"]')[0]
        basic_tr = basic.xpath('./tr')
        project_name = basic_tr[1].xpath('./td')[3].xpath('text()').extract_first()
        project_number = basic_tr[1].xpath('./td')[1].xpath('text()').extract_first()
        area = basic_tr[3].xpath('./td')[3].xpath('text()').extract_first()
        if area is not None:
            area_data = ''
            for a in area.split():
                area_data += a
        else:
            area_data = ''
        unit = basic_tr[2].xpath('./td')[3].xpath('text()').extract_first()

        unitLicenseNum = basic_tr[3].xpath('./td')[1].xpath('text()').extract_first()

        catalog = basic_tr[2].xpath('./td')[1].xpath('text()').extract_first()

        traits = basic_tr[7].xpath('./td')[3].xpath('text()').extract_first()

        purpose = basic_tr[8].xpath('./td')[1].xpath('text()').extract_first()

        money = basic_tr[6].xpath('./td')[1].xpath('text()').extract_first()

        acreage = basic_tr[6].xpath('./td')[3].xpath('text()').extract_first()

        level = basic_tr[4].xpath('./td')[3].xpath('text()').extract_first()

        docuCode = basic_tr[4].xpath('./td')[1].xpath('text()').extract_first()

        ccc = templates.Project(name=project_name, companyName=response.meta['company_name'],
                               area=area_data, provinceCode=project_number, unit=unit, unitLicenseNum=unitLicenseNum,
                               catalog=catalog, trait=traits, purpose=purpose, money=money, acreage=acreage,
                               level=level,
                               docuCode=docuCode, code=project_number
                               )
        basic_data = ccc.data()
        print('基本信息', basic_data)
        yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
                             headers={'Content-Type': 'application/json'},
                             method='POST',
                             body=json.dumps(basic_data),
                             callback=self.project_zz,
                             meta={'type': '基本信息', 'company_name': basic_data['companyName']}
                             )
        # print(data)

        mark = Selector(response=response).xpath('//table[@class="detailTable"]')[1]
        mark_tr = mark.xpath('./tr')
        del mark_tr[0]
        mark_list = [k for index, k in enumerate(mark_tr) if (index % 2 != 0)]
        print(len(mark_list), 'mmmmmmmmmmmmmmmmmmmm')
        for m in mark_list:
            td = m.xpath('./td')
            if len(td) == 1:
                pass
                # print(len(td), '没有招标信息的', project_name)
            else:
                project_code = td[0].xpath('./a/text()').extract_first()

                build_size = td[1].xpath('text()').extract_first()

                mark_name = td[2].xpath('text()').extract_first()

                have_project = td[3].xpath('text()').extract_first()

                mark_data = templates.Mark(companyName=response.meta['company_name'],
                                          tenderNum=project_code,
                                          prjSize=build_size,
                                          provinceTenderNum=project_code,
                                          agencyCorpName=mark_name,
                                          tenderCorpName=have_project,
                                          code=project_number)
                make_zz_data = mark_data.data()
                print(project_code, build_size, mark_name, have_project, '招标信息')
                yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectTender.htm',
                                     method='POST',
                                     headers={'Content-Type': 'application/json'},
                                     body=json.dumps(make_zz_data),
                                     callback=self.project_zz,
                                     meta={'type': '招标信息'}
                                     )

        contract = Selector(response=response).xpath('//table[@class="detailTable"]')[2]
        contract = contract.xpath('./tr')
        del contract[0]
        contract_list = [k for index, k in enumerate(contract) if (index % 2 != 0)]
        for m in contract_list:
            td = m.xpath('./td')
            if len(td) == 1:
                print(len(td), '没有合同信息的', project_name)
            else:
                contract_mark_number = td[0].xpath('text()').extract_first()

                contract_number = td[1].xpath('text()').extract_first()

                send_company = td[2].xpath('text()').extract_first()

                make_company = td[3].xpath('text()').extract_first()

                union_company = td[4].xpath('text()').extract_first()

                contract_object = templates.Contract(companyName=response.meta['company_name'],
                                                    code=project_number,
                                                    recordNum=contract_mark_number, contractNum=contract_number,
                                                    proprietorCorpName=send_company, contractorCorpName=make_company,
                                                    unionCorpName=union_company, provinceRecordNum=contract_mark_number
                                                    )
                contract_data = contract_object.data()
                print('合同信息', contract_data)
                yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectContract'
                                         '.htm',
                                     method='POST',
                                     headers={'Content-Type': 'application/json'},
                                     body=json.dumps(contract_data),
                                     callback=self.project_zz,
                                     meta={'type': '合同信息'}
                                     )

        make_drawing = Selector(response=response).xpath('//table[@class="detailTable"]')[3]
        make_drawing = make_drawing.xpath('./tr')

        make_see_number = make_drawing[1].xpath('./td')[1].xpath('text()').extract_first()

        make_see_name = make_drawing[1].xpath('./td')[3].xpath('text()').extract_first()

        drawing_num = make_drawing[2].xpath('./td')[1].xpath('text()').extract_first()

        see_date = make_drawing[2].xpath('./td')[3].xpath('text()').extract_first()

        see_name = make_drawing[3].xpath('./td')[1].xpath('text()').extract_first()

        see_num = make_drawing[3].xpath('./td')[3].xpath('text()').extract_first()

        desgin_name = make_drawing[4].xpath('./td')[1].xpath('text()').extract_first()

        desgin_num = make_drawing[4].xpath('./td')[3].xpath('text()').extract_first()

        make_size = make_drawing[5].xpath('./td')[1].xpath('text()').extract_first()

        ok_pass = make_drawing[5].xpath('./td')[3].xpath('text()').extract_first()

        see_error = make_drawing[6].xpath('./td')[1].xpath('text()').extract_first()

        see_number = make_drawing[6].xpath('./td')[3].xpath('text()').extract_first()

        drawing = templates.MakeDrawing(
            censorNum=make_see_number,
            censorCorpName=make_see_name,
            censorCorpCode=drawing_num,
            censorEDate=see_date,
            surveyCorpName=see_name,
            surveyCorpCode=see_num,
            designCorpName=desgin_name,
            designCorpCode=desgin_num,
            companyName=response.meta['company_name'],
            code=project_number,
            prjSize=make_size,
            engineers=[],
            provinceCensorNum=make_see_number
        )

        drawing_data = drawing.data()
        if drawing_data['censorNum']:
            print(drawing_data, '施工图纸信息')
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectCensor.htm',
                                 headers={'Content-Type': 'application/json'},
                                 method='POST',
                                 body=json.dumps(drawing_data),
                                 callback=self.project_zz,
                                 meta={'type': '施工图纸信息'}
                                 )

        construction = Selector(response=response).xpath('//table[@class="detailTable"]')[5]
        construction_tr = construction.xpath('./tr')

        # 施工许可证系统备案编号
        construction_num = construction_tr[1].xpath('./td')[1].xpath('text()').extract_first()

        # 暂时用不到
        usr_plan_land_num = construction_tr[1].xpath('./td')[3].xpath('text()').extract_first()

        # 暂时用不到
        usr_make_land_num = construction_tr[2].xpath('./td')[1].xpath('text()').extract_first()

        # 施工图审查合格书编号
        qualified_num = construction_tr[2].xpath('./td')[3].xpath('text()').extract_first()

        # 合同金额(万元)
        money_capital = construction_tr[3].xpath('./td')[1].xpath('text()').extract_first()

        # 面积(平方米)
        construction_area = construction_tr[3].xpath('./td')[3].xpath('text()').extract_first()

        # 建设规模暂时用不到
        construction_size = construction_tr[4].xpath('./td')[1].xpath('text()').extract_first()

        # 发证日期
        construction_date = construction_tr[4].xpath('./td')[3].xpath('text()').extract_first()

        # 勘察单位名称
        c_survey_name = construction_tr[5].xpath('./td')[1].xpath('text()').extract_first()

        # 勘察单位组织机构代码
        c_survey_num = construction_tr[5].xpath('./td')[3].xpath('text()').extract_first()

        # 设计单位名称
        c_degsin_name = construction_tr[6].xpath('./td')[1].xpath('text()').extract_first()

        # 设计单位组织机构代码
        c_degsin_num = construction_tr[6].xpath('./td')[3].xpath('text()').extract_first()

        # 施工单位名称
        c_make_name = construction_tr[7].xpath('./td')[1].xpath('text()').extract_first()

        # 施工单位组织机构代码
        c_make_num = construction_tr[7].xpath('./td')[3].xpath('text()').extract_first()

        # 施工单位安全生产许可证编号
        c_make_safe_num = construction_tr[8].xpath('./td')[1].xpath('text()').extract_first()

        # 监理单位名称
        c_supervisor_name = construction_tr[8].xpath('./td')[3].xpath('text()').extract_first()

        # 监理单位组织机构代码
        c_supervisor_num = construction_tr[9].xpath('./td')[1].xpath('text()').extract_first()

        # 项目经理姓名
        c_project_person_name = construction_tr[9].xpath('./td')[3].xpath('text()').extract_first()

        # 施工图审查人员证件类型
        c_name_person_idctype = construction_tr[10].xpath('./td')[1].xpath('text()').extract_first()

        # 项目经理身份证
        c_name_person_idcard = construction_tr[10].xpath('./td')[3].xpath('text()').extract_first()

        # 总监理工程师姓名
        c_chief_name = construction_tr[11].xpath('./td')[1].xpath('text()').extract_first()

        # 总监理工程师证件类型
        c_chief_idtype = construction_tr[11].xpath('./td')[3].xpath('text()').extract_first()

        # 总监理工程师证件号码
        c_chief_idcard = construction_tr[12].xpath('./td')[1].xpath('text()').extract_first()

        # 安全生产管理人员姓名
        c_safe_manager = construction_tr[12].xpath('./td')[3].xpath('text()').extract_first()

        # 安全生产管理证件类型
        c_safe_idtype = construction_tr[13].xpath('./td')[1].xpath('text()').extract_first()

        # 安全生产管理人员姓名
        c_safe_idcard = construction_tr[13].xpath('./td')[3].xpath('text()').extract_first()

        # 安全生产考核合格证书编号
        c_safe_assessenment_num = construction_tr[14].xpath('./td')[1].xpath('text()').extract_first()

        # 安全生产管理人员类型
        c_safe_assessenment_type = construction_tr[14].xpath('./td')[3].xpath('text()').extract_first()

        construction_model = templates.ConstructionPermit(builderLicenceNum=construction_num, censorNum=qualified_num,
                                                         contractMoney=money_capital, area=construction_area,
                                                         econCorpName=c_survey_name, econCorpCode=c_survey_num,
                                                         designCorpName=c_degsin_name, designCorpCode=c_degsin_num,
                                                         consCorpName=c_make_name, consCorpCode=c_make_num,
                                                         superCorpName=c_supervisor_name,
                                                         superCorpCode=c_supervisor_num,
                                                         constructorName=c_project_person_name,
                                                         constructorIDCard=c_name_person_idcard,
                                                         supervisionName=c_chief_name,
                                                         supervisionIDCard=c_chief_idcard,
                                                         companyName=response.meta['company_name'],
                                                         code=project_number,
                                                         provinceBuilderLicenceNum=construction_num
                                                         )
        construction_make_data = construction_model.data()
        print('施工许可信息', construction_make_data)
        if construction_make_data['builderLicenceNum']:
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo'
                                     '/addCompanyProjectBuilderLicence.htm',
                                 headers={'Content-Type': 'application/json'},
                                 method='POST',
                                 body=json.dumps(construction_make_data),
                                 callback=self.project_zz,
                                 meta={'type': '施工许可信息'}
                                 )

        completion_info = Selector(response=response).xpath('//table[@class="detailTable"]')[8]
        completion_tr = completion_info.xpath('./tr')
        # 竣工备案编号
        completion_num = completion_tr[1].xpath('./td')[1].xpath('text()').extract_first()

        # 施工许可证编号
        completion_make_numer = completion_tr[1].xpath('./td')[3].xpath('text()').extract_first()

        # 质量检测机构名称
        test_name = completion_tr[2].xpath('./td')[1].xpath('text()').extract_first()

        # 质量检测机构组织机构代码
        test_number = completion_tr[2].xpath('./td')[3].xpath('text()').extract_first()

        # 实际造价(万元)
        actual_capital = completion_tr[3].xpath('./td')[1].xpath('text()').extract_first()

        # 实际面积(平方米)
        actual_area = completion_tr[3].xpath('./td')[3].xpath('text()').extract_first()

        # 实际建设规模
        actual_size = completion_tr[4].xpath('./td')[1].xpath('text()').extract_first()

        # 结构体系
        c_body = completion_tr[4].xpath('./td')[3].xpath('text()').extract_first()

        # 备注
        remarks = completion_tr[5].xpath('./td')[1].xpath('text()').extract_first()

        Completion_data = templates.Completion(
            companyName=response.meta['company_name'], code=project_number, prjFinishNum=completion_num,
            factCost=actual_capital, factArea=actual_area, factSize=actual_size, prjStructureType=c_body,
            mark=remarks, provincePrjFinishNum=completion_num
        )
        Completion_zz = Completion_data.data()
        if Completion_zz['prjFinishNum']:
            print('当前公司----%s---%s--竣工数据' % (project_name, Completion_zz))
            yield scrapy.Request(url='https://api.maotouin.com/rest/companyInfo/addCompanyProjectFinish.htm',
                                 headers={'Content-Type': 'application/json'},
                                 method='POST',
                                 body=json.dumps(Completion_zz),
                                 callback=self.project_zz,
                                 meta={'type': '竣工验收'}
                                 )
示例#7
0
    def project_basic(self, response):
        name = Selector(response=response).xpath(
            '//td[@colspan="3"]/text()').extract_first()

        code = Selector(response=response).xpath(
            '//td[@class="name_level3 col_01_value"]/text()').extract_first()

        provinceCode = Selector(response=response).xpath(
            '//td[@class="col_02_value"]')[0].xpath('text()').extract_first()

        unit = Selector(response=response).xpath(
            '//td[@class="col_01_value"]')[1].xpath('text()').extract_first()

        catalog = Selector(response=response).xpath(
            '//td[@class="col_02_value"]')[1].xpath('text()').extract_first()

        unitLicenseNum = Selector(response=response).xpath(
            '//td[@class="col_01_value"]')[2].xpath('text()').extract_first()

        area = Selector(response=response).xpath(
            '//td[@class="col_01_value"]')[3].xpath('text()').extract_first()

        docuCode = Selector(response=response).xpath(
            '//td[@class="col_01_value"]')[4].xpath('text()').extract_first()

        level = Selector(response=response).xpath(
            '//td[@class="col_02_value"]')[4].xpath('text()').extract_first()

        money = Selector(response=response).xpath(
            '//td[@class="col_01_value"]')[5].xpath('text()').extract_first()

        acreage = Selector(response=response).xpath(
            '//td[@class="col_02_value"]')[5].xpath('text()').extract_first()

        trait = Selector(response=response).xpath(
            '//td[@class="col_01_value"]')[6].xpath('text()').extract_first()

        purpose = Selector(response=response).xpath(
            '//td[@class="col_02_value"]')[6].xpath('text()').extract_first()

        basic = templates.Project(name=name,
                                  companyName=response.meta['companyName'],
                                  code=code,
                                  provinceCode=provinceCode,
                                  unit=unit,
                                  catalog=catalog,
                                  unitLicenseNum=unitLicenseNum,
                                  area=area,
                                  docuCode=docuCode,
                                  level=level,
                                  money=money,
                                  acreage=acreage,
                                  trait=trait,
                                  purpose=purpose)
        basic_data = basic.data()
        print(basic_data, '基本信息')
        yield scrapy.Request(
            url=
            'https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
            body=json.dumps(basic_data),
            callback=self.project_zz,
            headers={'Content-Type': 'application/json'},
            method='POST',
            meta={
                'type': '基本信息',
                'company_name': basic_data['companyName']
            },
        )

        xx = 'PrjId=(.*)'

        bid_url = 'http://cx.jlsjsxxw.com/handle/ProjectHandler.ashx?method=ztb&PRJID=%s&_=1556177544518'\
                  % re.findall(xx, response.url)[0]
        yield scrapy.Request(url=bid_url,
                             callback=self.project_bid_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'code': code
                             })

        drawing_url = 'http://cx.jlsjsxxw.com/handle/ProjectHandler.ashx?method=sgtsc&PRJID=%s&_=1558342067012' % \
                      re.findall(xx, response.url)[0]
        yield scrapy.Request(url=drawing_url,
                             callback=self.project_drawing_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'code': code
                             })

        contract_url = 'http://cx.jlsjsxxw.com/handle/ProjectHandler.ashx?method=htba&PRJID=%s&_=1558342067013' % \
                       re.findall(xx, response.url)[0]
        yield scrapy.Request(url=contract_url,
                             callback=self.project_contract_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'code': code
                             })

        construction_url = 'http://cx.jlsjsxxw.com/handle/ProjectHandler.ashx?method=sgxk&PRJID=%s&_=1558342067014' % \
                           re.findall(xx, response.url)[0]
        yield scrapy.Request(url=construction_url,
                             callback=self.project_construction_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'code': code
                             })
示例#8
0
    def project_basic(self, response):
        basic = templates.Projects('Project')
        attrs = [
            {
                'that': '',
                'attr': '//td[@colspan="3"]/text()',
                'name': 'name'
            },
            {
                'that': '',
                'attr': '//td[@class="name_level3 col_01_value"]/text()',
                'name': 'code'
            },
            {
                'that': 0,
                'attr': '//td[@class="col_02_value"]',
                'name': 'provinceCode',
                'then': 'text()'
            },
            {
                'that': 1,
                'attr': '//td[@class="col_01_value"]',
                'name': 'unit',
                'then': 'text()'
            },
            {
                'that': 1,
                'attr': '//td[@class="col_02_value"]',
                'name': 'catalog',
                'then': 'text()'
            },
            {
                'that': 2,
                'attr': '//td[@class="col_01_value"]',
                'name': 'unitLicenseNum',
                'then': 'text()'
            },
            {
                'that': 2,
                'attr': '//td[@class="col_02_value"]',
                'name': 'area',
                'then': 'text()'
            },
            {
                'that': 4,
                'attr': '//td[@class="col_01_value"]',
                'name': 'docuCode',
                'then': 'text()'
            },
            {
                'that': 4,
                'attr': '//td[@class="col_02_value"]',
                'name': 'level',
                'then': 'text()'
            },
            {
                'that': 5,
                'attr': '//td[@class="col_01_value"]',
                'name': 'money',
                'then': 'text()'
            },
            {
                'that': 5,
                'attr': '//td[@class="col_02_value"]',
                'name': 'acreage',
                'then': 'text()'
            },
            {
                'that': 6,
                'attr': '//td[@class="col_01_value"]',
                'name': 'trait',
                'then': 'text()'
            },
            {
                'that': 6,
                'attr': '//td[@class="col_02_value"]',
                'name': 'purpose',
                'then': 'text()'
            },
        ]
        code = Selector(response=response).xpath(
            '//td[@class="name_level3 col_01_value"]/text()').extract_first()
        name = Selector(response=response).xpath(
            '//td[@colspan="3"]/text()').extract_first()
        code = code.split()[0]
        xx = 'PRJNUM=(.*)'
        basic_d = basic.html_analysis(response, attrs)
        basic_d['companyName'] = response.meta['companyName']

        basic = templates.Project(**basic_d)
        basic_data = basic.data()
        print(basic_data, '基本信息')
        yield scrapy.Request(
            url=
            'https://api.maotouin.com/rest/companyInfo/addCompanyProject.htm',
            body=json.dumps(basic_data),
            callback=self.project_zz,
            headers={'Content-Type': 'application/json'},
            method='POST',
            meta={
                'type': '基本信息',
                'company_name': basic_data['companyName']
            },
        )

        #
        bid_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=ztb&PRJNUM=%s&_=1558598717869' \
                  % re.findall(xx, response.url)[0]
        yield scrapy.Request(url=bid_url,
                             callback=self.project_bid_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'code': code
                             })

        drawing_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=sgtsc&PRJNUM=%s&_=1558598717869' % \
                      re.findall(xx, response.url)[0]
        print(response.url, '施工图纸审查')
        yield scrapy.Request(url=drawing_url,
                             callback=self.project_drawing_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'code': code
                             })

        contract_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=htba&PRJNUM=%s&_=1558598717869' % \
                       re.findall(xx, response.url)[0]
        print(contract_url, '合同备案全部')
        yield scrapy.Request(url=contract_url,
                             callback=self.project_contract_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'code': code
                             })

        construction_url = 'http://218.60.144.163/LNJGPublisher/handle/ProjectHandler.ashx?method=sgxk&PRJNUM=%s&_=1558598717869' % \
                           re.findall(xx, response.url)[0]
        print(construction_url, '施工许可详list')
        yield scrapy.Request(url=construction_url,
                             callback=self.project_construction_list,
                             meta={
                                 'companyName': response.meta['companyName'],
                                 'name': name
                             })