Exemplo n.º 1
0
    def comm(self, id):
        bu = Building(co_index)

        house_url = self.start_url + "/api/buildInfos/getHouseInfosByPannelNumber?pannelNumber=" + str(id)
        comm_url = self.start_url + "/api/buildInfos/getHomePageBuildingInfo?blockNumber=" + str(id)
        comm_detail_url = self.start_url + "/api/buildInfos/getDetailsBuildingInfo?blockNumber=" + str(id)

        comm_res = requests.get(comm_url)
        comm_detail_res = requests.get(comm_detail_url)
        house_res = requests.get(house_url)
        comm_dict = json.loads(comm_res.text)
        comm_detail_dict = json.loads(comm_detail_res.text)
        house_dict = json.loads(house_res.text)

        bu.bu_id = id
        bu.bu_num = comm_dict["data"]["nameBuildings"]
        bu.area = comm_detail_dict['data']['houseingArea']
        bu.bu_address = comm_dict["data"]["houseaddress"]
        bu.bu_pre_sale = comm_detail_dict["data"]["yszh"]
        bu.bu_type = comm_dict["data"]["propertycategory"]
        bu.bo_develops = comm_dict["data"]["companyName"]

        bu.insert_db()

        house_num = house_dict["data"]
        for hu in house_num:
            ho = House(co_index)
            h = hu["data"]
            if len(h) > 0:
                for i in h:
                    try:
                        room_id = i["houseNumber"]
                        room_url = self.start_url + "/api/buildInfos/getHouseInfoByHouseNumber?houseNumber=" + str(
                            room_id)
                        res = requests.get(room_url, headers=self.headers)
                        dict = json.loads(res.text)
                        ho.bu_id = id
                        # ho.ho_num = room_id
                        ho.ho_name = dict["data"]["houseNo"]
                        ho.ho_build_size = dict["data"]["buildArea"]
                        ho.ho_true_size = dict["data"]["jacketArea"]
                        ho.ho_share_size = dict["data"]["apportionedArea"]
                        ho.ho_floor = dict["data"]["nominalLevel"]
                        ho.insert_db()
                    except Exception as e:
                        print(e)
            else:
                continue
Exemplo n.º 2
0
 def get_build_detail(self, all_building_url_list):
     house_url_list = []
     for i in all_building_url_list:
         try:
             response = requests.get(i, headers=self.headers)
             html = response.text
             tree = etree.HTML(html)
             bo_develops = tree.xpath('//*[@id="content_1"]/div[3]/text()[2]')[0]  # 开发商
             bu_build_size = tree.xpath('//*[@id="houseTable_1"]/tr[2]/td[6]/a/text()')  # 销售面积
             if bu_build_size:
                 bu_build_size = bu_build_size[0]
             bu_pre_sale = tree.xpath('//*[@id="houseTable_1"]/tr[2]/td[1]/a/text()')  # 预售证书
             if bu_pre_sale:
                 bu_pre_sale = bu_pre_sale[0]
             bu_floor = tree.xpath('//*[@id="houseTable_1"]/tr[2]/td[3]/a/text()')[0]  # 总层数
             bu_all_house = tree.xpath('//*[@id="houseTable_1"]/tr[2]/td[4]/a/text()')[0]  # 总套数
             bu_type = tree.xpath('//*[@id="houseTable_1"]/tr[2]/td[5]/a/text()')[0]  # 房屋用途
             build_html = re.search('houseTable_1.*?当前共有', html, re.S | re.M).group()
             build_detail_html = re.findall('class.*?</a></td>.*?</a></td>.*?</a></td>', build_html, re.S | re.M)
             bu_num = re.findall('项目名称:</b>(.*?)</div>', html, re.S | re.M)[0].strip()
             url_list = []
             for bu in build_detail_html:
                 try:
                     build = Building(co_index)
                     build.bu_id = re.search("href='roomTable.aspx\?id=(.*?)&", bu, re.S | re.M).group(1)
                     build.bu_address = re.search("_blank.*?_blank'>(.*?)</a></td><td>", bu, re.S | re.M).group(
                         1).strip()
                     build.bo_develops = bo_develops
                     build.bu_build_size = bu_build_size
                     build.bu_pre_sale = bu_pre_sale
                     build.bu_num = bu_num
                     build.bu_floor = bu_floor
                     build.bu_all_house = bu_all_house
                     build.bu_type = bu_type
                     for k in self.area_list:
                         if k in build.bu_address:
                             build.area = k
                             continue
                     build.insert_db()
                     house_url = re.search("(roomTable.aspx\?id=.*?&vc=.*?)'", bu, re.S | re.M).group(1)
                     url_list.append('http://dgfc.dg.gov.cn/dgwebsite_v2/Vendition/' + house_url)
                 except Exception as e:
                     print('楼栋错误,co_index={},url={}'.format(co_index, i), e)
             house_url_list = url_list + house_url_list
         except Exception as e:
             print('楼栋错误,co_index={},url={}'.format(co_index, i), e)
     return house_url_list
Exemplo n.º 3
0
 def get_build_info(self, build_url_list):
     for i in build_url_list:
         try:
             build = Building(co_index)
             build_url = 'http://222.223.160.199:8088/website/buildquery/selectBuild.jsp?buildID=' + i[0]
             response = requests.get(build_url, headers=self.headers)
             html = response.text
             build.bu_id = i[0]
             build.co_build_structural = re.search('结构类型.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bo_build_end_time = re.search('建成年份.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bu_build_size = re.search('总建筑面积.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bu_num = re.search('幢号.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.size = re.search('占地面积.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.bu_floor = re.search('房屋层数.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.bu_all_house = re.search('房屋套数.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.area = re.search('坐落区.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.insert_db()
             self.get_house_info(build.bu_id)
         except Exception as e:
             print('请求错误,url={}'.format(build_url),e)