def get_build_info(self, build_url_list): for i in build_url_list: try: build = Building(co_index) build_url = 'http://222.223.160.199:8088/website/buildquery/selectBuild.jsp?buildID=' + i[ 0] response = requests.get(build_url, headers=self.headers) html = response.text build.bu_id = i[0] build.co_build_structural = re.search('结构类型.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bo_build_end_time = re.search('建成年份.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_build_size = re.search('总建筑面积.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_num = re.search('幢号.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.size = re.search('占地面积.*?<td>(.*?)<', html, re.S | re.M).group(1) build.bu_floor = re.search('房屋层数.*?<td>(.*?)<', html, re.S | re.M).group(1) build.bu_all_house = re.search('房屋套数.*?<td>(.*?)<', html, re.S | re.M).group(1) build.area = re.search('坐落区.*?<td>(.*?)<', html, re.S | re.M).group(1) build.insert_db() self.get_house_info(build.bu_id) except Exception as e: print('请求错误,url={}'.format(build_url), e)
def comm(self, id): bu = Building(co_index) house_url = self.start_url + "/api/buildInfos/getHouseInfosByPannelNumber?pannelNumber=" + str( id) comm_url = self.start_url + "/api/buildInfos/getHomePageBuildingInfo?blockNumber=" + str( id) comm_detail_url = self.start_url + "/api/buildInfos/getDetailsBuildingInfo?blockNumber=" + str( id) comm_res = requests.get(comm_url) comm_detail_res = requests.get(comm_detail_url) house_res = requests.get(house_url) comm_dict = json.loads(comm_res.text) comm_detail_dict = json.loads(comm_detail_res.text) house_dict = json.loads(house_res.text) bu.bu_id = id bu.bu_num = comm_dict["data"]["nameBuildings"] bu.area = comm_detail_dict['data']['houseingArea'] bu.bu_address = comm_dict["data"]["houseaddress"] bu.bu_pre_sale = comm_detail_dict["data"]["yszh"] bu.bu_type = comm_dict["data"]["propertycategory"] bu.bo_develops = comm_dict["data"]["companyName"] bu.insert_db() house_num = house_dict["data"] for hu in house_num: ho = House(co_index) h = hu["data"] if len(h) > 0: for i in h: try: room_id = i["houseNumber"] room_url = self.start_url + "/api/buildInfos/getHouseInfoByHouseNumber?houseNumber=" + str( room_id) res = requests.get(room_url, headers=self.headers) dict = json.loads(res.text) ho.bu_id = id # ho.ho_num = room_id ho.ho_name = dict["data"]["houseNo"] ho.ho_build_size = dict["data"]["buildArea"] ho.ho_true_size = dict["data"]["jacketArea"] ho.ho_share_size = dict["data"]["apportionedArea"] ho.ho_floor = dict["data"]["nominalLevel"] ho.insert_db() except Exception as e: print(e) else: continue
def get_build_detail(self, all_building_url_list): house_url_list = [] for i in all_building_url_list: try: response = requests.get(i, headers=self.headers) html = response.text tree = etree.HTML(html) bo_develops = tree.xpath( '//*[@id="content_1"]/div[3]/text()[2]')[0] # 开发商 bu_build_size = tree.xpath( '//*[@id="houseTable_1"]/tr[2]/td[6]/a/text()') # 销售面积 if bu_build_size: bu_build_size = bu_build_size[0] bu_pre_sale = tree.xpath( '//*[@id="houseTable_1"]/tr[2]/td[1]/a/text()') # 预售证书 if bu_pre_sale: bu_pre_sale = bu_pre_sale[0] bu_floor = tree.xpath( '//*[@id="houseTable_1"]/tr[2]/td[3]/a/text()')[0] # 总层数 bu_all_house = tree.xpath( '//*[@id="houseTable_1"]/tr[2]/td[4]/a/text()')[0] # 总套数 bu_type = tree.xpath( '//*[@id="houseTable_1"]/tr[2]/td[5]/a/text()')[0] # 房屋用途 build_html = re.search('houseTable_1.*?当前共有', html, re.S | re.M).group() build_detail_html = re.findall( 'class.*?</a></td>.*?</a></td>.*?</a></td>', build_html, re.S | re.M) bu_num = re.findall('项目名称:</b>(.*?)</div>', html, re.S | re.M)[0].strip() url_list = [] for bu in build_detail_html: try: build = Building(co_index) build.bu_id = re.search( "href='roomTable.aspx\?id=(.*?)&", bu, re.S | re.M).group(1) build.bu_address = re.search( "_blank.*?_blank'>(.*?)</a></td><td>", bu, re.S | re.M).group(1).strip() build.bo_develops = bo_develops build.bu_build_size = bu_build_size build.bu_pre_sale = bu_pre_sale build.bu_num = bu_num build.bu_floor = bu_floor build.bu_all_house = bu_all_house build.bu_type = bu_type for k in self.area_list: if k in build.bu_address: build.area = k continue build.insert_db() house_url = re.search( "(roomTable.aspx\?id=.*?&vc=.*?)'", bu, re.S | re.M).group(1) url_list.append( 'http://dgfc.dg.gov.cn/dgwebsite_v2/Vendition/' + house_url) except Exception as e: print('楼栋错误,co_index={},url={}'.format(co_index, i), e) house_url_list = url_list + house_url_list except Exception as e: print('楼栋错误,co_index={},url={}'.format(co_index, i), e) return house_url_list