def house_parse(self, bu_id, co_id): # 房屋信息解析 ho = House(co_index) house_url = "http://ys.tyfdc.gov.cn/Firsthand/tyfc/publish/probld/NBView.do?" formdata = {"nid": bu_id, "projectid": co_id} try: res = requests.post(house_url, data=formdata, headers=self.headers) except Exception as e: print("co_index={},房屋详情页无法访问".format(co_index), e) con = res.text ho_name = re.findall('\'\);">(.*?) ', con, re.S | re.M) ho_build_size = re.findall('<span.*?建筑面积:(.*?)㎡', con, re.S | re.M) ho_true_size = re.findall('<span.*?套内面积:(.*?)分', con, re.S | re.M) ho_share_size = re.findall('<span.*?分摊面积:(.*?)㎡', con, re.S | re.M) ho_type = re.findall('<span.*?用途:(.*?)房', con, re.S | re.M) ho_price = re.findall('<span.*?单价:(.*?)"', con, re.S | re.M) ho_id = re.findall("getHouseBaseInfo\('(.*?)'\)", con, re.S | re.M) for index in range(0, len(ho_id)): ho.co_id = co_id ho.bu_id = bu_id ho.ho_name = ho_name[index] ho.ho_build_size = ho_build_size[index] ho.ho_type = ho_type[index] ho.ho_share_size = ho_share_size[index] ho.ho_price = ho_price[index] ho.ho_true_size = ho_true_size[index] ho.ho_num = ho_id[index] ho.insert_db()
def get_house_info(self, ho_con=None, headers=None, bu_id=None, url=None): if ho_con == None: res = requests.get(url, headers=headers) con = res.content.decode('gbk') html = etree.HTML(con) else: html = etree.HTML(ho_con) ho_url_list = html.xpath("//td[@width='120']/a/@href") for ho_url in ho_url_list: ho_detail = 'http://www.qyfgj.cn/newys/' + ho_url res = requests.get(ho_detail, headers=headers) con = res.content.decode('gbk') ho = House(co_index) ho.bu_id = bu_id ho.ho_num = re.search('房屋号.*?">(.*?)</td', con, re.S | re.M).group(1) ho.ho_build_size = re.search('建筑面积.*?">(.*?)m', con, re.S | re.M).group(1) ho.ho_true_size = re.search('套内面积.*?">(.*?)m', con, re.S | re.M).group(1) ho.ho_type = re.search('房屋用途.*?">(.*?)</td', con, re.S | re.M).group(1) ho.insert_db()
def house_info(self, house_list, bu_id, co_id): ho = House(co_index) for house_url in house_list: url = "http://ris.szpl.gov.cn/bol/" + house_url res = requests.get(url, headers=self.headers) ho.ho_num = re.search('id=(\d+)', house_url).group(1) con = res.text ho.bu_num = re.search('情况.*?">(.*?)&', con).group(1) ho.bu_id = bu_id ho.co_id = co_id ho.ho_floor = re.search('楼层.*?">(\d+)&', con).group(1) ho.ho_num = re.search('房号.*?">(\d+)&', con).group(1) ho.ho_type = re.search('用途.*?">(\d+)&', con).group(1) ho.ho_room_type = re.search('户型.*?">(\d+)&', con).group(1) ho.ho_build_size = re.search('建筑面积<.*?">(\d+.\d+)平方米', con).group(1) ho.ho_true_size = re.search('户内面积<.*?">(\d+.\d+)平方米', con).group(1) ho.insert_db()
def get_build_info(self, comm_url_list): for i in comm_url_list: try: sid = re.findall('\+(\d+)\+', i)[0] pid = re.findall('\+(\d+)\+', i)[1] build_url = 'http://www.jjzzfdc.com.cn/WebClient/ClientService/bldg_query.aspx?pid=' + pid + '&sid=' + sid # print(build_url) response = requests.get(build_url) html = response.text build = Building(co_index) build.bu_id = pid build.bu_num = re.search('楼栋座落.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_address = re.search('楼栋座落.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_pre_sale = re.search('预售证号.*?">(.*?) ', html, re.S | re.M).group(1) build.bu_pre_sale_date = re.search('时间.*?">(.*?) ', html, re.S | re.M).group(1) build.bu_all_house = re.search('dM.*?">(.*?) ', html, re.S | re.M).group(1) # build.bu_address = re.search('售楼处地址.*?">(.*?) ', html, re.S | re.M).group(1) build.insert_db() except Exception as e: print('co_index={}, 楼栋错误,url={}'.format(co_index, build_url), e) house_url = 'http://www.jjzzfdc.com.cn/WebClient/ClientService/proxp.aspx?key=WWW_LPB_001¶ms=' + sid # print(house_url) result = requests.get(house_url) html_ = result.text for house_info in re.findall('<Result.*?</Result>', html_, re.S | re.M): try: house = House(co_index) house.bu_id = build.bu_id house.bu_num = build.bu_num house.ho_name = re.search('<ONAME>(.*?)</ONAME>', house_info, re.S | re.M).group(1) house.ho_num = re.search('<OSEQ>(.*?)</OSEQ>', house_info, re.S | re.M).group(1) house.ho_build_size = re.search('<BAREA>(.*?)</BAREA>', house_info, re.S | re.M).group(1) house.ho_floor = re.search('<FORC>(.*?)</FORC>', house_info, re.S | re.M).group(1) house.ho_true_size = re.search('<PAREA>(.*?)</PAREA>', house_info, re.S | re.M).group(1) house.insert_db() except Exception as e: print('co_index={}, 房号错误'.format(co_index), e)