Пример #1
0
    def house_parse(self, bu_id, co_id):  # 房屋信息解析
        ho = House(co_index)
        house_url = "http://ys.tyfdc.gov.cn/Firsthand/tyfc/publish/probld/NBView.do?"
        formdata = {"nid": bu_id, "projectid": co_id}
        try:
            res = requests.post(house_url, data=formdata, headers=self.headers)
        except Exception as e:
            print("co_index={},房屋详情页无法访问".format(co_index), e)
        con = res.text

        ho_name = re.findall('\'\);">(.*?) ', con, re.S | re.M)
        ho_build_size = re.findall('<span.*?建筑面积:(.*?)㎡', con, re.S | re.M)
        ho_true_size = re.findall('<span.*?套内面积:(.*?)分', con, re.S | re.M)
        ho_share_size = re.findall('<span.*?分摊面积:(.*?)㎡', con, re.S | re.M)
        ho_type = re.findall('<span.*?用途:(.*?)房', con, re.S | re.M)
        ho_price = re.findall('<span.*?单价:(.*?)"', con, re.S | re.M)
        ho_id = re.findall("getHouseBaseInfo\('(.*?)'\)", con, re.S | re.M)
        for index in range(0, len(ho_id)):
            ho.co_id = co_id
            ho.bu_id = bu_id
            ho.ho_name = ho_name[index]
            ho.ho_build_size = ho_build_size[index]
            ho.ho_type = ho_type[index]
            ho.ho_share_size = ho_share_size[index]
            ho.ho_price = ho_price[index]
            ho.ho_true_size = ho_true_size[index]
            ho.ho_num = ho_id[index]
            ho.insert_db()
Пример #2
0
    def get_house_info(self, ho_con=None, headers=None, bu_id=None, url=None):

        if ho_con == None:
            res = requests.get(url, headers=headers)

            con = res.content.decode('gbk')
            html = etree.HTML(con)

        else:
            html = etree.HTML(ho_con)

        ho_url_list = html.xpath("//td[@width='120']/a/@href")

        for ho_url in ho_url_list:
            ho_detail = 'http://www.qyfgj.cn/newys/' + ho_url
            res = requests.get(ho_detail, headers=headers)
            con = res.content.decode('gbk')
            ho = House(co_index)

            ho.bu_id = bu_id
            ho.ho_num = re.search('房屋号.*?">(.*?)</td', con,
                                  re.S | re.M).group(1)
            ho.ho_build_size = re.search('建筑面积.*?">(.*?)m', con,
                                         re.S | re.M).group(1)
            ho.ho_true_size = re.search('套内面积.*?">(.*?)m', con,
                                        re.S | re.M).group(1)
            ho.ho_type = re.search('房屋用途.*?">(.*?)</td', con,
                                   re.S | re.M).group(1)

            ho.insert_db()
Пример #3
0
 def house_info(self, house_list, bu_id, co_id):
     ho = House(co_index)
     for house_url in house_list:
         url = "http://ris.szpl.gov.cn/bol/" + house_url
         res = requests.get(url, headers=self.headers)
         ho.ho_num = re.search('id=(\d+)', house_url).group(1)
         con = res.text
         ho.bu_num = re.search('情况.*?">(.*?)&', con).group(1)
         ho.bu_id = bu_id
         ho.co_id = co_id
         ho.ho_floor = re.search('楼层.*?">(\d+)&', con).group(1)
         ho.ho_num = re.search('房号.*?">(\d+)&', con).group(1)
         ho.ho_type = re.search('用途.*?">(\d+)&', con).group(1)
         ho.ho_room_type = re.search('户型.*?">(\d+)&', con).group(1)
         ho.ho_build_size = re.search('建筑面积<.*?">(\d+.\d+)平方米',
                                      con).group(1)
         ho.ho_true_size = re.search('户内面积<.*?">(\d+.\d+)平方米', con).group(1)
         ho.insert_db()
Пример #4
0
    def get_build_info(self, comm_url_list):
        for i in comm_url_list:
            try:
                sid = re.findall('\+(\d+)\+', i)[0]
                pid = re.findall('\+(\d+)\+', i)[1]
                build_url = 'http://www.jjzzfdc.com.cn/WebClient/ClientService/bldg_query.aspx?pid=' + pid + '&sid=' + sid
                # print(build_url)
                response = requests.get(build_url)
                html = response.text
                build = Building(co_index)
                build.bu_id = pid
                build.bu_num = re.search('楼栋座落.*?<td.*?>(.*?)<', html,
                                         re.S | re.M).group(1)
                build.bu_address = re.search('楼栋座落.*?<td.*?>(.*?)<', html,
                                             re.S | re.M).group(1)
                build.bu_pre_sale = re.search('预售证号.*?">(.*?)&nbsp', html,
                                              re.S | re.M).group(1)
                build.bu_pre_sale_date = re.search('时间.*?">(.*?)&nbsp', html,
                                                   re.S | re.M).group(1)
                build.bu_all_house = re.search('dM.*?">(.*?)&nbsp', html,
                                               re.S | re.M).group(1)
                # build.bu_address = re.search('售楼处地址.*?">(.*?)&nbsp', html, re.S | re.M).group(1)
                build.insert_db()
            except Exception as e:
                print('co_index={}, 楼栋错误,url={}'.format(co_index, build_url),
                      e)

            house_url = 'http://www.jjzzfdc.com.cn/WebClient/ClientService/proxp.aspx?key=WWW_LPB_001&params=' + sid
            # print(house_url)
            result = requests.get(house_url)
            html_ = result.text

            for house_info in re.findall('<Result.*?</Result>', html_,
                                         re.S | re.M):
                try:
                    house = House(co_index)
                    house.bu_id = build.bu_id
                    house.bu_num = build.bu_num
                    house.ho_name = re.search('<ONAME>(.*?)</ONAME>',
                                              house_info, re.S | re.M).group(1)
                    house.ho_num = re.search('<OSEQ>(.*?)</OSEQ>', house_info,
                                             re.S | re.M).group(1)
                    house.ho_build_size = re.search('<BAREA>(.*?)</BAREA>',
                                                    house_info,
                                                    re.S | re.M).group(1)
                    house.ho_floor = re.search('<FORC>(.*?)</FORC>',
                                               house_info,
                                               re.S | re.M).group(1)
                    house.ho_true_size = re.search('<PAREA>(.*?)</PAREA>',
                                                   house_info,
                                                   re.S | re.M).group(1)
                    house.insert_db()
                except Exception as e:
                    print('co_index={}, 房号错误'.format(co_index), e)