Exemplo n.º 1
0
    def house_crawler(self, house_url, bu_num, co_id, bu_id):
        ho = House(co_index, bu_num=bu_num, co_id=co_id, bu_id=bu_id)

        url = self.url + house_url
        con = requests.get(url, headers=self.headers)
        tr = con.text
        ho_name = re.findall('室号:(.*?)户', tr, re.S | re.M)  # 房号:3单元403
        # ho_num = re.findall('_td(\d+)"', tr)  # 房号id
        ho_floor = re.findall('(\d+)层', tr)  # 楼层
        ho_type = re.findall('房屋属性:(.*?)"', tr,
                             re.S | re.M)  # 房屋类型:普通住宅 / 车库仓库
        ho_room_type = re.findall('户型:(.*?)所', tr, re.S | re.M)  # 户型
        ho_build_size = re.findall('建筑面积:(.*?)房', tr, re.S | re.M)  # 建筑面积

        for floor in ho_floor:
            try:
                ho.ho_floor = floor
                for index in range(1, len(ho_name) + 1):
                    ho.ho_name = ho_name[index]
                    ho.ho_type = ho_type[index]
                    ho.ho_room_type = ho_room_type[index]
                    ho.ho_build_size = ho_build_size[index]
                    # ho.ho_num = ho_num[index]

                    ho.insert_db()
            except:
                continue
Exemplo n.º 2
0
 def get_house_info(self, house_url_list):
     for i in house_url_list:
         try:
             dongid = re.search('dongid=(.*?)&', i).group(1)
             roomid = re.search('roomid=(.*?)&', i).group(1)
             house_url = 'http://zjjg.0557fdc.com:9555/xiaoqu/roominfo.aspx?dongid=' + dongid + '&roomid=' + roomid
             house = House(co_index)
             house.co_name = 'Labelxqmc">(.*?)<'
             house.area = 'Labelxzq">(.*?)<'
             house.bu_num = 'Labeldongmc">(.*?)<'
             house.ho_type = 'Labelyxyongtu">(.*?)<'
             house.ho_name = '<span id="Labelroommc".*?>(.*?)</span>'
             house.ho_build_size = 'Labeljzmianji">(.*?)<'
             house.ho_true_size = 'Labeltaonei">(.*?)<'
             house.ho_share_size = 'Labelgongtan">(.*?)<'
             house.ho_room_type = 'Labelhuxing">(.*?)<'
             house.bu_id = 'dongid=(.*?)&'
             p = ProducerListUrl(page_url=house_url,
                                 request_type='get',
                                 encode='utf-8',
                                 analyzer_rules_dict=house.to_dict(),
                                 analyzer_type='regex',
                                 headers=self.headers)
             p.get_details()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
Exemplo n.º 3
0
 def get_house_info(self, bu_id, co_id):
     house_url = "http://www.xyfdc.gov.cn/wsba/Common/Agents/ExeFunCommon.aspx"
     payload = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\r\n<param funname=\"SouthDigital.Wsba.CBuildTableEx.GetBuildHTMLEx\">\r\n<item>" + \
               bu_id + "</item>\r\n<item>1</item>\r\n<item>1</item>\r\n<item>80</item>\r\n<item>840</item>\r\n<item>g_oBuildTable</item>\r\n<item> 1=1</item>\r\n<item>1</item>\r\n<item>false</item>\r\n</param>\r\n"
     headers = {
         'Content-Type': "text/xml",
     }
     response = requests.request("POST",
                                 house_url,
                                 data=payload,
                                 headers=headers)
     html = response.text
     house_info_list = re.findall(
         "onclick=.g_oBuildTable.clickRoom.*? title='(.*?)'", html,
         re.S | re.M)
     for i in house_info_list:
         try:
             house = House(co_index)
             house.ho_name = re.search('房号:(.*?)单元:', i,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('总面积:(.*?)平方米', i,
                                             re.S | re.M).group(1)
             house.ho_type = re.search('用途:(.*?)户型', i,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('户型:(.*?)状态', i,
                                            re.S | re.M).group(1)
             house.info = i
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print(
                 '房号错误,co_index={},url={},data={}'.format(
                     co_index, house_url, payload), e)
Exemplo n.º 4
0
 def get_house_info(self, house_url_list, co_name, bu_num):
     for i in house_url_list:
         try:
             house = House(co_index)
             house.co_name = co_name
             house.bu_num = bu_num
             house_url = 'http://www.sxczfdc.com/pubinfo/' + i
             response = requests.get(house_url, headers=self.headers)
             html = response.text
             house.ho_floor = re.findall('HouseInfo1_lblFwlc">(.*?)<', html,
                                         re.S | re.M)[0]
             house.ho_name = re.findall('HouseInfo1_lblFwfh">(.*?)<', html,
                                        re.S | re.M)[0]
             house.ho_type = re.findall('HouseInfo1_lblFwlx">(.*?)<', html,
                                        re.S | re.M)[0]
             house.ho_room_type = re.findall('HouseInfo1_lblFwhx">(.*?)<',
                                             html, re.S | re.M)[0]
             house.ho_build_size = re.findall(
                 'HouseInfo1_lblycfwjzmj">(.*?)<', html, re.S | re.M)[0]
             house.ho_true_size = re.findall(
                 'HouseInfo1_lblycfwtnmj">(.*?)<', html, re.S | re.M)[0]
             house.ho_share_size = re.findall(
                 'HouseInfo1_lblycfwftmj">(.*?)<', html, re.S | re.M)[0]
             house.orientation = re.findall('HouseInfo1_lblCx">(.*?)<',
                                            html, re.S | re.M)[0]
             house.insert_db()
         except Exception as e:
             print(e)
Exemplo n.º 5
0
 def get_house_detail(self, house_url_list, bu_id, co_id):
     for i in house_url_list:
         try:
             house = House(co_index)
             house_detail_url = 'http://www.lhfdc.gov.cn/templets/lh/aspx/hpms/RoomInfo.aspx?code=' + i
             response = requests.get(house_detail_url, headers=self.headers)
             html = response.text
             house.ho_name = re.search('id="ROOM_ROOMNO">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('id="ROOM_FWHX">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_type = re.search('id="ROOM_GHYT">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('id="ROOM_YCJZMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.ho_true_size = re.search('id="ROOM_YCTNMJ">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('id="ROOM_YCFTMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print(
                 '房号错误,co_index={},url={}'.format(co_index,
                                                  house_detail_url), e)
Exemplo n.º 6
0
    def ho_parse(self, bid, co_id):

        payload = '<?xml version="1.0" encoding="utf-8" standalone="yes"?><param funname="SouthDigital.CMS.CBuildTableEx.GetBuildHTMLEx"><item>'\
              +bid+'</item><item>1</item><item>1</item><item>100</item><item>1000</item><item>g_oBuildTable</item><item> 1=1</item><item>1</item></param>'
        payload = parse.quote(payload)
        try:
            res = requests.post(
                'http://www.hbsfdc.com/Common/Agents/ExeFunCommon.aspx',
                data=payload,
                headers=self.headers)
        except Exception as e:
            log.error("{}楼栋请求失败".format(bid))
        con = res.content.decode()
        ho_list = re.findall("title='(.*?)'>", con, re.S | re.M)
        for ho in ho_list:
            house = House(co_index)
            house.co_id = co_id
            house.bu_id = bid
            house.ho_name = re.search('房号:(.*)', ho).group(1)
            house.ho_type = re.search('用途:(.*)', ho).group(1)
            house.ho_room_type = re.search('户型:(.*)', ho).group(1)
            house.ho_build_size = re.search('总面积:(.*)', ho).group(1)
            if re.search('售价:(.*)', ho):
                house.ho_price = re.search('售价:(.*)', ho).group(1)
            else:
                house.ho_price = None
            house.insert_db()
Exemplo n.º 7
0
    def house_parse(self, house_url, co_id, bu_id):
        ho = House(co_index)
        url = "http://spf.tlfdc.cn/" + house_url
        res = requests.get(url, headers=self.headers)
        con = res.text

        ho_name = re.findall('室号:(.*?)套', con, re.S | re.M)
        ho_room_type = re.findall('套型:(.*?)建', con, re.S | re.M)
        ho_build_size = re.findall('建筑面积:(.*?)参', con, re.S | re.M)
        ho_price = re.findall('价格:(.*?)元', con, re.S | re.M)
        ho_detail = re.findall('href="(show.*?\?id=\d+&id2=\d+&prjid=\d+)"',
                               con, re.S | re.M)
        for index in range(0, len(ho_name)):
            try:
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = ho_name[index]
                ho.ho_room_type = ho_room_type[index]
                ho.ho_build_size = ho_build_size[index]
                ho.ho_price = ho_price[index]
                ho_detail_url = "http://spf.tlfdc.cn/" + ho_detail[index]
                res = requests.get(ho_detail_url, headers=self.headers)
                res = res.content.decode('gb2312')
                ho.ho_floor = re.findall('楼层.*?">(.*?)</td>', res,
                                         re.S | re.M)[0].strip()

                ho.insert_db()
            except:
                print('房号错误,co_index={},url={}'.format(co_index, url), e)
                continue
Exemplo n.º 8
0
 def get_house_detail(self, house_detail_url_list, co_id, bu_id):
     for i in house_detail_url_list:
         detail_url = 'http://www.yzfdc.cn/' + i
         try:
             house = House(co_index)
             time.sleep(3)
             response = self.s.get(detail_url, headers=self.headers)
             html = response.text
             house.co_name = re.search('lblxmmc.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.bu_num = re.search('lbldh.*?>(.*?)<', html,
                                      re.S | re.M).group(1)
             house.ho_name = re.search('lblfh.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('lbljzmj.*?>(.*?)<', html,
                                             re.S | re.M).group(1)
             house.ho_true_size = re.search('lbltnmj.*?>(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('lblftmj.*?>(.*?)<', html,
                                             re.S | re.M).group(1)
             house.ho_type = re.search('lblfwxz.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('lblhuxin.*?>(.*?)<', html,
                                            re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, detail_url),
                   e)
Exemplo n.º 9
0
    def ho_info(self, url, co_id, bu_id):
        ho_url = 'http://www.aqhouse.net/' + url
        while True:
            try:
                proxy = self.proxies[random.randint(0, 9)]
                ho_res = requests.get(ho_url,
                                      headers=self.headers,
                                      proxies=proxy)
                break
            except Exception as e:
                print(e)
        ho_html = etree.HTML(ho_res.text)
        room_list = ho_html.xpath("//td[@nowrap]/a/..")
        for room in room_list:
            try:
                room_info = room.xpath("./@title")[0]
                ho = House(co_index)
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = room.xpath("./a/text()")[0]
                ho.ho_build_size = re.search('建筑面积:(.*?)平方米',
                                             room_info).group(1)
                ho.ho_true_size = re.search('套内面积:(.*?)平方米',
                                            room_info).group(1)
                ho.ho_share_size = re.search('分摊面积:(.*?)平方米',
                                             room_info).group(1)
                ho.ho_room_type = re.search('套型:(.*)', room_info).group(1)
                ho.ho_price = re.search('价格.*?:(.*?)元/平方米', room_info).group(1)

                ho.insert_db()
            except:
                print('房屋解析失败')
Exemplo n.º 10
0
 def get_house_info(self, house_id_list, bu_id, co_id):
     for i in house_id_list:
         house_url = 'http://www.hbczfdc.com:4993/HPMS/RoomInfo.aspx?code=' + i
         try:
             house = House(co_index)
             response = requests.get(house_url, headers=self.headers)
             html = response.text
             house.bu_id = bu_id
             house.co_id = co_id
             house.ho_name = re.search('id="ROOM_HH">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_floor = re.search('id="ROOM_MYC">(.*?)<', html,
                                        re.S | re.M).group(1)
             house.ho_type = re.search('id="ROOM_FWYT">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('id="ROOM_HX">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_build_size = re.search('id="ROOM_YCJZMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.ho_true_size = re.search('id="ROOM_YCTNJZMJ">(.*?)<',
                                            html, re.S | re.M).group(1)
             house.ho_share_size = re.search('id="ROOM_YCFTJZMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
Exemplo n.º 11
0
    def room_crawler(self, room):  # 房屋

        ho = House(co_index)

        house_url = "http://www.hzszjj.gov.cn" + room

        res = requests.get(house_url, )
        con = etree.HTML(res.text)

        ho_table = con.xpath("//tr[@bgcolor='#fbf3e6']")
        for ho_list in ho_table[1:-1]:
            ho_floor = ho_list.xpath("./td[@align='center']/text()")[0]
            honum_list = ho_list.xpath(".//tr/td[@height='40']")
            for house in honum_list:
                ho.ho_floor = ho_floor  # 楼层
                id_num = re.search(r"(\d+)&\w+=(\d+)", room)
                ho.co_id = id_num.group(1)  # 小区id
                ho.bu_id = id_num.group(2)  # 楼栋id
                ho_url = house.xpath("./a/@href")[0]
                if len(ho_url) == 1:
                    ho_info = house.xpath("./a/@wf")[0]

                    ho.ho_name = house.xpath("./a/text()")[0]
                    info = re.search(
                        r":(.*?)<br>.*?:(.*?)<br>(.*?)<br><hr>.*?:(.*?)m.*?<br>.*?:(.*?)<br>.*?:(.*?)m",
                        ho_info)
                    ho.ho_type = info.group(5)
                    ho.ho_build_size = info.group(4)
                    ho.ho_room_type = info.group(2)

                else:
                    detail_url = "http://www.hzszjj.gov.cn/ts_web_dremis/web_house_dir/" + ho_url
                    res = requests.get(detail_url)
                    con = etree.HTML(res.text)
                    ho.ho_name = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_name']/text()"
                    )[0]
                    ho.ho_type = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_type']/text()"
                    )[0]
                    ho.ho_build_size = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_build_area']/text()"
                    )[0]
                    ho.ho_share_size = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_share_area']/text()"
                    )[0]
                    ho.ho_true_size = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_inside_area']/text()"
                    )[0]

                ho.insert_db()
Exemplo n.º 12
0
    def get_house_info(self, bu_url, bu_id):
        qrykey = re.search('qrykey=(.*?)&', bu_url).group(1)
        house_url = 'http://old.newhouse.cnnbfdc.com/GetHouseTable.aspx?qrykey=' + qrykey
        response = requests.get(house_url, headers=self.headers)
        html = response.text
        house_code_list = re.findall("onclick=select_room\('(.*?)'", html,
                                     re.S | re.M)
        for i in house_code_list:
            house_detail_url = 'http://old.newhouse.cnnbfdc.com/openRoomData.aspx?roomId=' + str(
                i)
            # while True:
            #     proxies = self.proxy_pool()
            try:
                res = requests.get(
                    house_detail_url,
                    headers=self.headers,
                )
            except Exception as e:
                print("{}城市无法访问房屋页面{}".format(city, house_detail_url), e)
                continue
                # if res.status_code ==200:
            time.sleep(2)
            #     self.proxy_status(proxies,0)
            #     break
            # else:
            #     self.proxy_status(proxies,1)
            #     continue
            content = res.text
            ho = House(co_index)
            ho.bu_id = bu_id
            try:
                ho.ho_name = re.search('室号.*?">(.*?)</td>', content,
                                       re.S | re.M).group(1)
                ho.ho_floor = re.search('楼层.*?">(.*?)</td>', content,
                                        re.S | re.M).group(1)
                ho.ho_room_type = re.search('户型.*?">(.*?)</td>', content,
                                            re.S | re.M).group(1)
                ho.ho_type = re.search('用途.*?">(.*?)</td>', content,
                                       re.S | re.M).group(1)
                ho.ho_build_size = re.search('预测建筑面积.*?">(.*?)</td>', content,
                                             re.S | re.M).group(1)
                ho.ho_true_size = re.search('预测套内面积.*?">(.*?)</td>', content,
                                            re.S | re.M).group(1)
                ho.ho_share_size = re.search('预测分摊面积.*?">(.*?)</td>', content,
                                             re.S | re.M).group(1)

                ho.insert_db()
            except Exception as e:
                print("{}房号错误,请求频繁,当前页面{}未提取".format(city, house_detail_url),
                      e)
                continue
Exemplo n.º 13
0
 def get_house_info(self, con, co_id, build_id):
     html_str = re.search('houseTableData.*?特别申明', con, re.S | re.M).group()
     for info in re.findall('<div style.*?</div>', html_str, re.S | re.M):
         try:
             ho = House(co_index)
             ho.ho_name = re.search("'HC_HOUSENUMB':'(.*?)',", info,
                                    re.S | re.M).group(1)
             ho.ho_room_type = re.search("'HC_HOUSETYPE':'(.*?)',", info,
                                         re.S | re.M).group(1)
             ho.ho_build_size = re.search("'HC_STCTAREA':'(.*?)',", info,
                                          re.S | re.M).group(1)
             ho.bu_id = build_id
             ho.co_id = co_id
             ho.insert_db()
         except Exception as e:
             print('house error, co_index={}'.format(co_index))
Exemplo n.º 14
0
 def get_house_info(self, house_url, bu_id, co_id):
     ho_url = 'http://www.fangdi.com.cn/' + house_url
     response = requests.get(ho_url, headers=self.headers)
     html = response.content.decode('gbk')
     house_html = re.search('室号 <.*?</table>.*?</table>', html,
                            re.S | re.M).group()
     house_info_list = re.findall('title.*?</td>', house_html, re.S | re.M)
     for i in house_info_list:
         try:
             house = House(co_index)
             house.ho_build_size = re.search('实测面积:(.*?)>', i,
                                             re.S | re.M).group(1)
             house.ho_name = re.search('实测面积.*?>(.*?)<br>', i,
                                       re.S | re.M).group(1).strip()
             house.bu_id = bu_id
             house.co_id = co_id
             if '<a' in house.ho_name:
                 house_detail_url_code = re.search('href="(.*?)"',
                                                   house.ho_name,
                                                   re.S | re.M).group(1)
                 house_detail_url = 'http://www.fangdi.com.cn/' + house_detail_url_code
                 result = requests.get(house_detail_url,
                                       headers=self.headers)
                 html_str = result.content.decode('gbk')
                 house.ho_floor = re.search('实际层.*?<TD.*?>(.*?)<', html_str,
                                            re.S | re.M).group(1)
                 house.ho_name = re.search('室号.*?<TD.*?>(.*?)<', html_str,
                                           re.S | re.M).group(1)
                 house.ho_type = re.search('房屋类型.*?<TD.*?>(.*?)<', html_str,
                                           re.S | re.M).group(1)
                 house.ho_room_type = re.search('房型.*?<TD.*?>(.*?)<',
                                                html_str,
                                                re.S | re.M).group(1)
                 house.ho_build_size = re.search('实测建筑面积.*?<TD.*?>(.*?)<',
                                                 html_str,
                                                 re.S | re.M).group(1)
                 house.ho_true_size = re.search('实测套内面积.*?<TD.*?>(.*?)<',
                                                html_str,
                                                re.S | re.M).group(1)
                 house.ho_share_size = re.search('实测分摊面积.*?<TD.*?>(.*?)<',
                                                 html_str,
                                                 re.S | re.M).group(1)
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, ho_url), e)
Exemplo n.º 15
0
 def house_info(self, house_list, bu_id, co_id):
     ho = House(co_index)
     for house_url in house_list:
         url = "http://ris.szpl.gov.cn/bol/" + house_url
         res = requests.get(url, headers=self.headers)
         ho.ho_num = re.search('id=(\d+)', house_url).group(1)
         con = res.text
         ho.bu_num = re.search('情况.*?">(.*?)&', con).group(1)
         ho.bu_id = bu_id
         ho.co_id = co_id
         ho.ho_floor = re.search('楼层.*?">(\d+)&', con).group(1)
         ho.ho_num = re.search('房号.*?">(\d+)&', con).group(1)
         ho.ho_type = re.search('用途.*?">(\d+)&', con).group(1)
         ho.ho_room_type = re.search('户型.*?">(\d+)&', con).group(1)
         ho.ho_build_size = re.search('建筑面积<.*?">(\d+.\d+)平方米',
                                      con).group(1)
         ho.ho_true_size = re.search('户内面积<.*?">(\d+.\d+)平方米', con).group(1)
         ho.insert_db()
Exemplo n.º 16
0
 def get_house_detail(self, house_url_list, bu_id, co_id):
     for i in house_url_list:
         try:
             house = House(co_index)
             house_detail_url = 'http://222.184.103.50:7700/WW/housedetail.aspx?houseID=' + i
             response = requests.get(house_detail_url, headers=self.headers)
             html = response.text
             house.ho_name = re.search('id="Label1">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('id="Label2">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_build_size = re.search('id="Label3">(.*?)<', html,
                                             re.S | re.M).group(1)
             house.co_id = co_id
             house.bu_id = bu_id
             house.insert_db()
         except Exception as e:
             print(
                 '请求错误,co_index={},url={}'.format(co_index,
                                                  house_detail_url), e)
Exemplo n.º 17
0
 def get_house_info(self, bu_id, co_id):
     house_url = 'http://b.fang99.com/buildinglistselect.aspx?buildingid=' + co_id + '&xmbh=&lzbh=' + bu_id
     response = self.request_proxy(house_url, headers=self.headers)
     html = response.content.decode('gbk')
     house_html = re.search('rpt_ewlpblc_fjlistdiv_0.*?erp_con_2', html, re.S | re.M).group()
     house_info_list = re.findall('<span.*?</span>', house_html, re.S | re.M)
     for i in house_info_list:
         try:
             house = House(co_index)
             house.ho_room_type = re.search('title="(.*?),', i, re.S | re.M).group(1)
             house.ho_build_size = re.search('title=".*?,(.*?)"', i, re.S | re.M).group(1)
             if '<a' in i:
                 house.ho_name = re.search('<a.*?>(.*?)<', i, re.S | re.M).group(1)
             else:
                 house.ho_name = re.search('<span.*?>(.*?)<', i, re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
Exemplo n.º 18
0
    def ho_info(self, bu_url_list, co_id):
        for bu_url in bu_url_list:
            try:
                res = requests.get(bu_url, headers=self.headers)
                html = etree.HTML(res.text)
                house_info_list = html.xpath("//li[@class='tjCor4']")
                for house_info in house_info_list:
                    house = house_info.xpath("./@title")[0]
                    ho = House(co_index)
                    ho.co_id = co_id
                    ho.bu_id = re.search('dbh=(\d+)', bu_url).group(1)
                    ho.ho_name = re.search('房号:(.*?)<br', house).group(1)
                    ho.ho_room_type = re.search('户型:(.*?)<br', house).group(1)
                    ho.ho_build_size = re.search('建筑面积:(.*?)平方米',
                                                 house).group(1)
                    ho.ho_price = re.search('单价:(.*?)元', house).group(1)
                    ho.ho_type = re.search('用途:(.*?)<br', house).group(1)

                    ho.insert_db()
            except Exception as e:
                log.error('房号信息错误', e)
Exemplo n.º 19
0
    def get_house_info(self, house_url_list):
        for url in house_url_list:
            response = requests.get(url)

            html = etree.HTML(response.text)
            con = html.xpath("//tr[@align='center']")
            for i in con:
                try:
                    house = House(co_index)
                    # house.ho_num = 'NHOUSENO">(.*?)<'
                    house.ho_name = i.xpath("./td/text()")[1]
                    house.ho_floor = i.xpath("./td/text()")[0]
                    house.ho_build_size = i.xpath("./td/text()")[3]
                    house.ho_true_size = i.xpath("./td/text()")[4]
                    house.ho_share_size = i.xpath("./td/text()")[5]
                    house.ho_room_type = i.xpath("./td/text()")[2]
                    house.ho_price = i.xpath("./td/text()")[-1]
                    house.orientation = i.xpath("./td/text()")[-2]
                    house.bu_id = re.search('ID=(\d+)',url).group(1)
                    house.insert_db()
                except Exception as e:
                    print('房号错误,co_index={},url={}'.format(co_index, url), e)
Exemplo n.º 20
0
 def get_house_info(self, code, co_name):
     house_url = 'http://house.bffdc.gov.cn/Common/Agents/ExeFunCommon.aspx?'
     payload = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\r\n<param funname=\"SouthDigital.Wsba.CBuildTableEx.GetBuildHTMLEx\">\r\n<item>" + \
               code[
                   0] + "</item>\r\n<item>1</item>\r\n<item>1</item>\r\n<item>55</item>\r\n<item>840</item>\r\n<item>g_oBuildTable</item>\r\n<item>false</item>\r\n<item> 1=1</item>\r\n</param>\r\n"
     headers = {
         'Content-Type': "text/xml",
     }
     response = requests.post(url=house_url, data=payload, headers=headers)
     html = response.text
     info = re.findall("title='(.*?)'", html, re.S | re.M)
     for i in info:
         try:
             house = House(co_index)
             house.bu_num = code[1]
             house.ho_name = re.search('房号:(.*?)\r\n', i).group(1)
             house.ho_type = re.search('用途:(.*?)\r\n', i).group(1)
             house.ho_room_type = re.search('户型:(.*?)\r\n', i).group(1)
             house.ho_build_size = re.search('总面积:(.*?)\r\n', i).group(1)
             house.co_name = co_name
             house.insert_db()
         except Exception as e:
             print(e)
Exemplo n.º 21
0
 def get_house_info(self, house_url_list):
     for i in house_url_list:
         try:
             house = House(co_index)
             house_url = 'http://www.ndjsj.gov.cn/House/' + i
             house.bu_num = '幢  号:.*?<td.*?>(.*?)<'
             house.ho_name = '房  号:.*?<td.*?>(.*?)<'
             house.co_name = '项目名称:.*?<td.*?>(.*?)<'
             house.ho_build_size = '建筑面积:.*?<td.*?>(.*?)<'
             house.ho_true_size = '套内面积:.*?<td.*?>(.*?)<'
             house.ho_share_size = '分摊面积:.*?<td.*?>(.*?)<'
             house.ho_type = '房屋用途:.*?<td.*?>(.*?)<'
             house.ho_floor = '所 在 层:.*?<td.*?>(.*?)<'
             house.ho_room_type = '房屋户型:.*?<td.*?>(.*?)<'
             p = ProducerListUrl(page_url=house_url,
                                 request_type='get',
                                 encode='utf-8',
                                 analyzer_rules_dict=house.to_dict(),
                                 analyzer_type='regex',
                                 headers=self.headers)
             p.get_details()
         except Exception as e:
             print('宁德房号错误,url={}'.format(house_url), e)
Exemplo n.º 22
0
    def get_house_info(self,co_id,bu_id,house_detail_list):
        for house_detail in house_detail_list:
            house_url = self.url + house_detail
            try:
                house_res = requests.get(house_url,headers=self.headers)
                house_res.status_code == 200
            except Exception as e:
                print("co_index={},房屋信息错误".format(co_index),e)
                continue
            house_con = house_res.text

            ho = House(co_index)
            ho.co_id = co_id
            ho.bu_id = bu_id
            ho.ho_name = re.search('房号.*?fh">(.*?)</span',house_con,re.S|re.M).group(1)
            ho.orientation = re.search('朝向.*?Cx">(.*?)</span',house_con,re.S|re.M).group(1)
            ho.ho_floor = re.search('层.*?lc">(.*?)</span',house_con,re.S|re.M).group(1)
            ho.ho_room_type = re.search('房型.*?hx">(.*?)</span',house_con,re.S|re.M).group(1)
            ho.ho_build_size = re.search('建筑面积.*?jzmj">(.*?)</span',house_con,re.S|re.M).group(1)
            ho.ho_share_size = re.search('分摊面积.*?ftmj">(.*?)</span',house_con,re.S|re.M).group(1)
            ho.ho_true_size= re.search('套内面积.*?tnmj">(.*?)</span',house_con,re.S|re.M).group(1)
            ho.ho_type = re.search('用途.*?lx">(.*?)</span',house_con,re.S|re.M).group(1)

            ho.insert_db()