Exemplo n.º 1
0
    def ho_info(self, url, co_id, bu_id):
        ho_url = 'http://www.aqhouse.net/' + url
        while True:
            try:
                proxy = self.proxies[random.randint(0, 9)]
                ho_res = requests.get(ho_url,
                                      headers=self.headers,
                                      proxies=proxy)
                break
            except Exception as e:
                print(e)
        ho_html = etree.HTML(ho_res.text)
        room_list = ho_html.xpath("//td[@nowrap]/a/..")
        for room in room_list:
            try:
                room_info = room.xpath("./@title")[0]
                ho = House(co_index)
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = room.xpath("./a/text()")[0]
                ho.ho_build_size = re.search('建筑面积:(.*?)平方米',
                                             room_info).group(1)
                ho.ho_true_size = re.search('套内面积:(.*?)平方米',
                                            room_info).group(1)
                ho.ho_share_size = re.search('分摊面积:(.*?)平方米',
                                             room_info).group(1)
                ho.ho_room_type = re.search('套型:(.*)', room_info).group(1)
                ho.ho_price = re.search('价格.*?:(.*?)元/平方米', room_info).group(1)

                ho.insert_db()
            except:
                print('房屋解析失败')
Exemplo n.º 2
0
    def ho_parse(self, bid, co_id):

        payload = '<?xml version="1.0" encoding="utf-8" standalone="yes"?><param funname="SouthDigital.CMS.CBuildTableEx.GetBuildHTMLEx"><item>'\
              +bid+'</item><item>1</item><item>1</item><item>100</item><item>1000</item><item>g_oBuildTable</item><item> 1=1</item><item>1</item></param>'
        payload = parse.quote(payload)
        try:
            res = requests.post(
                'http://www.hbsfdc.com/Common/Agents/ExeFunCommon.aspx',
                data=payload,
                headers=self.headers)
        except Exception as e:
            log.error("{}楼栋请求失败".format(bid))
        con = res.content.decode()
        ho_list = re.findall("title='(.*?)'>", con, re.S | re.M)
        for ho in ho_list:
            house = House(co_index)
            house.co_id = co_id
            house.bu_id = bid
            house.ho_name = re.search('房号:(.*)', ho).group(1)
            house.ho_type = re.search('用途:(.*)', ho).group(1)
            house.ho_room_type = re.search('户型:(.*)', ho).group(1)
            house.ho_build_size = re.search('总面积:(.*)', ho).group(1)
            if re.search('售价:(.*)', ho):
                house.ho_price = re.search('售价:(.*)', ho).group(1)
            else:
                house.ho_price = None
            house.insert_db()
Exemplo n.º 3
0
 def get_house_detail(self, house_url_list, bu_id, co_id):
     for i in house_url_list:
         try:
             house = House(co_index)
             house_detail_url = 'http://www.lhfdc.gov.cn/templets/lh/aspx/hpms/RoomInfo.aspx?code=' + i
             response = requests.get(house_detail_url, headers=self.headers)
             html = response.text
             house.ho_name = re.search('id="ROOM_ROOMNO">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('id="ROOM_FWHX">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_type = re.search('id="ROOM_GHYT">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('id="ROOM_YCJZMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.ho_true_size = re.search('id="ROOM_YCTNMJ">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('id="ROOM_YCFTMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print(
                 '房号错误,co_index={},url={}'.format(co_index,
                                                  house_detail_url), e)
Exemplo n.º 4
0
    def get_house_info(self, co_id, bu_id, id):

        house_list_url = "http://xx.yyfdcw.com/hetong/fdc_xxdxx.asp?id=" + str(
            id)
        res = requests.get(house_list_url, headers=self.headers)
        con = res.content.decode('gbk')
        house_list = re.findall("onClick=.*?open\('(.*?)',", con, re.S | re.M)
        for house_ in house_list:
            try:
                house_url = "http://xx.yyfdcw.com/hetong/" + house_
            except Exception as e:
                print("co_index={},房屋信息错误".format(co_index), e)
                continue
            ho_res = requests.get(house_url, headers=self.headers)
            ho_con = ho_res.content.decode('gbk')

            ho = House(co_index)
            ho.co_id = co_id
            ho.bu_id = bu_id
            ho.ho_name = re.search('室号.*?fafa>(.*?)</TD', ho_con,
                                   re.S | re.M).group(1)
            ho.ho_floor = re.search('实际层.*?fafa>(.*?)</TD', ho_con,
                                    re.S | re.M).group(1)
            ho.ho_build_size = re.search('建筑面积.*?fafa>(.*?)</TD', ho_con,
                                         re.S | re.M).group(1)
            ho.ho_true_size = re.search('套内面积.*?fafa>(.*?)</TD', ho_con,
                                        re.S | re.M).group(1)
            ho.ho_share_size = re.search('分摊面积.*?fafa>(.*?)</TD', ho_con,
                                         re.S | re.M).group(1)
            ho.ho_price = re.search('价格.*?fafa>(.*?)</TD', ho_con,
                                    re.S | re.M).group(1)
            ho.ho_type = re.search('用途.*?fafa>(.*?)</TD', ho_con,
                                   re.S | re.M).group(1)

            ho.insert_db()
Exemplo n.º 5
0
 def get_house_info(self, bu_id, co_id):
     url = 'http://www.fzfgj.cn/website/presale/home/HouseTableControl/GetData.aspx?Building_ID=' + bu_id
     try:
         response = requests.get(url=url, headers=self.headers)
         xml = response.text
         tree = etree.XML(xml)
         logo = tree.xpath('//LOGICBUILDING_ID/text()')[0]
         url_2 = 'http://www.fzfgj.cn/website/presale/home/HouseTableControl/GetData.aspx?LogicBuilding_ID=' + logo
         result = requests.get(url_2, headers=self.headers)
         xml_2 = result.text
         tree_2 = etree.XML(xml_2)
         house_info_list = tree_2.xpath('T_HOUSE')
         for i in house_info_list:
             try:
                 house = House(11)
                 ho_name = i.xpath('ROOM_NUMBER/text()')[0]
                 ho_build_size = i.xpath('BUILD_AREA/text()')[0]
                 ho_true_size = i.xpath('BUILD_AREA_INSIDE/text()')[0]
                 ho_share_size = i.xpath('BUILD_AREA_SHARE/text()')[0]
                 ho_floor = i.xpath('FLOOR_REALRIGHT/text()')[0]
                 ho_type = i.xpath('USE_FACT/text()')[0]
                 house.co_id = co_id
                 house.bu_id = bu_id
                 house.ho_build_size = ho_build_size
                 house.ho_true_size = ho_true_size
                 house.ho_share_size = ho_share_size
                 house.ho_floor = ho_floor
                 house.ho_name = ho_name
                 house.ho_type = ho_type
                 house.insert_db()
             except Exception as e:
                 print('房号错误,co_index={},url={}'.format(co_index, url_2), e)
     except BaseException as e:
         print('房号错误,co_index={},url={}'.format(co_index, url), e)
Exemplo n.º 6
0
 def get_house_detail(self, house_detail_url_list, co_id, bu_id):
     for i in house_detail_url_list:
         detail_url = 'http://www.yzfdc.cn/' + i
         try:
             house = House(co_index)
             time.sleep(3)
             response = self.s.get(detail_url, headers=self.headers)
             html = response.text
             house.co_name = re.search('lblxmmc.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.bu_num = re.search('lbldh.*?>(.*?)<', html,
                                      re.S | re.M).group(1)
             house.ho_name = re.search('lblfh.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('lbljzmj.*?>(.*?)<', html,
                                             re.S | re.M).group(1)
             house.ho_true_size = re.search('lbltnmj.*?>(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('lblftmj.*?>(.*?)<', html,
                                             re.S | re.M).group(1)
             house.ho_type = re.search('lblfwxz.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('lblhuxin.*?>(.*?)<', html,
                                            re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, detail_url),
                   e)
Exemplo n.º 7
0
 def get_house_detail(self, house_detail_url, co_id, bu_id):
     try:
         house = House(co_index)
         house_detail_url_ = 'http://www.yfci.gov.cn:8080/HousePresell/' + house_detail_url
         response = requests.get(house_detail_url_, headers=self.headers)
         html = response.text
         if '找不到记录' in html:
             return
         house.ho_name = re.search('id="HouseNO".*?>(.*?)<', html,
                                   re.S | re.M).group(1)
         house.ho_true_size = re.search('id="HouseArea".*?>(.*?)<', html,
                                        re.S | re.M).group(1)
         house.ho_build_size = re.search('id="SumBuildArea1".*?>(.*?)<',
                                         html, re.S | re.M).group(1)
         house.ho_type = re.search('id="HouseUse".*?>(.*?)<', html,
                                   re.S | re.M).group(1)
         house.orientation = re.search('id="CHX".*?>(.*?)<', html,
                                       re.S | re.M).group(1)
         house.ho_type = re.search('id="CHX".*?>(.*?)<', html,
                                   re.S | re.M).group(1)
         house.co_id = co_id
         house.bu_id = bu_id
         house.insert_db()
     except Exception as e:
         print(
             '房号错误,co_index={},url={}'.format(co_index, house_detail_url_),
             e)
Exemplo n.º 8
0
 def house_parse(self,bu_id,co_id,sid,propertyid):
     data = {
         'propertyid':propertyid,
         'sid':sid,
         'buildingid':bu_id,
         'tid':'price',
         'page':1
     }
     res = requests.post('http://tmsf.qzfdcgl.com/newhouse/property_pricesearch.htm',data=data,headers=self.headers)
     page = re.search('页数.*?/(\d+)',res.text).group(1)
     for i in range(1,int(page)+1):
         data['page'] = i
         ho_res = requests.post('http://tmsf.qzfdcgl.com/newhouse/property_pricesearch.htm', data=data, headers=self.headers)
         con  = ho_res.text
         ho_html = etree.HTML(con)
         house_list = ho_html.xpath("//tr[@onmouseout]")
         for house in house_list:
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = house.xpath("./td[3]/a/div/text()")[0]
             ho.unit = house.xpath("./td[2]/a/div/text()")[0]
             buildsize = house.xpath("./td[4]/a/div/span/@class")
             truesize = house.xpath("./td[5]/a/div/span/@class")
             price = house.xpath("./td[9]/a/div/span/@class")
             ho.ho_build_size = self.number_replace(buildsize)
             ho.ho_true_size = self.number_replace(truesize)
             ho.ho_price = self.number_replace(price)
             ho.insert_db()
Exemplo n.º 9
0
    def house_info(self, co_id, bu_id, house_url_list):
        for house_ in house_url_list:
            house_url = "http://www.njhouse.com.cn/2016/spf/" + house_
            try:
                # ho_res = requests.get(house_url,headers=self.headers)
                ho_pro = Proxy_contact(app_name="nanjing",
                                       method='get',
                                       url=house_url,
                                       headers=self.headers)
                ho_con = ho_pro.contact()
                ho_con = ho_con.decode('gbk')

                # ho_con = ho_res.content.decode('gbk')
                ho = House(co_index)
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = re.search('房号.*?;">(.*?)</td', ho_con,
                                       re.S | re.M).group(1)
                ho.ho_price = re.search('价格.*?<td>(.*?)元', ho_con,
                                        re.S | re.M).group(1)
                ho.ho_floor = re.search('楼层.*?;">(.*?)</td', ho_con,
                                        re.S | re.M).group(1)
                ho.ho_build_size = re.search('建筑面积.*?<td>(.*?)m', ho_con,
                                             re.S | re.M).group(1)
                ho.ho_true_size = re.search('套内面积.*?<td>(.*?)m', ho_con,
                                            re.S | re.M).group(1)
                ho.ho_share_size = re.search('分摊面积.*?<td>(.*?)m', ho_con,
                                             re.S | re.M).group(1)
                ho.ho_type = re.search('房屋类型.*?<td>(.*?)</td', ho_con,
                                       re.S | re.M).group(1)
            except Exception as e:
                log.error("房屋详情页错误{}".format(e))
                continue

            ho.insert_db()
Exemplo n.º 10
0
    def get_build_info(self, url, co_id):
        try:
            building = Building(co_index)
            response = requests.get(url)
            html = response.text
            tree = etree.HTML(html)
            co_name = tree.xpath('//*[@id="PageB_Location"]/text()')[0]  # 小区名字
            print(co_name)
            bu_name = tree.xpath('//*[@id="ItemName"]/text()')[0]  # 楼栋名称
            bu_num = tree.xpath('//*[@id="PageB_HouseNo"]/text()')[0]  # 楼号 栋号
            bu_all_house = tree.xpath('//*[@id="lb_countbulidtaoshu"]/text()')[
                0]  # 总套数
            bu_floor = tree.xpath('//*[@id="cell3-1"]/text()')
            bu_floor = self.is_none(bu_floor)  # 楼层
            bu_build_size = tree.xpath('//*[@id="lb_countbulidarea"]/text()')[
                0]  # 建筑面积
            bu_live_size = tree.xpath('//*[@id="lb_buildarea"]/text()')[
                0]  # 住宅面积
            bu_price = tree.xpath('//*[@id="lb_buildavg"]/text()')
            bu_price = self.is_none(bu_price)  # 住宅价格
            bu_id = re.search('\?(\d+)$', url).group(1)  # 楼栋id
            building.co_id = co_id
            building.bu_name = bu_name
            building.bu_num = bu_num
            building.bu_all_house = bu_all_house
            building.bu_floor = bu_floor
            building.bu_build_size = bu_build_size
            building.bu_live_size = bu_live_size
            building.bu_price = bu_price
            building.bu_id = bu_id
            building.insert_db()
            house_info_html = re.findall('<tr id="row3">(.*)$', html,
                                         re.S | re.M)[0]
            for i in re.findall('(<td.*?>.*?</td>)', house_info_html,
                                re.S | re.M):
                if '<br>' not in i:
                    continue
                ho_name_list = re.findall('<td.*?>(.*?)<br>', i, re.S | re.M)
                ho_true_size_list = re.findall('<td.*?>.*?<br>(.*?)<br>', i,
                                               re.S | re.M)
                ho_type = re.findall('<td.*?>.*?<br>.*?<br>(.*?)<br>', i,
                                     re.S | re.M)[0]
                for i in range(len(ho_name_list)):
                    try:
                        if 'font' in ho_name_list[i]:
                            ho_name = re.sub('<font.*?>', '', ho_name_list[i])
                        else:
                            ho_name = ho_name_list[i]
                        house = House(8)
                        house.ho_name = ho_name
                        house.ho_true_size = ho_true_size_list[i]
                        house.co_id = co_id
                        house.bu_id = bu_id
                        house.ho_type = ho_type
                        house.insert_db()

                    except Exception as e:
                        print(e)
        except BaseException as e:
            print(e)
Exemplo n.º 11
0
 def house_parse(self, ho_url, co_id, bu_id):
     house_url = "http://61.143.241.154/" + ho_url
     ho_res = requests.get(house_url, headers=headers)
     html = etree.HTML(ho_res.content.decode('gbk'))
     detail_list = html.xpath("//td[@height='80']/a/@href")
     for detail in detail_list:
         try:
             detail_url = 'http://61.143.241.154/' + detail
             res = requests.get(detail_url, headers=headers)
             con = res.content.decode('gbk')
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = re.search('房屋号.*?">(.*?)</td', con,
                                    re.S | re.M).group(1)
             ho.ho_true_size = re.search('套内面积.*?">(.*?)</td', con,
                                         re.S | re.M).group(1)
             ho.ho_build_size = re.search('建筑面积.*?">(.*?)</td', con,
                                          re.S | re.M).group(1)
             ho.orientation = re.search('房屋朝向.*?">(.*?)</td', con,
                                        re.S | re.M).group(1)
             ho.ho_type = re.search('用途.*?">(.*?)</td', con,
                                    re.S | re.M).group(1)
             ho.ho_price = re.search('申报总价.*?">(.*?)</td', con,
                                     re.S | re.M).group(1)
             ho.insert_db()
         except Exception as e:
             log.error("{}房屋请求解析失败{}".format(detail, e))
Exemplo n.º 12
0
    def house_parse(self, bu_id, co_id):  # 房屋信息解析
        ho = House(co_index)
        house_url = "http://ys.tyfdc.gov.cn/Firsthand/tyfc/publish/probld/NBView.do?"
        formdata = {"nid": bu_id, "projectid": co_id}
        try:
            res = requests.post(house_url, data=formdata, headers=self.headers)
        except Exception as e:
            print("co_index={},房屋详情页无法访问".format(co_index), e)
        con = res.text

        ho_name = re.findall('\'\);">(.*?)&nbsp;', con, re.S | re.M)
        ho_build_size = re.findall('<span.*?建筑面积:(.*?)㎡', con, re.S | re.M)
        ho_true_size = re.findall('<span.*?套内面积:(.*?)分', con, re.S | re.M)
        ho_share_size = re.findall('<span.*?分摊面积:(.*?)㎡', con, re.S | re.M)
        ho_type = re.findall('<span.*?用途:(.*?)房', con, re.S | re.M)
        ho_price = re.findall('<span.*?单价:(.*?)"', con, re.S | re.M)
        ho_id = re.findall("getHouseBaseInfo\('(.*?)'\)", con, re.S | re.M)
        for index in range(0, len(ho_id)):
            ho.co_id = co_id
            ho.bu_id = bu_id
            ho.ho_name = ho_name[index]
            ho.ho_build_size = ho_build_size[index]
            ho.ho_type = ho_type[index]
            ho.ho_share_size = ho_share_size[index]
            ho.ho_price = ho_price[index]
            ho.ho_true_size = ho_true_size[index]
            ho.ho_num = ho_id[index]
            ho.insert_db()
Exemplo n.º 13
0
 def get_house_info(self, house_url_list, bu_id, co_id):
     for i in house_url_list:
         try:
             house = House(co_index)
             response = requests.get(i, headers=self.headers)
             html = response.text
             house.ho_name = re.search('门牌号:.*?<td.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_floor = re.search('所在层:.*?<td.*?>(.*?)<', html,
                                        re.S | re.M).group(1)
             house.ho_type = re.search('房屋性质:.*?<td.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('预测建筑面积:.*?<td.*?>(.*?)<',
                                             html, re.S | re.M).group(1)
             house.ho_true_size = re.search('预测套内面积:.*?<td.*?>(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('预测分摊面积:.*?<td.*?>(.*?)<',
                                             html, re.S | re.M).group(1)
             house.co_address = re.search('房屋坐落:.*?<td.*?>(.*?)<', html,
                                          re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, i), e)
Exemplo n.º 14
0
    def get_house_info(self, house_url_list, bu_id, co_id):
        for i in house_url_list:
            try:
                house = House(co_index)
                house_url = 'http://www.fjnpfdc.com/House/' + i
                house_res = requests.get(house_url, headers=self.headers)
                house_con = house_res.content.decode('gbk')

                house.bu_id = bu_id
                house.co_id = co_id
                house.bu_num = re.search('幢  号:.*?<td>(.*?)<', house_con,
                                         re.S | re.M).group(1)
                house.ho_name = re.search('房  号:.*?<td>(.*?)<', house_con,
                                          re.S | re.M).group(1)
                house.co_name = re.search('项目名称:.*?<td>(.*?)<', house_con,
                                          re.S | re.M).group(1)
                house.ho_build_size = re.search('建筑面积:.*?<td>(.*?)<',
                                                house_con,
                                                re.S | re.M).group(1)
                house.ho_true_size = re.search('套内面积:.*?<td>(.*?)<', house_con,
                                               re.S | re.M).group(1)
                house.ho_share_size = re.search('分摊面积:.*?<td>(.*?)<',
                                                house_con,
                                                re.S | re.M).group(1)
                house.ho_floor = re.search('所 在 层:.*?<td>(.*?)<', house_con,
                                           re.S | re.M).group(1)

                house.insert_db()
            except Exception as e:
                print("co_index={},房屋{}错误".format(co_index, i), e)
Exemplo n.º 15
0
 def get_house_info(self, bu_id, co_id):
     house_url = "http://www.xyfdc.gov.cn/wsba/Common/Agents/ExeFunCommon.aspx"
     payload = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\r\n<param funname=\"SouthDigital.Wsba.CBuildTableEx.GetBuildHTMLEx\">\r\n<item>" + \
               bu_id + "</item>\r\n<item>1</item>\r\n<item>1</item>\r\n<item>80</item>\r\n<item>840</item>\r\n<item>g_oBuildTable</item>\r\n<item> 1=1</item>\r\n<item>1</item>\r\n<item>false</item>\r\n</param>\r\n"
     headers = {
         'Content-Type': "text/xml",
     }
     response = requests.request("POST",
                                 house_url,
                                 data=payload,
                                 headers=headers)
     html = response.text
     house_info_list = re.findall(
         "onclick=.g_oBuildTable.clickRoom.*? title='(.*?)'", html,
         re.S | re.M)
     for i in house_info_list:
         try:
             house = House(co_index)
             house.ho_name = re.search('房号:(.*?)单元:', i,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('总面积:(.*?)平方米', i,
                                             re.S | re.M).group(1)
             house.ho_type = re.search('用途:(.*?)户型', i,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('户型:(.*?)状态', i,
                                            re.S | re.M).group(1)
             house.info = i
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print(
                 '房号错误,co_index={},url={},data={}'.format(
                     co_index, house_url, payload), e)
Exemplo n.º 16
0
 def get_house_info(self, co_id, bu_id):
     house_url = "http://202.103.219.149:7000/LeadingEstate/buildingtable/ShowNewBuildingTable.aspx"
     payload = "IsShowHouse=1&BuidID=" + bu_id
     headers = {'Content-Type': "application/x-www-form-urlencoded"}
     try:
         response = requests.request("POST",
                                     house_url,
                                     data=payload,
                                     headers=headers)
         html = response.text
         house_info_list = re.findall('HouseID.*?\}', html, re.S | re.M)
         for i in house_info_list:
             house = House(co_index)
             house.bu_id = bu_id
             house.co_id = co_id
             house.ho_name = re.search('"YCHouseNo":"(.*?)"', i,
                                       re.S | re.M).group(1)
             house.ho_floor = re.search('"ActFLoor":"(.*?)"', i,
                                        re.S | re.M).group(1)
             house.ho_build_size = re.search('"YCJZArea":"(.*?)"', i,
                                             re.S | re.M).group(1)
             house.ho_true_size = re.search('"YCTNJZArea":"(.*?)"', i,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('"YCFTJZArea":"(.*?)"', i,
                                             re.S | re.M).group(1)
             house.insert_db()
     except Exception as e:
         print('请求错误,url={},data={}'.format(house_url, payload))
Exemplo n.º 17
0
 def get_house_info(self, house_id_list, bu_id, co_id):
     for i in house_id_list:
         house_url = 'http://www.hbczfdc.com:4993/HPMS/RoomInfo.aspx?code=' + i
         try:
             house = House(co_index)
             response = requests.get(house_url, headers=self.headers)
             html = response.text
             house.bu_id = bu_id
             house.co_id = co_id
             house.ho_name = re.search('id="ROOM_HH">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_floor = re.search('id="ROOM_MYC">(.*?)<', html,
                                        re.S | re.M).group(1)
             house.ho_type = re.search('id="ROOM_FWYT">(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('id="ROOM_HX">(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_build_size = re.search('id="ROOM_YCJZMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.ho_true_size = re.search('id="ROOM_YCTNJZMJ">(.*?)<',
                                            html, re.S | re.M).group(1)
             house.ho_share_size = re.search('id="ROOM_YCFTJZMJ">(.*?)<',
                                             html, re.S | re.M).group(1)
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
Exemplo n.º 18
0
    def get_build_info(self, url, response,co_id, bu_id):
        house = House(co_index)
        json_html = json.loads(response.text)
        for i in json_html:
                ho_name = i['roomno']  # 房号
                ho_type = i['ghyt']  # 用途
                ho_true_size = i['tnmj']  # 预测套内面积
                ho_floor = i['floorindex']  # 楼层
                ho_build_size = i['jzmj']  # 建筑面积
                house.co_id = co_id
                house.bu_id = bu_id
                house_code = i["fwcode"]
                house.ho_name = ho_name
                house.ho_type = ho_type
                house.ho_true_size = ho_true_size
                house.ho_floor = ho_floor
                house.ho_build_size = ho_build_size

                house_detail_url = "http://fsfc.fsjw.gov.cn/hpms_project/roomview.jhtml?id="+str(house_code)
                try:
                    res = requests.get(house_detail_url,headers=self.headers)
                    house.ho_share_size = re.search('实测分摊面积.*?<td>(.*?)</td>', res.text, re.S | re.M).group(1)
                    house.ho_price = re.search('总价.*?<td>(.*?)</td>', res.text, re.S | re.M).group(1)
                except Exception as e:
                    print("co_index={},房屋详情页{}请求失败!".format(co_index,house_detail_url))
                    print(e)
                    continue

                house.insert_db()
Exemplo n.º 19
0
 def house_info(self,ho_url,co_id,bu_id):
     url = "http://222.77.178.63:7002/" + ho_url
     url.rstrip('=')
     res = requests.get(url,headers=self.headers)
     res.encoding = 'gbk'
     html = etree.HTML(res.text)
     house_detail_list = html.xpath("//td/a[@target]/@href")
     for house_detail in house_detail_list:
         try:
             detail_url = "http://222.77.178.63:7002/" + house_detail
             detail_res = requests.get(detail_url,headers=self.headers)
             detail_res.encoding = 'gbk'
             con = detail_res.text
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = re.search('室号.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_floor = re.search('实际层.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_type = re.search('房屋类型.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_build_size = re.search('预测建筑面积.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_true_size = re.search('预测套内面积.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_share_size = re.search('预测分摊面积.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.ho_price = re.search('总价.*?">(.*?)<',con,re.S|re.M).group(1)
             ho.insert_db()
         except Exception as e:
             # log.error('房屋信息错误{}'.format(e))
             print('房屋信息错误{}'.format(e))
Exemplo n.º 20
0
    def house_parse(self, house_url, co_id, bu_id):
        ho = House(co_index)
        url = "http://spf.tlfdc.cn/" + house_url
        res = requests.get(url, headers=self.headers)
        con = res.text

        ho_name = re.findall('室号:(.*?)套', con, re.S | re.M)
        ho_room_type = re.findall('套型:(.*?)建', con, re.S | re.M)
        ho_build_size = re.findall('建筑面积:(.*?)参', con, re.S | re.M)
        ho_price = re.findall('价格:(.*?)元', con, re.S | re.M)
        ho_detail = re.findall('href="(show.*?\?id=\d+&id2=\d+&prjid=\d+)"',
                               con, re.S | re.M)
        for index in range(0, len(ho_name)):
            try:
                ho.co_id = co_id
                ho.bu_id = bu_id
                ho.ho_name = ho_name[index]
                ho.ho_room_type = ho_room_type[index]
                ho.ho_build_size = ho_build_size[index]
                ho.ho_price = ho_price[index]
                ho_detail_url = "http://spf.tlfdc.cn/" + ho_detail[index]
                res = requests.get(ho_detail_url, headers=self.headers)
                res = res.content.decode('gb2312')
                ho.ho_floor = re.findall('楼层.*?">(.*?)</td>', res,
                                         re.S | re.M)[0].strip()

                ho.insert_db()
            except:
                print('房号错误,co_index={},url={}'.format(co_index, url), e)
                continue
Exemplo n.º 21
0
    def house_parse(self, co_id, bu_id, bu_con):

        name_list = re.findall('<a style.*?\)>(.*?)</a', bu_con)
        for name in name_list:
            ho = House(co_index)
            ho.co_id = co_id
            ho.bu_id = bu_id
            ho.ho_name = name
            ho.insert_db()
Exemplo n.º 22
0
 def get_house_info(self, house_url, ho_name, bu_id, co_id):
     house = House(co_index)
     url = 'http://www.bjjs.gov.cn' + house_url
     if '#' not in url:
         house = self.get_house_detail(url, house)
     house.ho_name = ho_name
     house.bu_id = bu_id
     house.co_id = co_id
     house.insert_db()
Exemplo n.º 23
0
    def house(self, house_url, bu_id, co_id):

        ho_url = "http://www.syfc.com.cn" + house_url
        try:
            res = requests.get(ho_url, headers=self.headers)
            con = etree.HTML(res.text)
            ho_detail_url = con.xpath("//iframe/@src")[0]
            response = requests.get(ho_detail_url, headers=self.headers)
        except Exception as e:
            print("co_index={},楼栋详情页无法访问".format(co_index), e)
        html = etree.HTML(response.text)
        content = html.xpath("//td[@width='70']")
        for td in content:
            ho = House(co_index)
            try:
                room_url = td.xpath("./a/@href")[0]
                ho.ho_name = td.xpath("./a/text()")[0]
                # ho.ho_id = re.search('id=(\d+)&', room_url).group(1)
                ho.bu_id = bu_id
                ho.co_id = co_id
                room_url = "http://www.syfc.com.cn" + room_url
                try:
                    res = requests.get(room_url, headers=self.headers)
                    con = res.text
                except Exception as e:
                    print("co_idnex={},房屋详情页无法访问".format(co_index), e)
                # print(con)
                ho.ho_build_size = re.search('建筑面积.*?">(.*?)<', con,
                                             re.S | re.M).group(1)
                ho.ho_share_size = re.search('分摊面积.*?">(.*?)<', con,
                                             re.S | re.M).group(1)
                ho.ho_true_size = re.search('套内面积.*?">(.*?)<', con,
                                            re.S | re.M).group(1)
                ho.ho_type = re.search('类型.*?">(.*?)<', con,
                                       re.S | re.M).group(1)
                ho.insert_db()
            except:
                ho.bu_id = bu_id
                ho.co_id = co_id
                ho.ho_name = td.xpath("./text()")[0]
                ho.insert_db()
Exemplo n.º 24
0
 def ho_info(self, house_url, co_id, bu_id):
     res = requests.get(house_url, headers=self.headers)
     html = etree.HTML(res.text)
     ho_info_list = html.xpath("//tbody//td[@unitname]")
     for ho_info in ho_info_list:
         try:
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = ho_info.xpath("./text()")[0]
             ho.insert_db()
         except Exception as e:
             log.error("小区房屋信息提取失败", e)
Exemplo n.º 25
0
    def room_crawler(self, room):  # 房屋

        ho = House(co_index)

        house_url = "http://www.hzszjj.gov.cn" + room

        res = requests.get(house_url, )
        con = etree.HTML(res.text)

        ho_table = con.xpath("//tr[@bgcolor='#fbf3e6']")
        for ho_list in ho_table[1:-1]:
            ho_floor = ho_list.xpath("./td[@align='center']/text()")[0]
            honum_list = ho_list.xpath(".//tr/td[@height='40']")
            for house in honum_list:
                ho.ho_floor = ho_floor  # 楼层
                id_num = re.search(r"(\d+)&\w+=(\d+)", room)
                ho.co_id = id_num.group(1)  # 小区id
                ho.bu_id = id_num.group(2)  # 楼栋id
                ho_url = house.xpath("./a/@href")[0]
                if len(ho_url) == 1:
                    ho_info = house.xpath("./a/@wf")[0]

                    ho.ho_name = house.xpath("./a/text()")[0]
                    info = re.search(
                        r":(.*?)<br>.*?:(.*?)<br>(.*?)<br><hr>.*?:(.*?)m.*?<br>.*?:(.*?)<br>.*?:(.*?)m",
                        ho_info)
                    ho.ho_type = info.group(5)
                    ho.ho_build_size = info.group(4)
                    ho.ho_room_type = info.group(2)

                else:
                    detail_url = "http://www.hzszjj.gov.cn/ts_web_dremis/web_house_dir/" + ho_url
                    res = requests.get(detail_url)
                    con = etree.HTML(res.text)
                    ho.ho_name = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_name']/text()"
                    )[0]
                    ho.ho_type = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_type']/text()"
                    )[0]
                    ho.ho_build_size = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_build_area']/text()"
                    )[0]
                    ho.ho_share_size = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_share_area']/text()"
                    )[0]
                    ho.ho_true_size = con.xpath(
                        "//span[@id='ctl00_ContentPlaceHolder2_lb_house_inside_area']/text()"
                    )[0]

                ho.insert_db()
Exemplo n.º 26
0
 def ho_info(self,ho_list,co_id,bu_id):
     for hou in ho_list:
         try:
             ho = House(co_index)
             ho.co_id = co_id
             ho.bu_id = bu_id
             ho.ho_name = hou.xpath("./text()")[0]
             ho_info = hou.xpath("./@title")[0]
             ho.ho_build_size = re.search('建筑面积:(.*?)\n',ho_info).group(1)
             ho.ho_share_size = re.search('分摊面积:(.*)',ho_info).group(1)
             ho.ho_true_size = re.search('套内面积:(.*?)\n',ho_info).group(1)
             ho.insert_db()
         except Exception as e:
             # log.error("房屋信息错误{}".format(e))
             print("房屋信息错误{}".format(e))
Exemplo n.º 27
0
 def get_house_info(self, house_url, bu_id, co_id):
     try:
         house = House(co_index)
         house.bu_id = bu_id
         house.co_id = co_id
         response = requests.post(house_url, headers=self.headers)
         html = response.content.decode('gbk')
         house.ho_floor = re.search('所在楼层:.*?<td>(.*?)<', html, re.M | re.S).group(1)
         house.ho_name = re.search('房号:.*?<td>(.*?)<', html, re.M | re.S).group(1)
         house.ho_build_size = re.search('预测总面积:.*?<td>(.*?)<', html, re.M | re.S).group(1)
         house.ho_true_size = re.search('预测套内面积.*?<td>(.*?)<', html, re.M | re.S).group(1)
         house.ho_share_size = re.search('预测公摊面积.*?<td>(.*?)<', html, re.M | re.S).group(1)
         house.insert_db()
     except Exception as e:
         print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
Exemplo n.º 28
0
 def get_house_info(self, zu_house_url, bu_num, co_id):
     try:
         house = House(co_index)
         house.bu_num = bu_num
         house.co_id = co_id
         result = self.s.get(zu_house_url, headers=self.headers).text
         house.info = re.search('ItemName.*?>(.*?)<',
                                result).group(1).strip()
         ho_code_list = re.findall("OnClick=.__doPostBack\(.*?,'(.*?)'\)",
                                   result, re.S | re.M)
         ho_msg_list = re.findall("OnClick=.__doPostBack\('(.*?)'", result,
                                  re.S | re.M)
         self.get_house_detail(zu_house_url, ho_msg_list, ho_code_list,
                               house)
     except Exception as e:
         print(e)
Exemplo n.º 29
0
    def house_info(self,co_id,bu_id,bu_url):

        ho_url = 'http://www.bdfdc.net' + bu_url
        res = requests.get(ho_url,headers=self.headers)
        time.sleep(5)
        html = etree.HTML(res.text)
        house_info_list = html.xpath("//a[@wf]")
        for house_info in house_info_list:
            ho = House(co_index)
            detail = house_info.xpath("./@wf")[0]
            ho.ho_name = house_info.xpath("./text()")[0]
            ho.bu_id = bu_id
            ho.co_id = co_id
            ho.ho_build_size = re.search('建筑面积:(.*?)m',detail).group(1)
            ho.ho_type = re.search('用途:(.*?)<br',detail).group(1)
            ho.insert_db()
Exemplo n.º 30
0
 def get_house_info(self, con, co_id, build_id):
     html_str = re.search('houseTableData.*?特别申明', con, re.S | re.M).group()
     for info in re.findall('<div style.*?</div>', html_str, re.S | re.M):
         try:
             ho = House(co_index)
             ho.ho_name = re.search("'HC_HOUSENUMB':'(.*?)',", info,
                                    re.S | re.M).group(1)
             ho.ho_room_type = re.search("'HC_HOUSETYPE':'(.*?)',", info,
                                         re.S | re.M).group(1)
             ho.ho_build_size = re.search("'HC_STCTAREA':'(.*?)',", info,
                                          re.S | re.M).group(1)
             ho.bu_id = build_id
             ho.co_id = co_id
             ho.insert_db()
         except Exception as e:
             print('house error, co_index={}'.format(co_index))