Exemplo n.º 1
0
 def get_comm_detail(self, comm_list):
     for i in comm_list:
         try:
             comm = Comm(co_index)
             comm_url = 'http://www.lpsfdc.cn/Templets/LPS/aspx/' + i
             content = requests.get(comm_url)
             html = content.text
             co_name_list = re.findall('项目名称:.*?>(.*?)<', html, re.S | re.M)
             co_id_list = re.findall('hdProjectCode" value="(.*?)"', html, re.S | re.M)
             co_develops_list = re.findall('开发企业:.*?>(.*?)<', html, re.S | re.M)
             co_build_size_list = re.findall('TJ_ZMJ">(.*?)<', html, re.S | re.M)
             co_address_list = re.findall('Pro_XMDZ">(.*?)<', html, re.S | re.M)
             co_owner_list = re.findall('Pro_ZZZSBH">(.*?)<', html, re.S | re.M)
             co_pre_sale_list = re.findall('Pro_XKZH">(.*?)<', html, re.S | re.M)
             co_all_house_list = re.findall('TJ_HZYSTS">(.*?)<', html, re.S | re.M)
             for i in range(0, len(co_name_list)):
                 try:
                     comm.co_name = co_name_list[i]
                     comm.co_id = co_id_list[i]
                     comm.co_develops = co_develops_list[i]
                     comm.co_build_size = co_build_size_list[i]
                     comm.co_address = co_address_list[i]
                     comm.co_owner = co_owner_list[i]
                     comm.co_pre_sale = co_pre_sale_list[i]
                     comm.co_all_house = co_all_house_list[i]
                     comm.insert_db()
                     # global count
                     # count += 1
                     # print(count)
                 except Exception as e:
                     print('co_index={}, commiunty error'.format(co_index,), e)
                 build_url_list = re.findall("radiobuild' id='build(.*?)'", html, re.S | re.M)
                 build_name_list = re.findall("radiobuild.*?<span.*?>(.*?)<", html, re.S | re.M)
                 for i in range(0, len(build_url_list)):
                     build = Building(co_index)
                     build.bu_id = build_url_list[i]
                     build.bu_num = build_name_list[i]
                     build.co_id = co_id_list[0]
                     build.insert_db()
                 self.get_build_info(build_url_list)
         except Exception as e:
             print(e)
Exemplo n.º 2
0
 def get_build_info(self, co_id):
     try:
         build_url = 'http://222.184.103.50:7700/WW/ZHList.aspx?projectID=' + co_id + '&projectname='
         response = requests.get(build_url, headers=self.headers)
         html = response.text
         build_info_list = re.findall('<tr bgcolor="#f5f5f5">.*?</tr>',
                                      html, re.S | re.M)
         for i in build_info_list:
             build = Building(co_index)
             build.bu_num = re.search('<a id="LH".*?>(.*?)<', i,
                                      re.S | re.M).group(1).strip()
             build.bu_all_house = re.search('<td.*?<td.*?>(.*?)<', i,
                                            re.S | re.M).group(1).strip()
             build.bu_id = re.search('ZNo=(.*?)"', i,
                                     re.S | re.M).group(1).strip()
             build.co_id = co_id
             build.insert_db()
             self.get_house_url(build.bu_id, co_id)
     except Exception as e:
         print('请求错误,co_index={},url={}'.format(co_index, build_url), e)
Exemplo n.º 3
0
 def get_build_info(self, build_url):
     try:
         build = Building(co_index)
         response = requests.get(build_url, headers=self.headers)
         'http://www.gzbjfc.com/House/Table.aspx?xmmc=%E5%85%B0%E6%A1%A5%E5%9C%A3%E8%8F%B2&yszh=bj1740&qu=%E6%AF%95%E8%8A%82&zhlx=xs&dongID=30012124'
         html = response.text
         bu_id_list = re.findall('cph_hb1_dg1.*?center.*?center.*?<td>(.*?)<', html, re.S | re.M)
         build.co_id = re.findall('hdl1_hfYszh" value="(.*?)"', html, re.S | re.M)[0]
         build.bu_num = self.get_build_num(build.co_id)
         bu_all_house_list = re.findall('cph_hb1_dg1.*?center.*?center.*?<td>.*?<td>.*?<td>(.*?)<', html,
                                        re.S | re.M)
         house_url_list = re.findall('cph_hb1_dg1.*?<a.*?href="(.*?)"', html,
                                     re.S | re.M)
         for i in range(len(bu_id_list)):
             build.bu_id = bu_id_list[i]
             build.bu_all_house = bu_all_house_list[i]
             build.insert_db()
         self.get_house_info(house_url_list)
     except Exception as e:
         print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)
Exemplo n.º 4
0
 def get_build_info(self, build_url_list):
     for i in build_url_list:
         try:
             build = Building(co_index)
             build_url = 'http://222.223.160.199:8088/website/buildquery/selectBuild.jsp?buildID=' + i[0]
             response = requests.get(build_url, headers=self.headers)
             html = response.text
             build.bu_id = i[0]
             build.co_build_structural = re.search('结构类型.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bo_build_end_time = re.search('建成年份.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bu_build_size = re.search('总建筑面积.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bu_num = re.search('幢号.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.size = re.search('占地面积.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.bu_floor = re.search('房屋层数.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.bu_all_house = re.search('房屋套数.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.area = re.search('坐落区.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.insert_db()
             self.get_house_info(build.bu_id)
         except Exception as e:
             print('请求错误,url={}'.format(build_url),e)
Exemplo n.º 5
0
 def get_build_info(self, co_id):
     build_url = 'http://www.yanjifc.com/jdi'
     payload = "activityId=" + str(co_id) + "&module=jtsActBuildingInfo"
     result = requests.post(url=build_url,
                            data=payload,
                            headers=self.headers)
     data = result.json()
     build_list = data['ROWS']['ROW']
     for i in build_list:
         build = Building(co_index)
         build.bu_all_size = self.dict_get(i, 'BUILDING_AREA')
         build.bu_address = self.dict_get(i, 'LOCATION')
         build.bu_num = self.dict_get(i, 'LOCATION')
         build.bu_floor = self.dict_get(i, 'TOTAL_FLOORS')
         build.bu_all_house = self.dict_get(i, 'TOTAL_SET')
         build.co_build_structural = self.dict_get(i, 'STRUCTURE')
         build.bu_id = self.dict_get(i, 'RESOURCE_GUID')
         build.co_id = co_id
         build.insert_db()
         self.get_house_info(co_id, build.bu_id)
Exemplo n.º 6
0
 def get_comm_info(self, comm_url_list):
     for i in comm_url_list:
         comm_url = 'http://www.hbczfdc.com:4993/' + i.replace('../', '')
         try:
             comm = Comm(co_index)
             response = requests.get(comm_url, headers=self.headers)
             html = response.text
             comm.co_name = re.search('id="Project_XMMC">(.*?)<', html,
                                      re.S | re.M).group(1)
             comm.co_address = re.search('id="Project_XMDZ">(.*?)<', html,
                                         re.S | re.M).group(1)
             comm.co_develops = re.search('id="Project_COMPANYNAME">(.*?)<',
                                          html, re.S | re.M).group(1)
             comm.area = re.search('id="Project_AREA_NAME">(.*?)<', html,
                                   re.S | re.M).group(1)
             comm.co_build_size = re.search('id="Project_GHZJZMJ">(.*?)<',
                                            html, re.S | re.M).group(1)
             comm.co_volumetric = re.search('id="Project_RJL">(.*?)<', html,
                                            re.S | re.M).group(1)
             comm.co_pre_sale = re.search('id="presellInfo".*?,,(.*?)"',
                                          html, re.S | re.M).group(1)
             comm.co_land_use = re.search('id="tdzInfo".*?,,(.*?)"', html,
                                          re.S | re.M).group(1)
             comm.co_work_pro = re.search('id="sgxkzInfo".*?,,(.*?)"', html,
                                          re.S | re.M).group(1)
             comm.co_plan_pro = re.search('id="ghxkzInfo".*?,,(.*?)"', html,
                                          re.S | re.M).group(1)
             comm.co_id = re.search('code=(.*?)$', comm_url,
                                    re.S | re.M).group(1)
             comm.insert_db()
             build = Building(co_index)
             build.bu_id = re.search("name='radiobuild'.*? bid=(.*?) ",
                                     html, re.S | re.M).group(1)
             build.bu_num = re.search("name='radiobuild'.*?<span.*?>(.*?)<",
                                      html, re.S | re.M).group(1)
             build.co_id = comm.co_id
             build.insert_db()
             self.get_build_info(build.bu_id, comm.co_id)
         except Exception as e:
             print('小区页面错误,co_index={},url={}'.format(co_index, comm_url),
                   e)
Exemplo n.º 7
0
    def bu_parse(self, co_id, bulist):
        for bo in bulist:
            bu_url = "http://110.89.45.7:8082" + bo
            bu_res = requests.get(bu_url, headers=self.headers)
            con = bu_res.text
            bu = Building(co_index)
            bu.co_id = co_id
            bu.bu_id = re.search('buildingInfoID=(.*?)&', bo).group(1)
            bu.bu_num = re.search('幢号.*?">(.*?)</', con, re.S | re.M).group(1)
            bu.bu_floor = re.search('总 层 数.*?">(.*?)</', con,
                                    re.S | re.M).group(1)
            bu.bu_live_size = re.search('批准销售.*?">.*?</td.*?">(.*?)</td', con,
                                        re.S | re.M).group(1)
            bu.bu_all_size = re.search('总面积.*?">(.*?)</', con,
                                       re.S | re.M).group(1)
            bu.bu_type = re.search('设计用途.*?">(.*?)</', con,
                                   re.S | re.M).group(1)
            bu.insert_db()

            bu_html = etree.HTML(con)
            ho_list = bu_html.xpath("//td[@style]/a")
            self.ho_parse(co_id, bu.bu_id, ho_list)
Exemplo n.º 8
0
 def get_build_url(self, build_url_list, co_id):
     for i in build_url_list:
         try:
             build = Building(co_index)
             build.co_id = co_id
             bu_url = 'http://www.nhfg.cn/webhouseinfo/ItemList/' + i
             response = self.s.get(bu_url)
             html = response.text
             build.bu_num = \
                 re.findall('<TD style="WIDTH: 471px" colSpan="11"><FONT style="COLOR: white" face="宋体">(.*?)<',
                            html,
                            re.S | re.M)[0].strip()
             build.bu_all_house = re.findall(
                 '商业</FONT></TD>.*?center">(.*?)<', html,
                 re.S | re.M)[0].strip()
             build.insert_db()
             house_url = re.findall('(RoomLoad\.aspx\?.*?)"', html,
                                    re.S | re.M)[0]
             zu_house_url = 'http://www.nhfg.cn/webhouseinfo/ItemList/HouseList/' + house_url
             self.get_house_info(zu_house_url, build.bu_num, co_id)
         except Exception as e:
             print(e)
Exemplo n.º 9
0
 def get_build_detail(self, build_url, co_id):
     bu_url = 'http://www.yzfdc.cn/' + build_url
     response = self.s.get(bu_url, headers=self.headers)
     html = response.text
     build = Building(co_index)
     build.bu_num = re.search('查询幢号:.*?<span.*?<span.*?>(.*?)<', html,
                              re.S | re.M).group(1)
     bu_html = re.search('<div align="center">已售已备案.*?</table>', html,
                         re.S | re.M).group()
     build_html_list = re.findall('<tr.*?</tr>', bu_html, re.S | re.M)
     all_size = 0
     for i in build_html_list:
         num = re.search(
             '<div.*?<div.*?<div.*?<div.*?<div.*?<div.*?>(.*?)<', i,
             re.S | re.M).group(1)
         if num:
             all_size += float(num)
     build.bu_build_size = all_size
     build.co_id = co_id
     build.bu_id = re.search('GCZHId=(.*?)$', bu_url).group(1)
     build.insert_db()
     self.get_house_info(co_id, build.bu_id)
Exemplo n.º 10
0
    def analyzer_comm_url(self, comm_url_list):
        all_url = []
        for i in comm_url_list:
            try:
                res = requests.get(i)
                html = res.content.decode('gbk')
                c = Comm(self.co_index)
                c.co_name = re.search('项目名称:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1)  # 项目名称
                c.co_address = re.search('项目地址:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1)  # 项目地址
                c.co_develops = re.search('开发商:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1)  # 开发商
                c.co_build_size = re.search('总建筑面积:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1)  # 建筑面积
                c.co_land_type = re.search('用地依据:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1)  # 土地使用证
                c.co_all_house = re.search('>总套数:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1)  # 总套数
                c.area = re.search('所在区域:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1)  # 地区 area
                c.co_work_pro = re.search('施工许可证:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(1)  # 施工许可证
                c.co_plan_pro = re.search('建设工程规划许可证:.*?">.*?<span.*?>(.*?)</span>', html, re.S | re.M).group(
                    1)  # 规划许可证
                c.insert_db()

                buildlist = re.findall('onmouseover.*?</TR>', html, re.S | re.M)
                url_list = []
                for k in buildlist:
                    try:
                        b = Building(self.co_index)
                        build_list = re.findall('<TD.*?>(.*?)</TD>', k, re.S | re.M)
                        b.co_name = build_list[1]
                        b.bu_num = build_list[2]
                        b.bu_type = build_list[4]
                        b.insert_db()
                        house_url = re.findall('href="(.*?)"', k, re.S | re.M)
                        for j in house_url:
                            url_list.append('http://www.stfcj.gov.cn/stsite/ProjectList/' + j)
                    except Exception as e:
                        print('楼栋错误,co_index={},url={}'.format(co_index, i), e)
                all_url = all_url + url_list
            except Exception as e:
                print('小区错误,co_index={},url={}'.format(co_index, i), e)
        return all_url
Exemplo n.º 11
0
    def  build_info(self,bu_list,co_id):
        for bu in bu_list:
            bu_url = bu.xpath("./td[4]/a/@href")[0]
            build_url = self.start_url+'/' + bu_url
            bu_res = requests.get(build_url,headers=self.headers)
            bu_res.encoding = 'gbk'
            con = bu_res.text
            bu_pre_sale = re.search('预售许可证编号.*?blank">(.*?)</a',con,re.S|re.M).group(1)
            bu_pre_sale_date = re.search('预售证有效日期.*?">(.*?)</td',con,re.S|re.M).group(1)

            bu_html = etree.HTML(con)
            donglist = bu_html.xpath("//table[@id='donglist']/tr")
            for dong in donglist:
                dong_url = dong.xpath("./td/a/@href")[0]
                bu = Building(co_index)
                bu.co_id = co_id
                bu.bu_id = re.search('ID={(.*?)}',dong_url).group(1)
                bu.bu_num = dong.xpath("./td[3]/text()")[0]
                bu.bu_floor = dong.xpath("./td[4]/text()")[0]
                bu.bu_pre_sale = bu_pre_sale
                bu.bu_pre_sale_date = bu_pre_sale_date
                bu.insert_db()
                self.house_info(co_id,bu.bu_id,dong_url)
Exemplo n.º 12
0
    def build_info(self, co_id, bu_id):
        bu_url = 'http://www.lsjs.gov.cn/WebLSZFGB/ZNInfo.aspx?YSZID=' + bu_id + "&YSXMID=" + co_id
        bu_res = requests.get(bu_url, headers=self.headers)
        con = bu_res.text
        bu = Building(co_index)
        bu.co_id = co_id
        bu.bu_id = bu_id
        bu.bu_num = re.search('znxx">(.*?)</span', con).group(1)
        bu.bu_all_house = re.search('纳入网上预(销)售总套数.*?">(.*?)</', con,
                                    re.S | re.M).group(1)
        bu.bu_build_size = re.search('纳入网上预(销)售总面积.*?">(.*?)</', con,
                                     re.S | re.M).group(1)
        bu.insert_db()

        html = etree.HTML(con)
        house_list = html.xpath("//span[@class='syt-span']")
        for tag in house_list:
            ho = House(co_index)
            ho.bu_id = bu_id
            ho.co_id = co_id
            ho.ho_name = tag.xpath(".//p[@class='ewb-num']/text()")[0]
            ho.ho_build_size = tag.xpath(".//p[@class='ewb-con']/text()")[0]
            ho.insert_db()
Exemplo n.º 13
0
 def comm_list(self, html):
     com_list = html.xpath("//table[@id='data_table_2']//tr/td[3]/a/@href")
     for com_temp in com_list:
         com_url = 'http://www.hbsfdc.com' + com_temp.replace(
             "../../..", '')
         try:
             com_res = requests.get(com_url, headers=self.headers)
         except Exception as e:
             log.error("{}小区访问失败".format(com_url))
             continue
         com_con = com_res.content.decode()
         co = Comm(co_index)
         co.co_id = re.search('lcode=(\d+)', com_temp).group(1)
         co.co_name = re.search('项目名称.*?XMMC">(.*?)</span', com_con,
                                re.S | re.M).group(1)
         co.co_develops = re.search('开发公司.*?NAME">(.*?)</span', com_con,
                                    re.S | re.M).group(1)
         co.co_address = re.search('项目地址.*?XMDZ">(.*?)</span', com_con,
                                   re.S | re.M).group(1)
         co.area = re.search('所在区域.*?SZQY">(.*?)</span', com_con,
                             re.S | re.M).group(1)
         co.co_volumetric = re.search('容积率.*?RJL">(.*?)</span', com_con,
                                      re.S | re.M).group(1)
         co.co_pre_sale = re.search('预售证号.*?ZH">(.*?)</span', com_con,
                                    re.S | re.M).group(1)
         co.co_build_size = re.search('总建筑面积.*?JZMJ">(.*?)</span', com_con,
                                      re.S | re.M).group(1)
         co.insert_db()
         bu_list = re.findall("input name='radiobuild'.*?</td>", com_con)
         for bu in bu_list:
             bid = re.search('bid=(\d+)', bu).group(1)
             bo = Building(co_index)
             bo.co_id = co.co_id
             bo.bu_id = bid
             bo.bu_num = re.search('/>(.*?)</td>', bu).group(1)
             bo.insert_db()
             self.ho_parse(bid, co.co_id)
Exemplo n.º 14
0
 def get_build_info(self, build_url_list, co_id):
     for i in build_url_list:
         build_url = 'http://www.fjlyfdc.com.cn/' + i
         try:
             build = Building(co_index)
             response = requests.get(build_url, headers=self.headers)
             html = response.text
             build.bu_id = re.search('buildingInfoID=(.*?)&',
                                     build_url).group(1)
             build.co_id = co_id
             build.bo_develops = re.search('开发商:.*?<td.*?>(.*?)<', html,
                                           re.S | re.M).group(1)
             build.co_name = re.search('项目名称:.*?<td.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             build.bu_address = re.search('坐落位置:.*?<td.*?>(.*?)<', html,
                                          re.S | re.M).group(1)
             build.bu_num = re.search('幢号:.*?<td.*?>(.*?)<', html,
                                      re.S | re.M).group(1)
             build.co_build_structural = re.search('建筑结构:.*?<td.*?>(.*?)<',
                                                   html,
                                                   re.S | re.M).group(1)
             build.bu_type = re.search('设计用途:.*?<td.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             build.bu_floor = re.search('总 层 数:.*?<td.*?>(.*?)<', html,
                                        re.S | re.M).group(1)
             build.co_all_size = re.search('总面积:.*?<td.*?>(.*?)<', html,
                                           re.S | re.M).group(1)
             build.bo_build_start_time = re.search('开工日期:.*?<td.*?>(.*?)<',
                                                   html,
                                                   re.S | re.M).group(1)
             build.insert_db()
             house_url_list = re.findall(
                 'href="(/House/HouseInfo\?HouseCenterID=.*?)"', html,
                 re.S | re.M)
             self.get_house_info(house_url_list, build.bu_id, co_id)
         except Exception as e:
             print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)
Exemplo n.º 15
0
    def get_build_info(self, build_url_list):
        for i in build_url_list:
            try:
                build = Building(co_index)
                build_url = 'http://www.fjnpfdc.com/House/' + i
                res = requests.get(build_url, headers=self.headers)
                con = res.content.decode('gbk')
                build.co_name = re.search("项目名称:.*?<td.*?>(.*?)<", con,
                                          re.S | re.M).group(1)
                build.bu_num = re.search("幢  号:.*?<td.*?>(.*?)<", con,
                                         re.S | re.M).group(1)
                build.co_use = re.search("设计用途:.*?<td.*?>(.*?)<", con,
                                         re.S | re.M).group(1)
                build.co_build_structural = re.search("建筑结构:.*?<td.*?>(.*?)<",
                                                      con,
                                                      re.S | re.M).group(1)
                build.bu_floor = re.search("总 层 数:.*?<td.*?>(.*?)<", con,
                                           re.S | re.M).group(1)
                build.bu_build_size = re.search("总 面 积:.*?<td.*?>(.*?)<", con,
                                                re.S | re.M).group(1)
                build.co_build_end_time = re.search("竣工日期:.*?<td.*?>(.*?)<",
                                                    con, re.S | re.M).group(1)

                house_url_list = re.findall('<a href="(HouseInfo.*?)"', con)
                # p = ProducerListUrl(page_url=build_url,
                #                     request_type='get', encode='gbk',
                #                     analyzer_rules_dict=build.to_dict(),
                #                     current_url_rule='<a href="(HouseInfo.*?)"',
                #                     analyzer_type='regex',
                #                     headers=self.headers)
                build.co_id = re.search('ProjectId=(.*?)&', i).group(1)
                build.bu_id = re.search('BuildingId=(.*?)&P', i).group(1)
                build.insert_db()
                # house_url_list = p.get_details()
                self.get_house_info(house_url_list, build.bu_id, build.co_id)
            except Exception as e:
                print("co_index={},楼栋{}错误".format(co_index, i), e)
Exemplo n.º 16
0
 def get_build_info(self, build_all_url):
     build_url = 'http://www.tmsf.com/' + build_all_url[0]
     try:
         response = requests.get(build_url, headers=self.headers)
     except Exception as e:
         print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)
         return
     html = response.text
     build_code_list = re.findall("javascript:doPresell\('(.*?)'\)", html)
     sid = re.findall('id="sid" value="(.*?)"', html)[0]
     propertyid = re.findall('id="propertyid" value="(.*?)"', html)[0]
     co_id = sid + '_' + propertyid
     for presellid in build_code_list:
         build_detail_url = build_url + '?presellid=' + presellid
         try:
             result = requests.get(build_detail_url,
                                   headers=self.headers,
                                   timeout=10).text
         except Exception as e:
             print(
                 "楼栋错误,co_index={},url={}".format(co_index,
                                                  build_detail_url), e)
             continue
         build_num_html = re.search("幢  号.*?面  积:", result,
                                    re.S | re.M).group()
         build_num_list = re.findall('<a.*?</a>', build_num_html,
                                     re.S | re.M)
         for i in build_num_list:
             build = Building(co_index)
             build_num = re.search("doBuilding\('(.*?)'\)", i,
                                   re.S | re.M).group(1)
             build.bu_num = re.search("doBuilding.*?>(.*?)<", i,
                                      re.S | re.M).group(1)
             build.bu_id = build_num
             build.co_id = co_id
             build.insert_db()
             self.get_house_info(build_num, sid)
Exemplo n.º 17
0
    def get_build_info(self, bu_pre_sale, bo_develops, bu_co_name, bu_con):

        build = Building(co_index)

        build.bu_id = re.search('编号.*?>(\d+)<', bu_con, re.S | re.M).group(1)
        build.bu_num = re.search('幢号.*?>(\d+)<', bu_con, re.S | re.M).group(1)
        build.bu_floor = re.search('总层数.*?>(\d+)<', bu_con,
                                   re.S | re.M).group(1)
        build.bu_build_size = re.search('预售建筑面积.*?>(\d+.\d+)<', bu_con,
                                        re.S | re.M).group(1)
        build.bu_address = re.search('楼房坐落.*?;">(.*?)</span', bu_con,
                                     re.S | re.M).group(1)
        build.bu_live_size = re.search('住宅建筑面积.*?>(\d+.\d+)<', bu_con,
                                       re.S | re.M).group(1)
        build.bu_not_live_size = re.search('非住宅建筑面积.*?;">(.*?)</span', bu_con,
                                           re.S | re.M).group(1)
        build.bo_build_start_time = re.search('开工日期.*?;">(.*?)</span', bu_con,
                                              re.S | re.M).group(1)
        build.bu_all_house = re.search('总套数.*?>(\d+)<', bu_con,
                                       re.S | re.M).group(1)
        build.bu_pre_sale = bu_pre_sale
        build.bo_develops = bo_develops
        build.co_name = bu_co_name
        build.insert_db()
Exemplo n.º 18
0
    def bu_info(self,bu_list,co_id):
        for bu in bu_list:
            try:
                bu_url = 'http://www.fxfdcw.com/'+bu
                res = requests.get(bu_url,headers=self.headers)
                con = res.content.decode('gbk')
                html = etree.HTML(con)
                build = Building(co_index)
                build.co_id = co_id
                build.bu_id = re.search('bdid=(\d+)',bu).group(1)
                build.bu_num = re.search('楼号.*?">(.*?)</',con,re.S|re.M).group(1)
                build.bu_address =  re.search('坐落.*?">(.*?)</',con,re.S|re.M).group(1)
                build.bu_floor = re.search('地上层数.*?">(.*?)</',con,re.S|re.M).group(1)
                build.bu_build_size = re.search('建筑面积.*?wrap">(.*?)</',con,re.S|re.M).group(1)
                build.bu_all_house = re.search('套 数.*?">(.*?)</',con,re.S|re.M).group(1)
                build.bu_type = re.search('用  途.*?wrap">(.*?)</',con,re.S|re.M).group(1)
                build.insert_db()

                ho_list = html.xpath("//span[@title]")
            except Exception as e:
                # log.error("楼栋信息错误{}".format(e))
                print("楼栋信息错误{}".format(e))
                continue
            self.ho_info(ho_list,co_id,build.bu_id)
Exemplo n.º 19
0
 def build_info(self, build_detail, co_id):
     build_detail_url = 'http://as.gzfcxx.cn' + build_detail
     res = requests.get(build_detail_url, headers=self.headers)
     html = etree.HTML(res.text)
     build_info_list = html.xpath("//div[@class='box']//font/a/@href")
     for build_url in build_info_list:
         try:
             url = 'http://as.gzfcxx.cn' + build_url
             ho_res = requests.get(url, headers=self.headers)
             ho_html = etree.HTML(ho_res.text)
             bu = Building(co_index)
             bu.co_id = co_id
             bu.bu_id = re.search('dongID=(\d+)', build_url).group(1)
             bu.bu_num = ho_html.xpath(
                 "//option[@selected='selected']/text()")[0]
             bu.insert_db()
             temp = re.search("\?(.*?dongID=\d+)", build_url).group(1)
             real_url = 'http://as.gzfcxx.cn/Controls/HouseControls/FloorView.aspx?' + temp
             house_res = requests.get(real_url, headers=self.headers)
             ho_html = etree.HTML(house_res.text)
             info = ho_html.xpath("//table[@class='C1 T0 F0']/..")
         except Exception as e:
             log.error('楼栋信息错误', e)
             continue
         for i in info:
             try:
                 ho = House(co_index)
                 ho_info = i.xpath("./@title")[0]
                 ho.ho_build_size = re.search('(\d+).(\d+)', ho_info,
                                              re.S | re.M).group(1)
                 ho.ho_name = i.xpath(".//span/text()")[0]
                 ho.bu_id = bu.bu_id
                 ho.co_id = co_id
                 ho.insert_db()
             except Exception as e:
                 log.error('房间信息错误', e)
Exemplo n.º 20
0
    def get_comm_detail(self, href, comm):
        comm_detail_url = self.URL_FRONT + href
        response = requests.get(url=comm_detail_url, headers=self.headers)
        co_id = response.url
        co_id = int(co_id.split('=')[1])  # 小区id
        html = response.content.decode('gbk')

        co_name = self.regex_common(r'项目名称.*?<td.*?>(.*?)</td>', html)  # 小区名字
        co_owner = self.regex_common(r'房屋所有权证号.*?<td.*?>(.*?)</td>', html)
        co_use = self.regex_common(r'用  途.*?<td.*?>(.*?)</td>', html)
        co_develops = self.regex_common(r'开 发 商.*?<td.*?>(.*?)</td>', html)
        co_address = self.regex_common(r'项目位置.*?<td.*?>(.*?)</td>', html)
        co_pre_sale = self.regex_common(r'预售证号.*?<td.*?>(.*?)</td>', html)
        co_land_use = self.regex_common(r'土地使用权证.*?<td.*?>(.*?)</td>', html)
        co_land_type = self.regex_common(r'土地权证类型.*?<td.*?>(.*?)</td>', html)
        co_handed_time = self.regex_common(r'终止日期.*?<td.*?>(.*?)</td>', html)
        co_plan_pro = self.regex_common(r'规划许可证.*?<td.*?>(.*?)</td>', html)
        co_work_pro = self.regex_common(r'施工许可证.*?<td.*?>(.*?)</td>', html)
        co_type = self.regex_common(r'项目类型.*?<td.*?>(.*?)</td>', html)  # 小区类型
        co_size = self.regex_common(r'批准面积.*?<td.*?>(.*?)</td>', html)  # 占地面积
        comm.co_id = co_id
        comm.co_name = co_name
        comm.co_type = co_type
        comm.co_size = co_size
        comm.co_owner = co_owner
        comm.co_use = co_use
        comm.co_develops = co_develops
        comm.co_address = co_address
        comm.co_pre_sale = co_pre_sale
        comm.co_land_use = co_land_use
        comm.co_land_type = co_land_type
        comm.co_handed_time = co_handed_time
        comm.co_plan_pro = co_plan_pro
        comm.co_work_pro = co_work_pro
        # 获取楼栋url列表
        build_url_list = re.findall(r"<td><a href='(.*?)'", html, re.M | re.S)
        if not build_url_list:
            return
        else:
            for build_url in build_url_list:
                try:
                    building = Building(self.CO_INDEX)
                    build_id = re.search(r'<td>(\d{2,6})</td>', html,
                                         re.M | re.S).group(1)  # 楼栋id
                    bu_all_house = re.search(r'<td>(\d{1,3})</td>', html,
                                             re.M | re.S).group(1)  # 总套数
                    bu_price_demo = re.findall('<td>[\.\d]+</td>', html,
                                               re.M | re.S)[4]
                    bu_price = re.search('\d+', bu_price_demo).group()
                    data_dict = self.get_build_detail(build_url)
                    bu_num = data_dict['bu_num']  # 楼号
                    bu_build_size = data_dict['bu_build_size']  # 建筑面积
                    co_address = data_dict['co_address']  # 小区地址
                    co_build_end_time = data_dict['co_build_end_time']  # 竣工时间
                    co_build_type = data_dict['co_build_type']  # 竣工时间
                    if not co_build_end_time:
                        building.co_is_build = '1'
                    comm.co_address = co_address
                    comm.co_build_end_time = co_build_end_time
                    comm.bu_build_size = bu_build_size
                    comm.co_build_type = co_build_type
                    # 楼栋
                    building.bu_num = bu_num
                    building.bu_build_size = bu_build_size
                    building.bu_all_house = bu_all_house
                    building.bu_id = build_id
                    building.co_id = co_id
                    building.bu_price = bu_price
                    # 插入
                    building.insert_db()
                except Exception as e:
                    build_detail_url = self.URL_FRONT + build_url
                    print('楼栋错误:', build_detail_url)
        comm.insert_db()
Exemplo n.º 21
0
    def get_comm_info(self, comm_info):

        co = Comm(co_index)
        co.co_name = re.search('_blank">(.*?)</a', comm_info).group(1)
        try:
            co.co_address = re.findall('px">(.*?)</td', comm_info)[1]
        except:
            co.co_address = None
        co.area = re.search('center">(.*?)</td>', comm_info).group(1)
        co_detail_url = re.search("href='(.*?)'", comm_info).group(1)
        co_url = "http://www.qyfgj.cn/newys/" + co_detail_url
        try:
            res = requests.get(co_url, headers=self.headers)
        except Exception as e:
            print("co_index={}小区未请求到".format(co_index), e)
        con = res.content.decode('gbk')
        try:
            co.co_develops = re.search('开发商名称.*?px;">(.*?)</a', con,
                                       re.S | re.M).group(1)
            co.co_all_house = re.search('总套数.*?">(\d+)&nbsp', con,
                                        re.S | re.M).group(1)
            co.co_all_size = re.search('总面积.*?">(\d+.\d+)&nbsp;m', con,
                                       re.S | re.M).group(1)
        except:
            print("小区无开发商等信息")
        co.insert_db()

        try:
            build = re.findall('<tr bgcolor="white">(.*?)</tr>', con,
                               re.S | re.M)
        except:
            print("小区没有楼栋信息")
        build_headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119Safari/537.36',
            'Cookie': 'ASP.NET_SessionId=irv0qjamqztp1pb0shoqrx2j',
            'Referer': co_url
        }

        for build_info in build:
            if "进入" in build_info:
                build_url = re.search('href="(.*?)"><font',
                                      build_info).group(1)
                build_url = "http://www.qyfgj.cn/newys/" + build_url
                ho_headers = {
                    'User-Agent':
                    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119Safari/537.36',
                    'Cookie': 'ASP.NET_SessionId=irv0qjamqztp1pb0shoqrx2j',
                    'Referer': build_url
                }
                build_res = requests.get(build_url, headers=build_headers)
                build_con = build_res.content.decode('gbk')

                if re.search('ID=(\d+)', build_url):  #现售
                    bu = Building(co_index)
                    bu_id = re.search('ID=(\d+)', build_url).group(1)
                    bu.bu_id = bu_id
                    bu.co_name = co.co_name
                    bu.insert_db()
                    self.get_house_info(headers=ho_headers,
                                        bu_id=bu_id,
                                        url=build_url)

                else:  #预售
                    bu = Building(co_index)
                    bu.co_name = co.co_name
                    bu.bu_type = re.search('用途.*?">(.*?)</td>', build_con,
                                           re.S | re.M).group(1)
                    bu.bu_pre_sale = re.search('许可证编号.*?_blank">(.*?)</a>',
                                               build_con, re.S | re.M).group(1)
                    bu.bu_pre_sale_date = re.search('有效日期.*?">(.*?)</td>',
                                                    build_con,
                                                    re.S | re.M).group(1)
                    bu.bu_address = re.search('项目座落.*?">(.*?)</td>', build_con,
                                              re.S | re.M).group(1)
                    ret = re.findall('<tr onmouseover(.*?)</tr', build_con,
                                     re.S | re.M)
                    for i in ret:
                        house_url = re.search('href="(.*?)"', i).group(1)
                        house_url = "http://www.qyfgj.cn/newys/" + house_url
                        bu.bu_id = re.search('dbh=(.*?)&', i).group(1)
                        bu.bu_num = re.search('<td width="89.*?">(.*?)</',
                                              i).group(1)
                        bu.bu_floor = re.search('<td width="84.*?">(\d+)</td',
                                                i).group(1)
                        bu.insert_db()

                        ho_res = requests.get(house_url, headers=ho_headers)
                        ho_con = ho_res.content.decode('gbk')
                        new_headers = {
                            'User-Agent':
                            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119Safari/537.36',
                            'Cookie':
                            'ASP.NET_SessionId=irv0qjamqztp1pb0shoqrx2j',
                            'Referer': house_url
                        }
                        self.get_house_info(ho_con=ho_con,
                                            headers=new_headers,
                                            bu_id=bu.bu_id)
            else:
                print("楼栋无链接地址")
Exemplo n.º 22
0
    def get_build_url_list(self, url_list):
        for i in url_list:
            try:
                res = requests.get(i)
                html = res.content.decode('gbk')
                for k in re.findall('项目名称.*?</dl>', html, re.S | re.M):
                    try:
                        c = Comm(self.co_index)
                        c.co_name = re.search('html">(.*?)</a>', k,
                                              re.S | re.M).group(1)
                        c.co_address = re.search('class="address"(.*?)</dd>',
                                                 k, re.S | re.M).group(1)
                        c.area = re.search('"city">(.*?)</dd>', k,
                                           re.S | re.M).group(1)
                        c.co_develops = re.search('"average">(.*?)</dd>', k,
                                                  re.S | re.M).group(1)
                        c.insert_db()
                        global count
                        count += 1
                        print(count)

                        url = re.search('a href="(.*?)">', k,
                                        re.S | re.M).group(1)
                        complete_url = self.url_source + url
                        res = requests.get(complete_url)
                        html = res.content.decode('gbk')
                        build_info_str = re.search('楼盘表</td>(.*?)合  计', html,
                                                   re.S | re.M).group(1)
                        for j in re.findall('<tr.*?</tr>', build_info_str,
                                            re.S | re.M):
                            try:
                                b = Building(self.co_index)
                                b.co_name = re.search('html">(.*?)</a>', k,
                                                      re.S | re.M).group(1)
                                b.bu_all_house = re.search(
                                    'absmiddle"  />(.*?)</a>', j,
                                    re.S | re.M).group(1)
                                b.bu_num = re.search(
                                    '="absmiddle"  />(.*?)</a></strong></', j,
                                    re.S | re.M).group(1)
                                b.bu_build_size = re.search(
                                    'td class="t_c">.*?td class="t_c">(.*?㎡)</td>',
                                    j, re.S | re.M).group(1)
                                b.insert_db()

                                url = re.search('a href="(.*?)"', j,
                                                re.S | re.M).group(1)
                                complete_url = self.url_source + url
                                res = requests.get(complete_url)
                                html = res.content.decode('gbk')
                                # 解析html获取iframe表单的数据
                                house_url = self.url_source + re.search(
                                    '<iframe.*?"(.*?)"', html,
                                    re.S | re.M).group(1)
                                logic_house_url = house_url.replace(
                                    'Default', 'GetData')
                                logic_house_html = requests.get(
                                    url=logic_house_url).content.decode()
                                logic_id = re.search(
                                    '<LOGICBUILDING_ID>(.*?)<',
                                    logic_house_html, re.S | re.M).group(1)
                                final_url = 'http://www.yingtanfdc.com/website/presale/home/HouseTableControl/GetData.aspx?LogicBuilding_ID=' + logic_id
                                final_html = requests.get(
                                    url=final_url).content.decode('gbk')
                                for l in re.findall(
                                        '<ROOM_NUMBER>(.*?)</ROOM_NUMBER>',
                                        final_html, re.S | re.M):
                                    try:
                                        h = House(self.co_index)
                                        h.info = final_html
                                        h.ho_name = l
                                        h.co_name = re.search(
                                            'html">(.*?)</a>', k,
                                            re.S | re.M).group(1)
                                        h.bu_num = re.search(
                                            '="absmiddle"  />(.*?)</a></strong></',
                                            j, re.S | re.M).group(1)
                                        h.insert_db()
                                    except Exception as e:
                                        continue
                            except Exception as e:
                                continue
                    except Exception as e:
                        continue
            except Exception as e:
                continue
Exemplo n.º 23
0
    def get_comm_info(self,url,response,comm):

        html = response.text
        tree = etree.HTML(html)
        # 地区
        co_area = tree.xpath('//*[@id="content"]/div[2]/div[1]/div[2]/table/tr[3]/td[2]/text()')[0]

        # 小区名称
        co_name = tree.xpath('//*[@id="content"]/div[2]/div[1]/div[2]/table/tr[1]/td/strong/span/text()')[0]
        # 小区地址
        co_address = tree.xpath('//*[@id="content"]/div[2]/div[1]/div[2]/table/tr[2]/td/span/text()')[0]
        # 开发商
        co_develops = tree.xpath('//*[@id="content"]/div[2]/div[1]/div[2]/table/tr[3]/td[1]/span/@title')[0]
        # 物业公司
        co_develops = tree.xpath('//div[@class="wzjs-box"]//tr[3]//span/text()')[0]
        # 容积率
        co_volumetric = tree.xpath('//*[@id="content"]/div[2]/div[1]/div[2]/table/tr[5]/td[2]/span/text()')[0]
        # 预售证书
        co_pre_sale = tree.xpath('//*[@id="content"]/div[2]/div[1]/div[2]/table/tr[6]/td[1]/text()')[0]
        # 建筑面积
        co_build_size = tree.xpath('//*[@id="content"]/div[2]/div[1]/div[2]/table/tr[5]/td[1]')[0].text
        # 小区id
        co_id = re.search('id=(.*?)$', url).group(1)
        html_ = html.replace('\t', '').replace('\r', '').replace('\n', '').replace(' ', '')
        bu_url_info = re.search('<pclass="bot-a">(.*?)</p>', html_).group(1)
        building_url_list = re.findall('<td><aid="(.*?)"(.*?)>(.*?)</a>', bu_url_info)

        for i in building_url_list:
            build = Building(co_index)
            value = i[0]
            bu_name = i[2]
            house_url = 'http://fsfc.fsjw.gov.cn/hpms_project/room.jhtml?id=' + value
            floor_url = "http://fsfc.fsjw.gov.cn/hpms_project/roomtj.jhtml?id=" + value

            try:
                res = requests.get(floor_url,headers=self.headers)
            except Exception as e:
                print("co_index={},楼栋详情页{}访问失败".format(co_index,floor_url))
                print(e)
                continue

            try:
                bu_floor = json.loads(res.text)
                build.bu_floor = bu_floor["zcs"]
            except:
                build.bu_floor = None

            try:
                response = requests.get(house_url, headers=self.headers)
            except Exception as e:
                print("co_index={},房屋详情页{}请求失败".format(co_index,house_url))
                print(e)
            self.get_build_info(house_url,response,co_id,value)

            build.co_id = co_id
            build.bu_id = value
            build.bu_name = bu_name

            build.insert_db()

        comm.co_name = co_name
        comm.co_id = co_id
        comm.co_address = co_address
        comm.co_develops = co_develops
        comm.co_volumetric = co_volumetric
        comm.co_pre_sale = co_pre_sale
        comm.co_build_size = co_build_size
        comm.area = co_area
        comm.insert_db()
Exemplo n.º 24
0
    def comm_crawler(self, comm_url, co_develops, co_pre_sale, co_name,
                     co_pre_sale_date):
        ho = House(co_index)
        comm_res = requests.get(comm_url, headers=self.headers)
        comm_html = etree.HTML(comm_res.text)
        value = comm_html.xpath("//input[@id='propertyid']/@value")[0]
        sid = comm_html.xpath("//input[@id='sid']/@value")[0]
        # detail_url = "http://hu.tmsf.com/newhouse/property_"+str(sid)+"_"+str(value)+"_price.htm"

        bu = Building(co_index)
        bu_num = comm_html.xpath("//div[@id='building_dd']//a")[1:]
        # bu_info,bu_num_list = self.build(comm_html,value)
        self.comm_info(co_develops, co_pre_sale, co_name, co_pre_sale_date,
                       value)
        # page_html = requests.get(detail_url,headers=self.headers)
        for bu_ in bu_num:
            bu.bu_num = bu_.xpath("./text()")[0]
            bu_id = bu_.xpath("./@id")[0]
            bu.bu_id = re.search('\d+', bu_id).group(0)
            bu.co_id = value
            bu.insert_db()
            detail_url = "http://hu.tmsf.com/newhouse/property_" + str(
                sid) + "_" + str(value) + "_price.htm?buildingid=" + str(
                    bu.bu_id)
            page_html = requests.get(detail_url, headers=self.headers)

            page = re.search('页数 \d+/(\d+)', page_html.text).group(1)
            for i in range(1, int(page) + 1):
                detail_url = detail_url + "?page=" + str(i)

                detail_res = requests.get(detail_url, headers=self.headers)
                house_html = etree.HTML(detail_res.text)
                house_url_list = house_html.xpath("//td[@width='100']/a/@href")
                house_bu_num = house_html.xpath("//td[@width='100']/a/text()")
                house_name = house_html.xpath(
                    "//td[@width='101'][1]/a/div/text()")

                for index in range(1, len(house_url_list) + 1):
                    try:
                        ho.bu_num = house_bu_num[index]  # 楼号 栋号
                        house_url = "http://hu.tmsf.com" + house_url_list[index]
                        house_res = requests.get(house_url,
                                                 headers=self.headers)
                        house_html = house_res.text
                        ho.bu_id = bu.bu_id
                        ho.co_id = re.search('楼盘主页.*?_\d+_(\d+)_info',
                                             house_html).group(1)  # 小区id
                        ho.ho_name = house_name[index]  # 房号:3单元403
                        # ho.ho_num =  re.search('_(\d+).htm',house_url).group(1) # 房号id

                        ho.ho_type = re.search('房屋用途:.*?>(.*?)<',
                                               house_html).group(
                                                   1)  # 房屋类型:普通住宅 / 车库仓库
                        ho.ho_floor = re.search('第(.*?)层', house_html).group(1)

                        build_text = re.search('建筑面积:(.*?)平方米',
                                               house_html).group(1)
                        build_num = re.findall('class="(.*?)"', build_text)
                        ho.ho_build_size = self.number(build_num)  # 建筑面积

                        size_text = re.search('套内面积:(.*?)平方米',
                                              house_html).group(1)
                        size_num = re.findall('class="(.*?)"', size_text)
                        ho.ho_true_size = self.number(size_num)  # 预测套内面积,实际面积

                        price_text = re.search('总  价:(.*?)万元',
                                               house_html).group(1)  # 价格
                        price_num = re.findall('class="(.*?)"', price_text)
                        ho.ho_price = self.number(price_num)

                        ho.insert_db()
                    except:
                        continue
Exemplo n.º 25
0
 def start_crawler(self):
     url = 'http://zzx.zzfc.com/ajaxpro/xy_ysxk_more,App_Web_mjeeodb-.ashx'
     for i in range(1, 21):
         payload = "{\"pageNo\":" + str(
             i) + ",\"pageSize\":30,\"rowcount\":589}"
         try:
             response = requests.post(url,
                                      data=payload,
                                      headers=self.headers)
             con = response.content.decode()
         except Exception as e:
             log.error('楼栋请求失败{}'.format(e))
             continue
         co_list = re.findall('\[\d+,.*?\d+\]', con)
         for comm in co_list:
             try:
                 sid = re.search('\[(\d+),', comm).group(1)
                 pid = re.search('",(\d+),', comm).group(1)
                 bu_url = 'http://zzx.zzfc.com/xy_bldg.aspx?pid=' + pid + '&sid=' + sid
                 bu_res = requests.get(bu_url, headers=self.headers)
                 bu_con = bu_res.content.decode()
                 bu = Building(co_index)
                 bu.bu_id = sid
                 bu.bu_address = re.search('楼栋座落.*?">(.*?)&nbsp', bu_con,
                                           re.S | re.M).group(1)
                 bu.bu_pre_sale = re.search('预售证号.*?">(.*?)&nbsp', bu_con,
                                            re.S | re.M).group(1)
                 bu.bu_pre_sale_date = re.search('预售日期.*?">(.*?)&nbsp',
                                                 bu_con,
                                                 re.S | re.M).group(1)
                 bu.bu_all_house = re.search('套数.*?">(.*?)&nbsp', bu_con,
                                             re.S | re.M).group(1)
                 bu.insert_db()
             except Exception as e:
                 log.error("{}楼栋解析失败{}".format(comm, e))
                 continue
             ho_url = 'http://zzx.zzfc.com/ajaxpro/xy_housetag,App_Web_xg4ulr9n.ashx'
             data = "{\"m_key\":\"WWW_LPB_001\",\"m_param\":\"" + sid + "\"}"
             headers = {
                 'User-Agent':
                 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119Safari/537.36',
                 'X-AjaxPro-Method': 'GETLPBDS'
             }
             try:
                 ho_res = requests.post(ho_url, data=data, headers=headers)
                 ho_con = ho_res.content.decode()
             except Exception as e:
                 log.error("房屋请求失败{}".format(e))
                 continue
             ho_list = re.findall('\["\d+.*?\d+\]', ho_con)
             for house in ho_list:
                 try:
                     ho = House(co_index)
                     ho.bu_id = sid
                     info_list = house.split(",")
                     ho.ho_name = info_list[4]
                     ho.ho_floor = re.search('(\d+)层', house).group(1)
                     ho.ho_build_size = info_list[-3]
                     ho.ho_true_size = info_list[-2]
                     ho.insert_db()
                 except Exception as e:
                     log.error("{}房屋解析错误{}".format(house, e))
                     continue