Пример #1
0
    def get_build_info(self, build_lis, co_id):
        for build_ in build_lis:
            build_url = "http://xx.yyfdcw.com" + build_
            try:
                build_res = requests.get(build_url, headers=self.headers)
            except Exception as e:
                print("co_index={},楼栋信息错误".format(co_index), e)
                continue
            con = build_res.text
            bu = Building(co_index)
            bu.co_id = co_id
            bu.bu_id = re.search('Bid=(\d+)', build_).group(1)
            bu.bu_num = re.search('名称.*?">(.*?)</spa', con).group(1)
            bu.bu_pre_sale = re.search("编.*?red'>(.*?)</a", con).group(1)
            bu.bu_pre_sale_date = re.search('颁发日期.*?Date">(.*?)</span',
                                            con).group(1)
            bu.bo_build_start_time = re.search('开工日期.*?">(.*?)</span',
                                               con).group(1)
            bu.bo_build_end_time = re.search('竣工日期.*?">(.*?)</span',
                                             con).group(1)
            bu.bo_develops = re.search('单位.*?">(.*?)</span', con).group(1)
            bu.bu_floor = re.search('层数.*?">(.*?)</span', con).group(1)
            bu.bu_live_size = re.search('住宅面积.*?">(.*?)</span', con).group(1)
            bu.size = re.search('总面积.*?">(.*?)</span', con).group(1)

            bu.insert_db()

            id = re.search('测量号.*?">(.*?)</span', con).group(1)
            self.get_house_info(co_id, bu.bu_id, id)
Пример #2
0
    def get_build_info(self, build_id_list, co_id):
        bu = Building(co_index)
        for build_id in build_id_list:
            formdata = {}
            formdata["action"] = "qeurySingleBuilding"
            formdata['pk'] = str(build_id)
            header = {
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36',
                'Referer':
                'http://hkrealestate.haikou.gov.cn/wp_myself/housequery/projectBuildingList.php'
            }
            try:
                build_info = self.s.post(
                    'http://hkrealestate.haikou.gov.cn/wp_myself/housequery/projectBuildHouseAction.php',
                    data=formdata,
                    headers=header)
            except Exception as e:
                print("co_idnex={},楼栋错误".format(co_index), e)

            build_con = build_info.text
            bu.bu_id = build_id
            bu.co_id = co_id
            bu.bu_num = re.search('幢名称.*?<td>(.*?)<', build_con,
                                  re.S | re.M).group(1)
            bu.bu_floor = re.search('总层数.*?<td>(.*?)<', build_con,
                                    re.S | re.M).group(1)
            bu.bu_build_size = re.search('>建筑面积.*?<td>(.*?)<', build_con,
                                         re.S | re.M).group(1)
            bu.bo_develops = re.search('房地产企业.*?">(.*?)</td', build_con,
                                       re.S | re.M).group(1)

            bu.insert_db()

            self.get_house_info(build_con, co_id, build_id)
Пример #3
0
    def parse(self, res):
        html = etree.HTML(res.content.decode('gbk'))
        bu_list = html.xpath("//div[@class='listCon']")
        for i in bu_list:
            temp = i.xpath("./a[@class='listCon2']/@href")[0]
            name = i.xpath("./a[@class='listCon1']/@title")[0]
            url = "http://www.hyfc365.com" + temp
            try:
                bu_res = requests.get(url, headers=self.headers)
                content = bu_res.content.decode('gbk')
                bu = Building(co_index)
                bu.bu_num = name
                project_id = re.search('ID=(.*)', temp).group(1)
                bu.bu_pre_sale = re.search('预售证名称.*?NAME">(.*?)</span',
                                           content, re.S | re.M).group(1)
                bu.bu_pre_sale_date = re.search('申领时间.*?">(.*?)</span',
                                                content, re.S | re.M).group(1)
                bu.bo_develops = re.search('申领单位.*?">(.*?)</span', content,
                                           re.S | re.M).group(1)
                bu.bu_build_size = re.search('"SALE_HOUSE_AREA">(.*?)<',
                                             content, re.S | re.M).group(1)
                bu.bu_all_house = re.search('"SALE_HOUSE_COUNT">(.*?)<',
                                            content, re.S | re.M).group(1)

                detail_url = 'http://www.hyfc365.com/RealEstate/Project/BuildingList.aspx?ID=' + project_id
                detail_res = requests.get(detail_url)
                bu_id = re.search("BUILDING_ID=(.*?)'",
                                  detail_res.text).group(1)
                bu.bu_id = bu_id
                bu.insert_db()
            except Exception as e:
                log.error("{}楼栋页面解析失败{}".format(url, e))
                continue
            self.house_parse(bu_id)
Пример #4
0
    def comm(self, id):
        bu = Building(co_index)

        house_url = self.start_url + "/api/buildInfos/getHouseInfosByPannelNumber?pannelNumber=" + str(
            id)
        comm_url = self.start_url + "/api/buildInfos/getHomePageBuildingInfo?blockNumber=" + str(
            id)
        comm_detail_url = self.start_url + "/api/buildInfos/getDetailsBuildingInfo?blockNumber=" + str(
            id)

        comm_res = requests.get(comm_url)
        comm_detail_res = requests.get(comm_detail_url)
        house_res = requests.get(house_url)
        comm_dict = json.loads(comm_res.text)
        comm_detail_dict = json.loads(comm_detail_res.text)
        house_dict = json.loads(house_res.text)

        bu.bu_id = id
        bu.bu_num = comm_dict["data"]["nameBuildings"]
        bu.area = comm_detail_dict['data']['houseingArea']
        bu.bu_address = comm_dict["data"]["houseaddress"]
        bu.bu_pre_sale = comm_detail_dict["data"]["yszh"]
        bu.bu_type = comm_dict["data"]["propertycategory"]
        bu.bo_develops = comm_dict["data"]["companyName"]

        bu.insert_db()

        house_num = house_dict["data"]
        for hu in house_num:
            ho = House(co_index)
            h = hu["data"]
            if len(h) > 0:
                for i in h:
                    try:
                        room_id = i["houseNumber"]
                        room_url = self.start_url + "/api/buildInfos/getHouseInfoByHouseNumber?houseNumber=" + str(
                            room_id)
                        res = requests.get(room_url, headers=self.headers)
                        dict = json.loads(res.text)
                        ho.bu_id = id
                        # ho.ho_num = room_id
                        ho.ho_name = dict["data"]["houseNo"]
                        ho.ho_build_size = dict["data"]["buildArea"]
                        ho.ho_true_size = dict["data"]["jacketArea"]
                        ho.ho_share_size = dict["data"]["apportionedArea"]
                        ho.ho_floor = dict["data"]["nominalLevel"]
                        ho.insert_db()
                    except Exception as e:
                        print(e)
            else:
                continue
Пример #5
0
 def get_build_info(self, build_url_list, co_id):
     for i in build_url_list:
         build_url = 'http://www.fjlyfdc.com.cn/' + i
         try:
             build = Building(co_index)
             response = requests.get(build_url, headers=self.headers)
             html = response.text
             build.bu_id = re.search('buildingInfoID=(.*?)&', build_url).group(1)
             build.co_id = co_id
             build.bo_develops = re.search('开发商:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.co_name = re.search('项目名称:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bu_address = re.search('坐落位置:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bu_num = re.search('幢号:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.co_build_structural = re.search('建筑结构:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bu_type = re.search('设计用途:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bu_floor = re.search('总 层 数:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.co_all_size = re.search('总面积:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bo_build_start_time = re.search('开工日期:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.insert_db()
             house_url_list = re.findall('href="(/House/HouseInfo\?HouseCenterID=.*?)"', html, re.S | re.M)
             self.get_house_info(house_url_list, build.bu_id, co_id)
         except Exception as e:
             print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)
Пример #6
0
    def get_build_info(self, bu_pre_sale, bo_develops, bu_co_name, bu_con):

        build = Building(co_index)

        build.bu_id = re.search('编号.*?>(\d+)<', bu_con, re.S | re.M).group(1)
        build.bu_num = re.search('幢号.*?>(\d+)<', bu_con, re.S | re.M).group(1)
        build.bu_floor = re.search('总层数.*?>(\d+)<', bu_con,
                                   re.S | re.M).group(1)
        build.bu_build_size = re.search('预售建筑面积.*?>(\d+.\d+)<', bu_con,
                                        re.S | re.M).group(1)
        build.bu_address = re.search('楼房坐落.*?;">(.*?)</span', bu_con,
                                     re.S | re.M).group(1)
        build.bu_live_size = re.search('住宅建筑面积.*?>(\d+.\d+)<', bu_con,
                                       re.S | re.M).group(1)
        build.bu_not_live_size = re.search('非住宅建筑面积.*?;">(.*?)</span', bu_con,
                                           re.S | re.M).group(1)
        build.bo_build_start_time = re.search('开工日期.*?;">(.*?)</span', bu_con,
                                              re.S | re.M).group(1)
        build.bu_all_house = re.search('总套数.*?>(\d+)<', bu_con,
                                       re.S | re.M).group(1)
        build.bu_pre_sale = bu_pre_sale
        build.bo_develops = bo_develops
        build.co_name = bu_co_name
        build.insert_db()
Пример #7
0
 def get_build_detail(self, all_building_url_list):
     house_url_list = []
     for i in all_building_url_list:
         try:
             response = requests.get(i, headers=self.headers)
             html = response.text
             tree = etree.HTML(html)
             bo_develops = tree.xpath(
                 '//*[@id="content_1"]/div[3]/text()[2]')[0]  # 开发商
             bu_build_size = tree.xpath(
                 '//*[@id="houseTable_1"]/tr[2]/td[6]/a/text()')  # 销售面积
             if bu_build_size:
                 bu_build_size = bu_build_size[0]
             bu_pre_sale = tree.xpath(
                 '//*[@id="houseTable_1"]/tr[2]/td[1]/a/text()')  # 预售证书
             if bu_pre_sale:
                 bu_pre_sale = bu_pre_sale[0]
             bu_floor = tree.xpath(
                 '//*[@id="houseTable_1"]/tr[2]/td[3]/a/text()')[0]  # 总层数
             bu_all_house = tree.xpath(
                 '//*[@id="houseTable_1"]/tr[2]/td[4]/a/text()')[0]  # 总套数
             bu_type = tree.xpath(
                 '//*[@id="houseTable_1"]/tr[2]/td[5]/a/text()')[0]  # 房屋用途
             build_html = re.search('houseTable_1.*?当前共有', html,
                                    re.S | re.M).group()
             build_detail_html = re.findall(
                 'class.*?</a></td>.*?</a></td>.*?</a></td>', build_html,
                 re.S | re.M)
             bu_num = re.findall('项目名称:</b>(.*?)</div>', html,
                                 re.S | re.M)[0].strip()
             url_list = []
             for bu in build_detail_html:
                 try:
                     build = Building(co_index)
                     build.bu_id = re.search(
                         "href='roomTable.aspx\?id=(.*?)&", bu,
                         re.S | re.M).group(1)
                     build.bu_address = re.search(
                         "_blank.*?_blank'>(.*?)</a></td><td>", bu,
                         re.S | re.M).group(1).strip()
                     build.bo_develops = bo_develops
                     build.bu_build_size = bu_build_size
                     build.bu_pre_sale = bu_pre_sale
                     build.bu_num = bu_num
                     build.bu_floor = bu_floor
                     build.bu_all_house = bu_all_house
                     build.bu_type = bu_type
                     for k in self.area_list:
                         if k in build.bu_address:
                             build.area = k
                             continue
                     build.insert_db()
                     house_url = re.search(
                         "(roomTable.aspx\?id=.*?&vc=.*?)'", bu,
                         re.S | re.M).group(1)
                     url_list.append(
                         'http://dgfc.dg.gov.cn/dgwebsite_v2/Vendition/' +
                         house_url)
                 except Exception as e:
                     print('楼栋错误,co_index={},url={}'.format(co_index, i), e)
             house_url_list = url_list + house_url_list
         except Exception as e:
             print('楼栋错误,co_index={},url={}'.format(co_index, i), e)
     return house_url_list