Пример #1
0
 def get_build_info(self, presell_url_list, co_id):
     for presell_url in presell_url_list:
         pre_url = self.url + presell_url
         res = requests.get(pre_url, headers=self.headers)
         build_url_list = re.findall('【<a href="(.*?)" target="_self"',
                                     res.text, re.S | re.M)
         for build_url in build_url_list:
             build_info_url = self.url + build_url
             try:
                 build_res = requests.get(build_info_url,
                                          headers=self.headers)
                 con = build_res.text
                 bu = Building(co_index)
                 bu.co_id = co_id
                 bu.bu_id = re.search('ID=(\d+)', build_url).group(1)
                 bu.bu_num = re.search('栋.*?号.*?BuildingName">(.*?)</span',
                                       con, re.S | re.M).group(1)
                 bu.bu_floor = re.search('总 层 数.*?(\d+)</span', con,
                                         re.S | re.M).group(1)
                 bu.bu_build_size = re.search('建筑面积.*?Jzmj">(.*?)</span',
                                              con, re.S | re.M).group(1)
                 bu.bu_live_size = re.search('住宅面积.*?Zzmj">(.*?)</span',
                                             con, re.S | re.M).group(1)
                 bu.bu_not_live_size = re.search(
                     '非住宅面积.*?Fzzmj">(.*?)</span', con,
                     re.S | re.M).group(1)
                 bu.bu_pre_sale = re.search('预售许可证.*?xkzh">(.*?)</span',
                                            con, re.S | re.M).group(1)
                 bu.bu_pre_sale_date = re.search('发证日期.*?fzrq">(.*?)</span',
                                                 con, re.S | re.M).group(1)
                 bu.bu_type = re.search('项目类型.*?Type">(.*?)</span', con,
                                        re.S | re.M).group(1)
                 bu.insert_db()
             except Exception as e:
                 print("co_index={},楼栋信息错误".format(co_index), e)
                 continue
             house_detail_list = re.findall("getMoreHouseInfo\('(.*?)'\)\"",
                                            con, re.S | re.M)
             self.get_house_info(co_id, bu.bu_id, house_detail_list)
Пример #2
0
    def get_build_info(self, bu_pre_sale, bo_develops, bu_co_name, bu_con):

        build = Building(co_index)

        build.bu_id = re.search('编号.*?>(\d+)<', bu_con, re.S | re.M).group(1)
        build.bu_num = re.search('幢号.*?>(\d+)<', bu_con, re.S | re.M).group(1)
        build.bu_floor = re.search('总层数.*?>(\d+)<', bu_con,
                                   re.S | re.M).group(1)
        build.bu_build_size = re.search('预售建筑面积.*?>(\d+.\d+)<', bu_con,
                                        re.S | re.M).group(1)
        build.bu_address = re.search('楼房坐落.*?;">(.*?)</span', bu_con,
                                     re.S | re.M).group(1)
        build.bu_live_size = re.search('住宅建筑面积.*?>(\d+.\d+)<', bu_con,
                                       re.S | re.M).group(1)
        build.bu_not_live_size = re.search('非住宅建筑面积.*?;">(.*?)</span', bu_con,
                                           re.S | re.M).group(1)
        build.bo_build_start_time = re.search('开工日期.*?;">(.*?)</span', bu_con,
                                              re.S | re.M).group(1)
        build.bu_all_house = re.search('总套数.*?>(\d+)<', bu_con,
                                       re.S | re.M).group(1)
        build.bu_pre_sale = bu_pre_sale
        build.bo_develops = bo_develops
        build.co_name = bu_co_name
        build.insert_db()