Exemplo n.º 1
0
 def get_build_info(self,presell_url_list,co_id):
     for presell_url in presell_url_list:
         pre_url = self.url + presell_url
         res = requests.get(pre_url,headers=self.headers)
         build_url_list = re.findall('【<a href="(.*?)" target="_self"',res.text,re.S|re.M)
         for build_url in build_url_list:
             build_info_url = self.url+build_url
             try:
                 build_res = requests.get(build_info_url,headers=self.headers)
                 con = build_res.text
                 bu = Building(co_index)
                 bu.co_id = co_id
                 bu.bu_id = re.search('ID=(\d+)',build_url).group(1)
                 bu.bu_num = re.search('栋.*?号.*?BuildingName">(.*?)</span',con,re.S|re.M).group(1)
                 bu.bu_floor = re.search('总 层 数.*?(\d+)</span',con,re.S|re.M).group(1)
                 bu.bu_build_size = re.search('建筑面积.*?Jzmj">(.*?)</span',con,re.S|re.M).group(1)
                 bu.bu_live_size = re.search('住宅面积.*?Zzmj">(.*?)</span',con,re.S|re.M).group(1)
                 bu.bu_not_live_size = re.search('非住宅面积.*?Fzzmj">(.*?)</span',con,re.S|re.M).group(1)
                 bu.bu_pre_sale = re.search('预售许可证.*?xkzh">(.*?)</span',con,re.S|re.M).group(1)
                 bu.bu_pre_sale_date = re.search('发证日期.*?fzrq">(.*?)</span',con,re.S|re.M).group(1)
                 bu.bu_type = re.search('项目类型.*?Type">(.*?)</span',con,re.S|re.M).group(1)
                 bu.insert_db()
             except Exception as e:
                 print("co_index={},楼栋信息错误".format(co_index), e)
                 continue
             house_detail_list = re.findall("getMoreHouseInfo\('(.*?)'\)\"",con,re.S|re.M)
             self.get_house_info(co_id,bu.bu_id,house_detail_list)
Exemplo n.º 2
0
    def get_build_info(self, bu_pre_sale, bo_develops, bu_co_name, bu_con):

        build = Building(co_index)

        build.bu_id = re.search('编号.*?>(\d+)<', bu_con, re.S | re.M).group(1)
        build.bu_num = re.search('幢号.*?>(\d+)<', bu_con, re.S | re.M).group(1)
        build.bu_floor = re.search('总层数.*?>(\d+)<', bu_con,
                                   re.S | re.M).group(1)
        build.bu_build_size = re.search('预售建筑面积.*?>(\d+.\d+)<', bu_con,
                                        re.S | re.M).group(1)
        build.bu_address = re.search('楼房坐落.*?;">(.*?)</span', bu_con,
                                     re.S | re.M).group(1)
        build.bu_live_size = re.search('住宅建筑面积.*?>(\d+.\d+)<', bu_con,
                                       re.S | re.M).group(1)
        build.bu_not_live_size = re.search('非住宅建筑面积.*?;">(.*?)</span', bu_con,
                                           re.S | re.M).group(1)
        build.bo_build_start_time = re.search('开工日期.*?;">(.*?)</span', bu_con,
                                              re.S | re.M).group(1)
        build.bu_all_house = re.search('总套数.*?>(\d+)<', bu_con,
                                       re.S | re.M).group(1)
        build.bu_pre_sale = bu_pre_sale
        build.bo_develops = bo_develops
        build.co_name = bu_co_name
        build.insert_db()