Exemplo n.º 1
0
 def get_comm_info(self, comm_url,comm):
     try:
         response = requests.get(comm_url, headers=self.headers)
         html = response.text
         comm.co_id = re.search('jectcode=(.*?)"', html, re.S | re.M).group(1)
         comm.co_name = re.search("项目名称:.*?<td.*?>(.*?)<", html, re.S | re.M).group(1)
         comm.co_address = re.search('项目地址:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
         comm.co_develops = re.search('开发企业:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
         comm.co_owner = re.search('国土证书:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
         comm.area = re.search('行政区划:</th>.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
         comm.insert_db()
         build_html = re.search('套房信息.*?</table>', html, re.S | re.M).group()
         build_info_list = re.findall('<tr.*?>.*?</tr>', build_html, re.S | re.M)
         for i in build_info_list:
             try:
                 build = Building(co_index)
                 build.co_id = comm.co_id
                 build.bu_num = re.search('<td.*?>(.*?)</td', i, re.S | re.M).group(1)
                 build.bu_id = re.search('buildingcode=(.*?)&', i, re.S | re.M).group(1)
                 build.co_build_structural = re.search('<td.*?<td.*?<td.*?>(.*?)<', i, re.S | re.M).group(1)
                 build.bu_all_house = re.search('<td.*?<td.*?<td.*?<td.*?>(.*?)<', i, re.S | re.M).group(1)
                 build.bu_floor = re.search('<td.*?<td.*?<td.*?<td.*?<td.*?>(.*?)<', i, re.S | re.M).group(1)
                 build.insert_db()
                 house_url = re.search('href="(.*?)"', i, re.S | re.M).group(1)
                 self.get_build_info(house_url, build.bu_id, comm.co_id)
             except Exception as e:
                 print('楼栋错误,co_index={},url={}'.format(co_index, comm_url), e)
     except Exception as e:
         print('小区错误,co_index={},url={}'.format(co_index, comm_url), e)
Exemplo n.º 2
0
 def get_build_info(self, build_url_list):
     for i in build_url_list:
         try:
             build = Building(co_index)
             build_code = re.search('xqbm=(.*?)$', i).group(1)
             build_url = 'http://zjjg.0557fdc.com:9555/xiaoqu/donginfo.aspx?xqbm=' + build_code
             build.bu_num = 'Labeldongmc">(.*?)<'
             build.bu_pre_sale = 'Labelyszheng">(.*?)<'
             build.bu_floor = 'Labelsceng">(.*?)<'
             build.bu_address = 'Label1zuoluo">(.*?)<'
             build.bo_build_start_time = 'Label1kaigong">(.*?)<'
             build.co_build_structural = 'Labeljiegou">(.*?)<'
             build.co_id = 'donginfo.aspx\?xqbm=(.*?)"'
             build.bu_id = 'id="DropDownList1".*?value="(.*?)"'
             p = ProducerListUrl(page_url=build_url,
                                 request_type='get',
                                 encode='utf-8',
                                 analyzer_rules_dict=build.to_dict(),
                                 current_url_rule='location\.href=(.*?)"',
                                 analyzer_type='regex',
                                 headers=self.headers)
             house_url_list = p.get_details()
             self.get_house_info(house_url_list)
         except Exception as e:
             print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)
Exemplo n.º 3
0
 def get_build_info(self, build_url_list):
     for i in build_url_list:
         try:
             build = Building(co_index)
             build_url = 'http://www.ndjsj.gov.cn/House/' + i
             build.co_name = '项目名称:.*?<td.*?>(.*?)<'
             build.bu_num = '幢  号:.*?<td.*?>(.*?)<'
             build.bu_address = '坐落位置:.*?<td.*?>(.*?)<'
             build.co_build_structural = '建筑结构:.*?<td.*?>(.*?)<'
             build.bu_floor = '总 层 数:.*?<td.*?>(.*?)<'
             build.bu_build_size = '总 面 积:.*?<td.*?>(.*?)<'
             # build.bu_type = '设计用途:.*?<td.*?>(.*?)<'
             build.bu_all_house = '批准销售:.*?<td.*?>(.*?)<'
             p = ProducerListUrl(
                 page_url=build_url,
                 request_type='get',
                 encode='utf-8',
                 analyzer_rules_dict=build.to_dict(),
                 current_url_rule='javascript:ShowTitle.*?href="(.*?)"',
                 analyzer_type='regex',
                 headers=self.headers)
             house_url_list = p.get_details()
             self.get_house_info(house_url_list)
         except Exception as e:
             print('宁德楼栋错误,url={}'.format(build_url), e)
Exemplo n.º 4
0
 def get_build_info(self, build_url_list):
     for i in build_url_list:
         try:
             build = Building(co_index)
             build_url = 'http://222.223.160.199:8088/website/buildquery/selectBuild.jsp?buildID=' + i[0]
             response = requests.get(build_url, headers=self.headers)
             html = response.text
             build.bu_id = i[0]
             build.co_build_structural = re.search('结构类型.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bo_build_end_time = re.search('建成年份.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bu_build_size = re.search('总建筑面积.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.bu_num = re.search('幢号.*?<td.*?>(.*?)<', html, re.S | re.M).group(1)
             build.size = re.search('占地面积.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.bu_floor = re.search('房屋层数.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.bu_all_house = re.search('房屋套数.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.area = re.search('坐落区.*?<td>(.*?)<', html, re.S | re.M).group(1)
             build.insert_db()
             self.get_house_info(build.bu_id)
         except Exception as e:
             print('请求错误,url={}'.format(build_url),e)
Exemplo n.º 5
0
 def get_build_info(self, co_id):
     build_url = 'http://www.yanjifc.com/jdi'
     payload = "activityId=" + str(co_id) + "&module=jtsActBuildingInfo"
     result = requests.post(url=build_url,
                            data=payload,
                            headers=self.headers)
     data = result.json()
     build_list = data['ROWS']['ROW']
     for i in build_list:
         build = Building(co_index)
         build.bu_all_size = self.dict_get(i, 'BUILDING_AREA')
         build.bu_address = self.dict_get(i, 'LOCATION')
         build.bu_num = self.dict_get(i, 'LOCATION')
         build.bu_floor = self.dict_get(i, 'TOTAL_FLOORS')
         build.bu_all_house = self.dict_get(i, 'TOTAL_SET')
         build.co_build_structural = self.dict_get(i, 'STRUCTURE')
         build.bu_id = self.dict_get(i, 'RESOURCE_GUID')
         build.co_id = co_id
         build.insert_db()
         self.get_house_info(co_id, build.bu_id)
Exemplo n.º 6
0
    def get_build_info(self, build_url_list):
        for i in build_url_list:
            try:
                build = Building(co_index)
                build_url = 'http://www.fjnpfdc.com/House/' + i
                res = requests.get(build_url, headers=self.headers)
                con = res.content.decode('gbk')
                build.co_name = re.search("项目名称:.*?<td.*?>(.*?)<", con,
                                          re.S | re.M).group(1)
                build.bu_num = re.search("幢  号:.*?<td.*?>(.*?)<", con,
                                         re.S | re.M).group(1)
                build.co_use = re.search("设计用途:.*?<td.*?>(.*?)<", con,
                                         re.S | re.M).group(1)
                build.co_build_structural = re.search("建筑结构:.*?<td.*?>(.*?)<",
                                                      con,
                                                      re.S | re.M).group(1)
                build.bu_floor = re.search("总 层 数:.*?<td.*?>(.*?)<", con,
                                           re.S | re.M).group(1)
                build.bu_build_size = re.search("总 面 积:.*?<td.*?>(.*?)<", con,
                                                re.S | re.M).group(1)
                build.co_build_end_time = re.search("竣工日期:.*?<td.*?>(.*?)<",
                                                    con, re.S | re.M).group(1)

                house_url_list = re.findall('<a href="(HouseInfo.*?)"', con)
                # p = ProducerListUrl(page_url=build_url,
                #                     request_type='get', encode='gbk',
                #                     analyzer_rules_dict=build.to_dict(),
                #                     current_url_rule='<a href="(HouseInfo.*?)"',
                #                     analyzer_type='regex',
                #                     headers=self.headers)
                build.co_id = re.search('ProjectId=(.*?)&', i).group(1)
                build.bu_id = re.search('BuildingId=(.*?)&P', i).group(1)
                build.insert_db()
                # house_url_list = p.get_details()
                self.get_house_info(house_url_list, build.bu_id, build.co_id)
            except Exception as e:
                print("co_index={},楼栋{}错误".format(co_index, i), e)
Exemplo n.º 7
0
 def get_build_info(self, build_url_list, co_id):
     for i in build_url_list:
         build_url = 'http://www.fjlyfdc.com.cn/' + i
         try:
             build = Building(co_index)
             response = requests.get(build_url, headers=self.headers)
             html = response.text
             build.bu_id = re.search('buildingInfoID=(.*?)&',
                                     build_url).group(1)
             build.co_id = co_id
             build.bo_develops = re.search('开发商:.*?<td.*?>(.*?)<', html,
                                           re.S | re.M).group(1)
             build.co_name = re.search('项目名称:.*?<td.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             build.bu_address = re.search('坐落位置:.*?<td.*?>(.*?)<', html,
                                          re.S | re.M).group(1)
             build.bu_num = re.search('幢号:.*?<td.*?>(.*?)<', html,
                                      re.S | re.M).group(1)
             build.co_build_structural = re.search('建筑结构:.*?<td.*?>(.*?)<',
                                                   html,
                                                   re.S | re.M).group(1)
             build.bu_type = re.search('设计用途:.*?<td.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             build.bu_floor = re.search('总 层 数:.*?<td.*?>(.*?)<', html,
                                        re.S | re.M).group(1)
             build.co_all_size = re.search('总面积:.*?<td.*?>(.*?)<', html,
                                           re.S | re.M).group(1)
             build.bo_build_start_time = re.search('开工日期:.*?<td.*?>(.*?)<',
                                                   html,
                                                   re.S | re.M).group(1)
             build.insert_db()
             house_url_list = re.findall(
                 'href="(/House/HouseInfo\?HouseCenterID=.*?)"', html,
                 re.S | re.M)
             self.get_house_info(house_url_list, build.bu_id, co_id)
         except Exception as e:
             print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)