def get_comm_info(self, comm_url,comm): try: response = requests.get(comm_url, headers=self.headers) html = response.text comm.co_id = re.search('jectcode=(.*?)"', html, re.S | re.M).group(1) comm.co_name = re.search("项目名称:.*?<td.*?>(.*?)<", html, re.S | re.M).group(1) comm.co_address = re.search('项目地址:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) comm.co_develops = re.search('开发企业:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) comm.co_owner = re.search('国土证书:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) comm.area = re.search('行政区划:</th>.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) comm.insert_db() build_html = re.search('套房信息.*?</table>', html, re.S | re.M).group() build_info_list = re.findall('<tr.*?>.*?</tr>', build_html, re.S | re.M) for i in build_info_list: try: build = Building(co_index) build.co_id = comm.co_id build.bu_num = re.search('<td.*?>(.*?)</td', i, re.S | re.M).group(1) build.bu_id = re.search('buildingcode=(.*?)&', i, re.S | re.M).group(1) build.co_build_structural = re.search('<td.*?<td.*?<td.*?>(.*?)<', i, re.S | re.M).group(1) build.bu_all_house = re.search('<td.*?<td.*?<td.*?<td.*?>(.*?)<', i, re.S | re.M).group(1) build.bu_floor = re.search('<td.*?<td.*?<td.*?<td.*?<td.*?>(.*?)<', i, re.S | re.M).group(1) build.insert_db() house_url = re.search('href="(.*?)"', i, re.S | re.M).group(1) self.get_build_info(house_url, build.bu_id, comm.co_id) except Exception as e: print('楼栋错误,co_index={},url={}'.format(co_index, comm_url), e) except Exception as e: print('小区错误,co_index={},url={}'.format(co_index, comm_url), e)
def get_build_info(self, build_url_list): for i in build_url_list: try: build = Building(co_index) build_code = re.search('xqbm=(.*?)$', i).group(1) build_url = 'http://zjjg.0557fdc.com:9555/xiaoqu/donginfo.aspx?xqbm=' + build_code build.bu_num = 'Labeldongmc">(.*?)<' build.bu_pre_sale = 'Labelyszheng">(.*?)<' build.bu_floor = 'Labelsceng">(.*?)<' build.bu_address = 'Label1zuoluo">(.*?)<' build.bo_build_start_time = 'Label1kaigong">(.*?)<' build.co_build_structural = 'Labeljiegou">(.*?)<' build.co_id = 'donginfo.aspx\?xqbm=(.*?)"' build.bu_id = 'id="DropDownList1".*?value="(.*?)"' p = ProducerListUrl(page_url=build_url, request_type='get', encode='utf-8', analyzer_rules_dict=build.to_dict(), current_url_rule='location\.href=(.*?)"', analyzer_type='regex', headers=self.headers) house_url_list = p.get_details() self.get_house_info(house_url_list) except Exception as e: print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)
def get_build_info(self, build_url_list): for i in build_url_list: try: build = Building(co_index) build_url = 'http://www.ndjsj.gov.cn/House/' + i build.co_name = '项目名称:.*?<td.*?>(.*?)<' build.bu_num = '幢 号:.*?<td.*?>(.*?)<' build.bu_address = '坐落位置:.*?<td.*?>(.*?)<' build.co_build_structural = '建筑结构:.*?<td.*?>(.*?)<' build.bu_floor = '总 层 数:.*?<td.*?>(.*?)<' build.bu_build_size = '总 面 积:.*?<td.*?>(.*?)<' # build.bu_type = '设计用途:.*?<td.*?>(.*?)<' build.bu_all_house = '批准销售:.*?<td.*?>(.*?)<' p = ProducerListUrl( page_url=build_url, request_type='get', encode='utf-8', analyzer_rules_dict=build.to_dict(), current_url_rule='javascript:ShowTitle.*?href="(.*?)"', analyzer_type='regex', headers=self.headers) house_url_list = p.get_details() self.get_house_info(house_url_list) except Exception as e: print('宁德楼栋错误,url={}'.format(build_url), e)
def get_build_info(self, build_url_list): for i in build_url_list: try: build = Building(co_index) build_url = 'http://222.223.160.199:8088/website/buildquery/selectBuild.jsp?buildID=' + i[0] response = requests.get(build_url, headers=self.headers) html = response.text build.bu_id = i[0] build.co_build_structural = re.search('结构类型.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bo_build_end_time = re.search('建成年份.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_build_size = re.search('总建筑面积.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_num = re.search('幢号.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.size = re.search('占地面积.*?<td>(.*?)<', html, re.S | re.M).group(1) build.bu_floor = re.search('房屋层数.*?<td>(.*?)<', html, re.S | re.M).group(1) build.bu_all_house = re.search('房屋套数.*?<td>(.*?)<', html, re.S | re.M).group(1) build.area = re.search('坐落区.*?<td>(.*?)<', html, re.S | re.M).group(1) build.insert_db() self.get_house_info(build.bu_id) except Exception as e: print('请求错误,url={}'.format(build_url),e)
def get_build_info(self, co_id): build_url = 'http://www.yanjifc.com/jdi' payload = "activityId=" + str(co_id) + "&module=jtsActBuildingInfo" result = requests.post(url=build_url, data=payload, headers=self.headers) data = result.json() build_list = data['ROWS']['ROW'] for i in build_list: build = Building(co_index) build.bu_all_size = self.dict_get(i, 'BUILDING_AREA') build.bu_address = self.dict_get(i, 'LOCATION') build.bu_num = self.dict_get(i, 'LOCATION') build.bu_floor = self.dict_get(i, 'TOTAL_FLOORS') build.bu_all_house = self.dict_get(i, 'TOTAL_SET') build.co_build_structural = self.dict_get(i, 'STRUCTURE') build.bu_id = self.dict_get(i, 'RESOURCE_GUID') build.co_id = co_id build.insert_db() self.get_house_info(co_id, build.bu_id)
def get_build_info(self, build_url_list): for i in build_url_list: try: build = Building(co_index) build_url = 'http://www.fjnpfdc.com/House/' + i res = requests.get(build_url, headers=self.headers) con = res.content.decode('gbk') build.co_name = re.search("项目名称:.*?<td.*?>(.*?)<", con, re.S | re.M).group(1) build.bu_num = re.search("幢 号:.*?<td.*?>(.*?)<", con, re.S | re.M).group(1) build.co_use = re.search("设计用途:.*?<td.*?>(.*?)<", con, re.S | re.M).group(1) build.co_build_structural = re.search("建筑结构:.*?<td.*?>(.*?)<", con, re.S | re.M).group(1) build.bu_floor = re.search("总 层 数:.*?<td.*?>(.*?)<", con, re.S | re.M).group(1) build.bu_build_size = re.search("总 面 积:.*?<td.*?>(.*?)<", con, re.S | re.M).group(1) build.co_build_end_time = re.search("竣工日期:.*?<td.*?>(.*?)<", con, re.S | re.M).group(1) house_url_list = re.findall('<a href="(HouseInfo.*?)"', con) # p = ProducerListUrl(page_url=build_url, # request_type='get', encode='gbk', # analyzer_rules_dict=build.to_dict(), # current_url_rule='<a href="(HouseInfo.*?)"', # analyzer_type='regex', # headers=self.headers) build.co_id = re.search('ProjectId=(.*?)&', i).group(1) build.bu_id = re.search('BuildingId=(.*?)&P', i).group(1) build.insert_db() # house_url_list = p.get_details() self.get_house_info(house_url_list, build.bu_id, build.co_id) except Exception as e: print("co_index={},楼栋{}错误".format(co_index, i), e)
def get_build_info(self, build_url_list, co_id): for i in build_url_list: build_url = 'http://www.fjlyfdc.com.cn/' + i try: build = Building(co_index) response = requests.get(build_url, headers=self.headers) html = response.text build.bu_id = re.search('buildingInfoID=(.*?)&', build_url).group(1) build.co_id = co_id build.bo_develops = re.search('开发商:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.co_name = re.search('项目名称:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_address = re.search('坐落位置:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_num = re.search('幢号:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.co_build_structural = re.search('建筑结构:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_type = re.search('设计用途:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bu_floor = re.search('总 层 数:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.co_all_size = re.search('总面积:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.bo_build_start_time = re.search('开工日期:.*?<td.*?>(.*?)<', html, re.S | re.M).group(1) build.insert_db() house_url_list = re.findall( 'href="(/House/HouseInfo\?HouseCenterID=.*?)"', html, re.S | re.M) self.get_house_info(house_url_list, build.bu_id, co_id) except Exception as e: print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)