def get_build_info(self, build_url_list): for i in build_url_list: try: build = Building(co_index) build_url = 'http://www.ndjsj.gov.cn/House/' + i build.co_name = '项目名称:.*?<td.*?>(.*?)<' build.bu_num = '幢 号:.*?<td.*?>(.*?)<' build.bu_address = '坐落位置:.*?<td.*?>(.*?)<' build.co_build_structural = '建筑结构:.*?<td.*?>(.*?)<' build.bu_floor = '总 层 数:.*?<td.*?>(.*?)<' build.bu_build_size = '总 面 积:.*?<td.*?>(.*?)<' # build.bu_type = '设计用途:.*?<td.*?>(.*?)<' build.bu_all_house = '批准销售:.*?<td.*?>(.*?)<' p = ProducerListUrl( page_url=build_url, request_type='get', encode='utf-8', analyzer_rules_dict=build.to_dict(), current_url_rule='javascript:ShowTitle.*?href="(.*?)"', analyzer_type='regex', headers=self.headers) house_url_list = p.get_details() self.get_house_info(house_url_list) except Exception as e: print('宁德楼栋错误,url={}'.format(build_url), e)
def get_build_info(self, build_url_list): for i in build_url_list: try: build = Building(co_index) build_code = re.search('xqbm=(.*?)$', i).group(1) build_url = 'http://zjjg.0557fdc.com:9555/xiaoqu/donginfo.aspx?xqbm=' + build_code build.bu_num = 'Labeldongmc">(.*?)<' build.bu_pre_sale = 'Labelyszheng">(.*?)<' build.bu_floor = 'Labelsceng">(.*?)<' build.bu_address = 'Label1zuoluo">(.*?)<' build.bo_build_start_time = 'Label1kaigong">(.*?)<' build.co_build_structural = 'Labeljiegou">(.*?)<' build.co_id = 'donginfo.aspx\?xqbm=(.*?)"' build.bu_id = 'id="DropDownList1".*?value="(.*?)"' p = ProducerListUrl(page_url=build_url, request_type='get', encode='utf-8', analyzer_rules_dict=build.to_dict(), current_url_rule='location\.href=(.*?)"', analyzer_type='regex', headers=self.headers) house_url_list = p.get_details() self.get_house_info(house_url_list) except Exception as e: print('楼栋错误,co_index={},url={}'.format(co_index, build_url), e)
def get_build_info(self, all_build_url_list): b = Building(co_index) b.co_id = "onclick=GetData\('(.*?)'," b.bu_id = "onclick=GetData\('.*?','(.*?)'" b.bu_num = "font12yellow-leftA'>.*?</span>套</td><td>.*?</td><td>(.*?)<" b.bu_all_house = "font12yellow-leftA'>(.*?)<" data_list = b.to_dict() p = ProducerListUrl( page_url=all_build_url_list, request_type='get', encode='utf-8', analyzer_rules_dict=data_list, current_url_rule="onclick=GetData\('(.*?)','(.*?)'\)", analyzer_type='regex', headers=self.headers) house_url_list = p.get_details() return house_url_list
def get_build_info(self, more_build_url): for i in more_build_url: try: build = Building(co_index) build_url = 'http://www.jmfc.com.cn/' + i build.bu_num = '<tr bgcolor="#FFFFFF">.*?<td.*?>(.*?)<' build.co_id = '楼盘首页.*?aid-(.*?)/' build.bu_id = '&addno=12&action=loupantable&lzbm=(.*?)&ql_xh=' build.bu_pre_sale = '<tr bgcolor="#FFFFFF">.*?<td.*?>.*?<.*?<td.*?>(.*?)<' build.bu_floor = '<tr bgcolor="#FFFFFF">.*?<td.*?>.*?<.*?<td.*?>.*?<.*?<td.*?>(.*?)<' build.bu_all_house = '<tr bgcolor="#FFFFFF">.*?<td.*?>.*?<.*?<td.*?>.*?<.*?<td.*?>.*?<.*?<td.*?>(.*?)<' p = ProducerListUrl( page_url=build_url, request_type='get', encode='gbk', analyzer_rules_dict=build.to_dict(), current_url_rule= '<tr bgcolor="#FFFFFF">.*?align="left".*?href="(.*?)"', analyzer_type='regex', headers=self.headers) house_url_list = p.get_details() self.get_house_info(house_url_list) except Exception as e: print(e)