Exemplo n.º 1
0
 def build_info(self,co_id,temp_url_list):
         for temp_url in temp_url_list:
             try:
                 build_url = "http://222.77.178.63:7002/" + temp_url
                 res = requests.get(build_url,headers=self.headers)
                 html = etree.HTML(res.content.decode('gbk'))
                 build_info_list = html.xpath("//tr[@class='indextabletxt']")
                 for build_info in build_info_list:
                     bu = Building(co_index)
                     ho_url = build_info.xpath("./td/a/@href")[0]
                     bu.co_id = co_id
                     bu.bu_id = re.search('Param=(.*)',ho_url).group(1)
                     bu.bu_num = build_info.xpath("./td/a/text()")[0]
                     bu.bu_all_house = build_info.xpath("./td[2]/text()")[0]
                     try:
                         bu.bu_all_size = build_info.xpath("./td[3]/text()")[0]
                     except:
                         bu.bu_all_size  = None
                     try:
                         bu.bu_live_size = build_info.xpath("./td[5]/text()")[0]
                     except:
                         bu.bu_live_size = None
                     bu.insert_db()
             except Exception as e:
                 # log.error('楼栋信息错误{}'.format(e))
                 print('楼栋信息错误{}'.format(e))
                 continue
             self.house_info(ho_url,co_id,bu.bu_id)
Exemplo n.º 2
0
 def get_build_info(self, build_info_list, co_id):
     for i in build_info_list:
         try:
             build = Building(co_index)
             build.bu_num = re.search('<td>(.*?)</td>', i, re.S | re.M).group(1)
             build.bu_all_house = re.search('<td>.*?<td>(.*?)</td>', i, re.S | re.M).group(1)
             build.bu_all_size = re.search('<td>.*?<td>.*?<td>(.*?)</td>', i, re.S | re.M).group(1)
             build.bu_id = re.search('\?id=(.*?)"', i, re.S | re.M).group(1)
             build.co_id = co_id
             build.insert_db()
             house_url = re.search('href="(.*?)"', i, re.S | re.M).group(1)
             self.get_house_info(house_url, co_id, build.bu_id)
         except Exception as e:
             print('楼栋错误,co_index={},str={}'.format(co_index, i), e)
Exemplo n.º 3
0
 def bu_parse(self, detail_url, co_id):
     pre_url = detail_url.replace('lp', 'presell')
     pre_res = requests.get(pre_url, headers=self.headers)
     pre_html = etree.HTML(pre_res.text)
     bu_pre_list = pre_html.xpath("//dt/strong/a")
     for bu_pre in bu_pre_list:
         bu_pre_url = bu_pre.xpath("./@href")[0]
         bu_pre_sale = bu_pre.xpath("./text()")[0]
         bu_url = 'http://www.zstmsf.com' + bu_pre_url
         while True:
             try:
                 proxy = self.proxies[random.randint(0, 9)]
                 bu_res = requests.get(bu_url,
                                       headers=self.headers,
                                       proxies=proxy,
                                       timeout=10)
                 break
             except:
                 continue
         bu_html = etree.HTML(bu_res.text)
         bu_list = bu_html.xpath("//tr//strong/a/@href")
         for bo_url in bu_list:
             ho_url = "http://www.zstmsf.com" + bo_url
             while True:
                 try:
                     proxy = self.proxies[random.randint(0, 9)]
                     ho_res = requests.get(ho_url,
                                           headers=self.headers,
                                           proxies=proxy,
                                           timeout=10)
                     break
                 except:
                     continue
             build = Building(co_index)
             build.co_id = co_id
             build.bu_id = re.search('zid=.*?(\d+)', ho_url).group(1)
             build.bu_num = re.search('幢名称:<strong>(.*?)<',
                                      ho_res.text).group(1)
             build.bu_all_house = re.search("幢总套数.*?'>(.*?)</",
                                            ho_res.text).group(1)
             build.bu_all_size = re.findall("面积.*?'>(.*?)</",
                                            ho_res.text)[0]
             build.bu_pre_sale = bu_pre_sale
             build.insert_db()
             self.ho_parse(co_id, build.bu_id, ho_res)
Exemplo n.º 4
0
 def get_build_info(self, co_id):
     build_url = 'http://www.yanjifc.com/jdi'
     payload = "activityId=" + str(co_id) + "&module=jtsActBuildingInfo"
     result = requests.post(url=build_url,
                            data=payload,
                            headers=self.headers)
     data = result.json()
     build_list = data['ROWS']['ROW']
     for i in build_list:
         build = Building(co_index)
         build.bu_all_size = self.dict_get(i, 'BUILDING_AREA')
         build.bu_address = self.dict_get(i, 'LOCATION')
         build.bu_num = self.dict_get(i, 'LOCATION')
         build.bu_floor = self.dict_get(i, 'TOTAL_FLOORS')
         build.bu_all_house = self.dict_get(i, 'TOTAL_SET')
         build.co_build_structural = self.dict_get(i, 'STRUCTURE')
         build.bu_id = self.dict_get(i, 'RESOURCE_GUID')
         build.co_id = co_id
         build.insert_db()
         self.get_house_info(co_id, build.bu_id)
Exemplo n.º 5
0
    def bu_parse(self, co_id, bulist):
        for bo in bulist:
            bu_url = "http://110.89.45.7:8082" + bo
            bu_res = requests.get(bu_url, headers=self.headers)
            con = bu_res.text
            bu = Building(co_index)
            bu.co_id = co_id
            bu.bu_id = re.search('buildingInfoID=(.*?)&', bo).group(1)
            bu.bu_num = re.search('幢号.*?">(.*?)</', con, re.S | re.M).group(1)
            bu.bu_floor = re.search('总 层 数.*?">(.*?)</', con,
                                    re.S | re.M).group(1)
            bu.bu_live_size = re.search('批准销售.*?">.*?</td.*?">(.*?)</td', con,
                                        re.S | re.M).group(1)
            bu.bu_all_size = re.search('总面积.*?">(.*?)</', con,
                                       re.S | re.M).group(1)
            bu.bu_type = re.search('设计用途.*?">(.*?)</', con,
                                   re.S | re.M).group(1)
            bu.insert_db()

            bu_html = etree.HTML(con)
            ho_list = bu_html.xpath("//td[@style]/a")
            self.ho_parse(co_id, bu.bu_id, ho_list)