Пример #1
0
    def get_house_info(self, house_url_list, bu_id, co_id):
        for i in house_url_list:
            try:
                house = House(co_index)
                house_url = 'http://www.fjnpfdc.com/House/' + i
                house_res = requests.get(house_url, headers=self.headers)
                house_con = house_res.content.decode('gbk')

                house.bu_id = bu_id
                house.co_id = co_id
                house.bu_num = re.search('幢  号:.*?<td>(.*?)<', house_con,
                                         re.S | re.M).group(1)
                house.ho_name = re.search('房  号:.*?<td>(.*?)<', house_con,
                                          re.S | re.M).group(1)
                house.co_name = re.search('项目名称:.*?<td>(.*?)<', house_con,
                                          re.S | re.M).group(1)
                house.ho_build_size = re.search('建筑面积:.*?<td>(.*?)<',
                                                house_con,
                                                re.S | re.M).group(1)
                house.ho_true_size = re.search('套内面积:.*?<td>(.*?)<', house_con,
                                               re.S | re.M).group(1)
                house.ho_share_size = re.search('分摊面积:.*?<td>(.*?)<',
                                                house_con,
                                                re.S | re.M).group(1)
                house.ho_floor = re.search('所 在 层:.*?<td>(.*?)<', house_con,
                                           re.S | re.M).group(1)

                house.insert_db()
            except Exception as e:
                print("co_index={},房屋{}错误".format(co_index, i), e)
Пример #2
0
    def get_house_detail(self, house_url_list):
        for i in house_url_list:
            res = requests.get(i)
            html = res.content.decode('gbk')
            bu_name = re.search('楼号:.*?HouseNum">(.*?)</span>', html, re.S | re.M).group(1)
            co_name = re.search('项目名称.*?PrjName">(.*?)</span>', html, re.S | re.M).group(1)
            ho_id = re.findall("aspx\?Room=(.*?)'.*?<b>(.*?)</b>", html, re.S | re.M)
            # 房号和房号id对应的字段
            ho_id_dict = {}
            for k in ho_id:
                ho_id_dict[k[0]] = k[1]

            house_info = re.findall("<Room><Cell RoomID='(.*?)'.*?BArea='(.*?)'.*?HouseUse='(.*?)'.*?</Room>",
                                    html,
                                    re.S | re.M)
            for j in house_info:
                try:
                    h = House(self.co_index)
                    h.ho_name = ho_id_dict[j[0]]
                    h.ho_true_size = j[1]
                    h.ho_type = j[2]
                    h.co_name = co_name
                    h.bu_num = bu_name
                    h.insert_db()
                except Exception as e:
                    print('房屋错误,co_index={},url={}'.format(co_index, i), e)
                    continue
Пример #3
0
 def get_house_info(self, house_url_list, co_name, bu_num):
     for i in house_url_list:
         try:
             house = House(co_index)
             house.co_name = co_name
             house.bu_num = bu_num
             house_url = 'http://www.sxczfdc.com/pubinfo/' + i
             response = requests.get(house_url, headers=self.headers)
             html = response.text
             house.ho_floor = re.findall('HouseInfo1_lblFwlc">(.*?)<', html,
                                         re.S | re.M)[0]
             house.ho_name = re.findall('HouseInfo1_lblFwfh">(.*?)<', html,
                                        re.S | re.M)[0]
             house.ho_type = re.findall('HouseInfo1_lblFwlx">(.*?)<', html,
                                        re.S | re.M)[0]
             house.ho_room_type = re.findall('HouseInfo1_lblFwhx">(.*?)<',
                                             html, re.S | re.M)[0]
             house.ho_build_size = re.findall(
                 'HouseInfo1_lblycfwjzmj">(.*?)<', html, re.S | re.M)[0]
             house.ho_true_size = re.findall(
                 'HouseInfo1_lblycfwtnmj">(.*?)<', html, re.S | re.M)[0]
             house.ho_share_size = re.findall(
                 'HouseInfo1_lblycfwftmj">(.*?)<', html, re.S | re.M)[0]
             house.orientation = re.findall('HouseInfo1_lblCx">(.*?)<',
                                            html, re.S | re.M)[0]
             house.insert_db()
         except Exception as e:
             print(e)
Пример #4
0
 def get_house_info(self, house_url_list):
     for i in house_url_list:
         try:
             dongid = re.search('dongid=(.*?)&', i).group(1)
             roomid = re.search('roomid=(.*?)&', i).group(1)
             house_url = 'http://zjjg.0557fdc.com:9555/xiaoqu/roominfo.aspx?dongid=' + dongid + '&roomid=' + roomid
             house = House(co_index)
             house.co_name = 'Labelxqmc">(.*?)<'
             house.area = 'Labelxzq">(.*?)<'
             house.bu_num = 'Labeldongmc">(.*?)<'
             house.ho_type = 'Labelyxyongtu">(.*?)<'
             house.ho_name = '<span id="Labelroommc".*?>(.*?)</span>'
             house.ho_build_size = 'Labeljzmianji">(.*?)<'
             house.ho_true_size = 'Labeltaonei">(.*?)<'
             house.ho_share_size = 'Labelgongtan">(.*?)<'
             house.ho_room_type = 'Labelhuxing">(.*?)<'
             house.bu_id = 'dongid=(.*?)&'
             p = ProducerListUrl(page_url=house_url,
                                 request_type='get',
                                 encode='utf-8',
                                 analyzer_rules_dict=house.to_dict(),
                                 analyzer_type='regex',
                                 headers=self.headers)
             p.get_details()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, house_url), e)
Пример #5
0
 def get_house_detail(self, house_detail_url_list, co_id, bu_id):
     for i in house_detail_url_list:
         detail_url = 'http://www.yzfdc.cn/' + i
         try:
             house = House(co_index)
             time.sleep(3)
             response = self.s.get(detail_url, headers=self.headers)
             html = response.text
             house.co_name = re.search('lblxmmc.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.bu_num = re.search('lbldh.*?>(.*?)<', html,
                                      re.S | re.M).group(1)
             house.ho_name = re.search('lblfh.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_build_size = re.search('lbljzmj.*?>(.*?)<', html,
                                             re.S | re.M).group(1)
             house.ho_true_size = re.search('lbltnmj.*?>(.*?)<', html,
                                            re.S | re.M).group(1)
             house.ho_share_size = re.search('lblftmj.*?>(.*?)<', html,
                                             re.S | re.M).group(1)
             house.ho_type = re.search('lblfwxz.*?>(.*?)<', html,
                                       re.S | re.M).group(1)
             house.ho_room_type = re.search('lblhuxin.*?>(.*?)<', html,
                                            re.S | re.M).group(1)
             house.bu_id = bu_id
             house.co_id = co_id
             house.insert_db()
         except Exception as e:
             print('房号错误,co_index={},url={}'.format(co_index, detail_url),
                   e)
Пример #6
0
    def get_build_info(self, build_url_list, bu_pre_sale_list, co_name, co_id):
        for i in range(len(build_url_list)):
            try:
                build = Building(co_index)
                build.co_id = co_id

                build.co_name = co_name
                build.bu_pre_sale = bu_pre_sale_list[i]
                build.bu_id = re.search('lh=(\d+)', build_url_list[i]).group(1)
                build_url = 'http://221.2.144.162:8090/' + build_url_list[i]
                response = requests.get(build_url, headers=self.headers)
                html = response.content.decode('gbk')
                build.bu_num = re.findall('<font color=white.*?><b>(.*?)<',
                                          html, re.S | re.M)[0]
                build.bu_address = re.findall('坐落位置:</b>(.*?)<', html,
                                              re.S | re.M)[0]
                build.insert_db()
                ho_url_list = re.findall('background-.*?href=(.*?) ', html,
                                         re.S | re.M)
                ho_name_list = re.findall('background-color.*?<a.*?>(.*?)<',
                                          html, re.S | re.M)
                for i in range(len(ho_url_list)):
                    try:
                        house = House(co_index)
                        house_url = 'http://221.2.144.162:8090/' + ho_url_list[
                            i]
                        result = requests.get(
                            house_url,
                            headers=self.headers).content.decode('gbk')
                        house.bu_id = build.bu_id
                        house.co_id = co_id
                        house.ho_type = re.findall(
                            '用&nbsp;&nbsp;&nbsp;途:.*?<td.*?>(.*?)<', result,
                            re.S | re.M)[0]
                        house.ho_build_size = re.findall(
                            '建筑面积:.*?<td>(.*?)<', result, re.S | re.M)[0]
                        house.bu_num = build.bu_num
                        house.co_name = co_name
                        house.ho_name = ho_name_list[i]
                        house.insert_db()
                    except Exception as e:
                        print("co_index={},房屋信息错误".format(co_index), e)
            except Exception as e:
                print("co_index={},楼栋信息错误".format(co_index), e)
Пример #7
0
 def get_house_info(self, house_url_list):
     for i in house_url_list:
         try:
             house = House(co_index)
             house_url = 'http://www.ndjsj.gov.cn/House/' + i
             house.bu_num = '幢  号:.*?<td.*?>(.*?)<'
             house.ho_name = '房  号:.*?<td.*?>(.*?)<'
             house.co_name = '项目名称:.*?<td.*?>(.*?)<'
             house.ho_build_size = '建筑面积:.*?<td.*?>(.*?)<'
             house.ho_true_size = '套内面积:.*?<td.*?>(.*?)<'
             house.ho_share_size = '分摊面积:.*?<td.*?>(.*?)<'
             house.ho_type = '房屋用途:.*?<td.*?>(.*?)<'
             house.ho_floor = '所 在 层:.*?<td.*?>(.*?)<'
             house.ho_room_type = '房屋户型:.*?<td.*?>(.*?)<'
             p = ProducerListUrl(page_url=house_url,
                                 request_type='get',
                                 encode='utf-8',
                                 analyzer_rules_dict=house.to_dict(),
                                 analyzer_type='regex',
                                 headers=self.headers)
             p.get_details()
         except Exception as e:
             print('宁德房号错误,url={}'.format(house_url), e)
Пример #8
0
 def get_house_info(self, code, co_name):
     house_url = 'http://house.bffdc.gov.cn/Common/Agents/ExeFunCommon.aspx?'
     payload = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\r\n<param funname=\"SouthDigital.Wsba.CBuildTableEx.GetBuildHTMLEx\">\r\n<item>" + \
               code[
                   0] + "</item>\r\n<item>1</item>\r\n<item>1</item>\r\n<item>55</item>\r\n<item>840</item>\r\n<item>g_oBuildTable</item>\r\n<item>false</item>\r\n<item> 1=1</item>\r\n</param>\r\n"
     headers = {
         'Content-Type': "text/xml",
     }
     response = requests.post(url=house_url, data=payload, headers=headers)
     html = response.text
     info = re.findall("title='(.*?)'", html, re.S | re.M)
     for i in info:
         try:
             house = House(co_index)
             house.bu_num = code[1]
             house.ho_name = re.search('房号:(.*?)\r\n', i).group(1)
             house.ho_type = re.search('用途:(.*?)\r\n', i).group(1)
             house.ho_room_type = re.search('户型:(.*?)\r\n', i).group(1)
             house.ho_build_size = re.search('总面积:(.*?)\r\n', i).group(1)
             house.co_name = co_name
             house.insert_db()
         except Exception as e:
             print(e)
Пример #9
0
    def get_build_url_list(self, url_list):
        for i in url_list:
            try:
                res = requests.get(i)
                html = res.content.decode('gbk')
                for k in re.findall('项目名称.*?</dl>', html, re.S | re.M):
                    try:
                        c = Comm(self.co_index)
                        c.co_name = re.search('html">(.*?)</a>', k,
                                              re.S | re.M).group(1)
                        c.co_address = re.search('class="address"(.*?)</dd>',
                                                 k, re.S | re.M).group(1)
                        c.area = re.search('"city">(.*?)</dd>', k,
                                           re.S | re.M).group(1)
                        c.co_develops = re.search('"average">(.*?)</dd>', k,
                                                  re.S | re.M).group(1)
                        c.insert_db()
                        global count
                        count += 1
                        print(count)

                        url = re.search('a href="(.*?)">', k,
                                        re.S | re.M).group(1)
                        complete_url = self.url_source + url
                        res = requests.get(complete_url)
                        html = res.content.decode('gbk')
                        build_info_str = re.search('楼盘表</td>(.*?)合  计', html,
                                                   re.S | re.M).group(1)
                        for j in re.findall('<tr.*?</tr>', build_info_str,
                                            re.S | re.M):
                            try:
                                b = Building(self.co_index)
                                b.co_name = re.search('html">(.*?)</a>', k,
                                                      re.S | re.M).group(1)
                                b.bu_all_house = re.search(
                                    'absmiddle"  />(.*?)</a>', j,
                                    re.S | re.M).group(1)
                                b.bu_num = re.search(
                                    '="absmiddle"  />(.*?)</a></strong></', j,
                                    re.S | re.M).group(1)
                                b.bu_build_size = re.search(
                                    'td class="t_c">.*?td class="t_c">(.*?㎡)</td>',
                                    j, re.S | re.M).group(1)
                                b.insert_db()

                                url = re.search('a href="(.*?)"', j,
                                                re.S | re.M).group(1)
                                complete_url = self.url_source + url
                                res = requests.get(complete_url)
                                html = res.content.decode('gbk')
                                # 解析html获取iframe表单的数据
                                house_url = self.url_source + re.search(
                                    '<iframe.*?"(.*?)"', html,
                                    re.S | re.M).group(1)
                                logic_house_url = house_url.replace(
                                    'Default', 'GetData')
                                logic_house_html = requests.get(
                                    url=logic_house_url).content.decode()
                                logic_id = re.search(
                                    '<LOGICBUILDING_ID>(.*?)<',
                                    logic_house_html, re.S | re.M).group(1)
                                final_url = 'http://www.yingtanfdc.com/website/presale/home/HouseTableControl/GetData.aspx?LogicBuilding_ID=' + logic_id
                                final_html = requests.get(
                                    url=final_url).content.decode('gbk')
                                for l in re.findall(
                                        '<ROOM_NUMBER>(.*?)</ROOM_NUMBER>',
                                        final_html, re.S | re.M):
                                    try:
                                        h = House(self.co_index)
                                        h.info = final_html
                                        h.ho_name = l
                                        h.co_name = re.search(
                                            'html">(.*?)</a>', k,
                                            re.S | re.M).group(1)
                                        h.bu_num = re.search(
                                            '="absmiddle"  />(.*?)</a></strong></',
                                            j, re.S | re.M).group(1)
                                        h.insert_db()
                                    except Exception as e:
                                        continue
                            except Exception as e:
                                continue
                    except Exception as e:
                        continue
            except Exception as e:
                continue