示例#1
0
    def processHouse(self, houseStr):
        """
		对每个学区房记录进行处理
		"""
        houseInfo = houseStr.find("div", attrs={"class": "inventory_list_r_tit_list"}).find_all("a")
        houseId = houseInfo[0]["href"][len(PrefixURL) :][:-1]
        address = houseStr.find("div", attrs={"class": "inventory_list_r_name_ad"}).text
        detailInfo = houseStr.find("div", attrs={"class": "inventory_list_r_details_r"})
        details = detailInfo.find_all("span")
        price = details[2].text
        area = details[1].text
        # 如果houseId没被处理过
        if not houseId in self.success_houseIds:
            print houseId, address, price, area

            house = House()
            house.houseId = houseId
            house.address = address
            house.price = price
            house.area = area
            house.flage = 0
            self.houses.append(house)
            self.success_houseIds[houseId] = house

        else:
            print "id:%s  exist" % houseId
            print houseId, address, price, area
            house = self.success_houseIds[houseId]
            if houseId == house.houseId and address == house.address and price == house.price and area == house.area:
                house.flage = 1
            else:
                house.flage = 2
                print "The same houseId have different data"
            self.houses.append(house)
示例#2
0
	def getHouseList(self):
		"""
		从文件中获取房屋信息列表
		"""
		data = xlrd.open_workbook('houseInfo_address.xls')
		table = data.sheet_by_name(u'houseinfo')
		for index in range(1,table.nrows):
			houseId = table.cell(index,0).value
			address = table.cell(index,1).value
			area = table.cell(index,2).value
			price = table.cell(index,3).value
			longitude = table.cell(index,4).value
			latitude = table.cell(index,5).value
		
			house = House()
			house.houseId = houseId
			house.address = address
			house.price = price
			house.area = area
			house.longitude = longitude
			house.latitude = latitude
			self.houses.append(house)

			if (index % PageNumber == 0):
				self.processHouses()
				self.saveToFile()
				self.excelIndex += len(self.houses)
				self.houses = []
		if (len(self.houses) != 0):
			self.processHouses()
			self.saveToFile()
			self.houses = []
示例#3
0
	def processHouse(self,houseStr):
		"""
		对每个学区房记录进行处理
		"""
		houseInfo = houseStr.find_all('a')
		houseId = houseInfo[0]['href'][len(self.prefixText):][:-5]
		addressUrl = self.prefixUrl + houseInfo[1]['href']
		address = self.processAddress(addressUrl)
		price = (houseStr.find('div',attrs={"class": "price-pre"})).text
		area = (houseStr.find('div',attrs={"class": "where"}).find_all('span'))[3].text
		#如果houseId没被处理过
		if not houseId in self.success_houseIds:
			print houseId,address,price,area
			house = House()
			house.houseId = houseId
			house.address = address
			house.price = price
			house.area = area
			house.flage = 0
			self.houses.append(house)
			self.success_houseIds[houseId] = house
		else:
			print "id:%s  exist" % houseId
			print houseId,address,price,area
			house = self.success_houseIds[houseId]
			if(houseId == house.houseId and address == house.address and price == house.price and area == house.area):
				house.flage = 1
			else:
				house.flage = 2
				print "The same houseId have different data"
			self.houses.append(house)