Exemplo n.º 1
0
	def parse(self, response):
		self.driver.get(response.url)

		for z in range(31):
			#fetch geo info on current page
			for i in range(1,11):
				#initial xpath, as the shop info start in 2nd tr on each page
				shopName_xpath = '//tr[' + str(i + 1) + ']/td[1]'
				shopAddress_xpath = '//tr[' + str(i + 1) + ']/td[2]'
				shopName = self.driver.find_element_by_css_selector('#listhtml').find_element_by_xpath(shopName_xpath).text
				shopAddress = self.driver.find_element_by_css_selector('#listhtml').find_element_by_xpath(shopAddress_xpath).text
				#shopAddress need to be encode, as initial asiic format
				shop_address_encode = shopAddress.encode('utf-8')
				full_address = '上海市' + shop_address_encode
				try:
					# shopGeo will be fetched from baidu as [lat, lng]
					shopGeo = baiduAddressGeo.getGeoForAddress(full_address)
				except Exception, e:
					print 'can not get geo for this address'
					pass

				item = KfcItem()
				item['shopName'] = shopName
				item['lat'] = shopGeo[0]
				item['lng'] = shopGeo[1]
				yield item

			#turn to next page for shops
			try:
				self.driver.find_element_by_css_selector('a[style*="text-decoration:underline"] + *').click()
			except Exception, e:
				print 'finish! no more pages!'
				return
Exemplo n.º 2
0
geoData = open('./geo_accident.csv', 'a')
try:
	geoWriter = csv.writer(geoData)
	geoWriter.writerow(('district', 'hurt', 'time', 'place','lat', 'lng'))
finally:
	geoData.close()

#处理原始文件每一行,从百度接口获取坐标
for row in addrAll:
	# print row[1].decode('utf-8')
	district = row[0]
	hurt = row[1]
	time = row[2]
	place = row[3]
	fullAddr = '上海市' + row[3]
	addressEncode = fullAddr.decode('utf-8').encode('utf-8')
	addrGeo = baiduAddressGeo.getGeoForAddress(addressEncode)
	lat = addrGeo[0]
	lng = addrGeo[1]

	print district, hurt, time, place, lat, lng

	#将百度返回的坐标写入目标文件
	geoData = open('./geo_accident.csv', 'a')
	try:
		geoWriter = csv.writer(geoData)
		geoWriter.writerow((district, hurt, time, place, lat, lng))
	finally:
		geoData.close()
	
addrData.close()