示例#1
0
	def fetch_deal_in(self, city_id):
		try:
			total_page_num = -1
			total_deal_num = -1
			start_page_num = 1
			print "lashou_crawl.py fetch_deal_in", self.mapping[city_id]
			while True:
				content  = tuan_http.http_fetch(self.url_deal + city_id + "/p/" + str(start_page_num) + "/r/500", self.user_agent)
				filepath = None
				if  start_page_num == 1:
					filepath = os.path.join(self.data_store, city_id)
				else:
					filepath = os.path.join(self.data_store, city_id + "_" + str(start_page_num))
				fhandler = open(filepath, "w")
				fhandler.write(content)
				fhandler.close()

				if  total_deal_num == -1:
					root = ET.fromstring(content)
					total_deal_num = int(root.attrib["count"])
					total_page_num = total_deal_num / 500 + 1
				if  start_page_num == total_page_num:
					break
				else:
					start_page_num = start_page_num + 1
		except Exception, e:
			print "Error: lashou_crawl.py fetch_deal_in", self.mapping[city_id], e
			raise
示例#2
0
	def fetch_deal_in(self, city):
		try:
			print "dida_crawl.py fetch_deal_in", city
			content  = tuan_http.http_fetch(self.url_deal + city, self.user_agent)
			filepath = os.path.join(self.data_store, city)
			fhandler = open(filepath, "w")
			fhandler.write(content)
			fhandler.close()
		except Exception, e:
			print "Error: dida_crawl.py fetch_deal_in", city, e
			raise
示例#3
0
	def fetch_deal_in(self, city):
		try:
			start_page_num = 1
			count_per_page = 10000
			print "dianping_crawl.py fetch_deal_in", city
			content  = tuan_http.http_fetch(self.url_deal + self.mapping[city] + "&page=" + str(start_page_num) + "&count=" + str(count_per_page), self.user_agent)
			filepath = os.path.join(self.data_store, city)
			fhandler = open(filepath, "w")
			fhandler.write(content)
			fhandler.close()
		except Exception, e:
			print "Error: dianping_crawl.py fetch_deal_in", city, e
			raise
示例#4
0
	def fetch_city_list(self):
		try:
			content = tuan_http.http_fetch(self.url_city_list, self.user_agent)
			try:
				city_list = []
				root = ET.fromstring(content)
				for city in root.iter('city'):
					city_list.append(city.find('id').text)
				return city_list
			except Exception, e:
				print "Error: dida_crawl.py fetch_city_list", e
				raise
		except Exception, e:
			print "Error: dida_crawl.py fetch_city_list", e
			raise
示例#5
0
    def fetch_city_list(self):
        try:
            print "wuba_crawl.py fetch_city_list"
            content = tuan_http.http_fetch(self.url_city_list, self.user_agent)
            filepath = os.path.join(self.data_store, "city_list")
            fhandler = open(filepath, "w")
            fhandler.write(content)
            fhandler.close()

            try:
                city_list = []
                root = ET.fromstring(content)
                for city in root.iter("city"):
                    city_list.append(city.find("enname").text)
                return city_list
            except Exception, e:
                raise
        except Exception, e:
            print "Error: wuba_crawl.py fetch_city_list", e
            raise
示例#6
0
	def _fetch(self, url):
		return tuan_http.http_fetch(url, self.user_agent)