def segement_first(self,response):
		Some_Info = response.meta.get('Some_Info',None)
		Index_Url = response.meta.get('Index_Url',None)
		segement = response.meta.get('segement',None)
		First = response.meta.get('First',None)
		Second = response.meta.get('Second',None)
		Third = response.meta.get('Third',None)
		Final_Xpath = response.meta.get('Final_Xpath',None)
		detail_url = []
		level = 1
		is_sege = 1
		if First.has_key('json'):
				res_json = json.loads(response.body_as_unicode())
				#递归读取最底层的key对应的value值,我去,想出来了~~[这里是要for一遍最底层的list,所以要读到len-1处,然后在得到detail_url]
				depth = 0
				length = len(segement['index'])
				while depth < length - 1:
						res_json = res_json.get(segement['index'][depth])
						depth += 1
				#print "now the res_json is %s"%res_json
				for i in res_json:
						detail_url.append(i.get(segement['index'][length-1]))
				try:
						detail_url = R_2_A(Index_Url,detail_url,self.name,level,is_sege)
				except Exception,e:
						print Exception,":",e
Exemplo n.º 2
0
	def segement_second(self,response):
		#这边就是管你有没有,我都接收,在使用的时候判断,如果不存在,说明要直接到final_parse处
		Some_Info = response.meta.get('Some_Info',None)
		Index_Url = response.meta.get('Index_Url',None)
		segement = response.meta.get('segement',None)
		detail_url = []
		level = 0
		is_sege = 1
		
		if segement.has_key('json'):
			res_json = json.loads(response.body_as_unicode())
			#递归读取最底层的key对应的value值,我去,想出来了~~[这里是要for一遍最底层的list,所以要读到len-1处,然后在得到detail_url]
			depth = 0
			length = len(segement['index'])
			while depth < length - 1:
				res_json = res_json.get(segement['index'][depth])
				depth += 1
			#print "now the res_json is %s"%res_json
			for i in res_json:
				detail_url.append(i.get(segement['index'][length-1]))
			try:
				detail_url = R_2_A(Index_Url,detail_url,self.name,level,is_sege)
			except Exception,e:
				print Exception,":",e
Exemplo n.º 3
0
							i = T_T_P(i,self.name,level)
							url = urls.format(page=str(i))
							if C_U_V(url):
								request = Request(url,callback = self.parse_first,dont_filter=True)
								request.meta['Index_Url'] = url
								yield request
							else:
								continue
						


		else:
			detail_url = []
			if not Zero.has_key('json'):
				for xpath in Zero['xpath']:
					for url in R_2_A(Index_Url,response.xpath(xpath).extract(),self.name,level,is_sege):
						detail_url.append(url)
			else:
				res_json = json.loads(response.body_as_unicode())
				#递归读取最底层的key对应的value值,我去,想出来了~~[这里是要for一遍最底层的list,所以要读到len-1处,然后在得到detail_url]
				depth = 0
				length = len(Zero['index'])
				while depth < length - 1:
					res_json = res_json.get(Zero['index'][depth])
					depth += 1
					#print "now the res_json is %s"%res_json
				for i in res_json:
					detail_url.append(i.get(Zero['index'][length-1]))
				try:
					detail_url = R_2_A(Index_Url,detail_url,self.name,level,is_sege)
				except Exception,e: