Exemplo n.º 1
0
	def __init__(self, url, bVerbose, namespace, containerName):
		self.ns      = namespace
		self.containerName = containerName
		
		#init all sub-class
		self.r       = Reducer(bVerbose, namespace, containerName)
		self.logInfo(self.dumpListActionName())
		self.grid    = ListenerGridAccess(namespace)
		self.tool    = ListenerTools()
		self.context = zmq.Context()
		
		#init ZMQ
		self.sock	 = self.context.socket(zmq.PULL)
		self.r.setTimeout(None, LST_MSGIN_TIMEOUT / 1000)
		surl = "tcp://" + url
		self.sock.bind(surl)
Exemplo n.º 2
0
	def grid_content_CONCATandRemove(self, namespace, container, listcontentSrc, contentDest):
		grid = ListenerGridAccess(namespace)	
		tool = ListenerTools()	
		listToRemove = []
		buffer = ""
		for content in listcontentSrc:
			path = namespace + "/" + container + "/" + content
			b = grid.get_content(path);
			if b != "":
				self.printmsg("Read content " +  content)
				buffer += b + "\n"
				listToRemove.append(path)
		
		self.printmsg("Create content " + contentDest)
		stream    = io.BytesIO(buffer.encode("utf-8"))
		size      = tool.getSizeStream(stream)
		grid.put_content(namespace + "/" + container + "/" + contentDest, stream, size)
	
		for path in listToRemove:	
			self.printmsg("Remove temp content " + path)
			grid.remove_content(path)
Exemplo n.º 3
0
class Listener:

	#JSON format/constant
	# MANDATORY key NAME, format JSON
	JSON_KEYNAME_HEAD          = "HEAD"
	JSON_KEYNAME_HEAD_NAME     = "SRC_NAME"
	JSON_KEYNAME_HEAD_PID      = "SRC_ID"
	JSON_KEYNAME_HEAD_STATUS   = "STATUS"
	JSON_KEYNAME_HEAD_MSGID    = "MSG_ID"
	JSON_KEYNAME_HEAD_CRAWLERID= "CRAWL_ID"
	JSON_KEYNAME_DATAH         = "DATAH"
	JSON_KEYNAME_DATAR         = "DATAR"

	# specific data about previous header field
	JSON_KEYNAME_HEAD_STATUS_setcrawlerid = "setcrawlerid"
	JSON_KEYNAME_HEAD_STATUS_stopact      = "stopact"
	JSON_KEYNAME_HEAD_STATUS_data         = ""	


	#init queue
	def __init__(self, url, bVerbose, namespace, containerName):
		self.ns      = namespace
		self.containerName = containerName
		
		#init all sub-class
		self.r       = Reducer(bVerbose, namespace, containerName)
		self.logInfo(self.dumpListActionName())
		self.grid    = ListenerGridAccess(namespace)
		self.tool    = ListenerTools()
		self.context = zmq.Context()
		
		#init ZMQ
		self.sock	 = self.context.socket(zmq.PULL)
		self.r.setTimeout(None, LST_MSGIN_TIMEOUT / 1000)
		surl = "tcp://" + url
		self.sock.bind(surl)
		


	#descruct and close queue
	def __del__(self):
		self.context.term()


	def Initialize(self):
		self.r.clearAll()
		error = self.grid.create_container(self.containerName)
		if error != "": 
			self.syslogErr(error)
			return False
		else:
			self.syslogInfo("Container [" + self.containerName  + "] for storage Listener&reduce result is created or already exists")
		return True
				

	def dumpListActionName(self):
		list = self.r.getListActionName()
		d = "Reducer actions used: "
		for action in list:
			d += "\n\t" + action
		return d

	def logdebug(self, msg):
		logging.debug("%s: %s", LST_NAMESVC, msg)

	# logging info...
	def logInfo(self, msg):
		logging.info("%s: %s", LST_NAMESVC, msg)

	def syslogInfo(self, msg):
		self.logInfo(msg)
		syslog.syslog(syslog.LOG_INFO, LST_NAMESVC + ": " + msg)

	# logging error
	def syslogErr(self, msg):
		#print "%s: %s", LST_NAMESVC, msg
		logging.error("%s: %s", LST_NAMESVC, msg)
		syslog.syslog(syslog.LOG_ERR, LST_NAMESVC + ": " + msg)
	

	#build content with result of reducer
	def buildContent(self, sdatetime, result):
		content = "Reduce end date/time: " + sdatetime + " on namespace " + self.ns + "\n"
		content += "\n"
		content += result
		return content
		

	# end process, end reduce
	def endprocess(self, action_name):
		if action_name == "": return False
		try:			
			#finalyze reduce process
			result      = self.r.finalize(action_name)
			
			#get result string			
			result      = self.r.dumps(action_name)
			print "-------------------------------------------------------------"			
			print result
		except Exception as e:
			self.syslogErr("Reduce error :" + str(e))
			return False;
		except:
			self.syslogErr("Unknown Reduce Error :")
			return False;		
	
		if result == "": return False

		# date/heure courante	
		datetimecrt = datetime.datetime.now()
	
		#convet ti to write on content
		sdatetime = datetimecrt.strftime("%02d/%02m/%04Y %02H:%02M:%02S")
		content   = self.buildContent(sdatetime, result)
		stream    = io.BytesIO(content.encode("utf-8"))
		size      = self.tool.getSizeStream(stream)
		
		#build name for new content
		sdatetime   = datetimecrt.strftime("%04Y%02m%02d%02H%02M%02S")
		contentName = sdatetime + "-" + action_name + "-Reduce"
		path        = self.ns + "/" + LISTENER_RESULT_CONTAINER_NAME + "/" + str(contentName)
		
		#write new content on specfoc container
		error = self.grid.put_content(path, stream, size)
		if error != "":			
			fileName=LISTENER_RESULT_DIRTMP_NAME + "/" + contentName
			errorF = self.tool.write_file(fileName, path, result)
			if errorF != "":
				msg = "Reduce result: " + error + " - " + errorF
				self.syslogErr(msg)
			else:
				msg = "Reduce result: " + error + " - Result write on temporarily file [" + fileName + "]"
				self.syslogErr(msg)		
		else:
			msg = "Reduce Result write on content [" + path  + "] with success"
			self.syslogInfo(msg)							
		
		#clear all data and statistique
		self.r.clear(action_name)
		return True;


	#analysed recv message aznd execute it
	#return True if all action are terminated
	def ManageProcess(self, result):
		#extract header of message
		m_head  = result[self.JSON_KEYNAME_HEAD]
		m_head_name      = m_head[self.JSON_KEYNAME_HEAD_NAME]
		m_head_pid       = m_head[self.JSON_KEYNAME_HEAD_PID]
		m_head_status    = m_head[self.JSON_KEYNAME_HEAD_STATUS]
		m_head_msgid     = m_head[self.JSON_KEYNAME_HEAD_MSGID]
		m_head_crawlerid = m_head[self.JSON_KEYNAME_HEAD_CRAWLERID]
		
		#self.logInfo("msg received")

		bResult = False

		#test name of source of message
		if m_head_status != self.JSON_KEYNAME_HEAD_STATUS_data:
			#command to execute
			#save new id to manage autorized message
			if m_head_status == self.JSON_KEYNAME_HEAD_STATUS_setcrawlerid:
				self.logInfo("Message received: command " 
								+ self.JSON_KEYNAME_HEAD_STATUS_setcrawlerid 
								+ "id=" + m_head_crawlerid)
				self.r.ids_add(m_head_name, m_head_crawlerid, None)
			
			#stop process about specific pid generate message data to reduce
			elif m_head_status == self.JSON_KEYNAME_HEAD_STATUS_stopact:
				self.logInfo("end src process pid = " + str(m_head_pid)  + " was received")
				self.r.ids_rm(m_head_name, m_head_crawlerid, m_head_pid)
				
				#verif if allaction are terminated, if True: return True
				if self.r.idcrawl_isEmpty(m_head_name) == True:
					if self.r.idsrc_isEmpty(m_head_name) == True:
						self.logInfo("All src process was received: end reduce")					
						self.endprocess(m_head_name)
						bResult = True;
		
		else:			
			#verif if crawlerid are autorised to reduce data or not: here
			if self.r.idcrawl_isExist(m_head_name, m_head_crawlerid) == False:
				return False

			#execute reduce action
			m_datah = result[self.JSON_KEYNAME_DATAH]
			m_datar = result[self.JSON_KEYNAME_DATAR]
			self.logdebug('listener: recv : [' + str(result) + ']')
			sys.stdout.flush()
			try:
				if self.r.run(m_head_name, m_datah, m_datar) == True:
					# add source of message for manage terminated action
					self.r.ids_add(m_head_name, None, m_head_pid)
				else:
					self.syslogErr("Reduce error : bad [HEAD]/[NAME]=\""+ m_head_name +"\"")				
			except Exception as e:
				self.syslogErr("Reduce error :" + str(e))
			except:
				self.syslogErr("Unknown Reduce Error :")
		
		# timeout management for the others reducers
		self.ManageTimeout(m_head_name)
	
		return bResult


	# browse all educer for test timeout, excepted action_name_excepted
	# action_name_excepted = None: all reducer
	def	ManageTimeout(self, action_name_excepted):
		list_action_name = self.r.getListActionName()
		for action_name in list_action_name:
			if action_name_excepted != action_name:
				if self.r.idcrawl_isEmpty(action_name) == False:
					if self.r.isTimeout(action_name) == True:
						self.logInfo("Listener: timeout action [" + action_name + "]")		
						self.endprocess(action_name)


	# listener function
	def go(self, timeout):
		poll = zmq.Poller()
		poll.register(self.sock, zmq.POLLIN)
		
		self.logInfo("Waiting for incomming message to reduce...")
		
		while True:
			s = dict(poll.poll(timeout))
			if s.get(self.sock) == zmq.POLLIN:
				# get JSON format message
				try:
					result= self.sock.recv_json()
				except Exception as e:
					self.syslogErr("error :" + str(e))
					continue
				except:
					self.syslogErr("Unknown Error :")
					continue
			
				self.ManageProcess(result)
			
			else:
				self.ManageTimeout(None)
		
		self.logdebug("End listening!")