Exemplo n.º 1
0
	def check_scheduled_but_unprocessed_items(self):
		query = {"status":{"$in":["proc_schedule","quit","stop"] }}
		count = self.__logs_collection__.find(query).count()
		self.inform("Found %d scheduled but Unprocessed items" % count)
		if count <= 0 :
			return

		logs_cursor = self.__logs_collection__.find(query)

		for log in logs_cursor:
			file = log.get("file")
			self.__logs_collection__.update({"_id":log.get("_id")},{"$set":{"status":"proc_schedule","proc_timeout":five_mins_later()}},safe=True)
			self.schedule_job(log.get("_id"),file)
Exemplo n.º 2
0
	def check_new_files_to_be_processed(self):
		self.inform( "{0}Checking New Files to Process{0}".format("="*10))

		logs_cursor = self.__logs_collection__.find({"status":"ready"})
		count = 0

		for file in logs_cursor:
			count += 1
			self.__logs_collection__.update({"_id":file.get("_id")},{"$set":{"status":"proc_schedule","proc_timeout":five_mins_later()}},safe=True)
			self.schedule_job(file.get("_id"),file.get("file"))

		self.inform( "Found %s files to process " % ( str(count) if count > 0 else "NO" ) )
Exemplo n.º 3
0
	def check_scheduled_but_left_behind_jobs(self):
		self.inform( "{0}Checking Left Behind Tasks to Process{0}".format("="*10))

#		qsize = self.__worker_pool__._taskqueue.qsize()
#		self.inform("Process Queue Length is %s" % (qsize))

		query = { "status":{"$in": ["proc_schedule","working"] } }

		count = self.__logs_collection__.find(query).count()

		if count <= 0:
			self.inform( "Nothing seems to be active or left behind" )
		else:
			self.inform("Possibly %d working. Let's check their status" % count)

		logs_cursor = self.__logs_collection__.find(query)
		def_timeout = dt.utcnow() - timedelta(hours=1)

		for log in logs_cursor:
			shouldReschedule = False
			file = log.get("file")
			status = log.get("status")
			hbeat = log.get("hbeat")
			timeout = log.get("proc_timeout",def_timeout)

			if status in ["working"] :
				if hbeat is None:
					shouldReschedule = True
				elif hbeat < dt.utcnow() - timedelta(minutes=1):
					shouldReschedule = True

			elif status in ["proc_schedule"] and timeout <= dt.utcnow():
				shouldReschedule = True

			if shouldReschedule:
				self.inform("Rescheduling %s - was %s" % (file,status))
				self.__logs_collection__.update({"_id":log.get("_id")},{"$set":{"status":"proc_schedule","proc_timeout":five_mins_later()}},safe=True)
				self.schedule_job(log.get("_id"),file)