示例#1
0
    def executeInfrastructureTasks(self):

        print("")
        print("Executing Infrastructure tasks")
        print("	...Executing site tasks")

        # ESTO ES UNA CHAPUZA
        # para saber si hay que hacer un profiling completo o no, miro la infraestructura
        # si no tiene hosts es que toca.
        # entonces, lo miro antes de crear las tarea,s ya que al crear las tareas
        # se rcean loss hosts...

        waitingTime = self.myInfrastructure.getSiteWaitingtime()
        siteTasks = self.myInfrastructure.createInfrastructureTasks(self.infrastructureTasks)

        for task in siteTasks:
            ExecutionManager.submit(task)
            # ===================================================================
            # print ("desactivado el envio de tareas de profiling: ID: PERROLOCO")
            # ===================================================================
            self.infrastructureTasks.append(task)

        if waitingTime > 0:
            print("Waiting for profiling tasks to execute")
            print("this will take " + str(waitingTime) + " seconds to execute")

            ExecutionManager.waitForTermination(siteTasks, waitingTime=waitingTime)
            print("profiling tasks executed")

            for task in siteTasks:
                self.myInfrastructure.updateInfoAfterProfiling(task)
                task.status = "CLEAR"
                base.Session.add(task)
示例#2
0
    def profileApplication(self):

        goodHosts = self.myInfrastructure.getGoodHosts()
        appProfilingTasks = [
            ExecutionManager.createProfilingTask(host, self.myApplication) for host in goodHosts for i in range(2)
        ]
        # appProfilingTasks = [self.myApplication.profile.createProfilingTask(host) for host in goodHosts]

        # DEBIG
        # =======================================================================
        # profilingTask = appProfilingTasks[0]
        # profilingTask.gwID = "2217"
        # self.myApplication.updateInfoAfterProfiling(profilingTask)
        # =======================================================================

        # submit the tasks to the Grid
        for profilingTask in appProfilingTasks:
            ExecutionManager.submit(profilingTask)
            base.Session.add(profilingTask)

        try:
            base.Session.commit()
        except:
            base.Session.rollback()
            print("Lost connection with database, not storing anything!")

        profilingTime = 10 * self.myApplication.maxProfilingTime
        print("Waiting for the app profiles to finish, waiting for " + str(profilingTime) + " seconds")
        ExecutionManager.waitForTermination(appProfilingTasks, waitingTime=profilingTime)

        for profilingTask in appProfilingTasks:
            self.myApplication.updateInfoAfterProfiling(profilingTask)

        try:
            base.Session.commit()
        except:
            print("Lost connection with database, trying to recover it")
            try:
                base.Session = scoped_session(sessionmaker(bind=base.engine))
                for profilingTask in appProfilingTasks:
                    base.Session.add(profilingTask)
                    base.Session.commit()
                print("Worked, yeah")
            except:
                print("didn't work, not storing anything! Will probably crash soon LOL")

            base.Session.rollback()

            print("Lost connection with database, not storing anything!")
示例#3
0
def analyzeProfile(fileLocation= None):

	
	
	metadata = MetaData()
	
	myDBDesign = DBDesign()
	
	hostDB = myDBDesign.HostDBDesign(metadata)
	applicationDB = myDBDesign.ApplicationDesign(metadata)
	appProfileDB = myDBDesign.AppProfileDBDesign(metadata)
	gridTaskDesignDB = myDBDesign.GridTaskDBDesign(metadata)
	
	metadata.create_all(base.engine)
	
	print("Starting connection with GridWay metascheduler")
	ExecutionManager.initGridSession()
	print("Connection stablished")
	
	
	
	#load application
	myApp = base.Session.query(Application).order_by(Application.id.desc()).first()
	
	base.tmpExecutionFolder = base.tmpExecutionFolder + "/" + str(myApp.id) + "/"
	
	myTasks = base.Session.query(GridTask).filter(GridTask.applicationID==myApp.id)
	
	for task in myTasks:
		ExecutionManager.updateGridTaskStatus(task)
				
	finishedTasks = [gridTask for gridTask in myTasks if gridTask.status=="DONE"]
	for gridTask in finishedTasks:
		if gridTask.type == "applicationProfiling":
			myApp.updateInfoAfterProfiling(gridTask)

	print ("")
	print("Closing conection with GridWay metascheduler")
	ExecutionManager.exitGridSession()
	print("Connection closed")
示例#4
0
    def execute(self):

        while True:
            # update status of tasks being executed
            print("---")
            print("date:" + str(datetime.now()))

            print("")
            print("CHECKING FOR GRID CERTIFICATE")
            if not InformationManager.checkForValidCertificates():
                print("Could not find a valid certificate")
                print("Finishing execution now :(")
                break

                # ===================================================================
                # print ("PRINCICIPIO DE EXECUTE")
                # for task in self.infrastructureTasks:
                # 	print ("Task " + task.gwID + "has hosttype " +  str(task.host.__class__))
                #
                #
                # ===================================================================

            print("")
            print("UPDATING INFRASTRUCTURE STATUS")
            for gridTask in self.infrastructureTasks:
                ExecutionManager.updateGridTaskStatus(gridTask)

            finishedTasks = [gridTask for gridTask in self.infrastructureTasks if gridTask.status == "DONE"]
            for gridTask in finishedTasks:
                if gridTask.type == "hostProfiling":
                    self.myInfrastructure.updateInfoAfterProfiling(gridTask)
                    # and update gridTask status
                ExecutionManager.removeTaskFromGW(gridTask)
                self.infrastructureTasks.remove(gridTask)

            self.myInfrastructure.updateStatus(self.infrastructureTasks)

            print("")
            print("CREATING INFRASTRUCTURE TASKS")
            self.executeInfrastructureTasks()

            # ===================================================================
            # print ("FILNAL DE EXECUTE")
            # for task in self.infrastructureTasks:
            # 	print ("Task " + task.gwID + "has hosttype " +  str(task.host.__class__))
            # ===================================================================

            print("UPDATE APPLICATION STATUS")
            for gridTask in self.applicationTasks:
                ExecutionManager.updateGridTaskStatus(gridTask)

                # estoe s una chapuza
            totalTasks = self.infrastructureTasks + self.applicationTasks

            self.myInfrastructure.updateStatus(totalTasks)

            # process recently finishedTasks
            finishedTasks = [gridTask for gridTask in self.applicationTasks if gridTask.status == "DONE"]
            for gridTask in finishedTasks:
                if gridTask.type == "applicationProfiling":
                    self.myApplication.updateInfoAfterProfiling(gridTask)
                elif gridTask.type == "applicationExecution":
                    self.myApplication.updateInfoAfterExecution(gridTask)

                    # and update gridTask status
                ExecutionManager.removeTaskFromGW(gridTask)
                self.applicationTasks.remove(gridTask)

                # check for execution finish
            if self.myApplication.remainingSamples <= 0:
                print("Starting the exit of  execution loop")

                # TODO: poner en los hosts que el máximo de host total es el maximo del total y del maximoThisTime
                self.myApplication.finished = 1
                base.Session.add(self.myApplication)

                print("Application marked as finished")

                print("")
                print("Storing information about available hosts on remote sites")
                for host in self.myInfrastructure.hosts:
                    host.maxSlotCount = max(host.maxSlotCount, host.maxSlotCountThisTime)
                    base.Session.add(host)

                print("")
                print("Removing finished tasks from gridWay")

                # esto es discutible,
                # ahora, si una tarea de profiling ha llegado hasta el final de la ejecución sin ser completada, la marco como fallida
                # esto perjudica a los sitios que tienen un tiempo enorme de respuesta
                # lo contrario hace que si un sitio no contesta se siga considerando pendiente de profiling hasta el infinito

                for gridTask in self.applicationTasks:
                    ExecutionManager.removeTaskFromGW(gridTask)
                    if gridTask.type == "hostProfiling":
                        self.myInfrastructure.updateInfoAfterProfiling(gridTask)
                try:
                    base.Session.commit()
                except:
                    base.Session.rollback()
                    print("Lost connection with database, not storing anything!")

                print("Exiting execution loop")
                break

            print("")
            print("CREATING NEW EXECUTION TASKS")
            # ===================================================================
            # self.myInfrastructure.showHosts()
            # ===================================================================
            applicationExecutionTasks = self.mySchedulingAlgorithm.createApplicationTasks(
                self.myInfrastructure, self.myApplication, self.applicationTasks
            )
            for gridTask in applicationExecutionTasks:
                ExecutionManager.submit(gridTask)
                self.applicationTasks.append(gridTask)
                base.Session.add(gridTask)

            try:
                base.Session.commit()
            except:
                base.Session.rollback()
                print("Lost connection with database, not storing anything!")

            print("...")
            sleep(15)
示例#5
0
	gridTaskDesignDB = myDBDesign.GridTaskDBDesign(metadata)
	PilotResourcesDB = myDBDesign.PilotResourceDBDesign(metadata)
	
	
	#===========================================================================
	# parameterDesignDB = myDBDesign.parameterDBDesign(metadata)
	#===========================================================================

	metadata.create_all(base.engine)
	
	print("Database is correct")
	print("")


	print("Starting connection with GridWay metascheduler")
	ExecutionManager.initGridSession()
	print("Connection stablished")



	print ("Cleaning working directory from past executions")
	ExecutionManager.cleanTmpDirectory()
	print ("... done")

	print ("Selected LRMS")
	print (base.lrms)
	print ("... done")

	print ("Recovering infrastructure from past executions and current state")
	if (base.infrastructureType == None) or (base.infrastructureType == "standard"):
		print ("infrastructur: standard")
示例#6
0
	def createInfrastructureTasks(self, infrastructureTasks):	
		
		print ("---------------------")
		print ("---------------------")
		print ("---------------------")

		print ("CREATE INFRASTRUCTURE TASKS")
		
					
					
		hostsToProfile = []
		
		hostList =  obtainGWResources()
		for hostInfo in hostList:
			hostName = hostInfo.getElementsByTagName("HOSTNAME")[0].firstChild.data #TODO: remove "unicode" from TEXT
			
			try:
				foundArch = hostInfo.getElementsByTagName("ARCH")[0].firstChild.data
			except:
				foundArch=""
				
			try:	
				foundCpuMHz = int(hostInfo.getElementsByTagName("CPU_MHZ")[0].firstChild.data)
			except:
				foundCpuMHz = 0
			
			try:	
				foundLrms = hostInfo.getElementsByTagName("LRMS_NAME")[0].firstChild.data
			except:
				foundLrms = None
			
			try:	
				freeNodeCount = int(hostInfo.getElementsByTagName("FREENODECOUNT")[0].firstChild.data)
			except:
				freeNodeCount = 0	

			if foundLrms != None:
				if foundLrms == "jobmanager-pilot":			
					#solo tenemos en cuenta los pilots con al menos un slot disponible
					if not freeNodeCount > 0:
						continue
			
			#if a certain LRMS is desired, remove the hosts with a different one
			if self.lrms != None:
				if foundLrms != self.lrms:
					continue
				
			#if host is unknown, create a profiling task
			currentHost = self.getHost(hostName)
			if  currentHost == None:
				newHost = Host(hostName, arch=foundArch, cpuMHz = foundCpuMHz, lrms=foundLrms)
				self.hosts.append(newHost)
				hostsToProfile.append(newHost)
				#store new host on databae (faiulre resistance
				Session.add(newHost)
			#if information has changed, update host information
			elif (currentHost.arch != foundArch) or (currentHost.cpuMHz != foundCpuMHz):
				#TODO: pensar que hacer aqui. habria que eliminar el viejo o solo sobreescribir la información? Si se elimina el viejo, que pasa con las tareas ahí ejecutadas? No es trivial
				currentHost.arch = foundArch
				currentHost.cpuMHz = foundCpuMHz
				if currentHost.lrms == None:
					currentHost.lrms = foundLrms
				hostsToProfile.append(currentHost)
				Session.add(currentHost)
					
			elif currentHost.shouldBeProfiled():
				if currentHost.lrms == None:
					currentHost.lrms = foundLrms
				hostsToProfile.append(currentHost)

				
		#print("Host profiling: submission of 1 tasks per host")		
		hostProfilingTasks = [ExecutionManager.createHostProfilingTask(host) 
							for host in hostsToProfile
							for i in range(1)]
		
		
		
		siteTasks = []
		for task in hostProfilingTasks:
			found=False
			for gridTask in infrastructureTasks:
				if gridTask.host.hostname == task.host.hostname:
					found=True
					break
			if not found:
				siteTasks.append(task)
				
				
		#Esto es para el primer experimento de montera + gwpilot
		#queremos tener pilots funcionando, así que los arranco con esto 
		if self.lrms=="jobmanager-pilot":
			print ("creating fake profiling tasks")
			
			existingFakeTasks = len([task for task in infrastructureTasks if task.host.hostname=="" and task.status != "PENDING"])
			existingGoodPilots = len (self.getGoodHosts())
			existingProfilingTasks = len(hostProfilingTasks)
			#fakeTasksToCreate = base.maxRunningTasks - (existingFakeTasks + existingGoodPilots + existingProfilingTasks)
			fakeTasksToCreate = base.maxRunningTasks - existingFakeTasks
			
			print ("	Desired tasks: " + str(base.maxRunningTasks))
			print ("	Existing fake tasks: " + str(existingFakeTasks))
			print ("	Existing good pilots: " + str(existingGoodPilots))
			print ("	created: " + str(fakeTasksToCreate))
			
			emptyHost = FakeHost()
			fakeHostProfilingTasks = [ExecutionManager.createWakeUpask(emptyHost) 
						for i in range(fakeTasksToCreate)]
	
			siteTasks+=fakeHostProfilingTasks
		
				

		return siteTasks
	def createInfrastructureTasks(self, infrastructureTasks):	
		
		print ("-------------------")
		print ("-------------------")

		print ("createInfrastructureTasks- NewPilotInfrastructure")

	#	self.showHosts()
					
		hostList = obtainGWResources()
		
		hostsToProfile = []

		print ("Analyzing resources ")
		for hostInfo in hostList:
			hostName = hostInfo.getElementsByTagName("HOSTNAME")[0].firstChild.data.strip().lower() #TODO: remove "unicode" from TEXT
			whetstones=0

			try:
				foundArch = hostInfo.getElementsByTagName("ARCH")[0].firstChild.data
			except:
				foundArch=""
				
			try:	
				foundCpuMHz = int(hostInfo.getElementsByTagName("CPU_MHZ")[0].firstChild.data)
			except:
				foundCpuMHz = 0
			
			try:	
				foundLrms = hostInfo.getElementsByTagName("LRMS_NAME")[0].firstChild.data.strip().lower()
			except:
				foundLrms = None
				print ("Could not find LRMS for host " + hostName + ", skipping it")
				continue
			
			try:	
				freeNodeCount = int(hostInfo.getElementsByTagName("FREENODECOUNT")[0].firstChild.data)
			except:
				freeNodeCount = 0	

			if foundLrms == "jobmanager-pilot":			
				#solo tenemos en cuenta los pilots con al menos un slot disponible
				if not freeNodeCount > 0:
					continue
				
				username = os.getenv("USER")
				genericStringArgs = hostInfo.getElementsByTagName("GENERIC_VAR_STR")
				for node in genericStringArgs:
					if node.attributes['NAME'].value =="PILOT_REAL_HOSTNAME":
						workerNode = node.attributes['VALUE'].value.strip().lower()
					if node.attributes['NAME'].value =="PILOT_REAL_RESOURCE":
						site = node.attributes['VALUE'].value.strip().lower()
				
				genericIntArgs = hostInfo.getElementsByTagName("GENERIC_VAR_INT")
				for node in genericIntArgs:
					if node.attributes['NAME'].value =="PILOT_" + username + "_VAR_5":
						whetstones = int(node.attributes['VALUE'].value.strip().lower())
						if whetstones > 65534: 
							whetstones = 0
				# 	whetstones = 0
			#if host is unknown, create a profiling task
			currentHost = self.getHost(hostName)
			if  currentHost == None:
				print ("Host/Pilot  not found. hostname: " + hostName + ", LRMS: " + foundLrms)
				if foundLrms == "jobmanager-pilot":
					#he encontrado un pilot:
					#primero busco e resource, y si no existe lo creo.
					#luego creo un pilot que utilice ese resource

					pilotResource = base.Session.query(PilotResource).filter(PilotResource.site == site, PilotResource.workerNode == workerNode).first()
					if pilotResource == None:
						print ("	PilotResource was not found, creating a new one")
						pilotResource = PilotResource(site, workerNode)
					print ("	Creating a new Pilot in NewPilotInfrastructure.createInfrastructureTasks")
					newHost = Pilot(hostName, arch=foundArch, cpuMHz = foundCpuMHz, pilotResource = pilotResource, whetstones = whetstones)
					self.pilots.append(newHost)
					Session.add(newHost)

				else:
					print ("	Creating a new Host in NewPilotInfrastructure.createInfrastructureTasks")
					newHost = Host(hostName, arch=foundArch, cpuMHz = foundCpuMHz, lrms=foundLrms)
					self.hosts.append(newHost)
					Session.add(newHost)

				#ESTO ES PARA HACER EL PROFILING DE LOS PILOT SI NO HAN PUBLICADO LOS WHETSTONES, SI NO NO HACE FALTA	
				#===============================================================
				# if whetstones == 0 or whetstones > 65534: 
				# 	whetstones = 0
				# 	print ("	Host to profile: " + hostName + ": whetstone value not initialized ")
				# 	hostsToProfile.append(newHost)
				# 	#store new host on databae (faiulre resistance
				# 	Session.add(newHost)
				#===============================================================
				
			#if information has changed, update host information
			elif (currentHost.getWhetstones() != whetstones):
				#va con un set porque es una operación más complicada, así que está encapsulada en esta funcion
				currentHost.setWhetstones(whetstones)	
				Session.add(currentHost)
				print ("Host: " + hostName + " UPDATED, new whetstones=" + str(whetstones))

			elif currentHost.lrms == None:
				currentHost.lrms = foundLrms


		#pprofiling of new sites		
		hostProfilingTasks = [ExecutionManager.createHostProfilingTask(host) 
							for host in hostsToProfile
							for i in range(base.profilingTasksPerHost)]
		
		

	
		#estamos asumiento que todos los pilots publican la variable esa con su 
		#rendimiento, con lo que no hay que hacer el profiling de nada. 		
				
		#AHORA, EN ESA NUEVA APROXIMACION, QUEREMOS TENER UNOS CUANTO SBENCHMARKS PARA IR ARRANCANDO PILOTS 
		print ("creating fake profiling tasks")
		
		existingFakeTasks = len([task for task in infrastructureTasks if task.host.hostname=="" and task.status != "PENDING"])
		existingGoodPilots = len (self.getGoodHosts())
		existingProfilingTasks = len(hostProfilingTasks)
		#fakeTasksToCreate = base.maxRunningTasks - (existingFakeTasks + existingGoodPilots + existingProfilingTasks)
		fakeTasksToCreate = base.maxRunningTasks - existingFakeTasks
		
		print ("	Desired tasks: " + str(base.maxRunningTasks))
		print ("	Existing fake tasks: " + str(existingFakeTasks))
		print ("	Existing good pilots: " + str(existingGoodPilots))
		print ("	created: " + str(fakeTasksToCreate))
		
		emptyHost = FakeHost()
		fakeHostProfilingTasks = [ExecutionManager.createFakeHostProfilingTask(emptyHost) 
					for i in range(fakeTasksToCreate)]

		hostProfilingTasks+=fakeHostProfilingTasks
		
		
		return hostProfilingTasks