def createInfrastructureTasks(self, infrastructureTasks): print ("---------------------") print ("---------------------") print ("---------------------") print ("CREATE INFRASTRUCTURE TASKS") hostsToProfile = [] hostList = obtainGWResources() for hostInfo in hostList: hostName = hostInfo.getElementsByTagName("HOSTNAME")[0].firstChild.data #TODO: remove "unicode" from TEXT try: foundArch = hostInfo.getElementsByTagName("ARCH")[0].firstChild.data except: foundArch="" try: foundCpuMHz = int(hostInfo.getElementsByTagName("CPU_MHZ")[0].firstChild.data) except: foundCpuMHz = 0 try: foundLrms = hostInfo.getElementsByTagName("LRMS_NAME")[0].firstChild.data except: foundLrms = None try: freeNodeCount = int(hostInfo.getElementsByTagName("FREENODECOUNT")[0].firstChild.data) except: freeNodeCount = 0 if foundLrms != None: if foundLrms == "jobmanager-pilot": #solo tenemos en cuenta los pilots con al menos un slot disponible if not freeNodeCount > 0: continue #if a certain LRMS is desired, remove the hosts with a different one if self.lrms != None: if foundLrms != self.lrms: continue #if host is unknown, create a profiling task currentHost = self.getHost(hostName) if currentHost == None: newHost = Host(hostName, arch=foundArch, cpuMHz = foundCpuMHz, lrms=foundLrms) self.hosts.append(newHost) hostsToProfile.append(newHost) #store new host on databae (faiulre resistance Session.add(newHost) #if information has changed, update host information elif (currentHost.arch != foundArch) or (currentHost.cpuMHz != foundCpuMHz): #TODO: pensar que hacer aqui. habria que eliminar el viejo o solo sobreescribir la información? Si se elimina el viejo, que pasa con las tareas ahí ejecutadas? No es trivial currentHost.arch = foundArch currentHost.cpuMHz = foundCpuMHz if currentHost.lrms == None: currentHost.lrms = foundLrms hostsToProfile.append(currentHost) Session.add(currentHost) elif currentHost.shouldBeProfiled(): if currentHost.lrms == None: currentHost.lrms = foundLrms hostsToProfile.append(currentHost) #print("Host profiling: submission of 1 tasks per host") hostProfilingTasks = [ExecutionManager.createHostProfilingTask(host) for host in hostsToProfile for i in range(1)] siteTasks = [] for task in hostProfilingTasks: found=False for gridTask in infrastructureTasks: if gridTask.host.hostname == task.host.hostname: found=True break if not found: siteTasks.append(task) #Esto es para el primer experimento de montera + gwpilot #queremos tener pilots funcionando, así que los arranco con esto if self.lrms=="jobmanager-pilot": print ("creating fake profiling tasks") existingFakeTasks = len([task for task in infrastructureTasks if task.host.hostname=="" and task.status != "PENDING"]) existingGoodPilots = len (self.getGoodHosts()) existingProfilingTasks = len(hostProfilingTasks) #fakeTasksToCreate = base.maxRunningTasks - (existingFakeTasks + existingGoodPilots + existingProfilingTasks) fakeTasksToCreate = base.maxRunningTasks - existingFakeTasks print (" Desired tasks: " + str(base.maxRunningTasks)) print (" Existing fake tasks: " + str(existingFakeTasks)) print (" Existing good pilots: " + str(existingGoodPilots)) print (" created: " + str(fakeTasksToCreate)) emptyHost = FakeHost() fakeHostProfilingTasks = [ExecutionManager.createWakeUpask(emptyHost) for i in range(fakeTasksToCreate)] siteTasks+=fakeHostProfilingTasks return siteTasks
def createInfrastructureTasks(self, infrastructureTasks): print ("-------------------") print ("-------------------") print ("createInfrastructureTasks- NewPilotInfrastructure") # self.showHosts() hostList = obtainGWResources() hostsToProfile = [] print ("Analyzing resources ") for hostInfo in hostList: hostName = hostInfo.getElementsByTagName("HOSTNAME")[0].firstChild.data.strip().lower() #TODO: remove "unicode" from TEXT whetstones=0 try: foundArch = hostInfo.getElementsByTagName("ARCH")[0].firstChild.data except: foundArch="" try: foundCpuMHz = int(hostInfo.getElementsByTagName("CPU_MHZ")[0].firstChild.data) except: foundCpuMHz = 0 try: foundLrms = hostInfo.getElementsByTagName("LRMS_NAME")[0].firstChild.data.strip().lower() except: foundLrms = None print ("Could not find LRMS for host " + hostName + ", skipping it") continue try: freeNodeCount = int(hostInfo.getElementsByTagName("FREENODECOUNT")[0].firstChild.data) except: freeNodeCount = 0 if foundLrms == "jobmanager-pilot": #solo tenemos en cuenta los pilots con al menos un slot disponible if not freeNodeCount > 0: continue username = os.getenv("USER") genericStringArgs = hostInfo.getElementsByTagName("GENERIC_VAR_STR") for node in genericStringArgs: if node.attributes['NAME'].value =="PILOT_REAL_HOSTNAME": workerNode = node.attributes['VALUE'].value.strip().lower() if node.attributes['NAME'].value =="PILOT_REAL_RESOURCE": site = node.attributes['VALUE'].value.strip().lower() genericIntArgs = hostInfo.getElementsByTagName("GENERIC_VAR_INT") for node in genericIntArgs: if node.attributes['NAME'].value =="PILOT_" + username + "_VAR_5": whetstones = int(node.attributes['VALUE'].value.strip().lower()) if whetstones > 65534: whetstones = 0 # whetstones = 0 #if host is unknown, create a profiling task currentHost = self.getHost(hostName) if currentHost == None: print ("Host/Pilot not found. hostname: " + hostName + ", LRMS: " + foundLrms) if foundLrms == "jobmanager-pilot": #he encontrado un pilot: #primero busco e resource, y si no existe lo creo. #luego creo un pilot que utilice ese resource pilotResource = base.Session.query(PilotResource).filter(PilotResource.site == site, PilotResource.workerNode == workerNode).first() if pilotResource == None: print (" PilotResource was not found, creating a new one") pilotResource = PilotResource(site, workerNode) print (" Creating a new Pilot in NewPilotInfrastructure.createInfrastructureTasks") newHost = Pilot(hostName, arch=foundArch, cpuMHz = foundCpuMHz, pilotResource = pilotResource, whetstones = whetstones) self.pilots.append(newHost) Session.add(newHost) else: print (" Creating a new Host in NewPilotInfrastructure.createInfrastructureTasks") newHost = Host(hostName, arch=foundArch, cpuMHz = foundCpuMHz, lrms=foundLrms) self.hosts.append(newHost) Session.add(newHost) #ESTO ES PARA HACER EL PROFILING DE LOS PILOT SI NO HAN PUBLICADO LOS WHETSTONES, SI NO NO HACE FALTA #=============================================================== # if whetstones == 0 or whetstones > 65534: # whetstones = 0 # print (" Host to profile: " + hostName + ": whetstone value not initialized ") # hostsToProfile.append(newHost) # #store new host on databae (faiulre resistance # Session.add(newHost) #=============================================================== #if information has changed, update host information elif (currentHost.getWhetstones() != whetstones): #va con un set porque es una operación más complicada, así que está encapsulada en esta funcion currentHost.setWhetstones(whetstones) Session.add(currentHost) print ("Host: " + hostName + " UPDATED, new whetstones=" + str(whetstones)) elif currentHost.lrms == None: currentHost.lrms = foundLrms #pprofiling of new sites hostProfilingTasks = [ExecutionManager.createHostProfilingTask(host) for host in hostsToProfile for i in range(base.profilingTasksPerHost)] #estamos asumiento que todos los pilots publican la variable esa con su #rendimiento, con lo que no hay que hacer el profiling de nada. #AHORA, EN ESA NUEVA APROXIMACION, QUEREMOS TENER UNOS CUANTO SBENCHMARKS PARA IR ARRANCANDO PILOTS print ("creating fake profiling tasks") existingFakeTasks = len([task for task in infrastructureTasks if task.host.hostname=="" and task.status != "PENDING"]) existingGoodPilots = len (self.getGoodHosts()) existingProfilingTasks = len(hostProfilingTasks) #fakeTasksToCreate = base.maxRunningTasks - (existingFakeTasks + existingGoodPilots + existingProfilingTasks) fakeTasksToCreate = base.maxRunningTasks - existingFakeTasks print (" Desired tasks: " + str(base.maxRunningTasks)) print (" Existing fake tasks: " + str(existingFakeTasks)) print (" Existing good pilots: " + str(existingGoodPilots)) print (" created: " + str(fakeTasksToCreate)) emptyHost = FakeHost() fakeHostProfilingTasks = [ExecutionManager.createFakeHostProfilingTask(emptyHost) for i in range(fakeTasksToCreate)] hostProfilingTasks+=fakeHostProfilingTasks return hostProfilingTasks