def checkWorkerRequirements(self, cmd): # Check if worker is project dedicated if "project" in self.workerReqDict: name = cmd.getTask().getProject().getName() reqName = self.workerReqDict["project"] log.debug("Worker is dedicated to proj. %s, command belongs to %s" % (reqName, name)) if name != reqName: return False return True
def checkWorkerRequirements(self, cmd): #Check if worker is project dedicated if 'project' in self.workerReqDict: name=cmd.getTask().getProject().getName() reqName=self.workerReqDict['project'] log.debug("Worker is dedicated to proj. %s, command belongs to %s"% (reqName, name)) if name != reqName: return False return True
def matchCommandWorker(matcher, command): """Function to use in queue.getUntil() to get a number of commands from the queue. TODO: this is where performance tuning results should be used.""" cont = True # whether to continue getting commands from the queue # whether to use this command: make sure we only have a single type use = False execID = matcher.getExecID(command) log.log(cpc.util.log.TRACE, "exec id is %s" % execID) log.debug("Type: %s" % command.getTask().getFunctionName()) if execID is not None: use = matcher.checkType(command.getTask().getFunctionName()) and matcher.checkWorkerRequirements(command) log.debug("Should use: %s " % use) if use: if matcher.checkAddResources(command): use = True else: use = False cont = False return (cont, use)
def checkAddResources(self, cmd): """Check whether a command falls within the current resource allocation and add its requirements to the used resources if it does. cmd = the command to check returns: True if the command fits within the capabilities is added, False if the command doesn't fit.""" for rsrc in self.used.itervalues(): platformMax = self.usePlatform.getMaxResource(rsrc.name) cmdMinRsrc = cmd.getMinRequired(rsrc.name) rsrcLeft = platformMax - rsrc.value if cmdMinRsrc is not None: # check whether there's any left if rsrcLeft < cmdMinRsrc: log.debug("Left: %d, max=%d, minimum resources: %d"% (rsrcLeft, platformMax, cmdMinRsrc)) self.depleted=True return False # now reserve the resources cmd.resetReserved() for rsrc in self.used.itervalues(): platformMax = self.usePlatform.getMaxResource(rsrc.name) platformPref = self.usePlatform.getPrefResource(rsrc.name) cmdMinRsrc = cmd.getMinRequired(rsrc.name) cmdMaxRsrc = cmd.getMaxAllowed(rsrc.name) if cmdMinRsrc is not None: # the total amount of resources left on the current platform: rsrcLeft = platformMax - rsrc.value if platformPref is not None and rsrcLeft>platformPref: value=platformPref elif cmdMaxRsrc is not None and rsrcLeft>cmdMaxRsrc: value=cmdMaxRsrc self.depleted=True else: value=rsrcLeft # now we know how many log.debug("Reserving %d cores"%value) cmd.setReserved(rsrc.name, value) rsrc.value += value return True
def matchCommandWorker(matcher, command): """Function to use in queue.getUntil() to get a number of commands from the queue. TODO: this is where performance tuning results should be used.""" cont=True # whether to continue getting commands from the queue # whether to use this command: make sure we only have a single type use=False execID=matcher.getExecID(command) log.log(cpc.util.log.TRACE,'exec id is %s'%execID) log.debug("Type: %s"%command.getTask().getFunctionName()) if execID is not None: use=(matcher.checkType(command.getTask().getFunctionName()) and matcher.checkWorkerRequirements(command)) log.debug("Should use: %s "%use) if use: if matcher.checkAddResources(command): use=True else: use=False cont=False return (cont, use)
def checkAddResources(self, cmd): """Check whether a command falls within the current resource allocation and add its requirements to the used resources if it does. cmd = the command to check returns: True if the command fits within the capabilities is added, False if the command doesn't fit.""" for rsrc in self.used.itervalues(): platformMax = self.usePlatform.getMaxResource(rsrc.name) cmdMinRsrc = cmd.getMinRequired(rsrc.name) rsrcLeft = platformMax - rsrc.value if cmdMinRsrc is not None: # check whether there's any left if rsrcLeft < cmdMinRsrc: log.debug("Left: %d, max=%d, minimum resources: %d" % (rsrcLeft, platformMax, cmdMinRsrc)) self.depleted = True return False # now reserve the resources cmd.resetReserved() for rsrc in self.used.itervalues(): platformMax = self.usePlatform.getMaxResource(rsrc.name) platformPref = self.usePlatform.getPrefResource(rsrc.name) cmdMinRsrc = cmd.getMinRequired(rsrc.name) cmdMaxRsrc = cmd.getMaxAllowed(rsrc.name) if cmdMinRsrc is not None: # the total amount of resources left on the current platform: rsrcLeft = platformMax - rsrc.value if platformPref is not None and rsrcLeft > platformPref: value = platformPref elif cmdMaxRsrc is not None and rsrcLeft > cmdMaxRsrc: value = cmdMaxRsrc self.depleted = True else: value = rsrcLeft # now we know how many log.debug("Reserving %d cores" % value) cmd.setReserved(rsrc.name, value) rsrc.value += value return True
def run(self, serverState, request, response): # first read platform capabilities and executables rdr=cpc.command.platform_exec_reader.PlatformExecutableReader() workerData=request.getParam('worker') if request.hasParam('worker-id'): workerID=request.getParam('worker-id') else: workerID='(none)' log.debug("Worker platform + executables: %s"%workerData) rdr.readString(workerData,"Worker-reported platform + executables") # match queued commands to executables. cwm=CommandWorkerMatcher(rdr.getPlatforms(), rdr.getExecutableList(), rdr.getWorkerRequirements()) cmds=cwm.getWork(serverState.getCmdQueue()) if not cwm.isDepleted(): # now sleep for 5 seconds to give the dataflow time to react to any # new state. time.sleep(5) cmds.extend(cwm.getWork(serverState.getCmdQueue())) # now check the forwarded variables conf=serverState.conf originatingServer=None heartbeatInterval=None try: # check whether there is an originating server. If not, we're it if self.forwarded: if 'originating-server-id' in request.headers: originatingServer = request.headers['originating-server-id'] # check the expected heartbeat time. log.debug("Forwarded message") if request.hasParam('heartbeat-interval'): heartbeatInterval = int(request.getParam('heartbeat-interval')) log.debug("Forwarded heartbeat interval is %d"% heartbeatInterval) except NameError: # self.forwarded does not exist. Treat it as if self.forwarded == False pass if originatingServer is None: # If the originating server property has not been set, the # request hasn't been forwarded, therefore we are the originating # server selfNode=Node.getSelfNode(conf) originatingServer = selfNode.getId() # we only store worker state in the server the worker connects to serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID, request.headers['originating-client']) if heartbeatInterval is None: heartbeatInterval = conf.getHeartbeatTime() log.debug("worker identified %s"%request.headers['originating-client'] ) if len(cmds) > 0: # first add them to the running list so they never get lost runningCmdList=serverState.getRunningCmdList() runningCmdList.add(cmds, originatingServer, heartbeatInterval) # construct the tar file with the workloads. tff=tempfile.TemporaryFile() tf=tarfile.open(fileobj=tff, mode="w:gz") # make the commands ready for cmd in cmds: log.debug("Adding command id %s to tar file."%cmd.id) # write the command description to the command's directory task=cmd.getTask() #log.debug(cmd) project=task.getProject() taskDir = "task_%s"%task.getID() cmddir=cmd.getDir() if not os.path.exists(cmddir): log.debug("cmddir %s did not exist. Created directory."%cmd.id) os.mkdir(cmddir) arcdir="%s"%(cmd.id) log.debug("cmddir=%s"%cmddir) outf=open(os.path.join(cmddir, "command.xml"), "w") cmd.writeWorkerXML(outf) outf.close() tf.add(cmddir, arcname=arcdir, recursive=True) # set the state of the command. tf.close() del(tf) tff.seek(0) # now send it back response.setFile(tff,'application/x-tar') #project.writeTasks() # the file is closed after the response is sent. log.info("Did direct worker-ready") else: nodes = conf.getNodes().getNodesByPriority() topology = Nodes() if request.hasParam('topology'): topology = json.loads(request.getParam('topology') ,object_hook = json_serializer.fromJson) thisNode = Node.getSelfNode(conf) thisNode.nodes = conf.getNodes() topology.addNode(thisNode) hasJob =False # temporary flag that should be removed for node in nodes: if topology.exists(node.getId()) == False: clnt=ServerMessage(node.getId()) clientResponse=clnt.workerReadyForwardedRequest(workerID, workerData, topology, originatingServer, heartbeatInterval, request.headers['originating-client']) if clientResponse.getType() == 'application/x-tar': log.log(cpc.util.log.TRACE, 'got work from %s'% (clientResponse.headers[ 'originating-server-id'])) hasJob=True # we need to rewrap the message #TODO stupid intermediary step because the mmap form # clientresponse is prematurely closed tmp = tempfile.TemporaryFile('w+b') message = clientResponse.getRawData() tmp.write(message.read(len(message))) tmp.seek(0) #for key in clientResponse.headers: # print "%s:%s"%(key,clientResponse.headers[key]) response.setFile(tmp,'application/x-tar') response.headers['originating-server-id']=\ clientResponse.headers[ 'originating-server-id'] #OPTIMIZE leads to a lot of folding and unfolding of #packages if not hasJob: response.add("No command") log.info("Did delegated worker-ready")