class HeckleSystem(Component): """ Cobalt System component for handling / interacting with Heckle resource manager External Methods: add_process_groups -- allocates nodes get_process_groups -- get process groups based on specs signal_process_groups -- signal a process group wait_process_groups -- removed process groups based on specs Internal Methods: __init__: _start_pg: _check_builds_done: _wait: _release_resources: get_resources: Queue Manager Methods: validate_job: verify_locations: find_job_locations: find_queue_equivalence_classes: """ name = "system" implementation = "HeckleBreadboard" queue_assignments = {} def __init__(self, *args, **kwargs): logger.debug( "heckle: System: init ... %s ... &&&&&&&&&&&&&&&&&&&&&&&&&&&&& I am here as well &&&&&&&&&&&&&&&&&&&&&&&&&" % threading.current_thread().getName()) Component.__init__(self, *args, **kwargs) self.process_groups = ProcessGroupDict() self.process_groups.item_cls = HeckleProcessGroup self.resources = ResourceDict() self.queue_assignments["default"] = self.resources.keys() print "\n\n\n\n" print "Queue assignments are: %s" % self.queue_assignments def __repr__(self): """ printout representation of the class """ indict = self.__dict__ printstr = "" printstr += "Heckle System Object: Values" for element in indict: printstr += str(element) + "::" if indict[element] == None: printstr += "None, " else: printstr += str(indict[element]) + ", " printstr += " Process Groups:" for element in self.process_groups: printstr += str(element) + "::" + str( self.process_groups[element]) + ", " return printstr ##################### # Main set of methods ##################### def add_process_groups(self, specs): """ Allocate nodes and add the list of those allocated to the PGDict specs is a list of dictionaries Each dictionary contains the specifications for all the nodes in the process group """ #Debug - Take out to really rebuild #### Need to check the environment variable for fakebuild try: specs[0]['fakebuild'] = specs[0]['env']['fakebuild'] del specs[0]['env']['fakebuild'] except: pass print "Heckle System: add_process_groups: <<<<<<<<<<<<<<<<<< OK< Debug< This< : %s" % specs HICCUP = HeckleConnector() #try: reservation = HICCUP.make_reservation(**(specs[0])) heckle_res_id = reservation.id uid = specs[0]['user'] logger.debug("Heckle System: heckle_res_id = %i" % heckle_res_id) specs[0]['heckle_res_id'] = heckle_res_id return self.process_groups.q_add( specs, lambda x, _: self._start_pg( x, heckle_res_id=heckle_res_id, uid=uid)) #except Exception as hec_except: ## could do something here about problems ## 1) Kill job, then resubmit job w/o node name(s) ## Would require access to cqadm via api ## 2) Put job / node in fail state ## 3) Simply fail #raise Exception("Heckle System Object: add_process_groups: %s" % hec_except) add_process_groups = exposed(query(add_process_groups)) def get_process_groups(self, specs): """get a list of existing allocations""" #logger.debug( "Heckle System: get_process_groups" ) self._wait() return self.process_groups.q_get(specs) get_process_groups = exposed(query(get_process_groups)) def signal_process_groups(self, specs, sig): """Free the specified process group (set of allocated nodes)""" logger.debug( "Heckle System: signal_process_groups: Specs are %s, sig is %s" % (specs, sig)) return self.process_groups.q_get(specs, lambda x, y: x.signal(y), sig) signal_process_groups = exposed(query(signal_process_groups)) def wait_process_groups(self, specs): """Remove terminated process groups""" logger.debug("Heckle System: wait_process_groups; specs are %s" % specs) return self.process_groups.q_del( specs, lambda x, _: self._release_resources(x)) wait_process_groups = exposed(query(wait_process_groups)) ######################################### # Methods for dealing with Process Groups ######################################### def _start_pg(self, pgp, heckle_res_id, uid): """ Populates the process group with its resources gets node information for nodes in process group Updates those attributes Places nodes in the pinging nodes list, to see if they're built """ logger.debug("Heckle System: start_pg: PGP is %s" % pgp) nodelist = pgp.location for node in nodelist: node_attributes = self.resources[node] node_attributes['mac'] = node_attributes['mac'].replace("-", ":") node_attributes['heckle_res_id'] = heckle_res_id pgp.resource_attributes[node] = node_attributes._get_dict() pgp.uid = uid pgp.pinging_nodes.append(nodelist) add_process_groups = exposed(query(add_process_groups)) def _check_builds_done(self): """ Check to see if the nodes are done building Starts the process group if all nodes in them are done building """ #logger.debug( "heckle: System: Check Build Done: Waiting to Start..." ) #sleep(20) retval = True pg_list = [ x for x in self.process_groups.itervalues() if (len(x.pinging_nodes) > 0) ] self.resources.update() for pgp in pg_list: for nodename in pgp.pinging_nodes: teststr = self.resources[nodename]['bootstate'] if teststr == "COMPLETED": logger.debug( "heckle: System: Check Build Done: Removing node %s...%i pinging nodes left" % (nodename, len(pgp.pinging_nodes) - 1)) pgp.pinging_nodes.remove(nodename) elif teststr in ["BOOTING", "", ""]: logger.debug( "Heckle System: Check Build Done: Node %s not done yet." % nodename) elif teststr == "UNALLOCATED": raise Exception( "HIC_SO: _check_builds_done: Node says, 'UNALLOCATED'. Possible build error, or system timed out." ) elif teststr == "CRITFAIL": raise Exception( "HIC_SO: _check_builds_done: Node says, 'CRITFAIL'. It timed out while building." ) ##################### #### Need to figure a better way to fail gracefully on this one... ##################### elif teststr == "READY": raise Exception( "HIC_SO: _check_builds_done: Node says, 'READY'. The Heckle Reservation is already ready already, skipping pinging." ) if len(pgp.pinging_nodes) == 0: logger.debug( "Heckle System: Check Build Done: No Pinging Nodes left, Start PG Running." ) pgp.start() else: retval = False return retval _check_builds_done = automatic(_check_builds_done) def _wait(self): """ Calls the process group container's wait() method """ #logger.debug( "Heckle System: wait" ) for pgp in self.process_groups.itervalues(): pgp.wait() _wait = automatic(_wait) def _release_resources(self, pgp): """ Releases all the Heckle nodes, unreserving them """ logger.debug("Heckle System: Release %s" % pgp.location) #self.resources[pgp.location]['action']='Free' HICCUP = HeckleConnector() HICCUP.free_reserved_node(uid=pgp.uid, node_list=pgp.location) self.resources.free(nodes=pgp.location) def get_resources(self, specs={}): """ Returns a list of names for all the FREE resources (nodes) which match the given specs. """ logger.debug("Heckle System: get Resources, specs are %s" % specs) ################################## ### Look at this as a future change ################################## specs['current reservation'] = 9999999 specs['allocatable'] = 'True' res_list = self.resources >= specs logger.debug("Heckle System: get Resources, resources are %s" % res_list) return res_list get_resources = exposed(query(get_resources)) ########################################################## # Methods for interacting with scheduler and queue-manager ########################################################## def validate_job(self, spec): """ Validates a job for submission -- will the job ever run under the current Heckle configuration? Steps: 1) Validate Kernel 2) Validate HW 3) Validate Job versus overall """ logger.debug("Heckle System: Validate Job: Specs are %s" % spec) try: checklist = spec['attrs'] except: checklist = {} #del(checklist['action']) try: nodecount = spec['nodecount'] except: nodecount = 1 glossary = self.resources.glossary dnelist = [] # for attributes which do not exist in glossary badlist = [] # for attributes in glossary which do not exist ################################## ### Look at this as a future change ### Think: Refresh Resources Info ################################## #1st step: Are there enough nodes at all? if nodecount >= self.resources.node_count(): pass else: raise Exception( "Validate Job: Not enough nodes; Requested %s, only have %s in the system." % (nodecount, self.resources.nodecount())) for att in checklist: val = checklist[att] try: if val in glossary[att]: pass else: badlist.append("%s:%s" % (att, val)) # Bad attribute except: dnelist.append(att) #Attribute does not exist checklist['current reservation'] = 9999999 checklist['allocatable'] = 'True' retlist = self.resources >= checklist retcount = len(retlist) goodlen = retcount >= nodecount if goodlen and not badlist and not dnelist: return spec #Good Job! else: retstr = "Validate Job: " if badlist or dnelist: if badlist: restr += "No value for attribute: %s" % badlist if dnelist: retstr += "Attributes Do Not Exist: %s" % dnelist else: retstr += "Need %s nodes, only have %s nodes: %s" % ( nodecount, retcount, retlist) raise Exception(retstr) return spec validate_job = exposed(validate_job) def verify_locations(self, location_list): """ Makes sure a location list is valid location list is a list of fully qualified strings of node names ex: nodename.mcs.anl.gov """ logger.debug("heckle: System: Validate Job: Verify Locations") return location_list in self.resources.glossary verify_locations = exposed(verify_locations) def find_job_location(self, job_location_args, end_times): """ Finds a group of not-busy nodes in which to run the job Arguments: job_location_args -- A list of dictionaries with info about the job jobid -- string identifier nodes -- int number of nodes queue -- string queue name required -- ?? utility_score -- ?? threshold -- ?? walltime -- ?? attrs -- dictionary of attributes to match against end_times -- supposed time the job will end Returns: Dictionary with list of nodes a job can run on, keyed by jobid """ logger.debug("heckle: System: find_job_location") locations = {} def jobsort(job): """Used to sort job list by utility score""" return job["utility_score"] job_location_args.sort(key=jobsort) #Try to match jobs to nodes which can run them for job in job_location_args: if "attrs" not in job or job["attrs"] is None: attrs = {} else: attrs = job['attrs'] attrs['current reservation'] = 9999999 attrs['allocatable'] = 'True' nodecount = int(job['nodes']) print "Heckle System: Find Job Location: Job is %s" % job ############################# ### Look at this as point of change ### Think: For node in unreserved nodes ### Choose node from list ### Remove node from unreserved nodes ############################# print "Heckle System: Find Job Location: Free Nodes is %s" % self.resources.getfreenodes( ) nodelist = (self.resources >= attrs) # get Matching Node print "Nodelist at this stage is %s" % nodelist if len(nodelist) >= nodecount: print "Nodecount = %s" % nodecount retlist = nodelist[:nodecount] self.resources.allocate(retlist) print "Heckle System: Find Job Location: Remaining nodelist is %s" % retlist else: raise Exception( "Heckle System: find_job_locations: Not Enough matching Nodes Available" ) locations[job["jobid"]] = retlist print "Locations is now: %s" % locations logger.info("heckle: find_job_location: locations are %s" % locations) return locations find_job_location = exposed(find_job_location) def find_queue_equivalence_classes(self, reservation_dict, active_queue_names): """ Finds equivalent queues An equivalent queue is a queue which can run upon the same partition(s) For now, with one partition (everything!) this is irrelevant. Returns: equiv= [{'reservations': [], 'queues': ['default']}] """ logger.debug("Heckle System: find queue equivalence classes") equiv = [] #print "Reservation_Dict is: %s" % reservation_dict #print "Active_queue_names is %s" % active_queue_names #print "Queue assignments are: %s" % self.queue_assignments for queue in self.queue_assignments: # skip queues that aren't running if not queue in active_queue_names: continue found_a_match = False print "Heckle Queue is %s" % queue for equ in equiv: print "Heckle Equ is %s" % equ if equ['data'].intersection(self.queue_assignments[queue]): equ['queues'].add(queue) equ['data'].update(self.queue_assignments[queue]) found_a_match = True break if not found_a_match: equiv.append({ 'queues': set([queue]), 'data': set(self.queue_assignments[queue]), 'reservations': set() }) real_equiv = [] for eq_class in equiv: found_a_match = False for equ in real_equiv: if equ['queues'].intersection(eq_class['queues']): equ['queues'].update(eq_class['queues']) equ['data'].update(eq_class['data']) found_a_match = True break if not found_a_match: real_equiv.append(eq_class) equiv = real_equiv for eq_class in equiv: for res_name in reservation_dict: for host_name in reservation_dict[res_name].split(":"): if host_name in eq_class['data']: eq_class['reservations'].add(res_name) for key in eq_class: eq_class[key] = list(eq_class[key]) del eq_class['data'] return equiv find_queue_equivalence_classes = exposed(find_queue_equivalence_classes) def get_partitions(self, locations): """ Work-around to get the cqadm to run a single job on this system PRE: locations is a list of dict of strings of possible node names POST: if good, return locations if not good, raise exception and list bad nodes """ nodelist = self.resources.Glossary.nodelist logger.debug("Heckle System: get_partitions: raw is are: %s" % locations) logger.debug("Heckle System: get_partitions: vals are: %s" % locs) if locations in nodelist: return locations else: raise Exception( "heckle: System: get_partition: Bad Locations: %s " % badlocations) get_partitions = exposed(get_partitions)
class HeckleSystem(Component): """ Cobalt System component for handling / interacting with Heckle resource manager External Methods: add_process_groups -- allocates nodes get_process_groups -- get process groups based on specs signal_process_groups -- signal a process group wait_process_groups -- removed process groups based on specs Internal Methods: __init__: _start_pg: _check_builds_done: _wait: _release_resources: get_resources: Queue Manager Methods: validate_job: verify_locations: find_job_locations: find_queue_equivalence_classes: """ name = "system" implementation = "HeckleBreadboard" queue_assignments = {} def __init__(self, *args, **kwargs): Component.__init__(self, *args, **kwargs) self.process_groups = ProcessGroupDict() self.process_groups.item_cls = HeckleProcessGroup self.queue_assignments["default"] = self.get_resources() self.hacky_forbidden_nodes = [ ] #This is a temporary fix for the forbidden nodes issue def __repr__(self): """ printout representation of the class """ indict = self.__dict__ printstr = "" printstr += "Heckle System Object: Values" for element in indict: printstr += str(element) + "::" if indict[element] == None: printstr += "None, " else: printstr += str(indict[element]) + ", " printstr += " Process Groups:" for element in self.process_groups: printstr += str(element) + "::" + \ str(self.process_groups[element]) + ", " return printstr ##################### # Main set of methods ##################### def add_process_groups(self, specs): """ This function takes the specs (a list of jobs) and initiates each job as a process group. The process group abstracts the actual job into an object, providing a single point of control and interaction for all the nodes within that job. Each job is described by a dict. Each dict contains: size: kernel: a String, the name of the kernel image to load. executable: A string, the name of the command to execute upon the head node; this could be considered the actual job's file. stdin, stdout, stderr: Three separate strings, each containing the file to use for standard communication with the job as it is running. May be specified, or False. kerneloptions: A string containing various options for the kernel, or False. args: A list umask: An integer jobid: An integer cobalt_log_file: A string containing the log file to use in the initiation and running of the job itself. location: List of strings of node / resource names env: A dict of key:value strings, specifying the environment in which the job is to run on the node id: A number mode: nodect: cwd: A string, specifying the current working directory in which to run the job on the node walltime: Integer; the time, in minutes, allocated for the job to run on the node. user: A string, the name of the user under which this job is to run. """ logstr = "System:add_process_groups:" LOGGER.debug(logstr + "Specs are %s" % specs) return self.process_groups.q_add(specs) add_process_groups = exposed(query(add_process_groups)) def get_process_groups(self, specs): """get a list of existing allocations""" LOGGER.debug("System:get_process_groups: specs are %s" % specs) self._wait() return self.process_groups.q_get(specs) get_process_groups = exposed(query(get_process_groups)) def signal_process_groups(self, specs, sig): """Free the specified process group (set of allocated nodes)""" LOGGER.debug( "System:signal_process_groups: Specs are %s, sig is %s"\ % (specs, sig) ) return self.process_groups.q_get(specs, lambda x, y: x.signal(y), sig) signal_process_groups = exposed(query(signal_process_groups)) def wait_process_groups(self, specs): """Remove terminated process groups""" LOGGER.debug("System:wait_process_groups; specs are %s" % specs) return self.process_groups.q_del(specs, lambda x, \ _:self._release_resources(x)) wait_process_groups = exposed(query(wait_process_groups)) ######################################### # Methods for dealing with Process Groups ######################################### def _check_builds_done(self): """ Check to see if the nodes are done building Starts the process group if all nodes in them are done building """ #LOGGER.debug( "System:Check Build Done: Waiting to Start..." ) #sleep(20) exstr = "System:check_build_done:" retval = True pg_list = [x for x in self.process_groups.itervalues()\ if (len(x.pinging_nodes) > 0)] hiccup = HeckleConnector() for pgp in pg_list: for nodename in pgp.pinging_nodes: teststr = hiccup.get_node_bootstate(nodename) if teststr == "READY": if 'fakebuild' in pgp.__dict__ and pgp.fakebuild: pgp.pinging_nodes.remove(nodename) LOGGER.debug( exstr + "Node %s done building; "\ + "%s pinging nodes left" %\ ( nodename, len(pgp.pinging_nodes)-1 ) ) else: LOGGER.debug( exstr + "Node %s not done yet" %\ nodename ) if teststr == "COMPLETED": LOGGER.debug( exstr + "Removing node %s...%i pinging nodes left" \ % (nodename, len(pgp.pinging_nodes)-1) ) pgp.pinging_nodes.remove(nodename) elif teststr in ["BOOTING", "", ""]: LOGGER.debug(exstr + "Node %s not done yet." % nodename) elif teststr == "UNALLOCATED": raise Exception( exstr + "Node 'UNALLOCATED'; Possible build error, or system timed out." ) elif teststr == "CRITFAIL": raise Exception( exstr + "Node says, 'CRITFAIL'. It timed out while building.") ##################### #### Need to figure a better way to fail gracefully ##################### if len(pgp.pinging_nodes) == 0: LOGGER.debug( "System:Check Build Done: No Pinging Nodes left, Start PG %s Running." \ % pgp.jobid) pgp.start() else: retval = False return retval _check_builds_done = automatic(_check_builds_done) def _wait(self): """ Calls the process group container's wait() method """ waitlen = len(self.process_groups.keys()) LOGGER.debug("System:_wait:%s process groups." % waitlen) for pgp in self.process_groups.itervalues(): pgp.wait() try: del (self.hacky_forbidden_nodes[pgp.location]) except: pass _wait = automatic(_wait) def _release_resources(self, pgp): """ Releases all the Heckle nodes, unreserving them """ LOGGER.debug("System:release") LOGGER.debug("System:Locations are: %s" % pgp.location) hiccup = HeckleConnector() hiccup.free_reserved_node(uid=pgp.uid, node_list=pgp.location) try: del (self.hacky_forbidden_nodes[pgp.location]) except: pass def get_resources(self, specs=None): """ Returns a list of free resources (nodes) which match the given specs. Specs is a dict which describes a job """ LOGGER.debug("System:get Resources") ################################## ### Look at this as a future change ################################## hiccup = HeckleConnector() if not specs: return hiccup.node_list else: return hiccup.list_available_nodes(**specs) get_resources = exposed(query(get_resources)) ########################################################## # Methods for interacting with scheduler and queue-manager ########################################################## def validate_job(self, spec): """ Validates a job for submission -- will the job ever run under the current Heckle configuration? Steps: 1) Validate Kernel 2) Validate HW 3) Validate Job versus overall """ LOGGER.debug("System:Validate Job: Specs are %s" % spec) hiccup = HeckleConnector() try: kernel = spec['kernel'] valid_kernel = hiccup.validkernel(kernel) if not valid_kernel: raise Exception("System:Validate Job: Bad Kernel") except: spec['kernel'] = 'default' try: valid_hw = hiccup.validhw(**spec['attrs']) if not valid_hw: raise Exception("System:Validate Job: Bad Hardware Specs: %s" % spec) except Exception as strec: raise Exception("System:Validate Job: Validate Job: %s" % strec) #try: #valid_job = hiccup.validjob( **spec ) #if not valid_job: #raise Exception( #"System: validate Job: Never enough nodes") #except: #raise Exception("System: validate Job: Never enough nodes") return spec validate_job = exposed(validate_job) def verify_locations(self, location_list): """ Makes sure a location list is valid location list is a list of fully qualified strings of node names ex: nodename.mcs.anl.gov """ LOGGER.debug("System:validate Job: Verify Locations") hiccup = HeckleConnector() heckle_set = set(hiccup.list_all_nodes()) location_set = set(location_list) if heckle_set >= location_set: return location_list else: not_valid_list = list(location_set.difference(heckle_set)) raise Exception( "System:VerifyLocations: Invalid location names: %s" % not_valid_list) verify_locations = exposed(verify_locations) def find_job_location(self, job_location_args, end_times): """ Finds a group of not-busy nodes in which to run the job Arguments: job_location_args -- A list of dictionaries with info about the job jobid -- string identifier nodes -- int number of nodes queue -- string queue name required -- ?? utility_score -- ?? threshold -- ?? walltime -- ?? attrs -- dictionary of attributes to match against end_times -- supposed time the job will end Returns: Dictionary with list of nodes a job can run on, keyed by jobid """ LOGGER.debug("System:find_job_location") locations = {} def jobsort(job): """Used to sort job list by utility score""" return job["utility_score"] job_location_args.sort(key=jobsort) #Try to match jobs to nodes which can run them hiccup = HeckleConnector() for job in job_location_args: if "attrs" not in job or job["attrs"] is None: job["attrs"] = {} print "Job is %s" % job tempjob = job.copy() if self.hacky_forbidden_nodes: if 'forbidden' not in tempjob.keys(): tempjob['forbidden'] = self.hacky_forbidden_nodes else: tempjob['forbidden'].extend(self.hacky_forbidden_nodes) ############################# ### Look at this as point of change ### Think: For node in unreserved nodes ### Choose node from list ### Remove node from unreserved nodes ############################# try: resources = hiccup.find_job_location(** job) #get matching nodes if not resources: continue except Exception as err: LOGGER.info("System:find_job_location: Error %s" % err) continue node_list = [] # Build a list of appropriate nodes for node in resources: node_list.append(node) self.hacky_forbidden_nodes.append(node) locations[job["jobid"]] = node_list LOGGER.info("System:find_job_location: locations are %s" % locations) return locations find_job_location = exposed(find_job_location) def find_queue_equivalence_classes(self, reservation_dict, \ active_queue_names): """ Finds equivalent queues An equivalent queue is a queue which can run upon the same partition(s) For now, with one partition (everything!) this is irrelevant. Returns: equiv= [{'reservations': [], 'queues': ['default']}] """ #LOGGER.debug("System:find queue equivalence classes" ) equiv = [] #print "Reservation_Dict is: %s" % reservation_dict #print "Active_queue_names is %s" % active_queue_names #print "Queue assignments are: %s" % self.queue_assignments for queue in self.queue_assignments: # skip queues that aren't running if not queue in active_queue_names: continue found_a_match = False #print "Heckle Queue is %s" % queue for equ in equiv: print "Heckle Equ is %s" % equ if equ['data'].intersection(self.queue_assignments[queue]): equ['queues'].add(queue) equ['data'].update(self.queue_assignments[queue]) found_a_match = True break if not found_a_match: equiv.append({ 'queues': set([queue]), 'data': set(self.queue_assignments[queue]), 'reservations': set() }) real_equiv = [] for eq_class in equiv: found_a_match = False for equ in real_equiv: if equ['queues'].intersection(eq_class['queues']): equ['queues'].update(eq_class['queues']) equ['data'].update(eq_class['data']) found_a_match = True break if not found_a_match: real_equiv.append(eq_class) equiv = real_equiv for eq_class in equiv: for res_name in reservation_dict: for host_name in reservation_dict[res_name].split(":"): if host_name in eq_class['data']: eq_class['reservations'].add(res_name) for key in eq_class: eq_class[key] = list(eq_class[key]) del eq_class['data'] return equiv find_queue_equivalence_classes = exposed(find_queue_equivalence_classes) def get_partitions(self, locations): """ Work-around to get the cqadm to run a single job on this system PRE: locations is a list of dict of strings of possible node names POST: if good, return locations if not good, raise exception and list bad nodes """ logstr = "System:get_partition: " hiccup = HeckleConnector() heckle_node_set = set(hiccup.list_all_nodes()) locs = locations[0]['name'] LOGGER.debug(logstr + "raw is are: %s" % locations) LOGGER.debug(logstr + "vals are: %s" % locs) if type(locs) == ListType: locset = set(locs) badlocations = locset.difference(heckle_node_set) if badlocations: raise Exception(logstr + "Bad Locations: %s " % list(badlocations)) elif type(locs) == StringType: if locs not in locations: raise Exception(logstr + "Bad Locations: %s" % locs) else: raise Exception( logstr + "location needs to be string or list of strings, you provided %s : %s" \ % ( type(locs), locs)) return locations get_partitions = exposed(get_partitions)
class HeckleSystem(Component): """ Cobalt System component for handling / interacting with Heckle resource manager External Methods: add_process_groups -- allocates nodes get_process_groups -- get process groups based on specs signal_process_groups -- signal a process group wait_process_groups -- removed process groups based on specs Internal Methods: __init__: _start_pg: _check_builds_done: _wait: _release_resources: get_resources: Queue Manager Methods: validate_job: verify_locations: find_job_locations: find_queue_equivalence_classes: """ name = "system" implementation = "HeckleBreadboard" queue_assignments = {} def __init__(self, *args, **kwargs): Component.__init__(self, *args, **kwargs) self.process_groups = ProcessGroupDict() self.process_groups.item_cls = HeckleProcessGroup self.queue_assignments["default"] = self.get_resources() self.hacky_forbidden_nodes = [] #This is a temporary fix for the forbidden nodes issue def __repr__(self): """ printout representation of the class """ indict = self.__dict__ printstr = "" printstr += "Heckle System Object: Values" for element in indict: printstr += str(element) + "::" if indict[element] == None: printstr += "None, " else: printstr += str(indict[element]) + ", " printstr += " Process Groups:" for element in self.process_groups: printstr += str(element) + "::" + \ str(self.process_groups[element]) + ", " return printstr ##################### # Main set of methods ##################### def add_process_groups(self, specs): """ This function takes the specs (a list of jobs) and initiates each job as a process group. The process group abstracts the actual job into an object, providing a single point of control and interaction for all the nodes within that job. Each job is described by a dict. Each dict contains: size: kernel: a String, the name of the kernel image to load. executable: A string, the name of the command to execute upon the head node; this could be considered the actual job's file. stdin, stdout, stderr: Three separate strings, each containing the file to use for standard communication with the job as it is running. May be specified, or False. kerneloptions: A string containing various options for the kernel, or False. args: A list umask: An integer jobid: An integer cobalt_log_file: A string containing the log file to use in the initiation and running of the job itself. location: List of strings of node / resource names env: A dict of key:value strings, specifying the environment in which the job is to run on the node id: A number mode: nodect: cwd: A string, specifying the current working directory in which to run the job on the node walltime: Integer; the time, in minutes, allocated for the job to run on the node. user: A string, the name of the user under which this job is to run. """ logstr = "System:add_process_groups:" LOGGER.debug( logstr + "Specs are %s" % specs ) return self.process_groups.q_add(specs) add_process_groups = exposed(query(add_process_groups)) def get_process_groups(self, specs): """get a list of existing allocations""" LOGGER.debug( "System:get_process_groups: specs are %s" % specs ) self._wait() return self.process_groups.q_get(specs) get_process_groups = exposed(query(get_process_groups)) def signal_process_groups(self, specs, sig): """Free the specified process group (set of allocated nodes)""" LOGGER.debug( "System:signal_process_groups: Specs are %s, sig is %s"\ % (specs, sig) ) return self.process_groups.q_get(specs, lambda x, y:x.signal(y), sig) signal_process_groups = exposed(query(signal_process_groups)) def wait_process_groups(self, specs): """Remove terminated process groups""" LOGGER.debug( "System:wait_process_groups; specs are %s" % specs ) return self.process_groups.q_del(specs, lambda x, \ _:self._release_resources(x)) wait_process_groups = exposed(query(wait_process_groups)) ######################################### # Methods for dealing with Process Groups ######################################### def _check_builds_done(self): """ Check to see if the nodes are done building Starts the process group if all nodes in them are done building """ #LOGGER.debug( "System:Check Build Done: Waiting to Start..." ) #sleep(20) exstr = "System:check_build_done:" retval = True pg_list = [x for x in self.process_groups.itervalues()\ if (len(x.pinging_nodes) > 0)] hiccup = HeckleConnector() for pgp in pg_list: for nodename in pgp.pinging_nodes: teststr = hiccup.get_node_bootstate(nodename) if teststr == "READY": if 'fakebuild' in pgp.__dict__ and pgp.fakebuild: pgp.pinging_nodes.remove(nodename) LOGGER.debug( exstr + "Node %s done building; "\ + "%s pinging nodes left" %\ ( nodename, len(pgp.pinging_nodes)-1 ) ) else: LOGGER.debug( exstr + "Node %s not done yet" %\ nodename ) if teststr == "COMPLETED": LOGGER.debug( exstr + "Removing node %s...%i pinging nodes left" \ % (nodename, len(pgp.pinging_nodes)-1) ) pgp.pinging_nodes.remove(nodename) elif teststr in ["BOOTING", "", ""]: LOGGER.debug( exstr + "Node %s not done yet." % nodename) elif teststr == "UNALLOCATED": raise Exception( exstr + "Node 'UNALLOCATED'; Possible build error, or system timed out.") elif teststr == "CRITFAIL": raise Exception( exstr + "Node says, 'CRITFAIL'. It timed out while building.") ##################### #### Need to figure a better way to fail gracefully ##################### if len(pgp.pinging_nodes) == 0: LOGGER.debug( "System:Check Build Done: No Pinging Nodes left, Start PG %s Running." \ % pgp.jobid) pgp.start() else: retval = False return retval _check_builds_done = automatic(_check_builds_done) def _wait(self): """ Calls the process group container's wait() method """ waitlen = len( self.process_groups.keys() ) LOGGER.debug( "System:_wait:%s process groups." % waitlen ) for pgp in self.process_groups.itervalues(): pgp.wait() try: del( self.hacky_forbidden_nodes[pgp.location] ) except: pass _wait = automatic(_wait) def _release_resources(self, pgp): """ Releases all the Heckle nodes, unreserving them """ LOGGER.debug( "System:release" ) LOGGER.debug( "System:Locations are: %s" % pgp.location ) hiccup = HeckleConnector() hiccup.free_reserved_node( uid = pgp.uid, node_list=pgp.location ) try: del( self.hacky_forbidden_nodes[pgp.location] ) except: pass def get_resources(self, specs=None ): """ Returns a list of free resources (nodes) which match the given specs. Specs is a dict which describes a job """ LOGGER.debug( "System:get Resources" ) ################################## ### Look at this as a future change ################################## hiccup = HeckleConnector() if not specs: return hiccup.node_list else: return hiccup.list_available_nodes( **specs ) get_resources = exposed(query(get_resources)) ########################################################## # Methods for interacting with scheduler and queue-manager ########################################################## def validate_job(self, spec): """ Validates a job for submission -- will the job ever run under the current Heckle configuration? Steps: 1) Validate Kernel 2) Validate HW 3) Validate Job versus overall """ LOGGER.debug( "System:Validate Job: Specs are %s" % spec ) hiccup = HeckleConnector() try: kernel = spec['kernel'] valid_kernel = hiccup.validkernel( kernel ) if not valid_kernel: raise Exception("System:Validate Job: Bad Kernel") except: spec['kernel'] = 'default' try: valid_hw = hiccup.validhw( **spec['attrs'] ) if not valid_hw: raise Exception( "System:Validate Job: Bad Hardware Specs: %s" % spec ) except Exception as strec: raise Exception("System:Validate Job: Validate Job: %s" % strec) #try: #valid_job = hiccup.validjob( **spec ) #if not valid_job: #raise Exception( #"System: validate Job: Never enough nodes") #except: #raise Exception("System: validate Job: Never enough nodes") return spec validate_job = exposed(validate_job) def verify_locations(self, location_list): """ Makes sure a location list is valid location list is a list of fully qualified strings of node names ex: nodename.mcs.anl.gov """ LOGGER.debug("System:validate Job: Verify Locations") hiccup = HeckleConnector() heckle_set = set(hiccup.list_all_nodes()) location_set = set(location_list) if heckle_set >= location_set: return location_list else: not_valid_list = list( location_set.difference( heckle_set ) ) raise Exception( "System:VerifyLocations: Invalid location names: %s" % not_valid_list) verify_locations = exposed( verify_locations ) def find_job_location(self, job_location_args, end_times): """ Finds a group of not-busy nodes in which to run the job Arguments: job_location_args -- A list of dictionaries with info about the job jobid -- string identifier nodes -- int number of nodes queue -- string queue name required -- ?? utility_score -- ?? threshold -- ?? walltime -- ?? attrs -- dictionary of attributes to match against end_times -- supposed time the job will end Returns: Dictionary with list of nodes a job can run on, keyed by jobid """ LOGGER.debug("System:find_job_location" ) locations = {} def jobsort(job): """Used to sort job list by utility score""" return job["utility_score"] job_location_args.sort(key=jobsort) #Try to match jobs to nodes which can run them hiccup = HeckleConnector() for job in job_location_args: if "attrs" not in job or job["attrs"] is None: job["attrs"] = {} print "Job is %s" % job tempjob = job.copy() if self.hacky_forbidden_nodes: if 'forbidden' not in tempjob.keys(): tempjob['forbidden'] = self.hacky_forbidden_nodes else: tempjob['forbidden'].extend( self.hacky_forbidden_nodes ) ############################# ### Look at this as point of change ### Think: For node in unreserved nodes ### Choose node from list ### Remove node from unreserved nodes ############################# try: resources = hiccup.find_job_location(**job) #get matching nodes if not resources: continue except Exception as err: LOGGER.info("System:find_job_location: Error %s" % err) continue node_list = [] # Build a list of appropriate nodes for node in resources: node_list.append(node) self.hacky_forbidden_nodes.append(node) locations[job["jobid"]] = node_list LOGGER.info("System:find_job_location: locations are %s" % locations ) return locations find_job_location = exposed(find_job_location) def find_queue_equivalence_classes(self, reservation_dict, \ active_queue_names): """ Finds equivalent queues An equivalent queue is a queue which can run upon the same partition(s) For now, with one partition (everything!) this is irrelevant. Returns: equiv= [{'reservations': [], 'queues': ['default']}] """ #LOGGER.debug("System:find queue equivalence classes" ) equiv = [] #print "Reservation_Dict is: %s" % reservation_dict #print "Active_queue_names is %s" % active_queue_names #print "Queue assignments are: %s" % self.queue_assignments for queue in self.queue_assignments: # skip queues that aren't running if not queue in active_queue_names: continue found_a_match = False #print "Heckle Queue is %s" % queue for equ in equiv: print "Heckle Equ is %s" % equ if equ['data'].intersection(self.queue_assignments[queue]): equ['queues'].add(queue) equ['data'].update(self.queue_assignments[queue]) found_a_match = True break if not found_a_match: equiv.append({'queues': set([queue]), 'data': set(self.queue_assignments[queue]), 'reservations': set()}) real_equiv = [] for eq_class in equiv: found_a_match = False for equ in real_equiv: if equ['queues'].intersection(eq_class['queues']): equ['queues'].update(eq_class['queues']) equ['data'].update(eq_class['data']) found_a_match = True break if not found_a_match: real_equiv.append(eq_class) equiv = real_equiv for eq_class in equiv: for res_name in reservation_dict: for host_name in reservation_dict[res_name].split(":"): if host_name in eq_class['data']: eq_class['reservations'].add(res_name) for key in eq_class: eq_class[key] = list(eq_class[key]) del eq_class['data'] return equiv find_queue_equivalence_classes = exposed(find_queue_equivalence_classes) def get_partitions(self, locations): """ Work-around to get the cqadm to run a single job on this system PRE: locations is a list of dict of strings of possible node names POST: if good, return locations if not good, raise exception and list bad nodes """ logstr = "System:get_partition: " hiccup = HeckleConnector() heckle_node_set = set(hiccup.list_all_nodes()) locs = locations[0]['name'] LOGGER.debug( logstr + "raw is are: %s" % locations ) LOGGER.debug( logstr + "vals are: %s" % locs ) if type(locs) == ListType: locset = set(locs) badlocations = locset.difference(heckle_node_set) if badlocations: raise Exception( logstr + "Bad Locations: %s " % list(badlocations) ) elif type(locs) == StringType: if locs not in locations: raise Exception( logstr + "Bad Locations: %s" % locs) else: raise Exception( logstr + "location needs to be string or list of strings, you provided %s : %s" \ % ( type(locs), locs)) return locations get_partitions = exposed(get_partitions)
class HeckleSystem(Component): """ Cobalt System component for handling / interacting with Heckle resource manager External Methods: add_process_groups -- allocates nodes get_process_groups -- get process groups based on specs signal_process_groups -- signal a process group wait_process_groups -- removed process groups based on specs Internal Methods: __init__: _start_pg: _check_builds_done: _wait: _release_resources: get_resources: Queue Manager Methods: validate_job: verify_locations: find_job_locations: find_queue_equivalence_classes: """ name = "system" implementation = "HeckleBreadboard" queue_assignments = {} def __init__(self, *args, **kwargs): logger.debug( "heckle: System: init ... %s ... &&&&&&&&&&&&&&&&&&&&&&&&&&&&& I am here as well &&&&&&&&&&&&&&&&&&&&&&&&&" % threading.current_thread().getName() ) Component.__init__(self, *args, **kwargs) self.process_groups = ProcessGroupDict() self.process_groups.item_cls = HeckleProcessGroup self.resources = ResourceDict() self.queue_assignments["default"] = self.resources.keys() print "\n\n\n\n" print "Queue assignments are: %s" % self.queue_assignments def __repr__(self): """ printout representation of the class """ indict = self.__dict__ printstr = "" printstr += "Heckle System Object: Values" for element in indict: printstr += str(element) + "::" if indict[element] == None: printstr += "None, " else: printstr += str(indict[element]) + ", " printstr += " Process Groups:" for element in self.process_groups: printstr += str(element) + "::" + str(self.process_groups[element]) + ", " return printstr ##################### # Main set of methods ##################### def add_process_groups(self, specs): """ Allocate nodes and add the list of those allocated to the PGDict specs is a list of dictionaries Each dictionary contains the specifications for all the nodes in the process group """ # Debug - Take out to really rebuild #### Need to check the environment variable for fakebuild try: specs[0]["fakebuild"] = specs[0]["env"]["fakebuild"] del specs[0]["env"]["fakebuild"] except: pass print "Heckle System: add_process_groups: <<<<<<<<<<<<<<<<<< OK< Debug< This< : %s" % specs HICCUP = HeckleConnector() # try: reservation = HICCUP.make_reservation(**(specs[0])) heckle_res_id = reservation.id uid = specs[0]["user"] logger.debug("Heckle System: heckle_res_id = %i" % heckle_res_id) specs[0]["heckle_res_id"] = heckle_res_id return self.process_groups.q_add(specs, lambda x, _: self._start_pg(x, heckle_res_id=heckle_res_id, uid=uid)) # except Exception as hec_except: ## could do something here about problems ## 1) Kill job, then resubmit job w/o node name(s) ## Would require access to cqadm via api ## 2) Put job / node in fail state ## 3) Simply fail # raise Exception("Heckle System Object: add_process_groups: %s" % hec_except) add_process_groups = exposed(query(add_process_groups)) def get_process_groups(self, specs): """get a list of existing allocations""" # logger.debug( "Heckle System: get_process_groups" ) self._wait() return self.process_groups.q_get(specs) get_process_groups = exposed(query(get_process_groups)) def signal_process_groups(self, specs, sig): """Free the specified process group (set of allocated nodes)""" logger.debug("Heckle System: signal_process_groups: Specs are %s, sig is %s" % (specs, sig)) return self.process_groups.q_get(specs, lambda x, y: x.signal(y), sig) signal_process_groups = exposed(query(signal_process_groups)) def wait_process_groups(self, specs): """Remove terminated process groups""" logger.debug("Heckle System: wait_process_groups; specs are %s" % specs) return self.process_groups.q_del(specs, lambda x, _: self._release_resources(x)) wait_process_groups = exposed(query(wait_process_groups)) ######################################### # Methods for dealing with Process Groups ######################################### def _start_pg(self, pgp, heckle_res_id, uid): """ Populates the process group with its resources gets node information for nodes in process group Updates those attributes Places nodes in the pinging nodes list, to see if they're built """ logger.debug("Heckle System: start_pg: PGP is %s" % pgp) nodelist = pgp.location for node in nodelist: node_attributes = self.resources[node] node_attributes["mac"] = node_attributes["mac"].replace("-", ":") node_attributes["heckle_res_id"] = heckle_res_id pgp.resource_attributes[node] = node_attributes._get_dict() pgp.uid = uid pgp.pinging_nodes.append(nodelist) add_process_groups = exposed(query(add_process_groups)) def _check_builds_done(self): """ Check to see if the nodes are done building Starts the process group if all nodes in them are done building """ # logger.debug( "heckle: System: Check Build Done: Waiting to Start..." ) # sleep(20) retval = True pg_list = [x for x in self.process_groups.itervalues() if (len(x.pinging_nodes) > 0)] self.resources.update() for pgp in pg_list: for nodename in pgp.pinging_nodes: teststr = self.resources[nodename]["bootstate"] if teststr == "COMPLETED": logger.debug( "heckle: System: Check Build Done: Removing node %s...%i pinging nodes left" % (nodename, len(pgp.pinging_nodes) - 1) ) pgp.pinging_nodes.remove(nodename) elif teststr in ["BOOTING", "", ""]: logger.debug("Heckle System: Check Build Done: Node %s not done yet." % nodename) elif teststr == "UNALLOCATED": raise Exception( "HIC_SO: _check_builds_done: Node says, 'UNALLOCATED'. Possible build error, or system timed out." ) elif teststr == "CRITFAIL": raise Exception("HIC_SO: _check_builds_done: Node says, 'CRITFAIL'. It timed out while building.") ##################### #### Need to figure a better way to fail gracefully on this one... ##################### elif teststr == "READY": raise Exception( "HIC_SO: _check_builds_done: Node says, 'READY'. The Heckle Reservation is already ready already, skipping pinging." ) if len(pgp.pinging_nodes) == 0: logger.debug("Heckle System: Check Build Done: No Pinging Nodes left, Start PG Running.") pgp.start() else: retval = False return retval _check_builds_done = automatic(_check_builds_done) def _wait(self): """ Calls the process group container's wait() method """ # logger.debug( "Heckle System: wait" ) for pgp in self.process_groups.itervalues(): pgp.wait() _wait = automatic(_wait) def _release_resources(self, pgp): """ Releases all the Heckle nodes, unreserving them """ logger.debug("Heckle System: Release %s" % pgp.location) # self.resources[pgp.location]['action']='Free' HICCUP = HeckleConnector() HICCUP.free_reserved_node(uid=pgp.uid, node_list=pgp.location) self.resources.free(nodes=pgp.location) def get_resources(self, specs={}): """ Returns a list of names for all the FREE resources (nodes) which match the given specs. """ logger.debug("Heckle System: get Resources, specs are %s" % specs) ################################## ### Look at this as a future change ################################## specs["current reservation"] = 9999999 specs["allocatable"] = "True" res_list = self.resources >= specs logger.debug("Heckle System: get Resources, resources are %s" % res_list) return res_list get_resources = exposed(query(get_resources)) ########################################################## # Methods for interacting with scheduler and queue-manager ########################################################## def validate_job(self, spec): """ Validates a job for submission -- will the job ever run under the current Heckle configuration? Steps: 1) Validate Kernel 2) Validate HW 3) Validate Job versus overall """ logger.debug("Heckle System: Validate Job: Specs are %s" % spec) try: checklist = spec["attrs"] except: checklist = {} # del(checklist['action']) try: nodecount = spec["nodecount"] except: nodecount = 1 glossary = self.resources.glossary dnelist = [] # for attributes which do not exist in glossary badlist = [] # for attributes in glossary which do not exist ################################## ### Look at this as a future change ### Think: Refresh Resources Info ################################## # 1st step: Are there enough nodes at all? if nodecount >= self.resources.node_count(): pass else: raise Exception( "Validate Job: Not enough nodes; Requested %s, only have %s in the system." % (nodecount, self.resources.nodecount()) ) for att in checklist: val = checklist[att] try: if val in glossary[att]: pass else: badlist.append("%s:%s" % (att, val)) # Bad attribute except: dnelist.append(att) # Attribute does not exist checklist["current reservation"] = 9999999 checklist["allocatable"] = "True" retlist = self.resources >= checklist retcount = len(retlist) goodlen = retcount >= nodecount if goodlen and not badlist and not dnelist: return spec # Good Job! else: retstr = "Validate Job: " if badlist or dnelist: if badlist: restr += "No value for attribute: %s" % badlist if dnelist: retstr += "Attributes Do Not Exist: %s" % dnelist else: retstr += "Need %s nodes, only have %s nodes: %s" % (nodecount, retcount, retlist) raise Exception(retstr) return spec validate_job = exposed(validate_job) def verify_locations(self, location_list): """ Makes sure a location list is valid location list is a list of fully qualified strings of node names ex: nodename.mcs.anl.gov """ logger.debug("heckle: System: Validate Job: Verify Locations") return location_list in self.resources.glossary verify_locations = exposed(verify_locations) def find_job_location(self, job_location_args, end_times): """ Finds a group of not-busy nodes in which to run the job Arguments: job_location_args -- A list of dictionaries with info about the job jobid -- string identifier nodes -- int number of nodes queue -- string queue name required -- ?? utility_score -- ?? threshold -- ?? walltime -- ?? attrs -- dictionary of attributes to match against end_times -- supposed time the job will end Returns: Dictionary with list of nodes a job can run on, keyed by jobid """ logger.debug("heckle: System: find_job_location") locations = {} def jobsort(job): """Used to sort job list by utility score""" return job["utility_score"] job_location_args.sort(key=jobsort) # Try to match jobs to nodes which can run them for job in job_location_args: if "attrs" not in job or job["attrs"] is None: attrs = {} else: attrs = job["attrs"] attrs["current reservation"] = 9999999 attrs["allocatable"] = "True" nodecount = int(job["nodes"]) print "Heckle System: Find Job Location: Job is %s" % job ############################# ### Look at this as point of change ### Think: For node in unreserved nodes ### Choose node from list ### Remove node from unreserved nodes ############################# print "Heckle System: Find Job Location: Free Nodes is %s" % self.resources.getfreenodes() nodelist = self.resources >= attrs # get Matching Node print "Nodelist at this stage is %s" % nodelist if len(nodelist) >= nodecount: print "Nodecount = %s" % nodecount retlist = nodelist[:nodecount] self.resources.allocate(retlist) print "Heckle System: Find Job Location: Remaining nodelist is %s" % retlist else: raise Exception("Heckle System: find_job_locations: Not Enough matching Nodes Available") locations[job["jobid"]] = retlist print "Locations is now: %s" % locations logger.info("heckle: find_job_location: locations are %s" % locations) return locations find_job_location = exposed(find_job_location) def find_queue_equivalence_classes(self, reservation_dict, active_queue_names): """ Finds equivalent queues An equivalent queue is a queue which can run upon the same partition(s) For now, with one partition (everything!) this is irrelevant. Returns: equiv= [{'reservations': [], 'queues': ['default']}] """ logger.debug("Heckle System: find queue equivalence classes") equiv = [] # print "Reservation_Dict is: %s" % reservation_dict # print "Active_queue_names is %s" % active_queue_names # print "Queue assignments are: %s" % self.queue_assignments for queue in self.queue_assignments: # skip queues that aren't running if not queue in active_queue_names: continue found_a_match = False print "Heckle Queue is %s" % queue for equ in equiv: print "Heckle Equ is %s" % equ if equ["data"].intersection(self.queue_assignments[queue]): equ["queues"].add(queue) equ["data"].update(self.queue_assignments[queue]) found_a_match = True break if not found_a_match: equiv.append( {"queues": set([queue]), "data": set(self.queue_assignments[queue]), "reservations": set()} ) real_equiv = [] for eq_class in equiv: found_a_match = False for equ in real_equiv: if equ["queues"].intersection(eq_class["queues"]): equ["queues"].update(eq_class["queues"]) equ["data"].update(eq_class["data"]) found_a_match = True break if not found_a_match: real_equiv.append(eq_class) equiv = real_equiv for eq_class in equiv: for res_name in reservation_dict: for host_name in reservation_dict[res_name].split(":"): if host_name in eq_class["data"]: eq_class["reservations"].add(res_name) for key in eq_class: eq_class[key] = list(eq_class[key]) del eq_class["data"] return equiv find_queue_equivalence_classes = exposed(find_queue_equivalence_classes) def get_partitions(self, locations): """ Work-around to get the cqadm to run a single job on this system PRE: locations is a list of dict of strings of possible node names POST: if good, return locations if not good, raise exception and list bad nodes """ nodelist = self.resources.Glossary.nodelist logger.debug("Heckle System: get_partitions: raw is are: %s" % locations) logger.debug("Heckle System: get_partitions: vals are: %s" % locs) if locations in nodelist: return locations else: raise Exception("heckle: System: get_partition: Bad Locations: %s " % badlocations) get_partitions = exposed(get_partitions)