def __setstate__(self, state): self._managed_partitions = state['managed_partitions'] self.config_file = state['config_file'] self._partitions = PartitionDict() self.process_groups = BGProcessGroupDict() self.process_groups.item_cls = BGSimProcessGroup self.node_card_cache = dict() self._partitions_lock = thread.allocate_lock() self.failed_components = sets.Set() self.pending_diags = dict() self.failed_diags = list() self.bridge_in_error = False self.cached_partitions = None self.offline_partitions = [] if self.config_file is not None: self.configure(self.config_file) if 'partition_flags' in state: for pname, flags in state['partition_flags'].items(): if pname in self._partitions: self._partitions[pname].scheduled = flags[0] self._partitions[pname].functional = flags[1] self._partitions[pname].queue = flags[2] else: logger.info("Partition %s is no longer defined" % pname) self.update_relatives() self.lock = threading.Lock() self.statistics = Statistics()
def __setstate__(self, state): sys.setrecursionlimit(5000) self._managed_partitions = state['managed_partitions'] self._partitions = PartitionDict() self.process_groups = BGProcessGroupDict() self.process_groups.item_cls = BGProcessGroup self.node_card_cache = dict() self._partitions_lock = thread.allocate_lock() self.pending_diags = dict() self.failed_diags = list() self.diag_pids = dict() self.pending_script_waits = sets.Set() self.bridge_in_error = False self.cached_partitions = None self.offline_partitions = [] self.configure() if 'partition_flags' in state: for pname, flags in state['partition_flags'].items(): if pname in self._partitions: self._partitions[pname].scheduled = flags[0] self._partitions[pname].functional = flags[1] self._partitions[pname].queue = flags[2] else: logger.info("Partition %s is no longer defined" % pname) self.update_relatives() # initiate the process before starting any threads thread.start_new_thread(self.update_partition_state, tuple()) self.lock = threading.Lock() self.statistics = Statistics()
def configure (self): """Read partition data from the bridge.""" self.logger.info("configure()") try: system_def = Cobalt.bridge.PartitionList.by_filter() except BridgeException: print "Error communicating with the bridge during initial config. Terminating." sys.exit(1) # initialize a new partition dict with all partitions # partitions = PartitionDict() tmp_list = [] wiring_cache = {} bp_cache = {} for partition_def in system_def: tmp_list.append(self._new_partition_dict(partition_def, bp_cache)) partitions.q_add(tmp_list) # update object state self._partitions.clear() self._partitions.update(partitions) # find the wiring deps start = time.time() for p in self._partitions.values(): self._detect_wiring_deps(p, wiring_cache) end = time.time() self.logger.info("took %f seconds to find wiring deps" % (end - start)) # update state information for p in self._partitions.values(): if p.state != "busy": for nc in p.node_cards: if nc.used_by: p.state = "blocked (%s)" % nc.used_by break for dep_name in p._wiring_conflicts: if self._partitions[dep_name].state == "busy": p.state = "blocked-wiring (%s)" % dep_name break
def configure(self, config_file): """ Configure simulated partitions. Arguments: config_file -- xml configuration file """ self.logger.log(1, "configure: opening machine configuration file") def _get_node_card(name): if not self.node_card_cache.has_key(name): self.node_card_cache[name] = NodeCard(name) return self.node_card_cache[name] try: system_doc = ElementTree.parse(config_file) except IOError: self.logger.error("unable to open file: %r" % config_file) self.logger.error("exiting...") sys.exit(1) except: self.logger.error("problem loading data from file: %r" % config_file, exc_info=True) self.logger.error("exiting...") sys.exit(1) system_def = system_doc.getroot() if system_def.tag != "BG": self.logger.error("unexpected root element in %r: %r" % (config_file, system_def.tag)) self.logger.error("exiting...") sys.exit(1) # that 32 is not really constant -- it needs to either be read from cobalt.conf or from the bridge API NODES_PER_NODECARD = 32 # initialize a new partition dict with all partitions # partitions = PartitionDict() tmp_list = [] # this is going to hold partition objects from the bridge (not our own Partition) self.logger.log( 1, "configure: acquiring machine information and creating partition objects" ) self._partitions.clear() for partition_def in system_def.getiterator("Partition"): node_list = [] switch_list = [] wire_list = [] for nc in partition_def.getiterator("NodeCard"): node_list.append(_get_node_card(nc.get("id"))) nc_count = len(node_list) for s in partition_def.getiterator("Switch"): switch_list.append(s.get("id")) for w in partition_def.getiterator("Wire"): wire_list.append(w.get("id")) self._partitions.q_add([ dict( name=partition_def.get("name"), queue=partition_def.get("queue", "default"), size=NODES_PER_NODECARD * nc_count, node_cards=node_list, switches=switch_list, wires=wire_list, state="idle", ) ]) # find the wiring deps self.logger.log(1, "configure: looking for wiring dependencies") for p in self._partitions.values(): self._detect_wiring_deps(p) # update partition relationship lists self.logger.log(1, "configure: updating partition relationship lists") self.update_relatives()
def configure(self, config_file): """Configure simulated partitions. Arguments: config_file -- xml configuration file """ def _get_node_card(name): if not self.node_card_cache.has_key(name): self.node_card_cache[name] = NodeCard(name) return self.node_card_cache[name] self.logger.info("configure()") try: system_doc = ElementTree.parse(config_file) except IOError: self.logger.error("unable to open file: %r" % config_file) self.logger.error("exiting...") sys.exit(1) except: self.logger.error("problem loading data from file: %r" % config_file) self.logger.error("exiting...") sys.exit(1) system_def = system_doc.getroot() if system_def.tag != "BG": self.logger.error("unexpected root element in %r: %r" % (config_file, system_def.tag)) self.logger.error("exiting...") sys.exit(1) # that 32 is not really constant -- it needs to either be read from cobalt.conf or from the bridge API NODES_PER_NODECARD = 32 # initialize a new partition dict with all partitions # partitions = PartitionDict() tmp_list = [] # this is going to hold partition objects from the bridge (not our own Partition) wiring_cache = {} bp_cache = {} for partition_def in system_def.getiterator("Partition"): node_list = [] switch_list = [] for nc in partition_def.getiterator("NodeCard"): node_list.append(_get_node_card(nc.get("id"))) nc_count = len(node_list) if not wiring_cache.has_key(nc_count): wiring_cache[nc_count] = [] wiring_cache[nc_count].append(partition_def.get("name")) for s in partition_def.getiterator("Switch"): switch_list.append(s.get("id")) tmp_list.append( dict( name=partition_def.get("name"), queue=partition_def.get("queue", "default"), size=NODES_PER_NODECARD * nc_count, node_cards=node_list, switches=switch_list, state="idle", )) partitions.q_add(tmp_list) # find the wiring deps for size in wiring_cache: for p in wiring_cache[size]: p = partitions[p] s1 = set(p.switches) for other in wiring_cache[size]: other = partitions[other] if (p.name == other.name): continue s2 = set(other.switches) if s1.intersection(s2): self.logger.info( "found a wiring dep between %s and %s", p.name, other.name) partitions[p.name]._wiring_conflicts.add(other.name) # update object state self._partitions.clear() self._partitions.update(partitions)