def getNodes(api, num, **constraints): # Now do the backtracking search for a suitable solution # First with existing slice nodes reqs = [] nodes = [] import node as Node for i in xrange(num): node = Node.Node(api) node.min_num_external_interface = 1 nodes.append(node) node = nodes[0] candidates = filterBlacklist(node.find_candidates()) reqs = [candidates] * num def pickbest(fullset, nreq, node=nodes[0]): if len(fullset) > nreq: fullset = zip(node.rate_nodes(fullset), fullset) fullset.sort(reverse=True) del fullset[nreq:] return set(map(operator.itemgetter(1), fullset)) else: return fullset solution = resourcealloc.alloc(reqs, sample=pickbest) # Do assign nodes runner = ParallelRun(maxthreads=4) for node, node_id in zip(nodes, solution): runner.put(node.assign_node_id, node_id) runner.join() return nodes
def getNodes(api, num, **constraints): # Now do the backtracking search for a suitable solution # First with existing slice nodes reqs = [] nodes = [] import node as Node for i in xrange(num): node = Node.Node(api) node.min_num_external_interface = 1 nodes.append(node) node = nodes[0] candidates = filterBlacklist(node.find_candidates()) reqs = [candidates] * num def pickbest(fullset, nreq, node=nodes[0]): if len(fullset) > nreq: fullset = zip(node.rate_nodes(fullset),fullset) fullset.sort(reverse=True) del fullset[nreq:] return set(map(operator.itemgetter(1),fullset)) else: return fullset solution = resourcealloc.alloc(reqs, sample=pickbest) # Do assign nodes runner = ParallelRun(maxthreads=4) for node, node_id in zip(nodes, solution): runner.put(node.assign_node_id, node_id) runner.join() return nodes
def do_wait_nodes(self): for guid, node in self._elements.iteritems(): if isinstance(node, self._node.Node): # Just inject configuration stuff node.home_path = "nepi-node-%s" % (guid,) node.ident_path = self.sliceSSHKey node.slicename = self.slicename # Show the magic self._logger.info("PlanetLab Node %s configured at %s", guid, node.hostname) try: runner = ParallelRun(maxthreads=64, maxqueue=1) abort = [] def waitforit(guid, node): try: node.wait_provisioning( (20*60 if node._node_id in self._just_provisioned else 60) ) self._logger.info("READY Node %s at %s", guid, node.hostname) # Prepare dependency installer now node.prepare_dependencies() except: abort.append(None) raise for guid, node in self._elements.iteritems(): if abort: break if isinstance(node, self._node.Node): self._logger.info("Waiting for Node %s configured at %s", guid, node.hostname) runner.put(waitforit, guid, node) runner.join() except self._node.UnresponsiveNodeError: # Uh... self._logger.warn("UNRESPONSIVE Nodes") # Mark all dead nodes (which are unresponsive) on the blacklist # and re-raise for guid, node in self._elements.iteritems(): if isinstance(node, self._node.Node): if not node.is_alive(): self._logger.warn("Blacklisting %s for unresponsiveness", node.hostname) self._blacklist.add(node.hostname) node.unassign_node() try: self._save_blacklist() except: # not important... import traceback traceback.print_exc() raise
def _do_in_factory_order(self, action, order, postaction = None, poststep = None): logger = self._logger guids = collections.defaultdict(list) # order guids (elements) according to factory_id for guid, factory_id in self._create.iteritems(): guids[factory_id].append(guid) # configure elements following the factory_id order for factory_id in order: # Create a parallel runner if we're given a Parallel() wrapper runner = None if isinstance(factory_id, Parallel): runner = ParallelRun(factory_id.maxthreads) factory_id = factory_id.factory # omit the factories that have no element to create if factory_id not in guids: continue # configure action factory = self._factories[factory_id] if isinstance(action, basestring) and not getattr(factory, action): continue def perform_action(guid): if isinstance(action, basestring): getattr(factory, action)(self, guid) else: action(self, guid) if postaction: postaction(self, guid) # perform the action on all elements, in parallel if so requested if runner: logger.debug("TestbedController: Starting parallel %s", action) runner.start() for guid in guids[factory_id]: if runner: logger.debug("TestbedController: Scheduling %s on %s", action, guid) runner.put(perform_action, guid) else: logger.debug("TestbedController: Performing %s on %s", action, guid) perform_action(guid) # sync if runner: runner.sync() # post hook if poststep: for guid in guids[factory_id]: if runner: logger.debug("TestbedController: Scheduling post-%s on %s", action, guid) runner.put(poststep, self, guid) else: logger.debug("TestbedController: Performing post-%s on %s", action, guid) poststep(self, guid) # sync if runner: runner.join() logger.debug("TestbedController: Finished parallel %s", action)
def _do_in_factory_order(self, action, order, postaction=None, poststep=None): logger = self._logger guids = collections.defaultdict(list) # order guids (elements) according to factory_id for guid, factory_id in self._create.iteritems(): guids[factory_id].append(guid) # configure elements following the factory_id order for factory_id in order: # Create a parallel runner if we're given a Parallel() wrapper runner = None if isinstance(factory_id, Parallel): runner = ParallelRun(factory_id.maxthreads) factory_id = factory_id.factory # omit the factories that have no element to create if factory_id not in guids: continue # configure action factory = self._factories[factory_id] if isinstance(action, basestring) and not getattr(factory, action): continue def perform_action(guid): if isinstance(action, basestring): getattr(factory, action)(self, guid) else: action(self, guid) if postaction: postaction(self, guid) # perform the action on all elements, in parallel if so requested if runner: logger.debug("TestbedController: Starting parallel %s", action) runner.start() for guid in guids[factory_id]: if runner: logger.debug("TestbedController: Scheduling %s on %s", action, guid) runner.put(perform_action, guid) else: logger.debug("TestbedController: Performing %s on %s", action, guid) perform_action(guid) # sync if runner: runner.sync() # post hook if poststep: for guid in guids[factory_id]: if runner: logger.debug( "TestbedController: Scheduling post-%s on %s", action, guid) runner.put(poststep, self, guid) else: logger.debug( "TestbedController: Performing post-%s on %s", action, guid) poststep(self, guid) # sync if runner: runner.join() logger.debug("TestbedController: Finished parallel %s", action)
def do_resource_discovery(self, recover = False): to_provision = self._to_provision = set() reserved = set(self._blacklist) for guid, node in self._elements.iteritems(): if isinstance(node, self._node.Node) and node._node_id is not None: reserved.add(node.hostname) # Initial algo: # look for perfectly defined nodes # (ie: those with only one candidate) reserve_lock = threading.RLock() def assignifunique(guid, node): # Try existing nodes first # If we have only one candidate, simply use it candidates = node.find_candidates( filter_slice_id = self.slice_id) node_id = None candidate_hosts = set(candidates.keys() if candidates else []) reserve_lock.acquire() try: candidate_hosts -= reserved if len(candidate_hosts) == 1: hostname = iter(candidate_hosts).next() node_id = candidates[hostname] reserved.add(hostname) elif not candidate_hosts: # Try again including unassigned nodes reserve_lock.release() try: candidates = node.find_candidates() finally: reserve_lock.acquire() candidate_hosts = set(candidates.keys() if candidates else []) candidate_hosts -= reserved if len(candidate_hosts) > 1: return if len(candidate_hosts) == 1: hostname = iter(candidate_hosts).next() node_id = candidates[hostname] to_provision.add(node_id) reserved.add(hostname) elif not candidates: raise RuntimeError, "Cannot assign resources for node %s, no candidates with %s" % (guid, node.make_filter_description()) finally: reserve_lock.release() if node_id is not None: node.assign_node_id(node_id) runner = ParallelRun(maxthreads=4) # don't overload the PLC API, just 4 threads to hide latencies and that's it runner.start() for guid, node in self._elements.iteritems(): if isinstance(node, self._node.Node) and node._node_id is None: runner.put(assignifunique, guid, node) runner.sync() # Now do the backtracking search for a suitable solution # First with existing slice nodes reqs = [] nodes = [] def genreqs(node, filter_slice_id=None): # Try existing nodes first # If we have only one candidate, simply use it candidates = node.find_candidates( filter_slice_id = filter_slice_id) for r in reserved: if candidates.has_key(r): del candidates[r] reqs.append(candidates.values()) nodes.append(node) for guid, node in self._elements.iteritems(): if isinstance(node, self._node.Node) and node._node_id is None: runner.put(genreqs, node, self.slice_id) runner.sync() if nodes and reqs: if recover: raise RuntimeError, "Impossible to recover: unassigned host for Nodes %r" % (nodes,) def pickbest(fullset, nreq, node=nodes[0]): if len(fullset) > nreq: fullset = zip(node.rate_nodes(fullset),fullset) fullset.sort(reverse=True) del fullset[nreq:] return set(map(operator.itemgetter(1),fullset)) else: return fullset try: solution = resourcealloc.alloc(reqs, sample=pickbest) except resourcealloc.ResourceAllocationError: # Failed, try again with all nodes reqs = [] for node in nodes: runner.put(genreqs, node) runner.sync() solution = resourcealloc.alloc(reqs, sample=pickbest) to_provision.update(solution) # Do assign nodes for node, node_id in zip(nodes, solution): runner.put(node.assign_node_id, node_id) runner.join()