def test_hostnames(self): logging.debug('') logging.debug('test_hostnames') hostnames = ResourceAllocationManager.get_hostnames({'n_cpus':1}) self.assertEqual(hostnames[0], platform.node()) hostnames = ResourceAllocationManager.get_hostnames({'no_such_resource':1}) self.assertEqual(hostnames, None)
def test_hostnames(self): logging.debug('') logging.debug('test_hostnames') hostnames = ResourceAllocationManager.get_hostnames({'n_cpus': 1}) self.assertEqual(hostnames[0], platform.node()) hostnames = ResourceAllocationManager.get_hostnames( {'no_such_resource': 1}) self.assertEqual(hostnames, None)
def test_hostnames(self): logging.debug('') logging.debug('test_hostnames') hostnames = RAM.get_hostnames({'min_cpus': 1}) self.assertEqual(hostnames[0], platform.node()) hostnames = RAM.get_hostnames({'allocator': 'LocalHost', 'localhost': False}) self.assertEqual(hostnames, None)
def test_hostnames(self): logging.debug('') logging.debug('test_hostnames') hostnames = RAM.get_hostnames({'min_cpus':1}) self.assertEqual(hostnames[0], platform.node()) hostnames = RAM.get_hostnames({'allocator':'LocalHost', 'localhost':False}) self.assertEqual(hostnames, None)
def main(): """ Configure a cluster and use it. """ enable_console(logging.DEBUG) logging.getLogger().setLevel(0) print 'Client PID', os.getpid() # Configure cluster. cluster_name = 'EC2Cluster' machines = [] if USE_EC2: # The identity file used to access EC2 via ssh. identity_filename = os.path.expanduser('~/.ssh/lovejoykey') identity_filename += '.ppk' if sys.platform == 'win32' else '.pem' machines.append(ClusterHost( hostname='*****@*****.**', python='setowns1_2013-05-06_09.17.04.529682' \ '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python', tunnel_incoming=True, tunnel_outgoing=True, identity_filename=identity_filename)) machines.append(ClusterHost( hostname='*****@*****.**', python='setowns1_2013-05-06_09.17.03.113077' \ '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python', tunnel_incoming=True, tunnel_outgoing=True, identity_filename=identity_filename)) machines.append(ClusterHost( hostname='*****@*****.**', python='setowns1_2013-05-06_09.17.05.434412' \ '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python', tunnel_incoming=True, tunnel_outgoing=True, identity_filename=identity_filename)) machines.append(ClusterHost( hostname='*****@*****.**', python='setowns1_2013-05-06_09.20.17.379627' \ '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/Scripts/python', tunnel_incoming=True, tunnel_outgoing=True, identity_filename=identity_filename)) machines.append(ClusterHost( hostname='*****@*****.**', python='setowns1_2013-05-06_09.19.49.348885' \ '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/Scripts/python', tunnel_incoming=True, tunnel_outgoing=True, identity_filename=identity_filename)) # machines.append(ClusterHost( # hostname='viper.grc.nasa.gov', # python='OpenMDAO-Framework/devenv/bin/python', # tunnel_incoming=True, tunnel_outgoing=True, # identity_filename=None)) else: # Trivial local 'cluster' for debugging without remote host issues. machines.append( ClusterHost(hostname=socket.getfqdn(), python=sys.executable)) # machines.append(ClusterHost( # hostname='viper.grc.nasa.gov', # python='OpenMDAO-Framework/devenv/bin/python', # tunnel_incoming=True, tunnel_outgoing=True, # identity_filename=None)) # Start it. cluster = ClusterAllocator(cluster_name, machines, allow_shell=True, method='load-average') # method='greedy') # method='round-robin') print 'Cluster initialized' RAM.insert_allocator(0, cluster) n_servers = RAM.max_servers(dict(allocator=cluster_name)) print n_servers, 'Servers:' for name in RAM.get_hostnames( dict(allocator=cluster_name, min_cpus=n_servers)): print ' ', name # Create model. top = GPOptimization() # Configure DOE. top.driver.sequential = False # Run concurrently across cluster. top.driver.reload_model = False # Force use of only cluster hosts by adding this requirement. top.driver.extra_resources = dict(allocator=cluster_name) # This is necessary more often than it should be. top.driver.ignore_egg_requirements = True # Perform the optimization. top.run()
def main(): """ Configure a cluster and use it. """ enable_console(logging.DEBUG) logging.getLogger().setLevel(0) print 'Client PID', os.getpid() # Configure cluster. cluster_name = 'EC2Cluster' machines = [] if USE_EC2: # The identity file used to access EC2 via ssh. identity_filename = os.path.expanduser('~/.ssh/lovejoykey') identity_filename += '.ppk' if sys.platform == 'win32' else '.pem' machines.append(ClusterHost( hostname='*****@*****.**', python='setowns1_2013-05-06_09.17.04.529682' \ '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python', tunnel_incoming=True, tunnel_outgoing=True, identity_filename=identity_filename)) machines.append(ClusterHost( hostname='*****@*****.**', python='setowns1_2013-05-06_09.17.03.113077' \ '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python', tunnel_incoming=True, tunnel_outgoing=True, identity_filename=identity_filename)) machines.append(ClusterHost( hostname='*****@*****.**', python='setowns1_2013-05-06_09.17.05.434412' \ '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/bin/python', tunnel_incoming=True, tunnel_outgoing=True, identity_filename=identity_filename)) machines.append(ClusterHost( hostname='*****@*****.**', python='setowns1_2013-05-06_09.20.17.379627' \ '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/Scripts/python', tunnel_incoming=True, tunnel_outgoing=True, identity_filename=identity_filename)) machines.append(ClusterHost( hostname='*****@*****.**', python='setowns1_2013-05-06_09.19.49.348885' \ '/OpenMDAO-OpenMDAO-Framework-testbranch/devenv/Scripts/python', tunnel_incoming=True, tunnel_outgoing=True, identity_filename=identity_filename)) # machines.append(ClusterHost( # hostname='viper.grc.nasa.gov', # python='OpenMDAO-Framework/devenv/bin/python', # tunnel_incoming=True, tunnel_outgoing=True, # identity_filename=None)) else: # Trivial local 'cluster' for debugging without remote host issues. machines.append(ClusterHost(hostname=socket.getfqdn(), python=sys.executable)) # machines.append(ClusterHost( # hostname='viper.grc.nasa.gov', # python='OpenMDAO-Framework/devenv/bin/python', # tunnel_incoming=True, tunnel_outgoing=True, # identity_filename=None)) # Start it. cluster = ClusterAllocator(cluster_name, machines, allow_shell=True, method='load-average') # method='greedy') # method='round-robin') print 'Cluster initialized' RAM.insert_allocator(0, cluster) n_servers = RAM.max_servers(dict(allocator=cluster_name)) print n_servers, 'Servers:' for name in RAM.get_hostnames(dict(allocator=cluster_name, min_cpus=n_servers)): print ' ', name # Create model. top = GPOptimization() # Configure DOE. top.driver.sequential = False # Run concurrently across cluster. top.driver.reload_model = False # Force use of only cluster hosts by adding this requirement. top.driver.extra_resources = dict(allocator=cluster_name) # This is necessary more often than it should be. top.driver.ignore_egg_requirements = True # Perform the optimization. top.run()
def _run_parallel(self, busy_hosts): """ Run parallel version of ADPAC. Gets hostnames from resource allocators and uses MPI for distribution. `busy_hosts` is a list of hosts to exclude from consideration, and is updated with the hosts we attempt to use here. This provides a mechanism to skip those hosts a previous attempt failed with. """ if self.mpi_procs: n_cpus = self.mpi_procs else: n_cpus = len(self.input.nbld) #TODO: get correct number of blocks (nbld isn't necessarily correct) hostnames = RAM.get_hostnames(dict(n_cpus=n_cpus, exclude=busy_hosts)) if not hostnames: self.raise_exception('No hosts!', RuntimeError) busy_hosts.extend(hostnames) machinefile = 'machines' with open(machinefile, 'w') as out: for name in hostnames: out.write('%s\n' % name) self.command = [self.mpi_path] self.command.extend(['-np', str(n_cpus)]) self.command.extend(['-machinefile', machinefile]) if os.path.sep in self.mpi_adpac: self.command.append(self.mpi_adpac) else: # Some mpirun commands want a real path. for prefix in os.environ['PATH'].split(os.path.pathsep): path = os.path.join(prefix, self.mpi_adpac) if os.path.exists(path): self.command.append(path) break else: self.raise_exception("Can't find %r on PATH" % self.mpi_adpac, RuntimeError) if self.stats: self.command.extend(['-s', 'all']) self.command.append('-Z') if self.iasync: self.command.append('-a') if self.ibalance: self.command.append('-b') if not self.icheck: self.command.append('-c') if not self.idissf: self.command.append('-d') if self.irevs: self.command.append('-r') self.command.extend(['-i', self.input.casename+'.input']) self.command.extend(['-o', self.input.casename+'.output']) self.stdout = self.input.casename+'.log' self.stderr = ExternalCode.STDOUT self.resources = {} # MPI will do distribution. super(ADPAC, self).execute() # On some systems (like GX with a shared filesystem between # front-end and compute nodes) it can take a bit before the # output files 'materialize'. for retry in range(30): if os.path.exists(self.input.casename+'.log') and \ os.path.exists(self.input.casename+'.output'): break else: time.sleep(1) else: self.raise_exception('timeout waiting for output files', RuntimeError)