def main(argv): args = parser.parse_args(argv[1:]) assert args.command == 'mode' cluster = Cluster.new('tmux') exp = cluster.new_experiment(EXP_NAME) exp.set_preamble_cmds(PREAMBLE_CMDS) serv, cli = create_program(exp) if args.mode == 'localhost': node = localhost_setup() localhost_placement(serv, cli, node) elif args.mode == 'ssh': nodeloader = NodeLoader( ConfigDict(argon.to_nested_dicts(args.cluster_config)), args.filter_regex) nodes = nodeloader.nodes if len(nodes) != 1: raise Exception( 'For this test condition, please specify just a single ssh node.' ) ssh_node = nodes[0] ssh_node.setup(res_files=RES_FILES) local_node = localhost_setup() ssh_placement(serv, cli, local_node, ssh_node) elif args.mode == 'slurm': nodeloader = NodeLoader( ConfigDict(argon.to_nested_dicts(args.cluster_config)), args.filter_regex) nodes = nodeloader.nodes if len(nodes) != 1: raise Exception( 'For this test condition, please specify just a single slurm node.' ) slurm_node = nodes[0] slurm_node.setup(res_files=RES_FILES) local_node = localhost_setup() slurm_placement(serv, cli, local_node, slurm_node) else: raise Exception('Unknown mode %s' % args.mode) try: cluster.launch(exp) while True: time.sleep(100000) except KeyboardInterrupt: cluster.delete(experiment_name=EXP_NAME)
def create_cluster(self): # step 1 return Cluster.new('kube')
def create_cluster(self, server_name): self.cluster = Cluster.new('tmux', server_name=server_name)
def create_cluster(self): # step 1 # Create a Cluster with Docker Compose backend. return Cluster.new('docker')
def action_create(self, args): """ Spin up a multi-node distributed Surreal experiment. Put any command line args that pass to the config script after "--" """ cluster = Cluster.new('subproc', stdout_mode='print', stderr_mode='print', log_dir=None) experiment_name = args.experiment_name exp = cluster.new_experiment(experiment_name) algorithm_args = args.remainder algorithm_args += [ "--num-agents", str(args.num_agents), ] experiment_folder = os.path.join(self.folder, experiment_name) print('Writing experiment output to {}'.format(experiment_folder)) algorithm_args += ["--experiment-folder", experiment_folder] algorithm_args += ["--env", args.env] executable = self._find_executable(args.algorithm) cmd_gen = CommandGenerator(num_agents=args.num_agents, num_evals=args.num_evals, executable=executable, config_commands=algorithm_args) learner = exp.new_process('learner', cmd=cmd_gen.get_command('learner')) # learner.set_env('DISABLE_MUJOCO_RENDERING', "1") replay = exp.new_process('replay', cmd=cmd_gen.get_command('replay')) ps = exp.new_process('ps', cmd=cmd_gen.get_command('ps')) tensorboard = exp.new_process('tensorboard', cmd=cmd_gen.get_command('tensorboard')) tensorplex = exp.new_process('tensorplex', cmd=cmd_gen.get_command('tensorplex')) loggerplex = exp.new_process('loggerplex', cmd=cmd_gen.get_command('loggerplex')) agents = [] for i in range(args.num_agents): agent_name = 'agent-{}'.format(i) agent = exp.new_process(agent_name, cmd=cmd_gen.get_command(agent_name)) agents.append(agent) evals = [] for i in range(args.num_evals): eval_name = 'eval-{}'.format(i) eval_p = exp.new_process(eval_name, cmd=cmd_gen.get_command(eval_name)) evals.append(eval_p) setup_network(agents=agents, evals=evals, learner=learner, replay=replay, ps=ps, tensorboard=tensorboard, tensorplex=tensorplex, loggerplex=loggerplex) self._setup_gpu(agents=agents, evals=evals, learner=learner, gpus=args.gpu) cluster.launch(exp, dry_run=args.dry_run)
import symphony from symphony.engine import Cluster cluster = Cluster.new('subproc', stdout_mode='print', stderr_mode='print', log_dir=None) exp = cluster.new_experiment('hello-world') server = exp.new_process('server', cmd='python simple_server.py') client = exp.new_process('client', cmd='python simple_client.py') server.binds('example') client.connects('example') print('Server and client are running in subprocess mode') cluster.launch(exp, dry_run=False)