def main(args): epoch_dir = os.path.split(args.network_file)[0] initial_count = int(os.path.split(epoch_dir)[-1]) network_file = args.network_file optimizer_file = args.optimizer_file args_file_path = args.args_file mts = args.max_train_steps with open(args.args_file, 'r') as args_file: args = dotdict(json.load(args_file)) print_ascii_logo() log_id = make_log_id(args.tag, args.mode_name, args.agent, args.vision_network + args.network_body) log_id_dir = os.path.join(args.log_dir, args.env_id, log_id) os.makedirs(log_id_dir) logger = make_logger('Local', os.path.join(log_id_dir, 'train_log.txt')) summary_writer = SummaryWriter(log_id_dir) saver = ModelSaver(args.nb_top_model, log_id_dir) log_args(logger, args) write_args_file(log_id_dir, args) logger.info('Resuming training from {} epoch {}'.format(args_file_path, initial_count)) # construct env env = make_env(args, args.seed) # construct network torch.manual_seed(args.seed) network_head_shapes = get_head_shapes(env.action_space, env.engine, args.agent) network = make_network(env.observation_space, network_head_shapes, args) network.load_state_dict(torch.load(network_file)) # construct agent os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.backends.cudnn.benchmark = True agent = make_agent(network, device, env.engine, env.gpu_preprocessor, args) # Construct the Container def make_optimizer(params): opt = torch.optim.RMSprop(params, lr=args.learning_rate, eps=1e-5, alpha=0.99) if args.optimizer_file is not None: opt.load_state_dict(torch.load(optimizer_file)) return opt container = Local( agent, env, make_optimizer, args.epoch_len, args.nb_env, logger, summary_writer, args.summary_frequency, saver ) try: container.run(mts + initial_count, initial_count) finally: env.close()
def main(args): print_ascii_logo() print('Saving replays... Press Ctrl+C to stop.') with open(args.args_file, 'r') as args_file: train_args = dotdict(json.load(args_file)) train_args.nb_env = 1 # construct env replay_dir = os.path.split(args.network_file)[0] def env_fn(seed): return DummyVecEnv([ make_sc2_env(train_args.env_id, train_args.seed, replay_dir=replay_dir, render=args.render) ], Engines.SC2) env = env_fn(args.seed) env.close() # construct network network_head_shapes = get_head_shapes(env.action_space, env.engine, train_args.agent) network = make_network(env.observation_space, network_head_shapes, train_args) network.load_state_dict(torch.load(args.network_file)) # create an agent (add act_eval method) os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.backends.cudnn.benchmark = True agent = make_agent(network, device, env.engine, env.gpu_preprocessor, train_args) # create a rendering container # TODO: could terminate after a configurable number of replays instead of running indefinitely renderer = ReplayGenerator(agent, env_fn, device, args.seed) try: renderer.run() finally: env.close()
def main(args): # construct logging objects print_ascii_logo() print('Rendering... Press Ctrl+C to stop.') with open(args.args_file, 'r') as args_file: train_args = dotdict(json.load(args_file)) train_args.nb_env = 1 # construct env def env_fn(seed): return atari_from_args(train_args, seed, subprocess=False) env = env_fn(args.seed) env.close() # construct network network_head_shapes = get_head_shapes(env.action_space, env.engine, train_args.agent) network = make_network(env.observation_space, network_head_shapes, train_args) network.load_state_dict( torch.load(args.network_file, map_location=lambda storage, loc: storage)) # create an agent (add act_eval method) os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") agent = make_agent(network, device, env.engine, env.gpu_preprocessor, train_args) # create a rendering container renderer = Renderer(agent, env_fn, device, args.seed) try: renderer.run() finally: env.close()
def main(args): # host needs to broadcast timestamp so all procs create the same log dir if rank == 0: timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') log_id = make_log_id_from_timestamp( args.tag, args.mode_name, args.agent, args.vision_network + args.network_body, timestamp) log_id_dir = os.path.join(args.log_dir, args.env_id, log_id) os.makedirs(log_id_dir) saver = SimpleModelSaver(log_id_dir) print_ascii_logo() else: timestamp = None timestamp = comm.bcast(timestamp, root=0) if rank != 0: log_id = make_log_id_from_timestamp( args.tag, args.mode_name, args.agent, args.vision_network + args.network_body, timestamp) log_id_dir = os.path.join(args.log_dir, args.env_id, log_id) comm.Barrier() # construct env seed = args.seed if rank == 0 else args.seed + ( args.nb_env * (rank - 1)) # unique seed per process env = make_env(args, seed) # construct network torch.manual_seed(args.seed) network_head_shapes = get_head_shapes(env.action_space, env.engine, args.agent) network = make_network(env.observation_space, network_head_shapes, args) # sync network params if rank == 0: for v in network.parameters(): comm.Bcast(v.detach().cpu().numpy(), root=0) print('Root variables synced') else: # can just use the numpy buffers variables = [v.detach().cpu().numpy() for v in network.parameters()] for v in variables: comm.Bcast(v, root=0) for shared_v, model_v in zip(variables, network.parameters()): model_v.data.copy_(torch.from_numpy(shared_v), non_blocking=True) print('{} variables synced'.format(rank)) # construct agent # host is always the first gpu, workers are distributed evenly across the rest if len(args.gpu_id) > 1: # nargs is always a list if rank == 0: gpu_id = args.gpu_id[0] else: gpu_id = args.gpu_id[1:][(rank - 1) % len(args.gpu_id[1:])] else: gpu_id = args.gpu_id[-1] os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") cudnn = True # disable cudnn for dynamic batches if rank == 0 and args.max_dynamic_batch > 0: cudnn = False torch.backends.cudnn.benchmark = cudnn agent = make_agent(network, device, env.engine, env.gpu_preprocessor, args) # workers if rank != 0: logger = make_logger( 'ImpalaWorker{}'.format(rank), os.path.join(log_id_dir, 'train_log{}.txt'.format(rank))) summary_writer = SummaryWriter(os.path.join(log_id_dir, str(rank))) container = ImpalaWorker(agent, env, args.nb_env, logger, summary_writer, use_local_buffers=args.use_local_buffers) # Run the container if args.profile: try: from pyinstrument import Profiler except: raise ImportError( 'You must install pyinstrument to use profiling.') profiler = Profiler() profiler.start() container.run() profiler.stop() print(profiler.output_text(unicode=True, color=True)) else: container.run() env.close() # host else: logger = make_logger( 'ImpalaHost', os.path.join(log_id_dir, 'train_log{}.txt'.format(rank))) summary_writer = SummaryWriter(os.path.join(log_id_dir, str(rank))) log_args(logger, args) write_args_file(log_id_dir, args) logger.info('Network Parameter Count: {}'.format( count_parameters(network))) # no need for the env anymore env.close() # Construct the optimizer def make_optimizer(params): opt = torch.optim.RMSprop(params, lr=args.learning_rate, eps=1e-5, alpha=0.99) return opt container = ImpalaHost(agent, comm, make_optimizer, summary_writer, args.summary_frequency, saver, args.epoch_len, args.host_training_info_interval, use_local_buffers=args.use_local_buffers) # Run the container if args.profile: try: from pyinstrument import Profiler except: raise ImportError( 'You must install pyinstrument to use profiling.') profiler = Profiler() profiler.start() if args.max_dynamic_batch > 0: container.run(args.max_dynamic_batch, args.max_queue_length, args.max_train_steps, dynamic=True, min_dynamic_batch=args.min_dynamic_batch) else: container.run(args.num_rollouts_in_batch, args.max_queue_length, args.max_train_steps) profiler.stop() print(profiler.output_text(unicode=True, color=True)) else: if args.max_dynamic_batch > 0: container.run(args.max_dynamic_batch, args.max_queue_length, args.max_train_steps, dynamic=True, min_dynamic_batch=args.min_dynamic_batch) else: container.run(args.num_rollouts_in_batch, args.max_queue_length, args.max_train_steps)
def main(args): # construct logging objects print_ascii_logo() log_id = make_log_id(args.tag, args.mode_name, args.agent, args.vision_network + args.network_body) log_id_dir = os.path.join(args.log_dir, args.env_id, log_id) os.makedirs(log_id_dir) logger = make_logger('Local', os.path.join(log_id_dir, 'train_log.txt')) summary_writer = SummaryWriter(log_id_dir) saver = SimpleModelSaver(log_id_dir) log_args(logger, args) write_args_file(log_id_dir, args) # construct env env = make_env(args, args.seed) # construct network torch.manual_seed(args.seed) network_head_shapes = get_head_shapes(env.action_space, env.engine, args.agent) network = make_network(env.observation_space, network_head_shapes, args) logger.info('Network Parameter Count: {}'.format( count_parameters(network))) # construct agent os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.backends.cudnn.benchmark = True agent = make_agent(network, device, env.engine, env.gpu_preprocessor, args) # Construct the Container def make_optimizer(params): opt = torch.optim.RMSprop(params, lr=args.learning_rate, eps=1e-5, alpha=0.99) return opt container = Local(agent, env, make_optimizer, args.epoch_len, args.nb_env, logger, summary_writer, args.summary_frequency, saver) # if running an eval thread create eval env, agent, & logger if args.nb_eval_env > 0: # replace args num envs & seed eval_args = deepcopy(args) eval_args.seed = args.seed + args.nb_env # env and agent eval_args.nb_env = args.nb_eval_env eval_env = make_env(eval_args, eval_args.seed) eval_net = make_network(eval_env.observation_space, network_head_shapes, eval_args) eval_agent = make_agent(eval_net, device, eval_env.engine, eval_env.gpu_preprocessor, eval_args) eval_net.load_state_dict(network.state_dict()) # logger eval_logger = make_logger('LocalEval', os.path.join(log_id_dir, 'eval_log.txt')) evaluation_container = EvaluationThread( network, eval_agent, eval_env, args.nb_eval_env, eval_logger, summary_writer, args.eval_step_rate, override_step_count_fn=lambda: container. local_step_count # wire local containers step count into eval ) evaluation_container.start() # Run the container if args.profile: try: from pyinstrument import Profiler except: raise ImportError( 'You must install pyinstrument to use profiling.') profiler = Profiler() profiler.start() container.run(10e3) profiler.stop() print(profiler.output_text(unicode=True, color=True)) else: container.run(args.max_train_steps) env.close() if args.nb_eval_env > 0: evaluation_container.stop() eval_env.close()
def main(args): # host needs to broadcast timestamp so all procs create the same log dir if rank == 0: timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') log_id = make_log_id_from_timestamp( args.tag, args.mode_name, args.agent, args.vision_network + args.network_body, timestamp) log_id_dir = os.path.join(args.log_dir, args.env_id, log_id) os.makedirs(log_id_dir) saver = SimpleModelSaver(log_id_dir) print_ascii_logo() else: timestamp = None timestamp = comm.bcast(timestamp, root=0) if rank != 0: log_id = make_log_id_from_timestamp( args.tag, args.mode_name, args.agent, args.vision_network + args.network_body, timestamp) log_id_dir = os.path.join(args.log_dir, args.env_id, log_id) comm.Barrier() # construct env seed = args.seed if rank == 0 else args.seed + ( args.nb_env * (rank - 1)) # unique seed per process # don't make a ton of envs if host if rank == 0: env_args = deepcopy(args) env_args.nb_env = 1 env = make_env(env_args, seed) else: env = make_env(args, seed) # construct network torch.manual_seed(args.seed) network_head_shapes = get_head_shapes(env.action_space, env.engine, args.agent) network = make_network(env.observation_space, network_head_shapes, args) # sync network params if rank == 0: for v in network.parameters(): comm.Bcast(v.detach().cpu().numpy(), root=0) print('Root variables synced') else: # can just use the numpy buffers variables = [v.detach().cpu().numpy() for v in network.parameters()] for v in variables: comm.Bcast(v, root=0) for shared_v, model_v in zip(variables, network.parameters()): model_v.data.copy_(torch.from_numpy(shared_v), non_blocking=True) print('{} variables synced'.format(rank)) # host is rank 0 if rank != 0: # construct logger logger = make_logger( 'ToweredWorker{}'.format(rank), os.path.join(log_id_dir, 'train_log_rank{}.txt'.format(rank))) summary_writer = SummaryWriter( os.path.join(log_id_dir, 'rank{}'.format(rank))) # construct agent # distribute evenly across gpus if isinstance(args.gpu_id, list): gpu_id = args.gpu_id[(rank - 1) % len(args.gpu_id)] else: gpu_id = args.gpu_id os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.backends.cudnn.benchmark = True agent = make_agent(network, device, env.engine, env.gpu_preprocessor, args) # construct container container = ToweredWorker(agent, env, args.nb_env, logger, summary_writer, args.summary_frequency) # Run the container try: container.run() finally: env.close() # host else: logger = make_logger( 'ToweredHost', os.path.join(log_id_dir, 'train_log_rank{}.txt'.format(rank))) log_args(logger, args) write_args_file(log_id_dir, args) logger.info('Network Parameter Count: {}'.format( count_parameters(network))) # no need for the env anymore env.close() # Construct the optimizer def make_optimizer(params): opt = torch.optim.RMSprop(params, lr=args.learning_rate, eps=1e-5, alpha=0.99) return opt container = ToweredHost(comm, args.num_grads_to_drop, network, make_optimizer, saver, args.epoch_len, logger) # Run the container if args.profile: try: from pyinstrument import Profiler except: raise ImportError( 'You must install pyinstrument to use profiling.') profiler = Profiler() profiler.start() container.run(10e3) profiler.stop() print(profiler.output_text(unicode=True, color=True)) else: container.run(args.max_train_steps)
def main(args): print_ascii_logo() logger = make_logger('Eval', os.path.join(args.log_id_dir, 'evaluation_log.txt')) log_args(logger, args) epoch_ids = sorted([ int(dir) for dir in os.listdir(args.log_id_dir) if os.path.isdir(os.path.join(args.log_id_dir, dir)) and ( 'rank' not in dir) ]) with open(os.path.join(args.log_id_dir, 'args.json'), 'r') as args_file: train_args = dotdict(json.load(args_file)) train_args.nb_env = 1 # construct env def env_fn(seed): return make_env(train_args, seed, subprocess=False, render=args.render) env = env_fn(args.seed) env.close() os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") network_head_shapes = get_head_shapes(env.action_space, env.engine, train_args.agent) network = make_network(env.observation_space, network_head_shapes, train_args) results = [] selected_models = [] for epoch_id in epoch_ids: network_path = os.path.join(args.log_id_dir, str(epoch_id), 'model*.pth') network_files = glob(network_path) best_mean = -float('inf') best_std_dev = 0. selected_model = None for network_file in network_files: # load new network network.load_state_dict( torch.load(network_file, map_location=lambda storage, loc: storage)) # construct agent agent = make_agent(network, device, env.engine, env.gpu_preprocessor, train_args) # container container = Evaluation(agent, env_fn, device, args.seed, args.render) # Run the container mean_reward, std_dev = container.run(args.nb_episode) if mean_reward >= best_mean: best_mean = mean_reward best_std_dev = std_dev selected_model = os.path.split(network_file)[-1] result = Result(epoch_id, best_mean, best_std_dev) selected_model = SelectedModel(epoch_id, selected_model) logger.info(str(result) + ' ' + str(selected_model)) results.append(np.asarray(result)) selected_models.append(selected_model) # save results results = np.stack(results) np.savetxt(os.path.join(args.log_id_dir, 'eval.csv'), results, delimiter=',', fmt=['%d', '%.3f', '%.3f']) # save selected models with open(os.path.join(args.log_id_dir, 'selected_models.txt'), 'w') as f: for sm in selected_models: f.write(str(sm) + '\n') env.close()