def main(args): """ Run an evaluation. :param args: Dict[str, Any] :return: """ args = DotDict(args) Init.print_ascii_logo() logger = Init.setup_logger(args.logdir, 'eval') Init.log_args(logger, args) R.load_extern_classes(args.logdir) eval_container = EvalContainer( args.actor, args.epoch, logger, args.logdir, args.gpu_id, args.nb_episode, args.start, args.end, args.seed, args.manager ) try: eval_container.run() finally: eval_container.close()
def main(args): """ Run an evaluation training. :param args: Dict[str, Any] :return: """ # construct logging objects args = DotDict(args) Init.print_ascii_logo() logger = Init.setup_logger(args.logdir, "eval") Init.log_args(logger, args) R.load_extern_classes(args.logdir) container = RenderContainer( args.actor, args.epoch, args.start, args.end, logger, args.logdir, args.gpu_id, args.seed, args.manager, ) try: container.run() finally: container.close()
def __init__(self, args, log_id_dir, initial_step_count, rank): seed = args.seed \ if rank == 0 \ else args.seed + args.nb_env * rank print('Worker {} using seed {}'.format(rank, seed)) # load saved registry classes REGISTRY.load_extern_classes(log_id_dir) # ENV engine = REGISTRY.lookup_engine(args.env) env_cls = REGISTRY.lookup_env(args.env) mgr_cls = REGISTRY.lookup_manager(args.manager) env_mgr = mgr_cls.from_args(args, engine, env_cls, seed=seed) # NETWORK torch.manual_seed(args.seed) device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu") output_space = REGISTRY.lookup_output_space(args.actor_worker, env_mgr.action_space) if args.custom_network: net_cls = REGISTRY.lookup_network(args.custom_network) else: net_cls = ModularNetwork net = net_cls.from_args(args, env_mgr.observation_space, output_space, env_mgr.gpu_preprocessor, REGISTRY) actor_cls = REGISTRY.lookup_actor(args.actor_worker) actor = actor_cls.from_args(args, env_mgr.action_space) builder = actor_cls.exp_spec_builder(env_mgr.observation_space, env_mgr.action_space, net.internal_space(), env_mgr.nb_env) exp = REGISTRY.lookup_exp(args.exp).from_args(args, builder) self.actor = actor self.exp = exp.to(device) self.nb_step = args.nb_step self.env_mgr = env_mgr self.nb_env = args.nb_env self.network = net.to(device) self.device = device self.initial_step_count = initial_step_count # TODO: this should be set to eval after some number of training steps self.network.train() # SETUP state variables for run self.step_count = self.initial_step_count self.global_step_count = self.initial_step_count self.ep_rewards = torch.zeros(self.nb_env) self.rank = rank self.obs = dtensor_to_dev(self.env_mgr.reset(), self.device) self.internals = listd_to_dlist([ self.network.new_internals(self.device) for _ in range(self.nb_env) ]) self.start_time = time() self._weights_synced = False
def main(local_args): """ Run distributed training. :param local_args: Dict[str, Any] :return: """ log_id_dir = local_args.log_id_dir initial_step_count = local_args.initial_step_count R.load_extern_classes(log_id_dir) logger = Init.setup_logger(log_id_dir, "train{}".format(GLOBAL_RANK)) helper = LogDirHelper(log_id_dir) with open(helper.args_file_path(), "r") as args_file: args = DotDict(json.load(args_file)) if local_args.resume: args = DotDict({**args, **vars(local_args)}) dist.init_process_group( backend="nccl", init_method=args.init_method, world_size=WORLD_SIZE, rank=LOCAL_RANK, ) logger.info("Rank {} initialized.".format(GLOBAL_RANK)) if LOCAL_RANK == 0: container = DistribHost( args, logger, log_id_dir, initial_step_count, LOCAL_RANK, GLOBAL_RANK, WORLD_SIZE, ) else: container = DistribWorker( args, logger, log_id_dir, initial_step_count, LOCAL_RANK, GLOBAL_RANK, WORLD_SIZE, ) try: container.run() finally: container.close()
def __init__( self, args, log_id_dir, initial_step_count, rank=0, ): # ARGS TO STATE VARS self._args = args self.nb_learners = args.nb_learners self.nb_workers = args.nb_workers self.rank = rank self.nb_step = args.nb_step self.nb_env = args.nb_env self.initial_step_count = initial_step_count self.epoch_len = args.epoch_len self.summary_freq = args.summary_freq self.nb_learn_batch = args.nb_learn_batch self.rollout_queue_size = args.rollout_queue_size # can be none if rank != 0 self.log_id_dir = log_id_dir # load saved registry classes REGISTRY.load_extern_classes(log_id_dir) # ENV (temporary) env_cls = REGISTRY.lookup_env(args.env) env = env_cls.from_args(args, 0) env_action_space, env_observation_space, env_gpu_preprocessor = \ env.action_space, env.observation_space, env.gpu_preprocessor env.close() # NETWORK torch.manual_seed(args.seed) device = torch.device("cuda") # ray handles gpus torch.backends.cudnn.benchmark = True output_space = REGISTRY.lookup_output_space( args.actor_worker, env_action_space) if args.custom_network: net_cls = REGISTRY.lookup_network(args.custom_network) else: net_cls = ModularNetwork net = net_cls.from_args( args, env_observation_space, output_space, env_gpu_preprocessor, REGISTRY ) self.network = net.to(device) # TODO: this is a hack, remove once queuer puts rollouts on the correct device self.network.device = device self.device = device self.network.train() # OPTIMIZER def optim_fn(x): return torch.optim.RMSprop(x, lr=args.lr, eps=1e-5, alpha=0.99) if args.nb_learners > 1: self.optimizer = NCCLOptimizer(optim_fn, self.network, self.nb_learners) else: self.optimizer = optim_fn(self.network.parameters()) # LEARNER / EXP rwd_norm = REGISTRY.lookup_reward_normalizer( args.rwd_norm).from_args(args) actor_cls = REGISTRY.lookup_actor(args.actor_host) builder = actor_cls.exp_spec_builder( env.observation_space, env.action_space, net.internal_space(), args.nb_env * args.nb_learn_batch ) w_builder = REGISTRY.lookup_actor(args.actor_worker).exp_spec_builder( env.observation_space, env.action_space, net.internal_space(), args.nb_env ) actor = actor_cls.from_args(args, env.action_space) learner = REGISTRY.lookup_learner(args.learner).from_args(args, rwd_norm) exp_cls = REGISTRY.lookup_exp(args.exp).from_args(args, builder) self.actor = actor self.learner = learner self.exp = exp_cls.from_args(args, builder).to(device) # Rank 0 setup, load network/optimizer and create SummaryWriter/Saver if rank == 0: if args.load_network: self.network = self.load_network(self.network, args.load_network) print('Reloaded network from {}'.format(args.load_network)) if args.load_optim: self.optimizer = self.load_optim(self.optimizer, args.load_optim) print('Reloaded optimizer from {}'.format(args.load_optim)) print('Network parameters: ' + str(self.count_parameters(net))) self.summary_writer = SummaryWriter(log_id_dir) self.saver = SimpleModelSaver(log_id_dir)