def __init__(self, save_dir, observation_space, argmax=False): self.preprocess_obss = utils.ObssPreprocessor(save_dir, observation_space) self.model = utils.load_model(save_dir) self.argmax = argmax if self.model.recurrent: self._initialize_memory()
def __init__(self, model_name, observation_space, deterministic=False): self.obss_preprocessor = utils.ObssPreprocessor(model_name, observation_space) self.model = utils.load_model(model_name) self.deterministic = deterministic if self.model.recurrent: self._initialize_memory()
def __init__(self, save_dir, observation_space, argmax=False, num_envs=1): self.preprocess_obss = utils.ObssPreprocessor(save_dir, observation_space) self.model = utils.load_model(save_dir) self.argmax = argmax self.num_envs = num_envs self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if self.model.recurrent: self.memories = torch.zeros(self.num_envs, self.model.memory_size)
def introspect_i2a(environment, model, seed=0, argmax=False, pause=0.1): utils.seed(seed) # Generate environment environment.seed(seed) # Define agent model_dir = utils.get_model_dir(model) preprocess_obss = utils.ObssPreprocessor(model_dir, environment.observation_space) model = utils.load_model(model_dir) # Run the agent done = True while True: if done: obs = environment.reset() print("Instr:", obs["mission"]) time.sleep(pause) renderer = environment.render("human") preprocessed_obss = preprocess_obss([obs]) with torch.no_grad(): dist, _, pred_actions, pred_observations, pred_rewards = model( preprocessed_obss, introspect=True) renderer.window.update_imagination_display(pred_observations, pred_actions, pred_rewards) if argmax: actions = dist.probs.max(1, keepdim=True)[1] else: actions = dist.sample() if torch.cuda.is_available(): actions = actions.cpu().numpy() obs, reward, done, _ = environment.step(actions.item()) if renderer.window is None: break
# Set seed for all randomness sources utils.seed(args.seed) # Generate environments envs = [] for i in range(args.procs): env = gym.make(args.env) env.seed(args.seed + 10000 * i) envs.append(env) # Define obss preprocessor preprocess_obss = utils.ObssPreprocessor(save_dir, envs[0].observation_space) # Define actor-critic model if utils.model_exists(save_dir): acmodel = utils.load_model(save_dir) status = utils.load_status(save_dir) logger.info("Model successfully loaded\n") else: acmodel = ACModel(preprocess_obss.obs_space, envs[0].action_space, not args.no_instr, not args.no_mem) status = {"num_frames": 0, "update": 0} logger.info("Model successfully created\n") logger.info("{}\n".format(acmodel)) if torch.cuda.is_available():
# Set seed for all randomness sources utils.seed(args.seed) # Generate environments envs = [] for i in range(args.procs): env = gym.make(args.env) env.seed(args.seed + 10000 * i) envs.append(env) # Define obss preprocessor obss_preprocessor = utils.ObssPreprocessor(run_dir, envs[0].observation_space) # Define actor-critic model if utils.model_exists(run_dir): acmodel = utils.load_model(run_dir) status = utils.load_status(run_dir) logger.info("Model successfully loaded\n") else: acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space, not args.no_instr, not args.no_mem) status = {"num_frames": 0, "i": 0} logger.info("Model successfully created\n") logger.info("{}\n".format(acmodel)) if torch.cuda.is_available():
# Set seed for all randomness sources utils.seed(args.seed) # Generate environments envs = [] for i in range(args.procs): env = gym.make(args.env) env.seed(args.seed + i) envs.append(env) # Define obss preprocessor obss_preprocessor = utils.ObssPreprocessor(model_name, envs[0].observation_space) # Define actor-critic model acmodel = utils.load_model(model_name, raise_not_found=False) if acmodel is None: acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space, not args.no_instr, not args.no_mem) logger.info("Model successfully created\n") logger.info("{}\n".format(acmodel)) if torch.cuda.is_available(): acmodel.cuda() logger.info("CUDA available: {}\n".format(torch.cuda.is_available())) # Define actor-critic algo