def select_agent(config, agent_name): env_factory = EnvFactory(config) dummy_env = env_factory.generate_real_env(print_str='Select Agent: ') agent_name = agent_name.lower() if agent_name == "td3": max_action = dummy_env.get_max_action() return TD3(env=dummy_env, max_action=max_action, config=config) if agent_name == "td3_icm": max_action = dummy_env.get_max_action() return TD3(env=dummy_env, max_action=max_action, config=config, icm=True) elif agent_name == "td3_vary": max_action = dummy_env.get_max_action() return TD3_vary(env=dummy_env, max_action=max_action, config=config) elif agent_name == "td3_icm_vary": max_action = dummy_env.get_max_action() return TD3_vary(env=dummy_env, max_action=max_action, config=config, icm=True) elif agent_name == "ppo": return PPO(env=dummy_env, config=config) elif agent_name == "ppo_icm": return PPO(env=dummy_env, config=config, icm=True) elif agent_name == "ddqn": return DDQN(env=dummy_env, config=config) elif agent_name == "ddqn_icm": return DDQN(env=dummy_env, config=config, icm=True) elif agent_name == "ddqn_vary": return DDQN_vary(env=dummy_env, config=config) elif agent_name == "ddqn_icm_vary": return DDQN_vary(env=dummy_env, config=config, icm=True) elif agent_name == "duelingddqn": return DuelingDDQN(env=dummy_env, config=config) elif agent_name == "duelingddqn_icm": return DuelingDDQN(env=dummy_env, config=config, icm=True) elif agent_name == "duelingddqn_vary": return DuelingDDQN_vary(env=dummy_env, config=config) elif agent_name == "duelingddqn_icm_vary": return DuelingDDQN_vary(env=dummy_env, config=config, icm=True) elif agent_name == "td3_discrete_vary": max_action = dummy_env.get_max_action() min_action = dummy_env.get_min_action() return TD3_discrete_vary(env=dummy_env, config=config, min_action=min_action, max_action=max_action) elif agent_name == "ql": return QL(env=dummy_env, config=config) elif agent_name == "ql_cb": return QL(env=dummy_env, config=config, count_based=True) elif agent_name == "sarsa": return SARSA(env=dummy_env, config=config) elif agent_name == "sarsa_cb": return SARSA(env=dummy_env, config=config, count_based=True) else: raise NotImplementedError("Unknownn RL agent")
def ddqn(env_type, experiment_id, config_file): params = read_yaml(config_file) params['model_type'] = 'DDQN' params['env_type'] = env_type params['experiment_id'] = experiment_id save_config(params, experiment_id) env = make_env(env_type, params) env.make_world(wall_prob=params.wall_prob, food_prob=0) q_net = create_nn(params) agent = DDQN(params, env, q_net, nn.MSELoss(), optim.RMSprop) agent.train(params.episodes, params.episode_step, params.random_step, params.min_greedy, params.max_greedy, params.greedy_step, params.update_period)
def ddqn(params, env_type, experiment_id, test_id): ''' Double Deep Q-learning Args: params: Dictionary of settings env_type: Evnrionment Type experiment_id: Id for the experiment test_id: Id for the test ''' params['experiment_id'] = experiment_id params['test_id'] = test_id env = make_env(env_type, params) env.make_world(wall_prob=params.wall_prob, food_prob=0) q_net = torch.load(args.model_file).cuda() agent = DDQN(params, env, q_net, nn.MSELoss(), optim.RMSprop) agent.test()
def calc_reference_deviation(virtual_env, real_env, config): state_reward_concat = None for i in range(10): agent = DDQN(env=real_env, config=config) _, _, replay_buffer_train = agent.train(env=virtual_env) states, _, _, rewards, _ = replay_buffer_train.get_all() state_reward = torch.cat((states, rewards), 1) if state_reward_concat == None: state_reward_concat = state_reward else: state_reward_concat = torch.cat((state_reward_concat, state_reward), 0) print(state_reward_concat.shape) print(torch.std(state_reward_concat, dim=0)) return torch.std(state_reward_concat, dim=0).item()
def compute(self, working_dir, bohb_id, config_id, cso, budget, *args, **kwargs): with open("default_config_cartpole.yaml", 'r') as stream: default_config = yaml.safe_load(stream) config = self.get_specific_config(cso, default_config, budget) print('----------------------------') print("START BOHB ITERATION") print('CONFIG: ' + str(config)) print('CSO: ' + str(cso)) print('BUDGET: ' + str(budget)) print('----------------------------') info = {} # generate environment env_fac = EnvFactory(config) env = env_fac.generate_real_env() ddqn = DDQN(env=env, config=config, icm=True) score_list = [] for _ in range(5): rewards, _, _ = ddqn.train(env) score_i = len(rewards) score_list.append(score_i) score = np.mean(score_list) info['config'] = str(config) print('----------------------------') print('FINAL SCORE: ' + str(score)) print("END BOHB ITERATION") print('----------------------------') return { "loss": score, "info": info }
def ddqn(env_type, experiment_id, config_file): ''' Double Deep Q-learning Args: env_type: Evnrionment Type experiment_id: Id for the experiment config_file: Path of the config file ''' params = read_yaml(config_file) params['model_type'] = 'DDQN' params['env_type'] = env_type params['experiment_id'] = experiment_id save_config(params, experiment_id) env = make_env(env_type, params) env.make_world(wall_prob=params.wall_prob, wall_seed=20, food_prob=0) q_net = create_nn(params) agent = DDQN(params, env, q_net, nn.MSELoss(), optim.RMSprop) agent.train(params.episodes, params.episode_step, params.random_step, params.min_greedy, params.max_greedy, params.greedy_step, params.update_period)
def main(config_file): # Check TF version logging.info("Tensorflow version: {}".format(tf.version.VERSION)) # Load main config file with open(config_file, "r") as f: config = yaml.load(f) result_path = config["result_dir"] agent_type = config["agent"] agent_config_file = os.path.join(config["agent_config_dir"], str(agent_type) + ".yml") mode = config["mode"] environment = config["environment"] environment_seed = config["environment_seed"] # Load config file for agent with open(agent_config_file, "r") as f: agent_config = yaml.load(f) # Create output directory time_str = time.strftime("%Y%m%d_%H%M%S") result_path = os.path.join(result_path, agent_type, time_str) if not os.path.exists(result_path): os.makedirs(result_path) agent_config["render_environment"] = config["render_environment"] agent_config["max_episode"] = config["max_episode"] agent_config["max_step"] = config["max_step"] agent_config["slide_window"] = config["slide_window"] agent_config["result_path"] = result_path # Save config files to output directory copyfile(config_file, os.path.join(result_path, os.path.basename(config_file))) copyfile(config_file, os.path.join(result_path, os.path.basename(agent_config_file))) logging.info( mode + " with {} algorithm in environment {}".format(agent_type, environment)) logging.info("Results will be saved at {}".format(result_path)) # Initialize environment env = gym.make('CartPole-v1') env.seed(environment_seed) env = env.unwrapped # Build/load agent if agent_type == "DQN": agent = DQN(agent_config, env) agent.train() elif agent_type == "DDQN": agent = DDQN(agent_config, env) agent.train() elif agent_type == "DDQN_PER_Prop": agent = DDQN_PER_Prop(agent_config, env) agent.train() elif agent_type == "A2C": agent = A2C(agent_config, env) agent.train() elif agent_type == "REINFORCE": agent = REINFORCE(agent_config, env) agent.train() else: raise KeyError("Agent type does not exist") # Train or play if mode == "train": agent.train() elif mode == "play": agent.play()