Пример #1
0
def select_agent(config, agent_name):
    env_factory = EnvFactory(config)
    dummy_env = env_factory.generate_real_env(print_str='Select Agent: ')

    agent_name = agent_name.lower()

    if agent_name == "td3":
        max_action = dummy_env.get_max_action()
        return TD3(env=dummy_env, max_action=max_action, config=config)
    if agent_name == "td3_icm":
        max_action = dummy_env.get_max_action()
        return TD3(env=dummy_env, max_action=max_action, config=config, icm=True)
    elif agent_name == "td3_vary":
        max_action = dummy_env.get_max_action()
        return TD3_vary(env=dummy_env, max_action=max_action, config=config)
    elif agent_name == "td3_icm_vary":
        max_action = dummy_env.get_max_action()
        return TD3_vary(env=dummy_env, max_action=max_action, config=config, icm=True)
    elif agent_name == "ppo":
        return PPO(env=dummy_env, config=config)
    elif agent_name == "ppo_icm":
        return PPO(env=dummy_env, config=config, icm=True)
    elif agent_name == "ddqn":
        return DDQN(env=dummy_env, config=config)
    elif agent_name == "ddqn_icm":
        return DDQN(env=dummy_env, config=config, icm=True)
    elif agent_name == "ddqn_vary":
        return DDQN_vary(env=dummy_env, config=config)
    elif agent_name == "ddqn_icm_vary":
        return DDQN_vary(env=dummy_env, config=config, icm=True)
    elif agent_name == "duelingddqn":
        return DuelingDDQN(env=dummy_env, config=config)
    elif agent_name == "duelingddqn_icm":
        return DuelingDDQN(env=dummy_env, config=config, icm=True)
    elif agent_name == "duelingddqn_vary":
        return DuelingDDQN_vary(env=dummy_env, config=config)
    elif agent_name == "duelingddqn_icm_vary":
        return DuelingDDQN_vary(env=dummy_env, config=config, icm=True)
    elif agent_name == "td3_discrete_vary":
        max_action = dummy_env.get_max_action()
        min_action = dummy_env.get_min_action()
        return TD3_discrete_vary(env=dummy_env, config=config, min_action=min_action, max_action=max_action)
    elif agent_name == "ql":
        return QL(env=dummy_env, config=config)
    elif agent_name == "ql_cb":
        return QL(env=dummy_env, config=config, count_based=True)
    elif agent_name == "sarsa":
        return SARSA(env=dummy_env, config=config)
    elif agent_name == "sarsa_cb":
        return SARSA(env=dummy_env, config=config, count_based=True)
    else:
        raise NotImplementedError("Unknownn RL agent")
Пример #2
0
def ddqn(env_type, experiment_id, config_file):
    params = read_yaml(config_file)
    params['model_type'] = 'DDQN'
    params['env_type'] = env_type
    params['experiment_id'] = experiment_id

    save_config(params, experiment_id)
    env = make_env(env_type, params)
    env.make_world(wall_prob=params.wall_prob, food_prob=0)
    q_net = create_nn(params)
    agent = DDQN(params, env, q_net, nn.MSELoss(), optim.RMSprop)
    agent.train(params.episodes, params.episode_step, params.random_step,
                params.min_greedy, params.max_greedy, params.greedy_step,
                params.update_period)
Пример #3
0
def ddqn(params, env_type, experiment_id, test_id):
    '''
    Double Deep Q-learning

    Args:
        params: Dictionary of settings
        env_type: Evnrionment Type
        experiment_id: Id for the experiment
        test_id: Id for the test
    '''

    params['experiment_id'] = experiment_id
    params['test_id'] = test_id
    env = make_env(env_type, params)
    env.make_world(wall_prob=params.wall_prob, food_prob=0)
    q_net = torch.load(args.model_file).cuda()
    agent = DDQN(params, env, q_net, nn.MSELoss(), optim.RMSprop)
    agent.test()
Пример #4
0
def calc_reference_deviation(virtual_env, real_env, config):

    state_reward_concat = None

    for i in range(10):
        agent = DDQN(env=real_env, config=config)
        _, _, replay_buffer_train = agent.train(env=virtual_env)

        states, _, _, rewards, _ = replay_buffer_train.get_all()
        state_reward = torch.cat((states, rewards), 1)

        if state_reward_concat == None:
            state_reward_concat = state_reward
        else:
            state_reward_concat = torch.cat((state_reward_concat, state_reward), 0)

        print(state_reward_concat.shape)
        print(torch.std(state_reward_concat, dim=0))

    return torch.std(state_reward_concat, dim=0).item()
    def compute(self, working_dir, bohb_id, config_id, cso, budget, *args, **kwargs):
        with open("default_config_cartpole.yaml", 'r') as stream:
            default_config = yaml.safe_load(stream)

        config = self.get_specific_config(cso, default_config, budget)
        print('----------------------------')
        print("START BOHB ITERATION")
        print('CONFIG: ' + str(config))
        print('CSO:    ' + str(cso))
        print('BUDGET: ' + str(budget))
        print('----------------------------')

        info = {}

        # generate environment
        env_fac = EnvFactory(config)
        env = env_fac.generate_real_env()

        ddqn = DDQN(env=env,
                    config=config,
                    icm=True)

        score_list = []
        for _ in range(5):
            rewards, _, _ = ddqn.train(env)
            score_i = len(rewards)
            score_list.append(score_i)

        score = np.mean(score_list)

        info['config'] = str(config)

        print('----------------------------')
        print('FINAL SCORE: ' + str(score))
        print("END BOHB ITERATION")
        print('----------------------------')

        return {
                "loss": score,
                "info": info
                }
Пример #6
0
def ddqn(env_type, experiment_id, config_file):
    '''
    Double Deep Q-learning

    Args:
        env_type: Evnrionment Type
        experiment_id: Id for the experiment
        config_file: Path of the config file
    '''

    params = read_yaml(config_file)
    params['model_type'] = 'DDQN'
    params['env_type'] = env_type
    params['experiment_id'] = experiment_id

    save_config(params, experiment_id)
    env = make_env(env_type, params)
    env.make_world(wall_prob=params.wall_prob, wall_seed=20, food_prob=0)
    q_net = create_nn(params)
    agent = DDQN(params, env, q_net, nn.MSELoss(), optim.RMSprop)
    agent.train(params.episodes, params.episode_step, params.random_step,
                params.min_greedy, params.max_greedy, params.greedy_step,
                params.update_period)
Пример #7
0
def main(config_file):
    # Check TF version
    logging.info("Tensorflow version: {}".format(tf.version.VERSION))

    # Load main config file
    with open(config_file, "r") as f:
        config = yaml.load(f)

    result_path = config["result_dir"]
    agent_type = config["agent"]
    agent_config_file = os.path.join(config["agent_config_dir"],
                                     str(agent_type) + ".yml")
    mode = config["mode"]
    environment = config["environment"]
    environment_seed = config["environment_seed"]

    # Load config file for agent
    with open(agent_config_file, "r") as f:
        agent_config = yaml.load(f)

    # Create output directory
    time_str = time.strftime("%Y%m%d_%H%M%S")
    result_path = os.path.join(result_path, agent_type, time_str)
    if not os.path.exists(result_path):
        os.makedirs(result_path)

    agent_config["render_environment"] = config["render_environment"]
    agent_config["max_episode"] = config["max_episode"]
    agent_config["max_step"] = config["max_step"]
    agent_config["slide_window"] = config["slide_window"]
    agent_config["result_path"] = result_path

    # Save config files to output directory
    copyfile(config_file,
             os.path.join(result_path, os.path.basename(config_file)))
    copyfile(config_file,
             os.path.join(result_path, os.path.basename(agent_config_file)))

    logging.info(
        mode +
        " with {} algorithm in environment {}".format(agent_type, environment))
    logging.info("Results will be saved at {}".format(result_path))

    # Initialize environment
    env = gym.make('CartPole-v1')
    env.seed(environment_seed)
    env = env.unwrapped

    # Build/load agent
    if agent_type == "DQN":
        agent = DQN(agent_config, env)
        agent.train()
    elif agent_type == "DDQN":
        agent = DDQN(agent_config, env)
        agent.train()
    elif agent_type == "DDQN_PER_Prop":
        agent = DDQN_PER_Prop(agent_config, env)
        agent.train()
    elif agent_type == "A2C":
        agent = A2C(agent_config, env)
        agent.train()
    elif agent_type == "REINFORCE":
        agent = REINFORCE(agent_config, env)
        agent.train()
    else:
        raise KeyError("Agent type does not exist")

    # Train or play
    if mode == "train":
        agent.train()
    elif mode == "play":
        agent.play()