Пример #1
0
            action = action_to_one_hot_bool(agent.select_epsilon_greedy_action(state,epsilon))
            for i in range(frame_repeat):
                sleep(sleep_time)
                reward = game.make_action(action, 1)
            done = game.is_episode_finished()
            state = next_state
            reward_sum += reward
            if done:
                print('episode:', episode, 'sum_of_rewards_for_episode:', reward_sum)
                total_reward.append(reward_sum)
                break
    return agent, total_reward

if __name__ == "__main__":
    version = 'DDQN'
    config_file = 'deadly_corridor'
    checkpoint_file = './results/dqn/agent1/cp-0001.ckpt'
    frame_repeat = 12
    episodes = 5
    config_path = 'D:/Joe/Anaconda3/envs/tensorflow_env/Lib/site-packages/vizdoom/scenarios/'+config_file+'.cfg'
    game = initialize_vizdoom(config_path)
    action_size = game.get_available_buttons_size()
    print('action_size',action_size)
    state_size = np.array(game.get_state().screen_buffer.shape)
    if version=='DQN':
        agent = DQN.DQN_Agent(state_size,action_size,checkpoint_file=checkpoint_file)
    if version=='DDQN':
        agent = Double_DQN.DQN_Agent(state_size,action_size,checkpoint_file=checkpoint_file)
    _, total_reward = run_agent(agent, game, frame_repeat)
    game.close()
Пример #2
0
    #parameters
    batch_size=6
    learning_rate=0.001
    folder=str(learning_rate)
    replay_buffer_size=100000
    m = 256
    episodes = 50
    n_agents = 1
    for version in versions:
        for config_file in config_files:
            config_path = 'D:/Joe/Anaconda3/envs/tensorflow_env/Lib/site-packages/vizdoom/scenarios/'+config_file+'.cfg'
            all_total_rewards = []
            for n in range(n_agents):
                game = initialize_vizdoom(config_path)
                action_size = game.get_available_buttons_size()
                print('action_size',action_size)
                state_size = np.array(game.get_state().screen_buffer.shape)
                if version=='DQN':
                    agent = DQN.DQN_Agent(state_size,action_size, batch_size=batch_size, learning_rate=learning_rate, replay_buffer_size=replay_buffer_size, checkpoint_file='./agent'+str(n)+'/cp-9999.ckpt')
                if version=='DDQN':
                    agent = Double_DQN.DQN_Agent(state_size,action_size, batch_size=batch_size, learning_rate=learning_rate, replay_buffer_size=replay_buffer_size, checkpoint_file='./agent'+str(n)+'/cp-9999.ckpt')
                _, total_reward = train_agent(agent, game, frame_repeat)
                all_total_rewards.append(total_reward)
                game.close()

                all_total_rewards_to_save = np.array(all_total_rewards)
                if version=='DQN':
                    np.save('./all_total_rewards_'+config_file+'_'+version+'.npy', all_total_rewards_to_save)
                if version=='DDQN':
                    np.save('./all_total_rewards_'+config_file+'_'+version+'.npy', all_total_rewards_to_save)