def evaluate_saved_model():
    args = parse_a2c_args()
    #TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  #"cuda" if torch.cuda.is_available() else
    env = DoomEnvironment(args, is_train=True)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)
    args.scenario_dir = "scenarios_transfer_learning/big_combs_test/"
    checkpoint = torch.load("0.pth.tar",
                            map_location=lambda storage, loc: storage)
    policy.load_state_dict(checkpoint['model'])
    policy.eval()

    assert args.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        args.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU

    for i in range(1, 64):  #for i in range(args.num_mazes_test):
        env = DoomEnvironment(args, idx=i, is_train=False)
        movie_name = 'videos/3 - 12-04-21 - base/big_combs_test{:0004}.mp4'.format(
            i)
        print('Creating movie {}'.format(movie_name))
        make_movie(policy, env, movie_name, args)
def evaluate_saved_model():
    args = parse_a2c_args()
    #TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  #"cuda" if torch.cuda.is_available() else
    env = DoomEnvironment(args, is_train=True)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)

    name = "Final on enclave"
    args.scenario_dir = "scenarios_transfer_learning/scenes/"
    checkpoint = torch.load("final.pth.tar",
                            map_location=lambda storage, loc: storage)
    policy.load_state_dict(checkpoint['model'])
    policy.eval()

    assert args.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        args.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU

    args.scenario = "custom_scenario000.cfg"
    env = DoomEnvironment(args, is_train=False)
    movie_name = '/home/adam/Bureau/Visuels/0 - Rollout faits main/{}.mp4'.format(
        name)
    print('Creating movie {}'.format(movie_name))
    make_movie(policy, env, movie_name, args)
示例#3
0
def evaluate_saved_model():

    args = parse_a2c_args()
    # TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  # "cuda" if torch.cuda.is_available() else
    env = DoomEnvironment(args, is_train=False)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)
    results = []

    for model in range(0, 2):
        checkpoint = torch.load(str(model) + ".pth.tar",
                                map_location=lambda storage, loc: storage)
        policy.load_state_dict(checkpoint['model'])
        policy.eval()

        assert args.model_checkpoint, 'No model checkpoint found'
        assert os.path.isfile(
            args.model_checkpoint), 'The model could not be loaded'
        # This lambda stuff is required otherwise it will try and load on GPU

        results.append([])

        for i in range(args.num_mazes_test):
            env = DoomEnvironment(args, idx=i, is_train=False)
            results[model].append(get_results(policy, env, args))
            print(i)

        success_rate = 0
        average_reward = 0
        average_time = 0

        for res in results[model]:
            if res[1] < 525:
                success_rate += 1
                average_time += res[1]
            average_reward += res[0]

        if success_rate != 0:
            average_time /= success_rate
        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        print(success_rate, average_reward, average_time)

    time_diff = 0
    finished_levels = 0

    for i in range(args.num_mazes_test):
        if results[1][i][1] < 525:
            finished_levels += 1
            time_diff += results[1][i][1] - results[0][i][1]

    print(time_diff / finished_levels)
def pipe_worker(pipe, params, is_train, idx=0):
    env = DoomEnvironment(params, idx=idx, is_train=is_train, use_shaping=params.use_shaping)
    while True:
        action = pipe.recv()
        if action is None:
            break
        elif action == 'reset':
            pipe.send(env.reset())
        else:
            obs, reward, done, info = env.step(action)
            pipe.send((obs, reward, done, info))
def evaluate_saved_model():  
    args = parse_a2c_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    env = DoomEnvironment(args, is_train=True)
    print(env.num_actions)
    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)
    checkpoint = torch.load(args.model_checkpoint, map_location=lambda storage, loc: storage) 
    policy.load_state_dict(checkpoint['model'])
    policy.eval()
    
    assert args.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(args.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU
 
    for i in range(args.num_mazes_test):
        env = DoomEnvironment(args, idx=i, is_train=True)
        movie_name = 'videos/rollout_{:0004}.mp4'.format(i)
        print('Creating movie {}'.format(movie_name))
        make_movie(policy, env, movie_name, args)
def pipe_worker2(pipe, params, is_train, idx_range=[0]):
    envs_queue = deque()    
    for idx in idx_range:
        env = DoomEnvironment(params, idx=idx, is_train=is_train, use_shaping=params.use_shaping, fixed_scenario=True)        
        obs = env.reset()
        envs_queue.append((obs, env))
        
    obs, cur_env = envs_queue.pop()
    
    
    while True:
        action = pipe.recv()
        if action is None:
            break
        elif action == 'reset':
            pipe.send(env.reset())
        else:
            obs, reward, done, info = cur_env.step(action)
            
            if done:
                envs_queue.append((obs, cur_env))
                obs, cur_env = envs_queue.popleft()
                
            pipe.send((obs, reward, done, info))
        self._dqn = DQN('deathmatch', n_actions,
                        epsilon)  # TODO: shouldn't be 2 ** n_actions
        self._drqn = DRQN('deathmatch', n_actions, epsilon)

    def forward(self, x_screens, hidden):
        # TODO: add _detection_ layer to the DRQN model
        q_values, hidden = self._drqn(x_screens, hidden)
        detection = np.random.rand()
        # TODO: Arnold paper propose to use dqn ____during evaluation___
        # TODO: if there are no detected enemies
        # TODO: or agent does not have any ammo left
        if detection > 0.0:  # without sigmoid
            q_values = self._dqn(x_screens)
        return detection, hidden, q_values

    def sample_actions(self):
        # pretty same as in drqn
        # may be I can even call .sample_actions()
        pass


def agent():
    return 0


if __name__ == '__main__':
    cfg = 'scenarios/deathmatch_shotgun.cfg'
    doom = DoomEnvironment(cfg, True, 4)
    while True:
        doom.step(agent())
示例#8
0
from torch import load
from doom_environment import DoomEnvironment
from utils import watch_agent
from models import agent
from time import sleep
# from hyperparameters import hp_basic_test as hp
from hyperparameters import hp_d_cor_test as hp
# from hyperparameters import hp_def_c_test as hp
# from hyperparameters import hp_h_gth_test as hp

if __name__ == '__main__':
    print('---------------------------- vizDoom watching script ---------------------------')

    test_env = DoomEnvironment('scenarios/' + hp.scenario + '.cfg', False, hp.test_skiprate)
    test_env.make_visible()
    policy_net = agent[hp.agent](hp.scenario, 2 ** test_env.get_n_buttons(), hp.epsilon)
    policy_net.load_state_dict(load(
        'logs/' + hp.scenario + '/' + hp.agent + '/model.pth',
        map_location=lambda storage, loc: storage)['policy_net_state'])
    # policy_net.eval()
    policy_net.train()
    print('scenario: {}, agent: {}'.format(hp.scenario, hp.agent))
    print('loaded model: {}'.format('logs/' + hp.scenario + '/' + hp.agent + '/model.pth'))
    print('agent\'s epsilon: {}'.format(hp.epsilon))

    print('------------------------------- watch the model --------------------------------')
    print('n_episodes: {}'.format(hp.n_episodes))
    for _ in range(hp.n_episodes):
        reward, shaped = watch_agent(hp.scenario, policy_net, test_env)
        print('Episode {} done, reward: {}, shaped: {}'.format(_, reward, shaped))
        sleep(1.0)
示例#9
0
def evaluate_saved_model(models, models_dir):

    args = parse_a2c_args()
    # TROUVER COMMENT UTILISER LE GPU
    device = torch.device("cpu")  # "cuda" if torch.cuda.is_available() else

    #création des environnements

    #création des environnements little-combs

    little_comb_env = []
    for i in range(50):
        args.scenario_dir = "scenarios_transfer_learning/little_combs_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        little_comb_env.append(DoomEnvironment(args, idx=i, is_train=False))

    #création des environnements big-combs

    big_comb_env = []
    for i in range(50):
        args.scenario_dir = "scenarios_transfer_learning/big_combs_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        big_comb_env.append(DoomEnvironment(args, idx=i, is_train=False))

    #création des environnements classic

    classic_env = []
    for i in range(50):
        args.scenario_dir = "scenarios_transfer_learning/mazes_classic_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        classic_env.append(DoomEnvironment(args, idx=i, is_train=False))

    #création des environnements other levels

    medium_comb_env = []
    for i in range(16):
        args.scenario_dir = "scenarios_transfer_learning/medium_combs_test/"
        args.scenario = "custom_scenario_test{:003}.cfg"
        medium_comb_env.append(DoomEnvironment(args, idx=i, is_train=False))

    obs_shape = (3, args.screen_height, args.screen_width)

    policy = CNNPolicy(obs_shape, args).to(device)

    resultat = []

    for model in models:
        checkpoint = torch.load(models_dir + "/checkpoint_" + str(model) +
                                ".pth.tar",
                                map_location=lambda storage, loc: storage)
        policy.load_state_dict(checkpoint['model'])
        policy.eval()

        resultat.append(model)

        assert args.model_checkpoint, 'No model checkpoint found'
        assert os.path.isfile(
            args.model_checkpoint), 'The model could not be loaded'
        # This lambda stuff is required otherwise it will try and load on GPU

        # evaluation sur les niveaux classiques

        results = []

        for i in range(50):
            env = classic_env[i]
            results.append(get_results(policy, env, args))

        print("Classic levels evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        # evaluation sur les little combs

        results = []

        for i in range(50):
            env = little_comb_env[i]
            results.append(get_results(policy, env, args))

        print("Little combs evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        # evaluation sur les big combs

        results = []

        for i in range(50):
            env = big_comb_env[i]
            results.append(get_results(policy, env, args))
        print("Big combs evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        # evaluation sur les autres niveaux

        results = []

        for i in range(16):
            env = medium_comb_env[i]
            results.append(get_results(policy, env, args))
        print("Other levels evaluation is done")

        success_rate = 0
        average_reward = 0

        for res in results:
            if res[1] < 525:
                success_rate += 1
            average_reward += res[0]

        success_rate /= args.num_mazes_test
        average_reward /= args.num_mazes_test

        resultat.append([success_rate, average_reward])

        print("Checkpoint " + str(model) + " has been evaluated")

    print(resultat)
示例#10
0
from hyperparameters import hp_h_gth_train as hp

if __name__ == '__main__':
    print(
        '---------------------------- vizDoom training script ---------------------------'
    )
    print('scenario: {}, agent: {}'.format(hp.scenario, hp.agent))

    print('\ntraining parameters:')
    print('n_epoch: {}, steps_per_epoch: {}, play_steps: {}'.format(
        hp.n_epoch, hp.steps_per_epoch, hp.play_steps))
    print('batch_size: {}, time_size: {}, not_update: {}'.format(
        hp.batch_size, hp.time_size, hp.not_update))
    print('tests_per_epoch: {}'.format(hp.tests_per_epoch))

    train_env = DoomEnvironment('scenarios/' + hp.scenario + '.cfg', False,
                                hp.train_skiprate)
    test_env = DoomEnvironment('scenarios/' + hp.scenario + '.cfg', False,
                               hp.test_skiprate)
    er = ReplayMemory(hp.replay_size, hp.screen_size)

    policy_net = agent[hp.agent](hp.scenario, 2**train_env.get_n_buttons())
    target_net = agent[hp.agent](hp.scenario, 2**train_env.get_n_buttons())
    optimizer = torch.optim.RMSprop(policy_net.parameters(), hp.learning_rate)

    trainer = Trainer(scenario=hp.scenario,
                      cuda=hp.cuda,
                      environment=train_env,
                      test_environment=test_env,
                      experience_replay=er,
                      policy_net=policy_net,
                      target_net=target_net,