示例#1
0
def test_label_buffer():
    import matplotlib.pyplot as plt
    import random
    from doom_rdqn.arguments import parse_game_args
    params = parse_game_args()
    params.decimate = False
    env = DoomEnvironment(params)
    for i in range(10):
        env.make_action(random.choice(list(range(8))))

    state = env.game.get_state()
    labels_buffer = state.labels_buffer
    label = state.labels

    plt.subplot(1, 2, 1)
    plt.imshow(env.get_observation().transpose(1, 2, 0))
    plt.subplot(1, 2, 2)
    plt.imshow(labels_buffer)
    plt.figure()
    plt.imshow(resize(labels_buffer, (56, 32), cv2.INTER_AREA))

    plt.figure()
    plt.imshow(
        resize(env.get_observation().transpose(1, 2, 0), (112, 64),
               cv2.INTER_AREA))

    data = env.get_observation()

    def resize_test(image):
        return resize(image.transpose(1, 2, 0), (112, 64)).transpose(2, 0, 1)
示例#2
0
def evaluate_saved_model():
    params = parse_game_args()

    env = DoomEnvironment(params, is_train=True)
    print(env.num_actions)
    obs_shape = (3, params.screen_height, params.screen_width)

    actor_critic = CNNPolicy(obs_shape[0], obs_shape, params)

    assert params.model_checkpoint, 'No model checkpoint found'
    assert os.path.isfile(
        params.model_checkpoint), 'The model could not be loaded'
    # This lambda stuff is required otherwise it will try and load on GPU
    checkpoint = torch.load(params.model_checkpoint,
                            map_location=lambda storage, loc: storage)
    actor_critic.load_state_dict(checkpoint['model'])

    base_filename = params.model_checkpoint.split('.')[0].split('/')[1]

    agent = BaseAgent(actor_critic, params)

    for i in range(params.num_mazes_test):
        env = DoomEnvironment(params, idx=i, is_train=True)
        movie_name = 'videos/{}_rollout_{:0004}.mp4'.format(base_filename, i)
        print('Creating movie {}'.format(movie_name))
        make_movie(agent, env, movie_name, params)
示例#3
0
    logger.write('Step: {:0004}, Game rewards: {}, Game times: {}'.format(
        step, reward_list, time_list))


def write_movie(params, logger, observations, step, score):
    observations = [o.transpose(1, 2, 0) * 255.0 for o in observations]
    clip = ImageSequenceClip(observations, fps=int(30 / params.frame_skip))
    output_dir = logger.get_eval_output()
    clip.write_videofile('{}eval{:0004}_{:00005.0f}.mp4'.format(
        output_dir, step, score * 100))


if __name__ == '__main__':
    # Test to improve movie with action probs, values etc

    params = parse_game_args()
    params.norm_obs = False
    params.recurrent_policy = True
    envs = MultiEnvs(params.simulator, 1, 1, params)
    obs_shape = envs.obs_shape
    obs_shape = (obs_shape[0] * params.num_stack, *obs_shape[1:])
    model = CNNPolicy(obs_shape[0], envs.num_actions, params.recurrent_policy,
                      obs_shape)
    env = DoomEnvironment(params)
    agent = BaseAgent(model, params)

    env.reset()
    agent.reset()

    rewards = []
    obss = []
示例#4
0
def gen_classic(selh, file, scenario = False, model="model_final"):
    params = parse_game_args()


    # Charge le scénario
    if not scenario :
        params.scenario = "custom_scenario003.cfg"
    else:
        params.scenario = scenario

    env = DoomEnvironment(params)

    device = torch.device("cuda" if False else "cpu")

    num_actions = env.num_actions
    network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)

    # Chargement du modèle de base

    network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)

    checkpoint = torch.load('models/' + model + '.pth.tar', map_location=lambda storage, loc: storage)

    """Remplacement des clefs du dictionnaire qui posent problème"""

    checkpoint['model']["dist.linear.weight"] = checkpoint['model']["dist_linear.weight"]
    del checkpoint['model']["dist_linear.weight"]
    checkpoint['model']["dist.linear.bias"] = checkpoint['model']["dist_linear.bias"]
    del checkpoint['model']["dist_linear.bias"]

    network.load_state_dict(checkpoint['model'])

    agent = BaseAgent(network, params)

    ERU = {'env': env, 'agent': agent}

    # Chargement des checkpoints
    num_checkpoints = [98, 98, 159]
    checkpoints = [1]*sum(num_checkpoints)
    networks = [1]*sum(num_checkpoints)
    agents = [1]*sum(num_checkpoints)
    ERUs = [1]*sum(num_checkpoints)

    for i in range(len(num_checkpoints)):
        for j in range(num_checkpoints[i]):
            iter = i*num_checkpoints[0]+j

           # if i==0:
           #     checkpoint_filename = '/home/adam/Bureau/Transfer Learning/5 - 28-03-21/checkpoint_{}_{}.pth.tar'.format(str(i + 1), str(j + 88))
            #else:
            checkpoint_filename = '/home/adam/Bureau/Transfer Learning/5 - 28-03-21/checkpoint_{}_{}.pth.tar'.format(str(i + 1), str(j + 1))

            checkpoints[i*num_checkpoints[0]+j] = torch.load(checkpoint_filename, map_location=lambda storage, loc: storage)

            """Remplacement des clefs du dictionnaire qui posent problème"""

            checkpoints[iter]['model']["dist.linear.weight"] = checkpoints[iter]['model']["dist_linear.weight"]
            del checkpoints[iter]['model']["dist_linear.weight"]
            checkpoints[iter]['model']["dist.linear.bias"] = checkpoints[iter]['model']["dist_linear.bias"]
            del checkpoints[iter]['model']["dist_linear.bias"]

            networks[iter] = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)
            networks[iter].load_state_dict(checkpoints[iter]['model'])

            agents[iter] = BaseAgent(networks[iter], params)

            ERUs[iter] = {'env': env, 'agent': agents[iter]}

            ERUs[iter]['env'].reset()

    selhs = []
    for i in range(sum(num_checkpoints)):
        selh = tsne_1d_projection(127)
        selh = torch.from_numpy(selh).type(torch.FloatTensor)
        selh = Variable(selh, volatile=True)
        selhs.append(selh)


    scores = []
    hiddens = []
    inputs = []
    actions = []

    #Boucle pour obtenir les images du modèle de base

    obss = []
    actions = []

    for i in range(50):
        obs = ERU['env'].get_observation()
        action, value, action_probs, grads = ERU['agent'].get_action_value_and_probs_zeroes(obs, selh, epsilon=0.0)
        ERU['env'].make_action(int(action))
        obss.append(obs)
        actions.append(action)


    #Boucle pour évaluer les checkpoints sur les situations du modèle de base

    for i in range(sum(num_checkpoints)):

        for obs2 in obss:
            action, value, action_probs, grads = ERUs[i]['agent'].get_action_value_and_probs_zeroes(obs2, selhs[i], epsilon=0.0)

        hidden = ERUs[i]['agent'].model.get_gru_h()
        h = ''
        for elem in hidden[0][0]:
            h += str(elem) + ","
        h = h[:-1]

        h = h.split(',')
        hiddens.append(h)

        ERU['env'].make_action(int(action))

    im = Image.new('P', (sum(num_checkpoints), 128))
    for i in range(len(hiddens)):
        for j in range(len(hiddens[i])):
            value = int((float(hiddens[i][j])+1)*255/2)
            im.putpixel((i, j), (value, value, value, 255))
    im.show()
    im.save("timeline.png")

    im = Image.new('P', (sum(num_checkpoints)-1, 128))
    for i in range(len(hiddens)-1):
        for j in range(len(hiddens[i])):
            value = int((abs(float(hiddens[i][j])-float(hiddens[i+1][j])))*255*1.5)
            if value>255:
                value=255
            im.putpixel((i, j), (value, value, value, 255))
    im.show()
    im.save("variation.png")
示例#5
0
def train():
    # define params
    params = parse_game_args()
    logger = Logger(params)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    num_updates = int(
        params.num_frames) // params.num_steps // params.num_environments

    # environments

    envs = MultiEnvsMPPipes(params.simulator, params.num_environments, 1,
                            params)

    obs_shape = envs.obs_shape
    obs_shape = (obs_shape[0] * params.num_stack, *obs_shape[1:])

    evaluator = Evaluator(params)
    print('creating model')
    actor_critic = CNNPolicy(obs_shape[0], obs_shape, params).to(device)
    print('model created')
    start_j = 0

    if params.reload_model:
        checkpoint_idx = params.reload_model.split(',')[1]
        checkpoint_filename = '{}models/checkpoint_{}.pth.tar'.format(
            params.output_dir, checkpoint_idx)
        assert os.path.isfile(
            checkpoint_filename), 'The model could not be found {}'.format(
                checkpoint_filename)
        logger.write('Loading model{}'.format(checkpoint_filename))

        if device == 'cuda':  # The checkpoint will try to load onto the GPU storage unless specified
            checkpoint = torch.load(checkpoint_filename)
        else:
            checkpoint = torch.load(checkpoint_filename,
                                    map_location=lambda storage, loc: storage)
        actor_critic.load_state_dict(checkpoint['model'])

        start_j = (int(checkpoint_idx) // params.num_steps //
                   params.num_environments) + 1

    print('creating optimizer')
    optimizer = optim.RMSprop(
        [p for p in actor_critic.parameters() if p.requires_grad],
        params.learning_rate,
        eps=params.eps,
        alpha=params.alpha,
        momentum=params.momentum)

    if params.reload_model:
        optimizer.load_state_dict(checkpoint['optimizer'])

    rollouts = RolloutStorage(params.num_steps, params.num_environments,
                              obs_shape, actor_critic.state_size, params)

    current_obs = torch.zeros(params.num_environments, *obs_shape)

    # For Frame stacking
    def update_current_obs(obs):
        shape_dim0 = envs.obs_shape[0]
        obs = torch.from_numpy(obs).float()
        if params.num_stack > 1:
            current_obs[:, :-shape_dim0] = current_obs[:, shape_dim0:]
        current_obs[:, -shape_dim0:] = obs

    print('getting first obs')
    obs = envs.reset()
    print('update current obs')
    update_current_obs(obs)

    rollouts.observations[0].copy_(current_obs)

    # These variables are used to compute average rewards for all processes.
    episode_rewards = torch.zeros([params.num_environments, 1])
    final_rewards = torch.zeros([params.num_environments, 1])

    current_obs = current_obs.to(device)
    rollouts.set_device(device)

    print('Starting training loop')
    start = time.time()
    print(num_updates)

    for j in range(start_j, num_updates):
        # STARTING no grad scope
        with torch.no_grad():

            if j % params.eval_freq == 0 and not params.skip_eval:
                print('Evaluating model')
                if params.simulator == 'doom':
                    actor_critic.eval()
                    total_num_steps = (
                        j + 1) * params.num_environments * params.num_steps
                    #eval_model(actor_critic, params, logger, j, total_num_steps, params.eval_games)
                    evaluator.evaluate(actor_critic, params, logger, j,
                                       total_num_steps, params.eval_games)
                    actor_critic.train()

            # =============================================================================
            # Take steps in the environment
            # =============================================================================
            for step in range(params.num_steps):
                # Sample actions
                value, action, action_log_prob, states = actor_critic.act(
                    rollouts.observations[step], rollouts.states[step],
                    rollouts.masks[step])

                cpu_actions = action.squeeze(1).cpu().numpy()

                # Obser reward and next obs
                obs, reward, done, info = envs.step(cpu_actions)

                reward = torch.from_numpy(np.expand_dims(np.stack(reward),
                                                         1)).float()
                episode_rewards += reward

                # If done then create masks to clean the history of observations.
                masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                           for done_ in done])

                final_rewards *= masks
                final_rewards += (1 - masks) * episode_rewards
                episode_rewards *= masks

                masks = masks.to(device)

                if current_obs.dim() == 4:
                    current_obs *= masks.unsqueeze(2).unsqueeze(2)
                else:
                    current_obs *= masks

                update_current_obs(obs)

                rollouts.insert(step, current_obs, states, action,
                                action_log_prob, value, reward, masks)

            # =============================================================================
            # Compute discounted returns, re-step through the environment
            # =============================================================================
            next_value = actor_critic(rollouts.observations[-1],
                                      rollouts.states[-1],
                                      rollouts.masks[-1])[0]

            rollouts.compute_returns(next_value, params.use_gae, params.gamma,
                                     params.tau)

        # FINISHED no grad scope
        model_output = actor_critic.evaluate_actions(
            rollouts.observations[:-1].view(-1, *obs_shape),
            rollouts.states[0].view(-1, actor_critic.state_size),
            rollouts.masks[:-1].view(-1, 1), rollouts.actions.view(-1, 1))

        values, action_log_probs, dist_entropy, states = model_output

        values = values.view(params.num_steps, params.num_environments, 1)
        action_log_probs = action_log_probs.view(params.num_steps,
                                                 params.num_environments, 1)
        advantages = rollouts.returns[:-1] - values

        value_loss = advantages.pow(2).mean()
        action_loss = -(advantages.detach() * action_log_probs).mean()

        optimizer.zero_grad()

        loss = value_loss * params.value_loss_coef + action_loss - dist_entropy * params.entropy_coef
        loss.backward()
        nn.utils.clip_grad_norm(actor_critic.parameters(),
                                params.max_grad_norm)

        optimizer.step()
        rollouts.after_update()

        if j % params.model_save_rate == 0:
            total_num_steps = (j +
                               1) * params.num_environments * params.num_steps
            checkpoint = {
                'step': step,
                'params': params,
                'model': actor_critic.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            filepath = logger.output_dir + 'models/'

            torch.save(
                checkpoint, '{}checkpoint_{:00000000010}.pth.tar'.format(
                    filepath, total_num_steps))

        if j % params.log_interval == 0:
            end = time.time()
            total_num_steps = (j +
                               1) * params.num_environments * params.num_steps
            save_num_steps = (
                start_j) * params.num_environments * params.num_steps
            logger.write(
                "Updates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}"
                .format(
                    j, total_num_steps,
                    int((total_num_steps - save_num_steps) / (end - start)),
                    final_rewards.mean(), final_rewards.median(),
                    final_rewards.min(), final_rewards.max(),
                    dist_entropy.item(), value_loss.item(),
                    action_loss.item()))

    evaluator.cancel()
    envs.cancel()
示例#6
0
def test():
    def simulate_rollout(env):
        from random import choice
        buffer = []
        env.reset()
        k = 0
        while not env.is_episode_finished():
            k += 1
            obs = env.get_observation()
            buffer.append(obs)

            # Makes a random action and save the reward.
            reward = env.make_action(choice(list(range(env.num_actions))))
        print('Game finished in {} steps'.format(k))
        print('Total rewards = {}'.format(env.get_total_reward()))
        return k, buffer

    # =============================================================================
    #   Test the environment
    # =============================================================================

    from arguments import parse_game_args
    params = parse_game_args()
    env = DoomEnvironment(params)
    print(env.num_actions)
    print(env.game.get_available_buttons())
    print(len(env.action_map))
    print(env.game.get_screen_height(), env.game.get_screen_width())

    print(env.get_observation().shape)

    import matplotlib.pyplot as plt

    plt.imshow(env.get_observation().transpose(1, 2, 0))
    plt.figure()
    plt.imshow(env.get_observation().transpose(1, 2, 0))

    env.decimate = False

    def resize_obs(observation):
        observation = observation.transpose(1, 2, 0)
        observation = resize(
            observation, (observation.shape[0] / 2, observation.shape[1] / 2))
        observation = observation.transpose(2, 0, 1)
        return observation

    data = env.get_observation().transpose(1, 2, 0)
    from skimage.transform import rescale, resize, downscale_local_mean

    data_resized = resize(data, (data.shape[0] / 2, data.shape[1] / 2))

    plt.figure()
    plt.imshow(data_resized)

    obs = env.get_observation()
    obs_rs = resize_obs(obs)

    assert 0
    for action in env.action_map.keys():
        reward = env.make_action(action)
        print(reward, env.is_episode_finished())

    for i in range(100):
        k, b = simulate_rollout(env)

    print(env.game.get_available_game_variables())
    print(env.game.get_game_variable(GameVariable.HEALTH))
示例#7
0
文件: reduce.py 项目: sical/drlviz
def gen_classic(selh, file):
    params = parse_game_args()
    params.scenario = "health_gathering_supreme.cfg"
    env = DoomEnvironment(params)

    device = torch.device("cuda" if False else "cpu")

    num_actions = env.num_actions
    network = CNNPolicy(3, num_actions, True, (3, 64, 112)).to(device)

    checkpoint = torch.load('models/' + "health_gathering_supreme" +
                            '.pth.tar',
                            map_location=lambda storage, loc: storage)
    network.load_state_dict(checkpoint['model'])

    agent = BaseAgent(network, params)

    ERU = {'env': env, 'agent': agent}

    selh = torch.from_numpy(selh).type(torch.FloatTensor)

    selh = Variable(selh, volatile=True)

    ERU['env'].set_seed(randint(0, 999999999))
    ERU['env'].reset()

    scores = []
    hiddens = []
    inputs = []
    saliencies = []
    actions = []
    probabilities = []
    health = []
    positions = []
    orientations = []
    velocities = []
    items = []
    fov = []

    w = 0

    while not ERU['env'].is_episode_finished():
        obsvervation = io.BytesIO()

        obs = ERU['env'].get_observation()
        temp = ERU['env'].state.screen_buffer
        Image.fromarray(temp.transpose(1, 2, 0)).save(obsvervation,
                                                      format="JPEG")
        action, value, action_probs, grads = ERU[
            'agent'].get_action_value_and_probs_zeroes(obs, selh, epsilon=0.0)

        hidden = ERU['agent'].model.get_gru_h()
        h = ''
        for elem in hidden[0][0]:
            h += str(elem) + ","
        h = h[:-1]

        h = h.split(',')
        probs = ""
        for elem in action_probs[0]:
            probs += str(elem) + ","
        probs = probs[:-1]

        probs = probs.split(',')
        sa = io.BytesIO()

        t = Image.fromarray(grads, 'L')

        t.save(sa, format="JPEG")

        scores.append(str(round(ERU['env'].game.get_total_reward(), 2)))
        hiddens.append(h)
        inputs.append(base64.b64encode(obsvervation.getvalue()))
        saliencies.append(base64.b64encode(sa.getvalue()))
        actions.append(str(action))
        probabilities.append(probs)
        health.append(ERU['env'].get_health())
        positions.append(ERU['env'].get_pos())
        orientations.append(ERU['env'].get_ori())
        velocities.append(ERU['env'].get_velo())
        items.append(ERU['env'].get_item())
        fov.append(ERU['env'].get_fov())

        ERU['env'].make_action(int(action))
        print('Iteration', w, '/525')
        w += 1

    result = {
        'episode0': {
            'inputs': inputs,
            'actions': actions,
            'probabilities': probabilities,
            'saliencies': saliencies,
            'scores': scores,
            'positions': positions,
            'health': health,
            'hiddens': hiddens,
            'orientations': orientations,
            'velocities': velocities,
            'items': items,
            'fov': fov
        }
    }
    with open(file, 'w') as f:
        ujson.dump(result, f, indent=4, sort_keys=True)
    return result