Exemplo n.º 1
0
        solved_episodes = []

        for step in range(args.num_steps):
            # render
            if args.render:
                envs.render()

            # select action
            value, action, action_log_probs = agent.select_action(step)

            # take a step in the environment
            obs, reward, done, infos = envs.step(action)

            # calculate intrinsic reward
            if args.add_intrinsic_reward:
                intrinsic_reward = args.intrinsic_coef * agent.compute_intrinsic_reward(
                    step)
                if args.max_intrinsic_reward is not None:
                    intrinsic_reward = torch.clamp(
                        agent.compute_intrinsic_reward(step), 0.0,
                        args.max_intrinsic_reward)
            else:
                intrinsic_reward = torch.tensor(0).view(1, 1)
            intrinsic_rewards.extend(list(
                intrinsic_reward.numpy().reshape(-1)))

            # store experience
            agent.store_rollout(obs[1], action, action_log_probs, value,
                                reward, intrinsic_reward, done)

            # get final episode rewards
            for info in infos:
Exemplo n.º 2
0
        solved_episodes = []

        for step in range(args.num_steps):
            # render
            if args.render:
                envs.render()

            # select action
            value, action, action_log_probs = agent.select_action(step)

            # take a step in the environment
            obs, reward, done, infos = envs.step(action)

            # calculate intrinsic reward
            if args.add_intrinsic_reward:
                intrinsic_reward = args.intrinsic_coef * agent.compute_intrinsic_reward(step)
                if args.max_intrinsic_reward is not None:
                    intrinsic_reward = torch.clamp(agent.compute_intrinsic_reward(step), 0.0, args.max_intrinsic_reward)
            else:
                intrinsic_reward = torch.tensor(0).view(1, 1)
            intrinsic_rewards.extend(list(intrinsic_reward.numpy().reshape(-1)))

            # store experience
            agent.store_rollout(obs[1], action, action_log_probs,
                                value, reward, intrinsic_reward,
                                done)

            # get final episode rewards
            for info in infos:
                if 'episode' in info.keys():
                    extrinsic_rewards.append(info['episode']['r'])