示例#1
0
    def __init__(self,
                 env,
                 b_agent,
                 include_utterance_in_observation=False,
                 *args,
                 **kwargs):
        """
        Args:
            env: Environment which is being wrapped by this conversational
                environment. This environment must have a discrete action
                space. The RL agent will be able to take any of the actions
                in this environment, or select a new action `utterance`,
                managed by this class.
            b_agent: `Agent` instance with which the learned agent will
                interact.
            include_utterance_in_observation: If `True`, after making an
                utterance, include the uttered token as part of the next
                observation
        """
        super(SituatedConversationEnvironment, self).__init__(*args, **kwargs)

        assert isinstance(env.action_space, Discrete)
        self._env = env

        self.num_tokens = b_agent.num_tokens
        self.vocab = b_agent.vocab
        self.vocab_size = len(self.vocab)

        # Observations are a combination of observations from the wrapped
        # environment and a representation of any utterance received from the
        # agent.
        #
        # Optionally include a single-token utterance observation as well.
        self._received_message_space = DiscreteBinaryBag(self.vocab_size)
        if include_utterance_in_observation:
            utterance_space = Discrete(self.vocab_size)
            self._obs_space = Product(env.observation_space,
                                      self._received_message_space,
                                      utterance_space)
        else:
            self._obs_space = Product(env.observation_space,
                                      self._received_message_space)
        self.include_utterance_in_observation = include_utterance_in_observation

        # The agent can choose to take any action in the wrapped env, to add a
        # single token to its message, or to send a message to the agent.
        #
        # First `N` actions correspond to taking an action in the wrapped env.
        # Next `V` actions correspond to uttering a word from the vocabulary.
        # Final action corresponds to sending the message.
        action_space = Discrete(env.action_space.n + b_agent.vocab_size + 1)
        self._action_space = action_space

        self.b_agent = b_agent
示例#2
0
def to_tf_space(space):
    if isinstance(space, TheanoBox):
        return Box(low=space.low, high=space.high)
    elif isinstance(space, TheanoDiscrete):
        return Discrete(space.n)
    elif isinstance(space, TheanoProduct):
        return Product(list(map(to_tf_space, space.components)))
    else:
        raise NotImplementedError
示例#3
0
def to_tf_space(space):
    if isinstance(space, TheanoBox):
        return Box(low=space.low, high=space.high)
    elif isinstance(space, TheanoDiscrete):
        return Discrete(space.n)
    elif isinstance(space, TheanoProduct):
        return Product(list(map(to_tf_space, space.components)))
    else:
        print("HACK IN sandbox/rocky/envs/base.py")
        return Box(low=space.low, high=space.high)
示例#4
0
    def __init__(self, env=None, b_agent=None, *args, **kwargs):
        """
        Args:
            env: Environment which is being wrapped by this conversational
                environment. This environment must have a discrete action
                space. The RL agent will be able to take any of the actions
                in this environment, or select a new action `utterance`,
                managed by this class.
            b_agent: `Agent` instance with which the learned agent will
                interact.
        """
        super(SituatedConversationEnvironment, self).__init__(*args, **kwargs)

        if env is None:
            env = SlaveGridWorldEnv("walled_chain")
        if b_agent is None:
            b_agent = GridWorldMasterAgent(env)

        assert isinstance(env.action_space, Discrete)
        self._env = env

        self.num_tokens = b_agent.num_tokens
        self.vocab = b_agent.vocab
        self.vocab_size = len(self.vocab)

        # Observations are a combination of observations from the wrapped
        # environment and a representation of any utterance received from the
        # agent.
        self._received_message_space = DiscreteBinaryBag(self.vocab_size)
        self._obs_space = Product(env.observation_space,
                                  self._received_message_space)

        # The agent can choose to take any action in the wrapped env, to add a
        # single token to its message, or to send a message to the agent.
        #
        # First `N` actions correspond to taking an action in the wrapped env.
        # Next `V` actions correspond to uttering a word from the vocabulary.
        # Final action corresponds to sending the message.
        action_space = Discrete(env.action_space.n + b_agent.vocab_size + 1)
        self._action_space = action_space

        self._b_agent = b_agent