def __init__(self, env, b_agent, include_utterance_in_observation=False, *args, **kwargs): """ Args: env: Environment which is being wrapped by this conversational environment. This environment must have a discrete action space. The RL agent will be able to take any of the actions in this environment, or select a new action `utterance`, managed by this class. b_agent: `Agent` instance with which the learned agent will interact. include_utterance_in_observation: If `True`, after making an utterance, include the uttered token as part of the next observation """ super(SituatedConversationEnvironment, self).__init__(*args, **kwargs) assert isinstance(env.action_space, Discrete) self._env = env self.num_tokens = b_agent.num_tokens self.vocab = b_agent.vocab self.vocab_size = len(self.vocab) # Observations are a combination of observations from the wrapped # environment and a representation of any utterance received from the # agent. # # Optionally include a single-token utterance observation as well. self._received_message_space = DiscreteBinaryBag(self.vocab_size) if include_utterance_in_observation: utterance_space = Discrete(self.vocab_size) self._obs_space = Product(env.observation_space, self._received_message_space, utterance_space) else: self._obs_space = Product(env.observation_space, self._received_message_space) self.include_utterance_in_observation = include_utterance_in_observation # The agent can choose to take any action in the wrapped env, to add a # single token to its message, or to send a message to the agent. # # First `N` actions correspond to taking an action in the wrapped env. # Next `V` actions correspond to uttering a word from the vocabulary. # Final action corresponds to sending the message. action_space = Discrete(env.action_space.n + b_agent.vocab_size + 1) self._action_space = action_space self.b_agent = b_agent
def to_tf_space(space): if isinstance(space, TheanoBox): return Box(low=space.low, high=space.high) elif isinstance(space, TheanoDiscrete): return Discrete(space.n) elif isinstance(space, TheanoProduct): return Product(list(map(to_tf_space, space.components))) else: raise NotImplementedError
def to_tf_space(space): if isinstance(space, TheanoBox): return Box(low=space.low, high=space.high) elif isinstance(space, TheanoDiscrete): return Discrete(space.n) elif isinstance(space, TheanoProduct): return Product(list(map(to_tf_space, space.components))) else: print("HACK IN sandbox/rocky/envs/base.py") return Box(low=space.low, high=space.high)
def __init__(self, env=None, b_agent=None, *args, **kwargs): """ Args: env: Environment which is being wrapped by this conversational environment. This environment must have a discrete action space. The RL agent will be able to take any of the actions in this environment, or select a new action `utterance`, managed by this class. b_agent: `Agent` instance with which the learned agent will interact. """ super(SituatedConversationEnvironment, self).__init__(*args, **kwargs) if env is None: env = SlaveGridWorldEnv("walled_chain") if b_agent is None: b_agent = GridWorldMasterAgent(env) assert isinstance(env.action_space, Discrete) self._env = env self.num_tokens = b_agent.num_tokens self.vocab = b_agent.vocab self.vocab_size = len(self.vocab) # Observations are a combination of observations from the wrapped # environment and a representation of any utterance received from the # agent. self._received_message_space = DiscreteBinaryBag(self.vocab_size) self._obs_space = Product(env.observation_space, self._received_message_space) # The agent can choose to take any action in the wrapped env, to add a # single token to its message, or to send a message to the agent. # # First `N` actions correspond to taking an action in the wrapped env. # Next `V` actions correspond to uttering a word from the vocabulary. # Final action corresponds to sending the message. action_space = Discrete(env.action_space.n + b_agent.vocab_size + 1) self._action_space = action_space self._b_agent = b_agent