示例#1
0
    def state_update(self, obs, action):
        # update history
        if self.dst:
            self.dst.state['history'].append([str(action)])

        # NLU parsing
        input_act = self.nlu.parse(
            obs,
            sum(self.dst.state['history'], [])
            if self.dst else []) if self.nlu else obs

        # state tracking
        state = self.dst.update(input_act) if self.dst else input_act

        # update history
        if self.dst:
            self.dst.state['history'][-1].append(str(obs))

        # encode state
        encoded_state = self.state_encoder.encode(
            state) if self.state_encoder else state

        if self.nlu and self.dst:
            self.dst.state['user_action'] = input_act
        elif self.dst and not isinstance(
                self.dst, word_dst.MDBTTracker):  # for act-in act-out agent
            self.dst.state['user_action'] = obs

        logger.nl(f'User utterance: {obs}')
        logger.act(f'Inferred user action: {input_act}')
        logger.state(f'Dialog state: {state}')

        return input_act, state, encoded_state
示例#2
0
    def act(self, obs):
        '''Standard act method from algorithm.'''
        action = self.algorithm.act(self.body.encoded_state)
        self.body.action = action

        output_act, decoded_action = self.action_decode(action, self.body.state)

        logger.act(f'System action: {action}')
        logger.nl(f'System utterance: {decoded_action}')

        return decoded_action
 def reset(self, train_mode, config):
     self.simulator.init_session()
     self.history = []
     user_response, user_act, session_over, reward = self.simulator.response("null", self.history)
     self.last_act = user_act
     logger.act(f'User action: {user_act}')
     self.history.extend(["null", f'{user_response}'])
     self.env_info = [State(user_response, 0., session_over)] 
     # update evaluator
     if self.evaluator:
         self.evaluator.add_goal(self.get_goal())
         logger.act(f'Goal: {self.get_goal()}')
     return self.env_info 
 def step(self, action):
     user_response, user_act, session_over, reward = self.simulator.response(action, self.history)
     self.last_act = user_act
     self.history.extend([f'{action}', f'{user_response}'])
     logger.act(f'Inferred system action: {self.get_sys_act()}')
     # update evaluator
     if self.evaluator:
         self.evaluator.add_sys_da(self.get_sys_act())
         self.evaluator.add_usr_da(self.get_last_act())
         if session_over:
             reward = 2.0 * self.simulator.policy.max_turn if self.evaluator.task_success() else -1.0 * self.simulator.policy.max_turn
         else:
             reward = -1.0
     self.env_info = [State(user_response, reward, session_over)] 
     return self.env_info