示例#1
0
    def build_sess(self, sys_agent):
        if self.dataset == 'multiwoz':
            evaluator = MultiWozEvaluator()
        else:
            evaluator = None

        if evaluator is None:
            self.sess = None
        else:
            self.sess = BiSession(sys_agent=sys_agent, user_agent=self.user_agent, kb_query=None, evaluator=evaluator)
        return self.sess
def evaluate(dataset_name, model_name, load_path, calculate_reward=True):
    seed = 20190827
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if dataset_name == 'MultiWOZ':
        dst_sys = RuleDST()
        
        if model_name == "PPO":
            from convlab2.policy.ppo import PPO
            if load_path:
                policy_sys = PPO(False)
                policy_sys.load(load_path)
            else:
                policy_sys = PPO.from_pretrained()
        elif model_name == "PG":
            from convlab2.policy.pg import PG
            if load_path:
                policy_sys = PG(False)
                policy_sys.load(load_path)
            else:
                policy_sys = PG.from_pretrained()
        elif model_name == "MLE":
            from convlab2.policy.mle.multiwoz import MLE
            if load_path:
                policy_sys = MLE()
                policy_sys.load(load_path)
            else:
                policy_sys = MLE.from_pretrained()
        elif model_name == "GDPL":
            from convlab2.policy.gdpl import GDPL
            if load_path:
                policy_sys = GDPL(False)
                policy_sys.load(load_path)
            else:
                policy_sys = GDPL.from_pretrained()
        elif model_name == "GAIL":
            from convlab2.policy.gail import GAIL
            if load_path:
                policy_sys = GAIL(False)
                policy_sys.load(load_path)
            else:
                policy_sys = GAIL.from_pretrained()        
                
            
        dst_usr = None

        policy_usr = RulePolicy(character='usr')
        simulator = PipelineAgent(None, None, policy_usr, None, 'user')

        env = Environment(None, simulator, None, dst_sys)

        agent_sys = PipelineAgent(None, dst_sys, policy_sys, None, 'sys')

        evaluator = MultiWozEvaluator()
        sess = BiSession(agent_sys, simulator, None, evaluator)

        task_success = {'All': []}
        for seed in range(100):
            random.seed(seed)
            np.random.seed(seed)
            torch.manual_seed(seed)
            sess.init_session()
            sys_response = []
            logging.info('-'*50)
            logging.info(f'seed {seed}')
            for i in range(40):
                sys_response, user_response, session_over, reward = sess.next_turn(sys_response)
                if session_over is True:
                    task_succ = sess.evaluator.task_success()
                    logging.info(f'task success: {task_succ}')
                    logging.info(f'book rate: {sess.evaluator.book_rate()}')
                    logging.info(f'inform precision/recall/f1: {sess.evaluator.inform_F1()}')
                    logging.info('-'*50)
                    break
            else: 
                task_succ = 0
    
            for key in sess.evaluator.goal: 
                if key not in task_success: 
                    task_success[key] = []
                else: 
                    task_success[key].append(task_succ)
            task_success['All'].append(task_succ)
        
        for key in task_success: 
            logging.info(f'{key} {len(task_success[key])} {np.average(task_success[key]) if len(task_success[key]) > 0 else 0}')

        if calculate_reward:
            reward_tot = []
            for seed in range(100):
                s = env.reset()
                reward = []
                value = []
                mask = []
                for t in range(40):
                    s_vec = torch.Tensor(policy_sys.vector.state_vectorize(s))
                    a = policy_sys.predict(s)

                    # interact with env
                    next_s, r, done = env.step(a)
                    logging.info(r)
                    reward.append(r)
                    if done: # one due to counting from 0, the one for the last turn
                        break
                logging.info(f'{seed} reward: {np.mean(reward)}')
                reward_tot.append(np.mean(reward))
            logging.info(f'total avg reward: {np.mean(reward_tot)}')
    else:
        raise Exception("currently supported dataset: MultiWOZ")
示例#3
0
    # not use dst
    user_dst = None
    # rule policy
    user_policy = RulePolicy(character='usr')
    #user_policy.policy.goal_generator = GoalGenerator_7()
    #user_policy.policy.goal_generator = GoalGenerator_restaurant()
    # template NLG
    user_nlg = TemplateNLG(is_user=True)
    # assemble
    user_agent = PipelineAgent(user_nlu,
                               user_dst,
                               user_policy,
                               user_nlg,
                               name='user')

    evaluator = MultiWozEvaluator()
    sess = BiSession(sys_agent=sys_agent,
                     user_agent=user_agent,
                     kb_query=None,
                     evaluator=evaluator)

    #1603813675
    set_seed(1603813675, False)

    sys_response = ''
    sess.init_session()
    print('init goal:')
    pprint(sess.evaluator.goal)
    print('-' * 50)
    sess.demo(sys_response)
'''
示例#4
0
def evaluate(dataset_name, model_name, load_path):
    seed = 20200722
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if dataset_name == 'MultiWOZ':
        dst_sys = RuleDST()

        if model_name == "PPO":
            from convlab2.policy.ppo import PPO
            if load_path:
                policy_sys = PPO(False)
                policy_sys.load(load_path)
            else:
                policy_sys = PPO.from_pretrained()
        elif model_name == "DQN":
            from convlab2.policy.dqn.DQN.DQN import DQN
            if load_path:
                policy_sys = DQN(False)
                policy_sys.load(load_path)
            else:
                print('Please add load path.')
        elif model_name == "DQfD_RE":
            from convlab2.policy.dqn.RE.DQfD import DQfD
            if load_path:
                policy_sys = DQfD(False)
                policy_sys.load(load_path)
            else:
                print('Please add load path.')
        elif model_name == "DQfD_NLE":
            from convlab2.policy.dqn.NLE.DQfD import DQfD
            if load_path:
                policy_sys = DQfD(False)
                policy_sys.load(load_path)
            else:
                print('Please add load path.')
        elif model_name == "MLE":
            from convlab2.policy.mle.multiwoz import MLE
            if load_path:
                policy_sys = MLE()
                policy_sys.load(load_path)
            else:
                policy_sys = MLE.from_pretrained()

        policy_usr = RulePolicy(character='usr')
        simulator = PipelineAgent(None, None, policy_usr, None, 'user')

        agent_sys = PipelineAgent(None, dst_sys, policy_sys, None, 'sys')

        evaluator = MultiWozEvaluator()
        sess = BiSession(agent_sys, simulator, None, evaluator)

        task_success = 0
        evaluator_success = 0
        for seed in range(100):
            random.seed(seed)
            np.random.seed(seed)
            torch.manual_seed(seed)
            sess.init_session()
            sys_response = []

            cur_success = 0
            for i in range(40):
                sys_response, user_response, session_over, reward = sess.next_turn(
                    sys_response)
                if reward == 80:
                    cur_success = 1
                    task_success += 1
                if session_over is True:
                    break
            # logging.debug('Current task success: {}, the evaluator result: {}.'.format(cur_success, sess.evaluator.task_success()))
            evaluator_success += sess.evaluator.task_success()

        logging.debug('Task success rate: {} and evaluator result: {}.'.format(
            task_success / 100, evaluator_success / 100))
        return task_success / 100, evaluator_success / 100