Пример #1
0
def eval_simulator_performance(data, goal_type=None):
    begin_da_predict_golden = []
    state_da_predict_golden = []
    state_predict_golden = []
    simulator = Simulator()
    for task_id, item in data.items():
        if goal_type and item['type']!=goal_type:
            continue
        for i, turn in enumerate(item['messages']):
            if turn['role']=='usr':
                if i==0:
                    simulator.init_session(goal=item['goal'])
                    begin_da_predict_golden.append({
                        'predict': simulator.begin_da(),
                        'golden': turn['dialog_act']
                    })
                else:
                    last_turn = item['messages'][i - 2]
                    usr_da = item['messages'][i - 2]['dialog_act']
                    sys_da = item['messages'][i - 1]['dialog_act']
                    simulator.init_session(goal=item['goal'], state=deepcopy(last_turn['user_state']))
                    simulator.state_update(prev_user_da=usr_da, prev_sys_da=sys_da)
                    cur_da = simulator.state_predict()
                    new_state = deepcopy(simulator.state)
                    state_da_predict_golden.append({
                        'predict': cur_da,
                        'golden': turn['dialog_act']
                    })
                    state_predict_golden.append({
                        'predict': new_state,
                        'golden': turn['user_state']
                    })

    print('begin da', calculateF1(begin_da_predict_golden))
    print('state da', calculateF1(state_da_predict_golden))
    print('all da', calculateF1(begin_da_predict_golden+state_da_predict_golden))
    print('joint state', calculateJointState(state_predict_golden))
    print('slot state', calculateSlotState(state_predict_golden))
Пример #2
0
def eval_state_predict(data):
    def state_update(prev_state, cur_state):
        update = []
        for prev_ele, cur_ele in zip(prev_state, cur_state):
            if cur_ele != prev_ele:
                update.append(cur_ele)
        id = 1
        for ele in cur_state[::-1]:
            if ele[-1]:
                id = ele[0]
                break
        return update, id

    simulator = Simulator()
    for task_id, item in data.items():
        for i, turn in enumerate(item['messages']):
            if turn['role']=='usr' and i > 0:
                last_turn = item['messages'][i-2]
                usr_da = item['messages'][i-2]['dialog_act']
                sys_da = item['messages'][i-1]['dialog_act']
                simulator.init_session(goal=item['goal'],state=deepcopy(last_turn['user_state']))
                simulator.state_update(prev_user_da=usr_da, prev_sys_da=sys_da)
                cur_da = simulator.state_predict()
                new_state = simulator.state
                # print('old state:')
                # pprint(last_turn['user_state'])
                # if 'NoOffer' in [x[0] for x in item['messages'][i-1]['dialog_act']]:
                print(item['messages'][i-2]['content'])
                print(item['messages'][i-1]['content'])
                print(turn['content'])
                print('usr da')
                pprint(usr_da)
                print('sys da')
                pprint(sys_da)
                print('predict state update:')
                pprint(state_update(last_turn['user_state'], new_state))
                print('golden state:')
                pprint(state_update(last_turn['user_state'], turn['user_state']))
                print('predict usr da')
                pprint(cur_da)
                print('golden usr da')
                pprint(turn['dialog_act'])
                print('-'*100)
Пример #3
0
def end2end_evaluate_simulation(policy):
    nlu = BERTNLU('all', 'crosswoz_all_context.json', None)
    nlg_usr = TemplateNLG(is_user=True, mode='auto_manual')
    nlg_sys = TemplateNLG(is_user=False, mode='auto_manual')
    # nlg_usr = SCLSTM(is_user=True, use_cuda=False)
    # nlg_sys = SCLSTM(is_user=False, use_cuda=False)
    usr_policy = Simulator()
    usr_agent = PipelineAgent(nlu, None, usr_policy, nlg_usr, name='usr')
    sys_policy = policy
    sys_dst = RuleDST()
    sys_agent = PipelineAgent(nlu, sys_dst, sys_policy, nlg_sys, name='sys')
    sess = BiSession(sys_agent=sys_agent, user_agent=usr_agent)

    task_success = {
        'All': list(),
        '单领域': list(),
        '独立多领域': list(),
        '独立多领域+交通': list(),
        '不独立多领域': list(),
        '不独立多领域+交通': list()
    }
    simulate_sess_num = 100
    repeat = 5
    random_seed = 2019
    random.seed(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    random_seeds = [
        random.randint(1, 2**32 - 1)
        for _ in range(simulate_sess_num * repeat * 10000)
    ]
    while True:
        sys_response = ''
        random_seed = random_seeds[0]
        random.seed(random_seed)
        np.random.seed(random_seed)
        torch.manual_seed(random_seed)
        random_seeds.pop(0)
        sess.init_session()
        # print(usr_policy.goal_type)
        if len(task_success[
                usr_policy.goal_type]) == simulate_sess_num * repeat:
            continue
        for i in range(15):
            sys_response, user_response, session_over, reward = sess.next_turn(
                sys_response)
            # print('user:'******'sys:', sys_response)
            # print(session_over, reward)
            # print()
            if session_over is True:
                task_success['All'].append(1)
                task_success[usr_policy.goal_type].append(1)
                break
        else:
            task_success['All'].append(0)
            task_success[usr_policy.goal_type].append(0)
        print([len(x) for x in task_success.values()])
        # print(min([len(x) for x in task_success.values()]))
        if len(task_success['All']) % 100 == 0:
            for k, v in task_success.items():
                print(k)
                for i in range(repeat):
                    samples = v[i * simulate_sess_num:(i + 1) *
                                simulate_sess_num]
                    print(sum(samples), len(samples),
                          (sum(samples) / len(samples)) if len(samples) else 0)
                print('avg', (sum(v) / len(v)) if len(v) else 0)
        if min([len(x)
                for x in task_success.values()]) == simulate_sess_num * repeat:
            break
        # pprint(usr_policy.original_goal)
        # pprint(task_success)
    print('task_success')
    for k, v in task_success.items():
        print(k)
        for i in range(repeat):
            samples = v[i * simulate_sess_num:(i + 1) * simulate_sess_num]
            print(sum(samples), len(samples),
                  (sum(samples) / len(samples)) if len(samples) else 0)
        print('avg', (sum(v) / len(v)) if len(v) else 0)
Пример #4
0
def da_evaluate_simulation(policy):
    usr_policy = Simulator()
    usr_agent = PipelineAgent(None, None, usr_policy, None, name='usr')
    sys_policy = policy
    sys_dst = RuleDST()
    sys_agent = PipelineAgent(None, sys_dst, sys_policy, None, name='sys')
    sess = BiSession(sys_agent=sys_agent, user_agent=usr_agent)

    task_success = {
        'All': list(),
        '单领域': list(),
        '独立多领域': list(),
        '独立多领域+交通': list(),
        '不独立多领域': list(),
        '不独立多领域+交通': list()
    }
    simulate_sess_num = 100
    repeat = 5
    random_seed = 2019
    random.seed(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    random_seeds = [
        random.randint(1, 2**32 - 1)
        for _ in range(simulate_sess_num * repeat * 10000)
    ]
    while True:
        sys_response = []
        random_seed = random_seeds[0]
        random.seed(random_seed)
        np.random.seed(random_seed)
        torch.manual_seed(random_seed)
        random_seeds.pop(0)
        sess.init_session()
        # print(usr_policy.goal_type)
        if len(task_success[
                usr_policy.goal_type]) == simulate_sess_num * repeat:
            continue
        for i in range(15):
            sys_response, user_response, session_over, reward = sess.next_turn(
                sys_response)
            # print('user:'******'sys:', sys_response)
            # print(session_over, reward)
            # print()
            if session_over is True:
                # pprint(sys_agent.tracker.state)
                task_success['All'].append(1)
                task_success[usr_policy.goal_type].append(1)
                break
        else:
            task_success['All'].append(0)
            task_success[usr_policy.goal_type].append(0)
        print([len(x) for x in task_success.values()])
        # print(min([len(x) for x in task_success.values()]))
        if len(task_success['All']) % 100 == 0:
            for k, v in task_success.items():
                print(k)
                for i in range(repeat):
                    samples = v[i * simulate_sess_num:(i + 1) *
                                simulate_sess_num]
                    print(sum(samples), len(samples),
                          (sum(samples) / len(samples)) if len(samples) else 0)
                print('avg', (sum(v) / len(v)) if len(v) else 0)
        if min([len(x)
                for x in task_success.values()]) == simulate_sess_num * repeat:
            break
        # pprint(usr_policy.original_goal)
        # pprint(task_success)
    print('task_success')
    for k, v in task_success.items():
        print(k)
        for i in range(repeat):
            samples = v[i * simulate_sess_num:(i + 1) * simulate_sess_num]
            print(sum(samples), len(samples),
                  (sum(samples) / len(samples)) if len(samples) else 0)
        print('avg', (sum(v) / len(v)) if len(v) else 0)