Exemplo n.º 1
0
def generate_examples(description, examples_path, max_examples, remove_fail,
                      max_turns):
    global num_examples
    examples = []
    num_failed = 0
    for i in range(max_examples):
        scenario = scenario_db.scenarios_list[num_examples %
                                              len(scenario_db.scenarios_list)]
        sessions = [
            agents[0].new_session(0, scenario.kbs[0]),
            agents[1].new_session(1, scenario.kbs[1])
        ]
        controller = Controller(scenario, sessions)
        ex = controller.simulate(max_turns)
        if ex.outcome['reward'] == 0:
            num_failed += 1
            if remove_fail:
                continue
        examples.append(ex)
        num_examples += 1
        logstats.update_summary_map(summary_map, {'length': len(ex.events)})
    with open(examples_path, 'w') as out:
        print >> out, json.dumps([e.to_dict() for e in examples])
    print 'number of failed dialogues:', num_failed

    logstats.add('length', summary_map['length']['mean'])
Exemplo n.º 2
0
    def update_entity_stats(self,
                            summary_map,
                            batch_preds,
                            batch_targets,
                            prefix=''):
        def get_entity(x):
            return [e for e in x if is_entity(e)]

        pos_target = prefix + 'pos_target'
        pos_pred = prefix + 'pos_pred'
        tp = prefix + 'tp'
        for preds, targets in izip(batch_preds, batch_targets):
            # None targets means that this is a padded turn
            if targets is None:
                recalls.append(None)
            else:
                preds = set(get_entity(preds))
                targets = set(get_entity(targets))
                # Don't record cases where no entity is presented
                if len(targets) > 0:
                    logstats.update_summary_map(summary_map, {
                        pos_target: len(targets),
                        pos_pred: len(preds)
                    })
                    logstats.update_summary_map(
                        summary_map,
                        {tp: sum([1 if e in preds else 0 for e in targets])})
Exemplo n.º 3
0
 def __init__(self):
     keys = ('undecided', 'fact', 'single_fact', 'joint_fact', 'coref',
             'correct_single', 'correct_joint', 'correct_joint_ent',
             'repeated', 'same_col')
     self.summary_map = {}
     for k in keys:
         logstats.update_summary_map(self.summary_map, {k: 0})
Exemplo n.º 4
0
 def eval_joint(self, kb, span):
     #print 'eval_joint:', span
     logstats.update_summary_map(self.summary_map, {'joint_fact': 1})
     num, ent1, _, ent2 = span
     ent1 = ent1[1]
     ent2 = ent2[1]
     if ent1 == ent2:
         #print 'repeated'
         logstats.update_summary_map(self.summary_map, {'repeated': 1})
         return
     # Same type, i.e. in the same column
     if ent1[1] == ent2[1]:
         #print 'same column'
         logstats.update_summary_map(self.summary_map, {'same_col': 1})
         return
     num = self.str_to_num(num)
     count = 0
     for i, item in enumerate(kb.items):
         entities = [entity for entity in self.item_entities(item)]
         if ent1 in entities and ent2 in entities:
             count += 1
     #print 'correct joint ent'
     logstats.update_summary_map(self.summary_map, {'correct_joint_ent': 1})
     if count == num:
         #print 'correct joint'
         logstats.update_summary_map(self.summary_map, {'correct_joint': 1})
Exemplo n.º 5
0
def generate_examples(description, examples_path, max_examples, remove_fail,
                      max_turns):
    global num_examples
    examples = []
    num_failed = 0
    for i in range(max_examples):
        scenario = scenario_db.scenarios_list[num_examples %
                                              len(scenario_db.scenarios_list)]
        sessions = [
            agents[0].new_session(0, scenario.kbs[0]),
            agents[1].new_session(1, scenario.kbs[1])
        ]
        controller = Controller.get_controller(scenario, sessions)
        ex = controller.simulate(max_turns)
        if ex.outcome['reward'] == 0:
            num_failed += 1
            if remove_fail:
                continue
        examples.append(ex)
        num_examples += 1
        logstats.update_summary_map(summary_map, {'length': len(ex.events)})
    with open(examples_path, 'w') as out:
        print >> out, json.dumps([e.to_dict() for e in examples])
    print 'number of failed dialogues:', num_failed

    logstats.add('length', summary_map['length']['mean'])
    if args.fact_check:
        if args.agents[0] == args.agents[1] and hasattr(agents[0], 'env'):
            results0 = agents[0].env.evaluator.report()
            results1 = agents[1].env.evaluator.report()
            results = {k: (results0[k] + results1[k]) / 2. for k in results0}
            logstats.add('bot_chat', results)
Exemplo n.º 6
0
def get_turns_vs_completed(all_chats):
    num_turns_dict = defaultdict(dict)
    for chat in all_chats:
        if chat["outcome"] is not None:
            num_turns = len(chat['events'])
            logstats.update_summary_map(
                num_turns_dict[num_turns],
                {'complete': 1 if chat["outcome"]["reward"] == 1 else 0})
    return {k: v['complete']['sum'] for k, v in num_turns_dict.iteritems()}
Exemplo n.º 7
0
def get_select_vs_completed(all_chats):
    num_select_dict = defaultdict(dict)
    for chat in all_chats:
        if chat["outcome"] is not None:
            events = [Event.from_dict(e) for e in chat["events"]]
            num_select = len([e for e in events if e.action == 'select'])
            logstats.update_summary_map(
                num_select_dict[num_select],
                {'complete': 1 if chat["outcome"]["reward"] == 1 else 0})
    return {k: v['complete']['sum'] for k, v in num_select_dict.iteritems()}
Exemplo n.º 8
0
 def eval_single(self, kb, span):
     #print 'eval_single:', span
     logstats.update_summary_map(self.summary_map, {'single_fact': 1})
     num, ent = span
     ent = ent[1]  # take the canonical form
     num = self.str_to_num(num)
     count = 0
     for i, item in enumerate(kb.items):
         for entity in self.item_entities(item):
             if entity == ent:
                 count += 1
     if num == count:
         #print 'correct single'
         logstats.update_summary_map(self.summary_map, {'correct_single': 1})
Exemplo n.º 9
0
    def update_selection_stats(self, summary_map, scores, targets, prefix=''):
        # NOTE: targets are from ground truth response and many contain new entities.
        # Ideally this would not happen as a mentioned entity is either from the agent's
        # KB or from partner's mentions (which is added to the graph), so during decoding
        # there shouldn't be new entities. However, the lexicon may "create" an entity.
        batch_size, num_nodes = scores.shape
        targets = targets[:, :num_nodes]

        pos_pred = scores > 0
        pos_target = targets == 1
        tp = np.sum(np.logical_and(pos_pred, pos_target))
        logstats.update_summary_map(
            summary_map, {
                prefix + 'tp': tp,
                prefix + 'pos_pred': np.sum(pos_pred),
                prefix + 'pos_target': np.sum(pos_target)
            })
Exemplo n.º 10
0
def get_stats(chat, agent_id, preprocessor):
    ex = Example.from_dict(None, chat)
    kbs = ex.scenario.kbs
    mentioned_entities = set()
    stats = {}
    vocab = set()
    for i, event in enumerate(ex.events):
        if agent_id != event.agent:
            continue
        if event.action == 'select':
            utterance = []
            logstats.update_summary_map(stats, {'num_select': 1})
        elif event.action == 'message':
            utterance = preprocessor.process_event(event, kbs[event.agent],
                                                   mentioned_entities)
            # Skip empty utterances
            if not utterance:
                continue
            else:
                utterance = utterance[0]
                for token in utterance:
                    if is_entity(token):
                        logstats.update_summary_map(stats, {'num_entity': 1})
                        mentioned_entities.add(token[1][0])
                    else:
                        vocab.add(token)
                logstats.update_summary_map(stats,
                                            {'utterance_len': len(utterance)})
        speech_act = get_speech_act(defaultdict(int), event, utterance)
        if speech_act[0] in ('inform', 'ask', 'answer'):
            logstats.update_summary_map(stats, {'SA_' + speech_act[0]: 1})
        logstats.update_summary_map(stats, {'num_utterance': 1})

    new_stats = {}
    for k in stats:
        if k in ('num_select', 'num_utterance', 'num_entity'):
            new_stats[k] = stats[k]['sum']
        elif k in ('utterance_len', ):
            new_stats[k] = stats[k]['mean']
        elif k.startswith('SA_'):
            new_stats[k] = stats[k]['sum']
    new_stats['vocab_size'] = len(vocab)
    return new_stats
Exemplo n.º 11
0
def get_cross_talk(all_chats):
    summary_map = {}
    is_null = lambda x: x is None or x == 'null'
    count = 0

    def is_valid(event):
        if is_null(event.start_time) or event.start_time >= event.time:
            return False
        return True

    for chat in all_chats:
        if chat["outcome"] is not None and chat["outcome"]["reward"] == 1:
            events = [Event.from_dict(e) for e in chat["events"]]
            for event1, event2 in izip(events, events[1:]):
                # start_time is not available
                if not is_valid(event2):
                    continue
                sent_time = float(event1.time)
                start_time = float(event2.start_time)
                cross_talk = 1 if start_time < sent_time else 0
                logstats.update_summary_map(summary_map,
                                            {'cross_talk': cross_talk})

                if is_valid(event1):
                    typing_time = float(event1.time) - float(event1.start_time)
                    assert typing_time > 0
                    msg_len = len(event1.data)
                    logstats.update_summary_map(
                        summary_map, {'char_per_sec': msg_len / typing_time})

    try:
        print 'Char/Sec:', summary_map['char_per_sec']['mean']
    except KeyError:
        pass
    try:
        return summary_map['cross_talk']['mean']
    # Cross talk only available for chats with start_time
    except KeyError:
        return -1
Exemplo n.º 12
0
def check_fact(summary_map, tokens, kb):
    '''
    Simple fact checker:
        each utterance is converted to a list of numbers and entities and we assume
        that the number describes the following entities, which will cause some false
        negatives.
    '''
    hypothesis = []
    N = len(kb.items)
    for token in tokens:
        if is_entity(token):
            if len(hypothesis) > 0:
                # Represent entity as its canonical form
                hypothesis[-1][1].append(token[1][0])
        else:
            number = to_number(token, N)
            if number:
                hypothesis.append((number, []))
    for n, entities in hypothesis:
        if len(entities) > 0:
            correct = 1 if n == count_kb_entity(kb, entities) else 0
            logstats.update_summary_map(summary_map, {'correct': correct})
Exemplo n.º 13
0
def get_dialog_stats(summary_map, utterance_counts, dialog):
    '''
    Count number of entities and attributes per dialogue.
    '''
    num_entities = 0
    all_ents = set()
    for agent, act, ents, utterance in dialog:
        num_ents = len(ents)
        num_types = len(set(ents))
        num_entities += num_ents
        all_ents.update(ents)
        if num_ents > 0:
            logstats.update_summary_map(summary_map, {
                'multi_entity_per_entity_utterance':
                1 if num_types > 1 else 0
            })
            logstats.update_summary_map(
                summary_map, {
                    'repeated_entity_per_entity_utterance':
                    1 if num_ents > num_types else 0
                })
            if num_ents > num_types:
                examples['repeated_entity_per_entity_utterance'].append(
                    utterance)

    logstats.update_summary_map(
        summary_map, {
            'num_entity_per_dialog': num_entities,
            'num_entity_type_per_dialog': len(all_ents),
            'num_attr_type_per_dialog': len(set([e[1] for e in all_ents]))
        })

    dialog = abstract_entity(dialog)
    utterances = get_utterance(dialog)
    for a, b in izip(utterances, utterances[1:]):
        utterance_counts[a][b] += 1
Exemplo n.º 14
0
 def update_summary(self, summary_map, bleu_scores):
     for bleu_score in bleu_scores:
         # None means no entity in this utterance
         if bleu_score is not None:
             logstats.update_summary_map(summary_map, {'bleu': bleu_score})
Exemplo n.º 15
0
 def inc_undecided(self):
     logstats.update_summary_map(self.summary_map, {'undecided': 1})
Exemplo n.º 16
0
 def inc_fact(self):
     logstats.update_summary_map(self.summary_map, {'fact': 1})
Exemplo n.º 17
0
def analyze_strategy(all_chats, scenario_db, preprocessor, text_output, lm):
    fout = open(text_output, 'w') if text_output is not None else None
    speech_act_summary_map = defaultdict(int)
    kb_strategy_summary_map = {}
    dialog_summary_map = {}
    fact_summary_map = {}
    utterance_counts = defaultdict(lambda: defaultdict(int))
    ngram_counts = defaultdict(lambda: defaultdict(int))
    template_summary_map = {'total': 0.}
    speech_act_sequence_summary_map = {'total': 0.}
    alpha_stats = []
    num_items_stats = []
    num_attrs_mentioned = 0.
    most_mentioned_attrs = 0.
    entity_mention_summary_map = {}

    total_events = 0
    total_dialogues = 0.

    lm_summary_map = {}
    for raw in all_chats:
        ex = Example.from_dict(scenario_db, raw)
        kbs = ex.scenario.kbs
        if ex.outcome is None or ex.outcome["reward"] == 0:
            continue  # skip incomplete dialogues
        total_dialogues += 1.
        dialog = []
        mentioned_entities = set()
        for i, event in enumerate(ex.events):
            if event.action == 'select':
                utterance = []
            elif event.action == 'message':
                utterance = preprocessor.process_event(event, kbs[event.agent],
                                                       mentioned_entities)
                # Skip empty utterances
                if not utterance:
                    continue
                else:
                    utterance = utterance[0]
                    for token in utterance:
                        if is_entity(token):
                            mentioned_entities.add(token[1][0])
                    logstats.update_summary_map(
                        dialog_summary_map,
                        {'utterance_length': len(utterance)})
                    check_fact(fact_summary_map, utterance, kbs[event.agent])
                    if lm:
                        logstats.update_summary_map(lm_summary_map, {
                            'score':
                            lm.score(' '.join(entity_to_type(utterance)))
                        })
                    update_ngram_counts(ngram_counts, utterance)
                    if fout:
                        fout.write('%s\n' %
                                   (' '.join(entity_to_type(utterance))))
            else:
                raise ValueError('Unknown event action %s.' % event.action)

            total_events += 1

            speech_act = get_speech_act(speech_act_summary_map, event,
                                        utterance)
            get_linguistic_template(template_summary_map, utterance)
            entities = [x[1] for x in utterance if is_entity(x)]
            dialog.append((event.agent, speech_act, entities, utterance))

        get_dialog_stats(dialog_summary_map, utterance_counts, dialog)
        get_speech_act_histograms(speech_act_sequence_summary_map, dialog)
        get_entity_mention(entity_mention_summary_map, dialog, kbs)

        orders, mentioned_attrs, most_mentioned_label = get_kb_strategy(
            kbs, dialog)
        orders = tuple(orders)
        most_mentioned_attrs += alpha_labels_to_values[most_mentioned_label]

        if len(orders) not in kb_strategy_summary_map.keys():
            kb_strategy_summary_map[len(orders)] = {}

        if orders not in kb_strategy_summary_map[len(orders)].keys():
            kb_strategy_summary_map[len(orders)][orders] = 0.0

        kb_strategy_summary_map[len(orders)][tuple(orders)] += 1.0
        alphas = ex.scenario.alphas

        num_attrs_mentioned += len(orders) / len(alphas)

        first_mentioned_label = NO_ALPHA_MENTION
        if len(orders) > 0:
            first_mentioned_label = orders[0]

        if len(mentioned_attrs) > 0:
            first_mentioned_type, first_mentioned_attr, first_agent = mentioned_attrs[
                0]
            update_item_stats(num_items_stats, first_mentioned_type,
                              first_mentioned_attr, kbs[first_agent])

            if first_mentioned_label != NO_ALPHA_MENTION:
                update_alpha_stats(alpha_stats, kbs[first_agent],
                                   first_mentioned_label)
                # print "First mentioned attribute alpha:", first_mentioned, alpha_labels_to_values[first_mentioned]

    if fout:
        fout.close()
    # Summarize stats
    total = float(total_events)
    kb_strategy_totals = {
        k1: sum(v2 for v2 in v1.values())
        for k1, v1 in kb_strategy_summary_map.items()
    }
    dialog_stats = {
        k: dialog_summary_map[k]['mean']
        for k in dialog_summary_map
    }
    dialog_stats['entity_type_token_ratio'] = dialog_summary_map[
        'num_entity_type_per_dialog']['sum'] / float(
            dialog_summary_map['num_entity_per_dialog']['sum'])

    unigram_counts = {k[0]: v for k, v in ngram_counts[1].iteritems()}
    dialog_stats['vocab_size'] = len(unigram_counts)
    dialog_stats['unigram_entropy'] = count_to_entropy(unigram_counts)
    multi_speech_act = sum([
        speech_act_summary_map[k] for k in speech_act_summary_map if len(k) > 1
    ]) / total

    return {
        'speech_act': {
            k: speech_act_summary_map[k] / total
            for k in speech_act_summary_map.keys()
        },
        'kb_strategy': {
            k1: {
                ", ".join(k2): v2 / kb_strategy_totals[k1]
                for k2, v2 in v1.items()
            }
            for k1, v1 in kb_strategy_summary_map.items()
        },
        'dialog_stats': dialog_stats,
        'lm_score': -1 if not lm else lm_summary_map['score']['mean'],
        'utterance_counts': utterance_counts,
        'ngram_counts': ngram_counts,
        'linguistic_templates': template_summary_map,
        'speech_act_sequences': speech_act_sequence_summary_map,
        'correct': fact_summary_map['correct']['mean'],
        'entity_mention': {
            k: np.mean(v)
            for k, v in entity_mention_summary_map['first'].iteritems()
        },
        'multi_speech_act': multi_speech_act,
        'alpha_stats': alpha_stats,
        'num_items_stats': num_items_stats
    }
Exemplo n.º 18
0
 def inc_coref(self):
     logstats.update_summary_map(self.summary_map, {'coref': 1})