Exemplo n.º 1
0
def main(opt):
    """Extracts training data for the negative response classifier (NRC) from Mturk logs

    input: file of logs (in ParlaiDialog format) from Mturk task 1 with turn-by-turn
        quality ratings 1-5
    output: file of episodes (self-feeding format) w/ +1/-1 ratings indicating
        positive/negative example
    """
    examples = []
    positives = opt['positives'].split(',')
    negatives = opt['negatives'].split(',')
    assert len(set(positives).intersection(set(negatives))) == 0

    num_episodes = 0
    num_parleys = 0
    for episode in extract_parlai_episodes(opt['infile']):
        num_episodes += 1
        history = []
        for parley in episode:
            num_parleys += 1

            # Update history (not including stock control flow responses)
            if parley.context.startswith(INITIAL_PROMPT):
                # Conversation prompt, first utterance
                # Begin history
                history = [parley.response]
            elif parley.context.startswith(EXP_REQUEST):
                # Asked for y_exp, got y_exp
                # Messed up, so blast history
                example = Parley(
                    context=add_person_tokens(history[:-2], last_speaker=1),
                    response=parley.response,  # y_exp
                )
                examples.append(example)
                history = []
            elif parley.context.startswith(NEWTOPIC):
                # Asked for new topic, got a first utterance
                # Begin new history
                history = [parley.response]
            elif parley.context.startswith(RAT_REQUEST):
                # Asked for rating, got one-word rating
                # Nothing to update in history
                pass
            elif CONTINUE in parley.context:
                # if response was negative, history will get blasted in EXP_REQUEST
                # if we're here, response was neutral/positive, so continue the history
                history.append(parley.context[parley.context.rindex(':') + 1:])
                history.append(parley.response)
            else:
                # normal turn: maintain the history
                history.append(parley.context)
                history.append(parley.response)

    with open(opt['outfile'], 'w') as outfile:
        for ex in examples:
            outfile.write(json.dumps(ex.to_dict()) + '\n')

    print(f"Extracted {len(examples)} ratings out of {num_episodes} episodes "
          f"({num_parleys} parleys) and wrote them to {opt['outfile']} with "
          f"histsz == {opt['history_size']}.")
def main(config):
    """
    Creates .stitched files from .suggested files.

    input: a .suggested file of logs (in ParlaiDialog format) from Mturk task 2, each of
        which starts with an initial prompt or topic request, and ends with a y
        that corresponds to the y_exp given in the previous turn
    output: a .stitched file (in self-feeding format) with the original mistake by the
        bot replace with the mturked y (based on y_exp)
    """
    examples = []
    episodes = [e for e in extract_parlai_episodes(config['infile'])]
    for episode in episodes:
        history = []
        num_parleys = len(episode)
        for i, parley in enumerate(episode):
            if i == 0:  # Don't include the topic request
                history.append(parley.response)
                continue
            elif i == num_parleys - 3:
                # third to last was mistake and negative feedback
                continue
            elif i == num_parleys - 2:
                # penultimate turn was suggestion request and y_exp
                continue
            elif i == num_parleys - 1:
                # ultimate turn was verbatim request and y
                example = Parley(
                    context=add_person_tokens(history, last_speaker=1),
                    response=parley.response,  # y
                )
                examples.append(example)
            else:
                # normal turn; just add to history
                history.append(parley.context)
                history.append(parley.response)

    # Write new episodes to self-feeding format
    with open(config['outfile'], 'w') as outfile:
        for ex in examples:
            outfile.write(json.dumps(ex.to_dict()) + '\n')

    print(
        f"Extracted {len(examples)} self-feeding episodes out of "
        f"{len(episodes)} parlai episodes and wrote them to {config['outfile']}."
    )
Exemplo n.º 3
0
def main(config):
    """
    Creates .identity files from .sliced files.

    input: a .sliced file of logs (in ParlaiDialog format) from Mturk task 1, each of
        which starts with an initial prompt or topic request, and ends with a y_exp
    output: an .identity file (in self-feeding format) with y_exps used as though they
        were ys
    """
    examples = []
    episodes = [e for e in extract_parlai_episodes(config['infile'])]
    for episode in episodes:
        history = []
        num_parleys = len(episode)
        for i, parley in enumerate(episode):
            if i == 0:  # Don't include the topic request
                history.append(parley.response)
                continue
            elif i == num_parleys - 2:
                # penultimate turn was mistake and negative feedback
                continue
            elif i == num_parleys - 1:
                # ultimate turn was correction request and y_exp
                example = Parley(
                    context=add_person_tokens(history, last_speaker=1),
                    response=parley.response,  # y_exp
                )
                examples.append(example)
            else:
                # normal turn; just add to history
                history.append(parley.context)
                history.append(parley.response)

    # Write new episodes to self-feeding format
    with open(config['outfile'], 'w') as outfile:
        for ex in examples:
            outfile.write(json.dumps(ex.to_dict()) + '\n')

    print(
        f"Extracted {len(examples)} self-feeding episodes out of "
        f"{len(episodes)} parlai episodes and wrote them to {config['outfile']}."
    )
Exemplo n.º 4
0
def main(config):
    """
    Creates .unfiltered files from .sliced files.

    input: a .sliced file of logs (in ParlaiDialog format) from Mturk task 1, each of
        which starts with an initial prompt or topic request, and ends with a y_exp
    output: a .unfiltered file (in self-feeding format) with every utterance output by
        bot used as a label (i.e., act as though the bot was a human and we want to
        train in a normal supervised way).
    """
    examples = []
    episodes = [e for e in extract_parlai_episodes(config['infile'])]
    for episode in episodes:
        history = []
        num_parleys = len(episode)
        for i, parley in enumerate(episode):
            if i == 0:  # Don't include the topic request
                history.append(parley.response)
                continue
            elif i == num_parleys - 1:
                # ultimate turn was correction request and explanation
                continue
            else:
                example = Parley(
                    context=add_person_tokens(history, last_speaker=1),
                    response=parley.context,  # What the bot said
                )
                examples.append(example)
                history.append(parley.context)
                history.append(parley.response)

    # Write new episodes to self-feeding format
    with PathManager.open(config['outfile'], 'w') as outfile:
        for ex in examples:
            outfile.write(json.dumps(ex.to_dict()) + '\n')

    print(
        f"Extracted {len(examples)} self-feeding episodes out of "
        f"{len(episodes)} parlai episodes and wrote them to {config['outfile']}."
    )
Exemplo n.º 5
0
def main(opt):
    """Extracts training data for the negative response classifier (NRC) from Mturk logs

    input: file of logs (in ParlaiDialog format) from Mturk task 1 with turn-by-turn
        quality ratings 1-5
    output: file of episodes (self-feeding format) w/ +1/-1 ratings indicating
        positive/negative example
    """
    examples = []

    num_episodes = 0
    num_parleys = 0
    for episode in extract_parlai_episodes(opt['infile']):
        num_episodes += 1
        history = []
        for parley in episode:
            num_parleys += 1
            # Update history (not including stock control flow responses)
            if (parley.context.startswith(INITIAL_PROMPT) or
                    parley.context.startswith(NEWTOPIC)):
                # a prompt, first utterance
                # Begin history
                history = [parley.response]
                # NOTE: we now allow these one-utterance episodes to be examples
                # continue
            elif (parley.context.startswith(EXP_REQUEST) or
                  parley.context.startswith(RAT_REQUEST)):
                # If 'filter_accusation' is on and the last example added was a human,
                # toss the previous example, which is when the human expressed
                # dissatisfaction
                if (opt['mode'] == 'human' and
                    opt['filter_accusation'] and
                    parley.context.startswith(EXP_REQUEST) and
                        len(examples) > 0):
                    examples.pop()
                # If 'filter_mistake' is on and the last example in the queue was a bot,
                # toss it too, since that's when the bot messed up
                if (opt['mode'] == 'bot' and
                    opt['filter_mistake'] and
                    parley.context.startswith(EXP_REQUEST) and
                        len(examples) > 0):
                    examples.pop()

                # Asked for y_exp or rating, got it
                # Messed up, so blast history
                history = []
                continue
            elif CONTINUE in parley.context:
                # if response was negative, history will get blasted in EXP_REQUEST
                # if we're here, response was neutral/positive, so continue the history
                history.append(parley.context[parley.context.rindex(':') + 1:])
                history.append(parley.response)
            else:
                # normal turn: maintain the history
                history.append(parley.context)
                history.append(parley.response)

            if opt['mode'] in ['bot'] and len(history) >= 2:
                if len(history) == 2:
                    example = Parley(
                        context='__null__',
                        response=history[0],
                    )
                else:
                    example = Parley(
                        context=add_person_tokens(history[:-2], last_speaker=1),
                        response=history[-2],  # What the bot said
                    )
                examples.append(example)

            if opt['mode'] in ['human']:
                if len(history) == 1:
                    example = Parley(
                        context='__null__',
                        response=history[0],
                    )
                else:
                    example = Parley(
                        # this is not technically true:
                        # the last speaker was the bot (__p2__),
                        # not the human (__p1__), but in all our data, __p1__ is always
                        # the speaking partner of the learner
                        context=add_person_tokens(history[:-1], last_speaker=1),
                        response=history[-1],  # What the bot said
                    )
                examples.append(example)

    with open(opt['outfile'], 'w') as outfile:
        for ex in examples:
            outfile.write(json.dumps(ex.to_dict()) + '\n')

    print(f"Extracted {len(examples)} examples out of {num_episodes} episodes "
          f"({num_parleys} parleys) and wrote them to {opt['outfile']} with "
          f"histsz == {opt['history_size']}.")
def main(config):
    """Extracts training data for the negative response classifier (NRC) from Mturk logs

    input: file of logs (in ParlaiDialog format) from Mturk task 1 with turn-by-turn
        quality ratings 1-5
    output: file of episodes (self-feeding format) w/ +1/-1 ratings indicating
        positive/negative example
    """
    examples = []
    positives = config['positives'].split(',')
    negatives = config['negatives'].split(',')
    assert(len(set(positives).intersection(set(negatives))) == 0)

    num_episodes = 0
    num_parleys = 0
    for episode in extract_parlai_episodes(config['infile']):
        num_episodes += 1
        history = []
        for parley in episode:
            num_parleys += 1

            # Update history (not including stock control flow responses)
            if parley.context.startswith(INITIAL_PROMPT.lower()):
                # Conversation prompt, first utterance
                history = [parley.response]
            elif parley.context.startswith(SUGGESTION_REQUEST.lower()):
                # Asked for y_exp, got y_exp
                pass
            elif parley.context.startswith(NEW_TOPIC_REQUEST.lower()):
                # Asked for new topic, got a first utterance
                history = [parley.response]
            else:
                history.append(parley.context)
                history.append(parley.response)

            # Only create a new example if this parley's rating is relevant
            if parley.reward in (positives + negatives):
                # Concatenate history and add speaker tokens as necessary
                # history_size refers to the total number of utterances
                # (history_size == 0 means predict sentiment from '__null__')
                # response that's being classified (so if history_size == 0 then
                # classify based only on the response w/o any extra context).
                # Note that the response being classified should always be preceded by
                # __p1__ (the human), not __p2__ (the bot).
                if config['history_size'] < 0:
                    utterances = history
                elif config['history_size'] == 0:
                    utterances = ['__null__']
                else:
                    utterances = history[-config['history_size']:]

                context = add_person_tokens(utterances, last_speaker=1)

                label = 1 if parley.reward in positives else -1

                example = Parley(context, label)
                examples.append(example)

    with open(config['outfile'], 'w') as outfile:
        for ex in examples:
            outfile.write(json.dumps(ex.to_dict()) + '\n')

    print(f"Extracted {len(examples)} ratings out of {num_episodes} episodes "
          f"({num_parleys} parleys) and wrote them to {config['outfile']} with "
          f"histsz == {config['history_size']}.")
Exemplo n.º 7
0
def create_supp(opt):
    """
    Evaluates a model.

    :param opt: tells the evaluation function how to run
    :return: the final result of calling report()
    """
    # Create model and assign it to the specified task
    agent = create_agent(opt, requireModelExists=True)
    world = create_task(opt, agent)

    # Extract supp examples from misses on deploy set
    num_seen = 0
    num_misses = 0
    num_supp = 0
    num_supp_correct = 0
    examples = []
    while not world.epoch_done():
        world.parley()
        # Examples are considered one at a time
        num_seen += 1
        if num_seen % 1000 == 0:
            print(f"{num_seen}/{world.num_examples()}")
        report = world.report()
        if report['accuracy'] < 1.0:
            # Example is a miss (i.e., model got it wrong)
            num_misses += 1
            if random.random() < opt['conversion_rate']:
                # Example will be converted (e.g., bot recognized mistake and asked)
                num_supp += 1
                texts = world.acts[0]['text'].split('\n')
                context = texts[-1]
                memories = texts[:-1]
                candidates = world.acts[0]['label_candidates']
                # Reward of 1 indicates positive, -1 indicates negative (for training)
                # For now, we only train with positives, and the reward field is unused
                reward = 1

                if random.random() < opt['conversion_acc']:
                    # Example will be converted correctly (e.g., good user response)
                    num_supp_correct += 1
                    response = world.acts[0]['eval_labels'][0]
                else:
                    # Example will be converted incorrectly (e.g., bad user response)
                    response = random.choice(
                        world.acts[0]['label_candidates'][:NUM_INLINE_CANDS -
                                                          1])

                example = Parley(context, response, reward, candidates,
                                 memories)
                examples.append(example)
        world.reset_metrics()

    print("EPOCH DONE")
    print(f"Model file: {opt['model_file']}")
    print(f"Deploy file: {opt['task']}")
    print(f"Supp file: {opt['outfile']}")
    print(f"Deploy size (# examples seen): {num_seen}")
    print(f"Supp size (# examples converted): {num_supp}")

    acc = 1 - (num_misses / num_seen)
    print(f"Accuracy (% of deploy): {acc * 100:.1f}% ({num_misses} misses)")
    print(f"Conversion rate (% of misses): {num_supp/num_misses * 100:.2f}% "
          f"({num_supp}/{num_misses})")
    print(
        f"Conversion acc (% of converted): {num_supp_correct/num_supp * 100:.2f}% "
        f"({num_supp_correct}/{num_supp})")

    with open(opt['outfile'], 'w') as outfile:
        for ex in examples:
            outfile.write(json.dumps(ex.to_dict()) + '\n')
Exemplo n.º 8
0
def main(opt):
    """Extracts training data for the negative response classifier (NRC) from Mturk logs

    input: file of logs (in ParlaiDialog format) from Mturk task 1 with turn-by-turn
        quality ratings 1-5
    output: file of episodes (self-feeding format) w/ +1/-1 ratings indicating
        positive/negative example
    """
    examples = []
    positives = opt['positives'].split(',')
    negatives = opt['negatives'].split(',')
    assert len(set(positives).intersection(set(negatives))) == 0

    num_episodes = 0
    num_parleys = 0
    for episode in extract_parlai_episodes(opt['infile']):
        num_episodes += 1
        history = []
        for parley in episode:
            num_parleys += 1

            # Update history (not including stock control flow responses)
            if parley.context.startswith(INITIAL_PROMPT):
                # Conversation prompt, first utterance
                # Begin history
                history = [parley.response]
            elif parley.context.startswith(EXP_REQUEST):
                # Asked for y_exp, got y_exp
                # Messed up, so blast history
                history = []
            elif parley.context.startswith(NEWTOPIC):
                # Asked for new topic, got a first utterance
                # Begin new history
                history = [parley.response]
            elif parley.context.startswith(RAT_REQUEST):
                # Concatenate history and add speaker tokens as necessary
                # history_size refers to the total number of utterances
                # (history_size == 0 means predict sentiment from '__null__')
                # response that's being classified (so if history_size == 0 then
                # classify based only on the response w/o any extra context).
                # Note that the response being classified should always be preceded by
                # __p1__ (the human), not __p2__ (the bot).
                if opt['history_size'] < 0:
                    utterances = history
                elif opt['history_size'] == 0:
                    utterances = ['__null__']
                else:
                    utterances = history[-opt['history_size'] :]
                context = add_person_tokens(utterances, last_speaker=1)

                if parley.response in positives:
                    label = 1
                elif parley.response in negatives:
                    label = -1
                else:
                    label = 0

                if label:
                    example = Parley(context, label)
                    examples.append(example)

            elif CONTINUE in parley.context:
                # if response was negative, history will get blasted in EXP_REQUEST
                # if we're here, response was neutral/positive, so continue the history
                history.append(parley.context[parley.context.rindex(':') + 1 :])
                history.append(parley.response)
            else:
                history.append(parley.context)
                history.append(parley.response)

    with open(opt['outfile'], 'w') as outfile:
        for ex in examples:
            outfile.write(json.dumps(ex.to_dict()) + '\n')

    print(
        f"Extracted {len(examples)} ratings out of {num_episodes} episodes "
        f"({num_parleys} parleys) and wrote them to {opt['outfile']} with "
        f"histsz == {opt['history_size']}."
    )