Пример #1
0
def write_predictions(args, model, dataset):
    """
    Writes model predictions to an output file. The official QA metrics (EM/F1)
    can be computed using `evaluation.py`. 

    Args:
        args: `argparse` object.
        model: Instance of the PyTorch model.
        dataset: Test dataset (technically, the development dataset since the
            official test datasets are blind and hosted by official servers).
    """
    # Load model checkpoint.
    model.load_state_dict(torch.load(args.model_path, map_location='cpu'))
    model.eval()

    # Set up test dataloader.
    test_dataloader = tqdm(
        dataset.get_batch(shuffle_examples=False),
        **_TQDM_OPTIONS,
    )

    # Output predictions.
    outputs = []

    with torch.no_grad():
        for (i, batch) in enumerate(test_dataloader):
            # Forward inputs.
            start_logits, end_logits = model(batch)

            # Form distributions over start and end positions.
            batch_start_probs = F.softmax(start_logits, 1)
            batch_end_probs = F.softmax(end_logits, 1)

            for j in range(start_logits.size(0)):
                # Find question index and passage.
                sample_index = args.batch_size * i + j
                qid, passage, _, _, _ = dataset.samples[sample_index]

                # Unpack start and end probabilities. Find the constrained
                # (start, end) pair that has the highest joint probability.
                start_probs = unpack(batch_start_probs[j])
                end_probs = unpack(batch_end_probs[j])
                # change
#                 start_index, end_index = search_span_endpoints(
#                         start_probs, end_probs,
#                         qid, passage
#                 )
                start_index, end_index = search_span_endpoints(
                        start_probs, end_probs)
                
                #Grab predicted span.
                pred_span = ' '.join(passage[start_index:(end_index + 1)])

                # Add prediction to outputs.
                outputs.append({'qid': qid, 'answer': pred_span})

    # Write predictions to output file.
    with open(args.output_path, 'w+') as f:
        for elem in outputs:
            f.write(f'{json.dumps(elem)}\n')
Пример #2
0
def write_predictions(args, model, dataset):
    """
    Writes model predictions to an output file. The official QA metrics (EM/F1)
    can be computed using `evaluation.py`. 

    Args:
        args: `argparse` object.
        model: Instance of the PyTorch model.
        dataset: Test dataset (technically, the development dataset since the
            official test datasets are blind and hosted by official servers).
    """
    # Load model checkpoint.
    model.load_state_dict(torch.load(args.model_path, map_location='cpu'))
    model.eval()

    # Set up test dataloader.
    test_dataloader = tqdm(
        dataset.get_batch(shuffle_examples=False),
        **_TQDM_OPTIONS,
    )

    # Output predictions.
    outputs = []

    # Task 1 stuff
    keep = {'PROPN', 'NUM', 'VERB', 'NOUN', 'ADJ'}
    tokenize = lambda text: [token.lemma_ for token in nlp(text)]
    question_answering = pipeline('question-answering')

    with torch.no_grad():
        for (i, batch) in enumerate(test_dataloader):
            # Forward inputs.
            start_logits, end_logits = model(batch)

            # Form distributions over start and end positions.
            batch_start_probs = F.softmax(start_logits, 1)
            batch_end_probs = F.softmax(end_logits, 1)

            for j in range(start_logits.size(0)):
                # Find question index and passage.
                sample_index = args.batch_size * i + j
                qid, context, question, ans_start, ans_end = dataset.samples[
                    sample_index]

                if args.task == 2:
                    result = question_answering(question=' '.join(question),
                                                context=' '.join(context))
                    outputs.append({'qid': qid, 'answer': result['answer']})
                else:
                    # Unpack start and end probabilities. Find the constrained
                    # (start, end) pair that has the highest joint probability.
                    start_probs = unpack(batch_start_probs[j])
                    end_probs = unpack(batch_end_probs[j])

                    start_index, end_index = search_span_endpoints(
                        start_probs, end_probs, args, context, question,
                        ans_start, ans_end)

                    # Grab predicted span.
                    pred_span = ' '.join(context[start_index:(end_index + 1)])

                    # Add prediction to outputs.
                    outputs.append({'qid': qid, 'answer': pred_span})

    # Write predictions to output file.
    with open(args.output_path, 'w+') as f:
        for elem in outputs:
            f.write(f'{json.dumps(elem)}\n')
Пример #3
0
def write_predictions(args, model, dataset):
    """
    Writes model predictions to an output file. The official QA metrics (EM/F1)
    can be computed using `evaluation.py`. 

    Args:
        args: `argparse` object.
        model: Instance of the PyTorch model.
        dataset: Test dataset (technically, the development dataset since the
            official test datasets are blind and hosted by official servers).
    """
    # Load model checkpoint.
    model.load_state_dict(torch.load(args.model_path, map_location='cpu'))
    model.eval()

    # Set up test dataloader.
    test_dataloader = tqdm(
        dataset.get_batch(shuffle_examples=False),
        **_TQDM_OPTIONS,
    )

    # Output predictions.
    outputs = []

    # Load spacy NER tags
    # Need to download the lg package separately before running using
    # python -m spacy download en_core_web_lg
    ner = spacy.load("en_core_web_lg")

    prob_diff = 0.10

    with torch.no_grad():
        for (i, batch) in enumerate(test_dataloader):
            print("Starting loop: " + str(i))
            # Forward inputs.
            start_logits, end_logits = model(batch)

            # Form distributions over start and end positions.
            batch_start_probs = F.softmax(start_logits, 1)
            batch_end_probs = F.softmax(end_logits, 1)

            for j in range(start_logits.size(0)):
                # Find question index and passage.
                sample_index = args.batch_size * i + j
                
                # Getting errors here, but my code shouldnt have messed with this
                qid, passage, question, _, _ = dataset.samples[sample_index]

                # Unpack start and end probabilities. Find the constrained
                # (start, end) pair that has the highest joint probability.
                start_probs = unpack(batch_start_probs[j])
                end_probs = unpack(batch_end_probs[j])

                question_joined = ' '.join(question)
                passage_joined = ' '.join(passage)
                ner_passage_tokens = ner(passage_joined)

                passage_ner_token_start_indices = [i.start_char for i in ner_passage_tokens.ents]
                passage_ner_token_end_indices = [i.end_char for i in ner_passage_tokens.ents]
                
                start_probs_len = len(start_probs)
                end_probs_len = len(end_probs)

                # 1. Loop through length of passage start probs
                for i in range(start_probs_len):
                    # 2. IF this index is NOT a token start index
                    if i not in passage_ner_token_start_indices:
                        # 3. THEN decrement the probability
                        start_probs[i] -= prob_diff

                for i in range(end_probs_len):
                    if i not in passage_ner_token_end_indices:
                        end_probs[i] -= prob_diff

                # ? who, when, where ?
                if bool(re.match("(who|WHO|Who|whom|WHOM|Whom)", question_joined)):
                    # Mask everything except PERSON
                    person_token_start_indices = [i.start_char for i in ner_passage_tokens.ents if i.label_ == "PERSON"]
                    person_token_end_indices = [i.end_char for i in ner_passage_tokens.ents if i.label_ == "PERSON"]

                    for i in range(start_probs_len):
                        if i in person_token_start_indices:
                            start_probs[i] += prob_diff
                        else:
                            start_probs[i] -= prob_diff

                    for i in range(end_probs_len):
                        if i in person_token_end_indices:
                            end_probs[i] += prob_diff
                        else:
                            end_probs[i] -= prob_diff
                elif bool(re.match("(when|WHEN|When)", question_joined)):
                    # Mask everything except DATE, TIME
                    when_token_start_indices = [i.start_char for i in ner_passage_tokens.ents if i.label_ == "DATE" or i.label_ == "TIME"]
                    when_token_end_indices = [i.end_char for i in ner_passage_tokens.ents if i.label_ == "DATE" or i.label_ == "TIME"]

                    for i in range(start_probs_len):
                        if i in when_token_start_indices:
                            start_probs[i] += prob_diff
                        else:
                            start_probs[i] -= prob_diff

                    for i in range(end_probs_len):
                        if i in when_token_end_indices:
                            end_probs[i] += prob_diff
                        else:
                            end_probs[i] -= prob_diff
                elif bool(re.match("(where|WHERE|Where)", question_joined)):
                    # Mask everything except LOC
                    where_token_start_indices = [i.start_char for i in ner_passage_tokens.ents if i.label_ == "LOC"]
                    where_token_end_indices = [i.end_char for i in ner_passage_tokens.ents if i.label_ == "LOC"]

                    for i in range(start_probs_len):
                        if i in where_token_start_indices:
                            start_probs[i] += prob_diff
                        else:
                            start_probs[i] -= prob_diff

                    for i in range(end_probs_len):
                        if i in where_token_end_indices:
                            end_probs[i] += prob_diff
                        else:
                            end_probs[i] -= prob_diff


                """
                At this point, start_probs and end_probs should have all the probabilities for indices 
                that are associated with O (Other) tokens decremented by the value of prob_diff.
                
                Also, 
                    if the question contains some form of "who", then we are probably looking for a person
                    so we should use the PERSON tokens found in the passage from the spacy library to add
                    the value of prob_diff to the corresponding index in start_probs and end_probs

                Expectation: 
                    Incrementing the probability of the right tokens should correspond to a greater chance
                    of the model predicting the right answer.
                    Decrementing the probability of O (Other) tokens should prevent the model from starting
                    or ending on a word that doesnt really mean anything 
                """
                start_index, end_index = search_span_endpoints(
                        start_probs, end_probs
                )
                
                # Grab predicted span.
                pred_span = ' '.join(passage[start_index:(end_index + 1)])

                # Add prediction to outputs.
                outputs.append({'qid': qid, 'answer': pred_span})

    # Write predictions to output file.
    with open(args.output_path, 'w+') as f:
        for elem in outputs:
            f.write(f'{json.dumps(elem)}\n')