Пример #1
0
#!/usr/bin/env python
import nltk
import init
import chunker


#returns 1 if the character following the answer in the document is one of <, . " : !>
#and returns 0 otherwise
def punc_loc(question, (answer, doc_num, index, features, q_id)):
    doc = chunker.clean_punctuation(init.get_doc(doc_num))

    answer = chunker.clean_punctuation(answer)
    #go to index location of candidate within the document
    alist = answer.split(
    )  #split candidate answer into words (space delimiter)
    answer_len = len(alist)  #word length of the candidate answer
    #print answer_len

    punc_word_index = index + answer_len  #index of the word that may contain the punctuation
    dlist = doc.split()  #split the document by words
    punc_word = dlist[punc_word_index]  #getting the actual word from the doc

    #check if that lastcharacter is a punctuation
    if punc_word == ',' or punc_word == '.' or punc_word == '"' or punc_word == ':' or punc_word == '!':
        return 1
    else:
        return 0


#test case below was modified to work with specified doc instead of the actual doc to work with a known index
def test():
Пример #2
0
#!/usr/bin/env python
from align import sw_align
from question_rewrite import rewriteQuestion
import init

def question_apposition(question, (answer, doc_num, index, features,q_id)):
    doc = init.get_doc(doc_num)
    apposition = question + ", " + answer + ","
    plain = question + " " + answer

    (app_score, app, doc_app, starts, ends) = sw_align(apposition, doc)
    (plain_score, pl, doc_pl, starts, ends) = sw_align(plain, doc)

    return [max(0, app_score - plain_score)]

def rewrite_apposition(question, candidate):
    return question_apposition(rewriteQuestion(question), candidate)

if __name__ == "__main__":
    init.get_corpus(qNum=209)
    question = "Who is the inventor of the phonograph?"
    doc = "SJMN91-06010225"
    print question_apposition(question, ("joe smith", doc, 700, {}))
    print rewrite_apposition(question, ("joe smith", doc, 700, {}))
Пример #3
0
from difflib import SequenceMatcher as SequenceMatcher
import init
import chunker
import read_questions
import sys

MAX_INT = sys.maxint


def literal_question_distance(question, (answer, doc_num, index, features, q_id)):
    """Evaluates a candidate based on how close it is to the longest fragment of
    the question in the document

    returns (distance, length of fragment)
    """
    doc = chunker.clean_punctuation(init.get_doc(doc_num))
    (start, _, length) = find_match(question, doc)
    words = doc.split()
    index = len(" ".join(words[0 : index + 1]))
    return (
        min(abs(start - index), abs(start + length - index), 0 if start <= index <= start + length else MAX_INT),
        length,
    )


def literal_rewrite_distance(question, candidate):
    """Evaluates a candidate based on how close it is to the longest fragment of
    the re-written question in the document

    returns (distance, length of fragment)
    """
Пример #4
0
from align import sw_align
from question_rewrite import rewriteQuestion
import init

def question_apposition(question, (answer, doc_num, index, features,q_id)):
    doc = init.get_doc(doc_num)
    apposition = question + ", " + answer + ","
    plain = question + " " + answer

    (app_score, app, doc_app, starts, ends) = sw_align(apposition, doc)
    (plain_score, pl, doc_pl, starts, ends) = sw_align(plain, doc)

    return max(0, app_score - plain_score)

def rewrite_apposition(question, candidate):
    return question_apposition(rewriteQuestion(question), candidate)

if __name__ == "__main__":
    init.get_corpus(qNum=209)
    question = "Who is the inventor of the phonograph?"
    doc = "SJMN91-06010225"
    print question_apposition(question, ("joe smith", doc, 700, {}))
#    print rewrite_apposition(question, ("joe smith", doc, 700, {}))