#!/usr/bin/env python import nltk import init import chunker #returns 1 if the character following the answer in the document is one of <, . " : !> #and returns 0 otherwise def punc_loc(question, (answer, doc_num, index, features, q_id)): doc = chunker.clean_punctuation(init.get_doc(doc_num)) answer = chunker.clean_punctuation(answer) #go to index location of candidate within the document alist = answer.split( ) #split candidate answer into words (space delimiter) answer_len = len(alist) #word length of the candidate answer #print answer_len punc_word_index = index + answer_len #index of the word that may contain the punctuation dlist = doc.split() #split the document by words punc_word = dlist[punc_word_index] #getting the actual word from the doc #check if that lastcharacter is a punctuation if punc_word == ',' or punc_word == '.' or punc_word == '"' or punc_word == ':' or punc_word == '!': return 1 else: return 0 #test case below was modified to work with specified doc instead of the actual doc to work with a known index def test():
#!/usr/bin/env python from align import sw_align from question_rewrite import rewriteQuestion import init def question_apposition(question, (answer, doc_num, index, features,q_id)): doc = init.get_doc(doc_num) apposition = question + ", " + answer + "," plain = question + " " + answer (app_score, app, doc_app, starts, ends) = sw_align(apposition, doc) (plain_score, pl, doc_pl, starts, ends) = sw_align(plain, doc) return [max(0, app_score - plain_score)] def rewrite_apposition(question, candidate): return question_apposition(rewriteQuestion(question), candidate) if __name__ == "__main__": init.get_corpus(qNum=209) question = "Who is the inventor of the phonograph?" doc = "SJMN91-06010225" print question_apposition(question, ("joe smith", doc, 700, {})) print rewrite_apposition(question, ("joe smith", doc, 700, {}))
from difflib import SequenceMatcher as SequenceMatcher import init import chunker import read_questions import sys MAX_INT = sys.maxint def literal_question_distance(question, (answer, doc_num, index, features, q_id)): """Evaluates a candidate based on how close it is to the longest fragment of the question in the document returns (distance, length of fragment) """ doc = chunker.clean_punctuation(init.get_doc(doc_num)) (start, _, length) = find_match(question, doc) words = doc.split() index = len(" ".join(words[0 : index + 1])) return ( min(abs(start - index), abs(start + length - index), 0 if start <= index <= start + length else MAX_INT), length, ) def literal_rewrite_distance(question, candidate): """Evaluates a candidate based on how close it is to the longest fragment of the re-written question in the document returns (distance, length of fragment) """
from align import sw_align from question_rewrite import rewriteQuestion import init def question_apposition(question, (answer, doc_num, index, features,q_id)): doc = init.get_doc(doc_num) apposition = question + ", " + answer + "," plain = question + " " + answer (app_score, app, doc_app, starts, ends) = sw_align(apposition, doc) (plain_score, pl, doc_pl, starts, ends) = sw_align(plain, doc) return max(0, app_score - plain_score) def rewrite_apposition(question, candidate): return question_apposition(rewriteQuestion(question), candidate) if __name__ == "__main__": init.get_corpus(qNum=209) question = "Who is the inventor of the phonograph?" doc = "SJMN91-06010225" print question_apposition(question, ("joe smith", doc, 700, {})) # print rewrite_apposition(question, ("joe smith", doc, 700, {}))