def collinsHeadSenseExtractor(self,questions, colName,training): rawQuestions = DBStore.getDB()['raw'+colName] adaptedLesk = AdaptedLesk(6) i = 1 questions.rewind() p = re.compile('(?P<head1>.+)--(?P<head2>.+)') for question in questions: # line = "was:What was archy , and mehitabel ?" print i i = i + 1 headWord = question['head'] try: match = p.match(headWord[0]) if match: headWord[0] = match.group('head1') except StandardError: pass if headWord[0] is None \ or len(wordnet.synsets(headWord[0]))==0 \ or headWord[0] == 'null': headSense = "null" else: pos = DataRetrieval.replace(question['tagged'][headWord[0]]) if question['whWord'] == 'whWord-how': headSense = 'null' else: print question['tokenized'],headWord[0],question['tagged'] headSense = adaptedLesk.wsd(question['tokenized'],headWord[0],question['tagged']) rawQuestions.update({'qID':question['qID']},{"$set":{"headSense":headSense}},safe=True,multi=True)
def replaceTag(tag): pos = DataRetrieval.replace(tag) if pos=='a': pos = 'n' return pos