def data(self, data): data = data.lower().strip() if self.is_question: # A new question. if self.is_subject: clean_question = normalizeWithStopWordRemoval(data) if len(clean_question) > 3: # Dint find the question. if clean_question not in self.questions: self.questions[clean_question ] = {ANSWER: set([]), QUESTION: set([data])} # Found the question. if data not in self.questions[clean_question][QUESTION]: self.questions[clean_question][QUESTION].add(data) self.curr_clean_question = clean_question self.curr_question = data # Answer for current question. if self.is_best_answer: clean_answer = normalizeWithStopWordRemoval(data) if (self.curr_clean_question in self.questions ) and (clean_answer not in self.questions[self.curr_clean_question][ANSWER]): self.questions[self.curr_clean_question][ANSWER].add(clean_answer)
def test_parse_one_question(self): expected = { normalizeWithStopWordRemoval(u'Why are yawns contagious?'.lower()): { ANSWER: set([normalizeWithStopWordRemoval(u'body when'.lower())]), QUESTION:set( [u'Why are yawns contagious?'.lower()]) } } ofile = open('parser_test.xml', 'w') ofile.write( '<document><uri>432470</uri> <subject>Why are yawns contagious?</subject>\ <content> When people yawn, you see that other people in the room yawn, too. Why is that?</content>\ <bestanswer>When your body </bestanswer></document>') ofile.close() self.assertDictEqual(etree.parse('parser_test.xml', self.parser), expected)
def test_parse_multiple_questions(self): expected = { normalizeWithStopWordRemoval(u'Why are yawns contagious?'.lower()): { ANSWER: set([normalizeWithStopWordRemoval(u'body when'.lower())]), QUESTION: set([u'Why are yawns contagious?'.lower()]) }, normalizeWithStopWordRemoval( u'What\'s the best way to heat up a cold hamburger '.lower()): { ANSWER: set([normalizeWithStopWordRemoval( u'If you must eat a heated In & Out hamburger'.lower())]), QUESTION: set([ u'What\'s the best way to heat up a cold hamburger '.lower() ]) } } ofile = open('parser_test.xml', 'w') ofile.write( '<document><uri>432470</uri><subject>Why are yawns contagious?</subject>\ <content> When people yawn, you see that other people in the room yawn, too. Why is that?</content>\ <bestanswer>When your body </bestanswer></document><vespaadd><document type="wisdom"><uri>800062</uri>\ <subject> What\'s the best way to heat up a cold hamburger (In & Out)?</subject>\ <content> What\'s the best way to heat up a cold hamburger (In & Out)? </content>\ <bestanswer> If you must eat a heated In & Out hamburger then</bestanswer></vespaadd></document>') ofile.close()