def _other_recognition(self, tagged_sentences, all_entities, question): # Nouns retrieval nouns = [] for sentence in tagged_sentences: nouns += filter(lambda x: x[1] == "NN", sentence) nouns = [noun for (noun, tag) in nouns] # Nouns filtering # Remove all entities that are nouns all_entities = set(itertools.chain(*map(str.split, all_entities))) nouns = [noun for noun in nouns if noun not in all_entities] features = QuestionClassifier.get_features(question.text, "hn") head = features["head"] if head == "": return nouns # Filter nouns with WordNet synsets try: threshold = float( MyConfig.get("answer_extraction", "other_threshold")) except MyConfigException as e: logger = logging.getLogger("qa_logger") logger.warning(str(e)) threshold = 0.6 try: ic = wordnet_ic.ic(MyConfig.get("answer_extraction", "ic")) except MyConfigException as e: logger = logging.getLogger("qa_logger") logger.warning(str(e)) ic = wordnet_ic.ic("ic-bnc.dat") result = [] head_synsets = wn.synsets(head, pos=wn.NOUN) if len(head_synsets) == 0: noun_synsets = wn.synsets(features["noun"], pos=wn.NOUN) if len(noun_synsets) == 0: return nouns else: head_synset = noun_synsets[0] else: head_synset = head_synsets[0] for noun in nouns: try: noun_synset = wn.synsets(noun, pos=wn.NOUN)[0] if threshold < noun_synset.lin_similarity(head_synset, ic) < 0.9: result.append(noun) except IndexError: continue return result
def _other_recognition(self, tagged_sentences, all_entities, question): # Nouns retrieval nouns = [] for sentence in tagged_sentences: nouns += filter(lambda x: x[1] == "NN", sentence) nouns = [noun for (noun, tag) in nouns] # Nouns filtering # Remove all entities that are nouns all_entities = set(itertools.chain(*map(str.split, all_entities))) nouns = [noun for noun in nouns if noun not in all_entities] features = QuestionClassifier.get_features(question.text, "hn") head = features["head"] if head == "": return nouns # Filter nouns with WordNet synsets try: threshold = float(MyConfig.get("answer_extraction", "other_threshold")) except MyConfigException as e: logger = logging.getLogger("qa_logger") logger.warning(str(e)) threshold = 0.6 try: ic = wordnet_ic.ic(MyConfig.get("answer_extraction", "ic")) except MyConfigException as e: logger = logging.getLogger("qa_logger") logger.warning(str(e)) ic = wordnet_ic.ic("ic-bnc.dat") result = [] head_synsets = wn.synsets(head, pos=wn.NOUN) if len(head_synsets) == 0: noun_synsets = wn.synsets(features["noun"], pos=wn.NOUN) if len(noun_synsets) == 0: return nouns else: head_synset = noun_synsets[0] else: head_synset = head_synsets[0] for noun in nouns: try: noun_synset = wn.synsets(noun, pos=wn.NOUN)[0] if threshold < noun_synset.lin_similarity(head_synset, ic) < 0.9: result.append(noun) except IndexError: continue return result
def _question_classification(self, question): # Choose the specified classifier try: features = MyConfig.get("answer_extraction", "question_features") except MyConfigException as e: logger = logging.getLogger("qa_logger") logger.warning(str(e)) features = "fnh" try: classifier_file = MyConfig.get("answer_extraction", "question_classifier") classifier_path = os.path.join("qc", features, classifier_file) except MyConfigException as e: logger = logging.getLogger("qa_logger") logger.warning(str(e)) classifier_path = os.path.join("qc", "fhn", "qc_bayes.pkl") # Question classification return QuestionClassifier.classify(classifier_path, question, features)