def rank_query_candidates(self, query_candidates, key=lambda x: x, utterance=""): """ Returns the candidate generated from search results. This methods doesn't look into the existing candidates, but rather creates a new one based on search results. :param query_candidates: List of EvaluationCandidate objects. This answerer don't actually use them. """ if isinstance(utterance, unicode): utterance = utterance.encode('utf-8') if utterance in self._answers_cache: return self._answers_cache[utterance] question_entities = set([e['name'] for e in find_entity_mentions(utterance.encode("utf-8"), use_tagme=True)]) res = self._searcher.search(utterance, topn=self._topn) res = json.loads(res) entities = dict() for r in res: for e in r['entities']: if e['mid'] not in entities: entities[e['mid']] = [] entities[e['mid']].append((r['phrase'], r['score'])) answers = sorted(entities.items(), key=lambda x: sum(score for _, score in x[1]), reverse=True) answers = [(KBEntity.get_entity_name(answer[0].replace("/", ".")), answer[1]) for answer in answers if answer[0] not in question_entities] answers = [EvaluationCandidate(None, answer[1], [answer[0], ]) for answer in answers] self._answers_cache[utterance] = answers return answers
with open("type-model.pickle", 'wb') as out: pickle.dump((vec, type_scorer), out) if __name__ == "__main__": extract_npmi_ngram_type_pairs() exit() globals.read_configuration('config.cfg') parser = globals.get_parser() scorer_globals.init() datasets = ["webquestions_split_train", ] # datasets = ["webquestions_split_train_externalentities", "webquestions_split_dev_externalentities",] # datasets = ["webquestions_split_train_externalentities3", "webquestions_split_dev_externalentities3",] data = [] for dataset in datasets: queries = load_eval_queries(dataset) for index, query in enumerate(queries): tokens = [token.token for token in parser.parse(query.utterance).tokens] answer_entities = [mid for answer in query.target_result for mid in KBEntity.get_entityid_by_name(answer, keep_most_triples=True)] notable_types = [KBEntity.get_notable_type(entity_mid) for entity_mid in answer_entities] data.append((tokens, notable_types)) logger.info(tokens) logger.info([KBEntity.get_entity_name(notable_type) for notable_type in notable_types]) with open("question_tokens_notable_types.pickle", 'wb') as out: pickle.dump(data, out)