def get_description(self, query): additional_words = [] synset = self._query_babelnet(query) if synset is not None: if len(synset) > 0: if len(synset['glosses']) > 0: if 'gloss' in synset['glosses'][0]: sentences = synset['glosses'][0]['gloss'].split('.') print('Description Result:') print(sentences) if sentences is not None: print('found information for query: ' + query) for i in range( 0, min(len(sentences), self._NUMBER_OF_SENTENCES)): transformer = StringTransformer() additional_sentence = transformer.transform( sentences[i]).get_words_list() additional_words.extend(additional_sentence) else: print('information not found for query: ' + query) else: print('information not found for query: ' + query) else: print('information not found for query: ' + query) else: print('information not found for query: ' + query) return additional_words
def find(self, query): transformer = StringTransformer() analyzer = EnglishAnalyzer(Version.LUCENE_CURRENT) reader = IndexReader.open(SimpleFSDirectory(File("index/"))) searcher = IndexSearcher(reader) searcher.setSimilarity(BM25Similarity()) processed_query = ' '.join( self._preprocessor(transformer.transform(query))) query = QueryParser(Version.LUCENE_CURRENT, "content", analyzer).parse(processed_query) hits = searcher.get_description(query, 10) result_list = [] for hit in hits.scoreDocs: doc = searcher.doc(hit.doc) result_list.append(doc.get("path").encode("utf-8")) return result_list
def get_type(self, query): additional_words = [] topic = self._query_freebase(query) if topic is not None: if '/common/topic/notable_types' in topic['property']: sentences = topic['property']['/common/topic/notable_types']['values'][0]['text'] if sentences is not None: print('found parent type for query: ' + query) transformer = StringTransformer() additional_sentence = transformer.transform(sentences).get_words_list() additional_words.extend(additional_sentence) else: print('parent type not found for query: ' + query) else: print('parent type not found for query: ' + query) else: print('parent type not found for query: ' + query)
def find(self, query): transformer = StringTransformer() query = self._preprocessor( transformer.transform(query).get_words_list()) # Filter words from the query that aren't in the vocabulary query = list([x for x in query if x in self._fasttext_model.vocab]) results = [] for key in list(self._service_map.keys()): # Assign 0 similarty for empty documents, otherwise calculate similarity if self._service_map[key]: results.append((key, self._fasttext_model.n_similarity( query, self._service_map[key]))) else: results.append((key, 0)) results = sorted(results, key=lambda item: -item[1]) result_list = [] for tuple_result in results: result_list.append(tuple_result[0]) return result_list
def get_description(self, query): additional_words = [] topic = self._query_freebase(query) if topic is not None: if '/common/topic/article' in topic['property']: if '/common/document/text' in topic['property']['/common/topic/article']['values'][0]['property']: sentences = topic['property']['/common/topic/article']['values'][0]['property']['/common/document/text']['values'][0]['value'].split('.') if sentences is not None: print('found information for query: ' + query) for i in range(0, min(len(sentences), self._NUMBER_OF_SENTENCES)): transformer = StringTransformer() additional_sentence = transformer.transform(sentences[i]).get_words_list() additional_words.extend(additional_sentence) else: print('information not found for query: ' + query) else: print('information not found for query: ' + query) else: print('information not found for query: ' + query) return additional_words