class FullTextSearch(object): def __init__(self,server,settings = None ): self.conn = ES(server) self.indices = {} if settings: self.settings = settings else: self.settings = { 'index': { 'analysis' : { 'analyzer' : { 'ngram_analyzer' : { 'tokenizer' : 'keyword', 'filter' : ['lowercase', 'filter_ngram'], 'type' : 'custom' } }, 'filter' : { 'filter_ngram' : { 'type' : 'nGram', 'max_gram' : 30, 'min_gram' : 1 } } } } } def search_index(self, type, index_names, query_string, num_results=-1): ns_index_names= [str(type) + "-_-" + index_name for index_name in index_names] q = WildcardQuery('_all',lower(query_string)) results = self.conn.search(query=q, indices=ns_index_names, doc_types=type) num_found = len(results) if(num_results > num_found): num_results = num_found nodelist = [self.datastore.get_node(type,r['_id']) for r in results['hits']['hits']] if(num_results!=-1): return nodelist[0:num_results] else: return nodelist def create_index(self, type, indexed_variables, index_name): ns_index_name = str(type) + "-_-" + index_name self.conn.delete_index_if_exists(ns_index_name) self.conn.create_index(ns_index_name,self.settings) mapping = {} for arg in indexed_variables: mapping[arg] = {'boost':1.0, 'analyzer' : 'ngram_analyzer', 'type': u'string', 'term_vector': 'with_positions_offsets'} index_settings = {'index_analyzer':'ngram_analyzer', 'search_analyzer':'standard', 'properties':mapping} self.conn.put_mapping(str(type),index_settings,[ns_index_name]) self.refresh_index_cache() self.populate_index(type, index_name) def refresh_index_cache(self): self.indices = self.conn.get_indices() def delete_index(self,type,index_name): ns_index_name = str(type) + "-_-" + index_name self.conn.delete_index_if_exists(ns_index_name) self.refresh_index_cache() def populate_index(self, type, index_name): #add all the currently existing nodes into the index ns_index_name = str(type) + "-_-" + index_name ref_node = self.datastore.get_reference_node(type) node_list = [rel.target_node for rel in ref_node.instance.outgoing] mapping = self.conn.get_mapping(type,ns_index_name) for node in node_list: key = node.key index_dict = self.populate_index_document(type,ns_index_name,node.attributes,mapping) try: self.conn.delete(ns_index_name,type,key) except exceptions.NotFoundException: pass try: self.conn.index(index_dict,ns_index_name,type,key) except exceptions.ElasticSearchParseException: pass self.conn.refresh([ns_index_name]) def on_create(self,node): type_indices = self.get_indices_of_type(node.type) for ns_index_name in type_indices: mapping = self.conn.get_mapping(node.type,ns_index_name) index_dict = self.populate_index_document(node.type,ns_index_name,node.attributes,mapping) self.conn.index(index_dict,ns_index_name,node.type,node.key) self.conn.refresh([ns_index_name]) def on_delete(self, node): type_indices = self.get_indices_of_type(node.type) for ns_index_name in type_indices: try: self.conn.delete(ns_index_name,node.type,node.key) self.conn.refresh([ns_index_name]) except exceptions.NotFoundException: pass def on_modify(self, node): type_indices = self.get_indices_of_type(node.type) for ns_index_name in type_indices: mapping = self.conn.get_mapping(node.type,ns_index_name) index_dict = self.populate_index_document(node.type,ns_index_name,node.attributes,mapping) try: self.conn.delete(ns_index_name,node.type,node.key) self.conn.index(index_dict,ns_index_name,node.type,node.key) self.conn.refresh([ns_index_name]) except exceptions.NotFoundException: pass def get_indices_of_type(self,type): type_indices = [] for index in self.indices.keys(): if index.startswith(type+"-_-"): type_indices.append(index) return type_indices def populate_index_document(self,type,ns_index_name,attributes,mapping): indexed_variables = mapping[type]['properties'].keys() index_dict = {} for arg in indexed_variables: try: index_dict[arg] = attributes[arg] except KeyError: #if this attribute doesn't exist for this node, just pass pass return index_dict
class FullTextSearch(object): def __init__(self, server, settings=None): self.conn = ES(server) self.indices = {} if settings: self.settings = settings else: self.settings = { 'index': { 'analysis': { 'analyzer': { 'ngram_analyzer': { 'tokenizer': 'keyword', 'filter': ['lowercase', 'filter_ngram'], 'type': 'custom' } }, 'filter': { 'filter_ngram': { 'type': 'nGram', 'max_gram': 30, 'min_gram': 1 } } } } } def search_index(self, type, index_names, query_string, num_results=-1): ns_index_names = [ str(type) + "-_-" + index_name for index_name in index_names ] q = WildcardQuery('_all', lower(query_string)) results = self.conn.search(query=q, indices=ns_index_names, doc_types=type) num_found = len(results) if (num_results > num_found): num_results = num_found nodelist = [ self.datastore.get_node(type, r['_id']) for r in results['hits']['hits'] ] if (num_results != -1): return nodelist[0:num_results] else: return nodelist def create_index(self, type, indexed_variables, index_name): ns_index_name = str(type) + "-_-" + index_name self.conn.delete_index_if_exists(ns_index_name) self.conn.create_index(ns_index_name, self.settings) mapping = {} for arg in indexed_variables: mapping[arg] = { 'boost': 1.0, 'analyzer': 'ngram_analyzer', 'type': u'string', 'term_vector': 'with_positions_offsets' } index_settings = { 'index_analyzer': 'ngram_analyzer', 'search_analyzer': 'standard', 'properties': mapping } self.conn.put_mapping(str(type), index_settings, [ns_index_name]) self.refresh_index_cache() self.populate_index(type, index_name) def refresh_index_cache(self): self.indices = self.conn.get_indices() def delete_index(self, type, index_name): ns_index_name = str(type) + "-_-" + index_name self.conn.delete_index_if_exists(ns_index_name) self.refresh_index_cache() def populate_index(self, type, index_name): #add all the currently existing nodes into the index ns_index_name = str(type) + "-_-" + index_name ref_node = self.datastore.get_reference_node(type) node_list = [rel.target_node for rel in ref_node.instance.outgoing] mapping = self.conn.get_mapping(type, ns_index_name) for node in node_list: key = node.key index_dict = self.populate_index_document(type, ns_index_name, node.attributes, mapping) try: self.conn.delete(ns_index_name, type, key) except exceptions.NotFoundException: pass try: self.conn.index(index_dict, ns_index_name, type, key) except exceptions.ElasticSearchParseException: pass self.conn.refresh([ns_index_name]) def on_create(self, node): type_indices = self.get_indices_of_type(node.type) for ns_index_name in type_indices: mapping = self.conn.get_mapping(node.type, ns_index_name) index_dict = self.populate_index_document(node.type, ns_index_name, node.attributes, mapping) self.conn.index(index_dict, ns_index_name, node.type, node.key) self.conn.refresh([ns_index_name]) def on_delete(self, node): type_indices = self.get_indices_of_type(node.type) for ns_index_name in type_indices: try: self.conn.delete(ns_index_name, node.type, node.key) self.conn.refresh([ns_index_name]) except exceptions.NotFoundException: pass def on_modify(self, node): type_indices = self.get_indices_of_type(node.type) for ns_index_name in type_indices: mapping = self.conn.get_mapping(node.type, ns_index_name) index_dict = self.populate_index_document(node.type, ns_index_name, node.attributes, mapping) try: self.conn.delete(ns_index_name, node.type, node.key) self.conn.index(index_dict, ns_index_name, node.type, node.key) self.conn.refresh([ns_index_name]) except exceptions.NotFoundException: pass def get_indices_of_type(self, type): type_indices = [] for index in self.indices.keys(): if index.startswith(type + "-_-"): type_indices.append(index) return type_indices def populate_index_document(self, type, ns_index_name, attributes, mapping): indexed_variables = mapping[type]['properties'].keys() index_dict = {} for arg in indexed_variables: try: index_dict[arg] = attributes[arg] except KeyError: #if this attribute doesn't exist for this node, just pass pass return index_dict