def es_deindex(self): conn = ES(settings.ES_SERVERS, basic_auth=settings.ES_AUTH) try: conn.delete(index=self.tenant.slug, doc_type=self.Meta.document_type, id=meta.id) except: pass
def es_deindex(self): conn = ES(settings.ES_SERVERS, basic_auth=settings.ES_AUTH) try: conn.delete( index=self.tenant.slug, doc_type=self.Meta.document_type, id=meta.id ) except: pass
class KVStore(KVStoreBase): def __init__(self, *args, **kwargs): super(KVStore, self).__init__(*args, **kwargs) self.connection = ES(settings.THUMBNAIL_ELASTIC_SEARCH_SERVERS) def _get_raw(self, key): try: #import pdb; pdb.set_trace() value = self.connection.get(settings.THUMBNAIL_ELASTIC_SEARCH_INDEX, settings.THUMBNAIL_ELASTIC_SEARCH_DOCUMENT_TYPE, key) return value['_source']['value'] except: return None def _set_raw(self, key, value): ret = self.connection.index({"value": value}, settings.THUMBNAIL_ELASTIC_SEARCH_INDEX, settings.THUMBNAIL_ELASTIC_SEARCH_DOCUMENT_TYPE, key) return ret['ok'] def _delete_raw(self, *keys): rets = [] for key in keys: try: ret = self.connection.delete(settings.THUMBNAIL_ELASTIC_SEARCH_INDEX, settings.THUMBNAIL_ELASTIC_SEARCH_DOCUMENT_TYPE, key) rets.append(ret['ok']) except: rets.append(False) return rets def _find_keys_raw(self, prefix): search = Search(query=PrefixQuery("_id", prefix), size=1000, start=0, fields=[]) results = self.connection.search(search, indexes=[settings.THUMBNAIL_ELASTIC_SEARCH_INDEX,], doc_types=[settings.THUMBNAIL_ELASTIC_SEARCH_DOCUMENT_TYPE,]) return [hit['_id'] for hit in results['hits']['hits']]
class DocManager(): """The DocManager class creates a connection to the backend engine and adds/removes documents, and in the case of rollback, searches for them. The reason for storing id/doc pairs as opposed to doc's is so that multiple updates to the same doc reflect the most up to date version as opposed to multiple, slightly different versions of a doc. We are using elastic native fields for _id and ns, but we also store them as fields in the document, due to compatibility issues. """ def __init__(self, url, auto_commit=True, unique_key='_id'): """Verify Elastic URL and establish a connection. """ if verify_url(url) is False: raise SystemError self.elastic = ES(server=url) self.auto_commit = auto_commit self.doc_type = 'string' # default type is string, change if needed self.unique_key = unique_key if auto_commit: self.run_auto_commit() def stop(self): """ Stops the instance """ self.auto_commit = False def upsert(self, doc): """Update or insert a document into Elastic If you'd like to have different types of document in your database, you can store the doc type as a field in Mongo and set doc_type to that field. (e.g. doc_type = doc['_type']) """ doc_type = self.doc_type index = doc['ns'] doc[self.unique_key] = str(doc[self.unique_key]) doc_id = doc[self.unique_key] id_query = TextQuery('_id', doc_id) elastic_cursor = self.elastic.search(query=id_query, indices=index) try: self.elastic.index(bsjson.dumps(doc), index, doc_type, doc_id) except ValueError: logging.info("Could not update %s" % (doc,)) self.elastic.refresh() def remove(self, doc): """Removes documents from Elastic The input is a python dictionary that represents a mongo document. """ try: self.elastic.delete(doc['ns'], 'string', str(doc[self.unique_key])) except (NotFoundException, TypeMissingException, IndexMissingException): pass def _remove(self): """For test purposes only. Removes all documents in test.test """ try: self.elastic.delete('test.test', 'string', '') except (NotFoundException, TypeMissingException, IndexMissingException): pass def search(self, start_ts, end_ts): """Called to query Elastic for documents in a time range. """ res = ESRange('_ts', from_value=start_ts, to_value=end_ts) results = self.elastic.search(RangeQuery(res)) return results def _search(self): """For test purposes only. Performs search on Elastic with empty query. Does not have to be implemented. """ results = self.elastic.search(MatchAllQuery()) return results def commit(self): """This function is used to force a refresh/commit. """ retry_until_ok(self.elastic.refresh) def run_auto_commit(self): """Periodically commits to the Elastic server. """ self.elastic.refresh() if self.auto_commit: Timer(1, self.run_auto_commit).start() def get_last_doc(self): """Returns the last document stored in the Elastic engine. """ result = self.elastic.search(MatchAllQuery(), size=1, sort='_ts:desc') for item in result: return item
es = ES() index_name = "my_index" type_name = "my_type" from utils_pyes import create_and_add_mapping create_and_add_mapping(es, index_name, type_name) es.index(doc={"name": "Joe Tester", "parsedtext": "Joe Testere nice guy", "uuid": "11111", "position": 1}, index=index_name, doc_type=type_name, id=1) es.index(doc={"name": "data1", "value": "value1"}, index=index_name, doc_type=type_name + "2", id=1, parent=1) es.index(doc={"name": "Bill Baloney", "parsedtext": "Bill Testere nice guy", "uuid": "22222", "position": 2}, index=index_name, doc_type=type_name, id=2, bulk=True) es.index(doc={"name": "data2", "value": "value2"}, index=index_name, doc_type=type_name + "2", id=2, parent=2, bulk=True) es.index(doc={"name": "Bill Clinton", "parsedtext": """Bill is not nice guy""", "uuid": "33333", "position": 3}, index=index_name, doc_type=type_name, id=3, bulk=True) es.force_bulk() es.update(index=index_name, doc_type=type_name, id=2, script='ctx._source.position += 1') es.update(index=index_name, doc_type=type_name, id=2, script='ctx._source.position += 1', bulk=True) es.delete(index=index_name, doc_type=type_name, id=1, bulk=True) es.delete(index=index_name, doc_type=type_name, id=3) es.force_bulk() es.indices.refresh(index_name) es.indices.delete_index(index_name)
class ESIndexerBase(object): ES_HOST = ES_HOST ES_INDEX_NAME = ES_INDEX_NAME ES_INDEX_TYPE = 'gene' def __init__(self): self.conn = ES(self.ES_HOST, default_indexes=[self.ES_INDEX_NAME], timeout=10.0) self.step = 10000 def create_index(self): try: print self.conn.open_index(self.ES_INDEX_NAME) except IndexMissingException: print self.conn.create_index(self.ES_INDEX_NAME) def delete_index_type(self, index_type): '''Delete all indexes for a given index_type.''' index_name = self.ES_INDEX_NAME # index_type = self.ES_INDEX_TYPE #Check if index_type exists mapping = self.conn.get_mapping(index_type, index_name) if index_name not in mapping or index_type not in mapping[index_name]: print 'Error: index type "%s" does not exist in index "%s".' % (index_type, index_name) return path = '/%s/%s' % (index_name, index_type) if ask('Confirm to delete all data under "%s":' % path) == 'Y': return self.conn.delete_mapping(index_name, index_type) def index(self, doc, index_type, id=None): '''add a doc to the index. If id is not None, the existing doc will be updated. ''' # index_type = self.ES_INDEX_TYPE return self.conn.index(doc, self.ES_INDEX_NAME, index_type, id=id) def delete_index(self, index_type, id): '''delete a doc from the index based on passed id.''' # index_type = self.ES_INDEX_TYPE return self.conn.delete(self.ES_INDEX_NAME, index_type, id) def optimize(self): return self.conn.optimize(self.ES_INDEX_NAME, wait_for_merge=True) def get_field_mapping(self): import dataload reload(dataload) dataload.register_sources() return dataload.get_mapping() def build_index(self, doc_d, update_mapping=False, bulk=True): index_name = self.ES_INDEX_NAME index_type = self.ES_INDEX_TYPE #Test if index exists try: print "Opening index...", self.conn.open_index(index_name) except NotFoundException: print 'Error: index "%s" does not exist. Create it first.' % index_name return -1 try: cur_mapping = self.conn.get_mapping(index_type, index_name) empty_mapping = False except ElasticSearchException: #if no existing mapping available for index_type #force update_mapping to True empty_mapping = True update_mapping = True # empty_mapping = not cur_mapping[index_name].get(index_type, {}) # if empty_mapping: # #if no existing mapping available for index_type # #force update_mapping to True # update_mapping = True if update_mapping: print "Updating mapping...", if not empty_mapping: print "\n\tRemoving existing mapping...", print self.conn.delete_mapping(index_name, index_type) _mapping = self.get_field_mapping() print self.conn.put_mapping(index_type, _mapping, [index_name]) print "Building index..." t0 = time.time() for doc_id, doc in doc_d.items(): self.conn.index(doc, index_name, index_type, doc_id, bulk=bulk) print self.conn.flush() print self.conn.refresh() print "Done[%s]" % timesofar(t0) def query(self, qs, fields='symbol,name', **kwargs): _q = StringQuery(qs) res = self.conn.search(_q, fields=fields, **kwargs) return res
class ElasticSearchServer(ESDBRequests): """ An object representing the CouchDB server, use it to list, create, delete and connect to databases. More info http://wiki.apache.org/couchdb/HTTP_database_API """ def __init__(self, dburl = 'http://localhost:9200', indices, types, usePYCurl = False, ckey = None, cert = None, capath = None): """ Set up a connection to the CouchDB server """ check_server_url(dburl) # PYCurl TODO # Same with cert and key self.url = dburl self.ESconn = ES(dburl) self.ckey = ckey self.cert = cert check_name(indices) check_name(types) self.indices = indices self.types = types def listDatabases(self): "List all the databases the server hosts" # TODO return self.get('/_all_dbs') def createDatabase(self, schema): """ A database must be named with all lowercase characters (a-z), digits (0-9), or any of the _$()+-/ characters and must end with a slash in the URL. """ self.ESconn.indices.create_index_if_missing(self.indices) self.ESconn.indices.put_mapping(self.types, {'properties': schema}, [self.indices]) def insertDoc(self, doc, _id): """ TODO """ self.ESconn.index(doc, self.indices, self.types, _id) def deleteDoc(self, _id): self.ESconn.delete(self.indices, self.types, _id) def termBoolQuery(self, query): """ query - dict must: key = key in the database value = searchable value should key = key in the database value = searchable value must_not key = key in the database value = searchable value """ queryMust = [] queryShould = [] queryMustNot = [] for item in ["must", "should", "must_not"]: if item in query: for dictVals in query[item]: for dictKey in dictVals: tempq = TermQuery(dictKey, dictVals[dictKey]) if item == "must": queryMust.append(tempq) elif item == "should": queryShould.append(tempq) elif item == "must_not": queryMustNot.append(tempq) query = BoolQuery(must=None if not queryMust else queryMust, should=None if not queryShould else queryShould, must_not=None if not queryMustNot else queryMustNot) search = Search(query) results = self.ESconn.search(search, self.indices) response = {"status_code": 200, "message": "Successful", "content": []} response["content"] = [result for result in results] return response
class Elastic(object): def init_app(self, app): self.conn = ES(app.config['ELASTIC_URL'], timeout=2) #self.remote_conns = [ES(url) for url in app.config['REMOTE_ELASTIC_URL']] def search(self, start=0, size=20, doc_types='resource', indices='order_index', sort=None, **kwargs): # set filter filters = [] for k,v in kwargs.items(): if k and k!='complete_time': filters.append(TermFilter(k, v)) elif k and v!='' and k=='complete_time': ct = kwargs['complete_time'] if len(ct) == 2: filters.append(RangeFilter(ESRange('complete_time', from_value=ct[0], to_value=ct[1]))) else: filters.append(RangeFilter(ESRange('complete_time', from_value=ct[0]))) _filter = None if filters: _filter = ANDFilter(filters) bq = MatchAllQuery() # filtered q = FilteredQuery(bq, _filter) # sort if sort: sf = SortFactory() for s in sort: sf.add(s) s = Search(q, sort=sf) else: s = Search(q) # result return self.conn.search(s, indices=indices, doc_types=doc_types, start=start, size=size) def delete(self, index='order_index', doc_type='resource', id=''): return self.conn.delete(index=index, doc_type=doc_type, id=id) def create(self, index='order_index', doc_type='resource', doc=None): # try: # self.delete(index, doc_type, doc['id']) # except NotFoundException: # pass try: return self.conn.index(doc, index, doc_type, id=doc['id']) except:# not connection pass def multi_create(self, index='order_index', doc_type='resource', doc=None): """如果同步缓存到远程,要使用celery""" try: return self.conn.index(doc, index, doc_type, id=doc['id']) except:# not connection pass try: for rconn in self.remote_conns: rconn.index(doc, index, doc_type, id=doc['id']) except: print '--------sync cache to remote error------'
class ElasticCatalog(object): default_indexes = { 'zelastic_doc_id': { 'type': 'string', 'index': 'not_analyzed' } } def __init__(self, connection_string, elastic_name, storage, bulk=False, bulk_size=400): self.conn = ES(connection_string, bulk_size=bulk_size) self.bulk_size = bulk_size self.name = elastic_name self.storage = storage self.bulk = bulk def update_mapping(self, name): meta = self.storage.meta(name) indexes = meta['indexes'] properties = self.default_indexes.copy() try: self.conn.create_index(self.name) except IndexAlreadyExistsException: pass for index_name, _type in indexes.items(): index = None if _type == 'str': index = { 'type': 'string', 'index': 'not_analyzed', } elif _type == 'full': index = { 'type': 'string', 'index': 'analyzed', } elif _type == 'bool': index = { 'type': 'boolean' } elif _type == 'int': index = { 'type': 'integer', } elif _type in ('datetime', 'date'): index = { 'type': 'date', } elif _type == 'float': index = { 'type': 'float', } if index is not None: properties[index_name] = index self.conn.indices.put_mapping( doc_type=name, mapping={ 'ignore_conflicts': True, 'properties': properties }, indices=[self.name]) def id(self, container_name, key): return '%s-%s' % (container_name, key) def index(self, container_name, doc, key): # need to add data to the index that isn't actually persisted data = { 'zelastic_doc_id': key } meta = self.storage.meta(container_name) indexes = meta['indexes'] for index in indexes.keys(): if index in doc: data[index] = doc[index] self.conn.index( data, self.name, container_name, self.id(container_name, key), bulk=self.bulk) def delete(self, container_name, key): self.conn.delete( self.name, container_name, self.id(container_name, key), bulk=self.bulk) def delete_all(self, container_name): self.conn.delete_mapping( self.name, container_name) def search(self, container_name, query, **kwargs): return self.conn.search( query, indexes=[self.name], doc_types=[container_name], **kwargs) def getFacets(self, container_name, field, size=100): return self.conn.search_raw({ "facets": { field: { "terms": { "all_terms": True, "field": field, "size": size, "order": "term" } } } }, indexes=[self.name], doc_type=container_name)
es.index(doc={ "name": "Bill Clinton", "parsedtext": """Bill is not nice guy""", "uuid": "33333", "position": 3 }, index=index_name, doc_type=type_name, id=3, bulk=True) es.force_bulk() es.update(index=index_name, doc_type=type_name, id=2, script='ctx._source.position += 1') es.update(index=index_name, doc_type=type_name, id=2, script='ctx._source.position += 1', bulk=True) es.delete(index=index_name, doc_type=type_name, id=1, bulk=True) es.delete(index=index_name, doc_type=type_name, id=3) es.force_bulk() es.indices.refresh(index_name) es.indices.delete_index(index_name)