def __call__(self, dquery): filters = [] catalog = self.catalogtool._catalog idxs = catalog.indexes.keys() query = {'match_all': {}} for key, value in dquery.items(): if key not in idxs and key not in ('SearchableText', 'Title', 'Description'): continue index = getIndex(catalog, key) qq = None if index is None and key in ('SearchableText', 'Title', 'Description'): # deleted index for plone performance but still need on ES index = EZCTextIndex(catalog, key) qq = index.get_query(key, value) if qq is None: continue if index is not None and index.filter_query: filters.append(qq) else: query = qq if len(filters) == 0: return query else: return { 'filtered': { 'filter': { 'and': filters }, 'query': query } }
def __call__(self): properties = self._default_mapping.copy() for name in self.catalog.indexes.keys(): index = getIndex(self.catalog, name) if index is not None: # prevent create default mapping analyzers if name not in properties: properties[name] = index.create_mapping(name) else: raise Exception('Can not locate index for %s' % ( name)) conn = self.es.connection index_name = self.es.index_name if conn.indices.exists(index_name): # created BEFORE we started creating this as aliases to versions, # we can't go anywhere from here beside try updating... pass else: if not self.es.index_version: # need to initialize version value self.es.bump_index_version() index_name_v = '%s_%i' % (index_name, self.es.index_version) if not conn.indices.exists(index_name_v): conn.indices.create( index_name_v, body=self.get_index_creation_body()) if not conn.indices.exists_alias(name=index_name): conn.indices.put_alias(index=index_name_v, name=index_name) for key in properties: if key in self._search_attributes: properties[key]['store'] = True return {'properties': properties}
def convertToElastic(self): setattr(self.catalogtool, CONVERTED_ATTR, True) self.catalogtool._p_changed = True properties = {} for name in self.catalog.indexes.keys(): index = getIndex(self.catalog, name) if index is not None: properties[name] = index.create_mapping(name) else: raise Exception("Can not locate index for %s" % (name)) # XXX then add an index specifically to hold metadata # We don't store any of the other index data # this will be json encoded properties['_metadata'] = { 'type': 'string', 'index': 'not_analyzed', 'store': True } conn = self.conn try: conn.create_index(self.catalogsid) except IndexAlreadyExistsException: pass mapping = {'properties': properties} conn.indices.put_mapping(doc_type=self.catalogtype, mapping=mapping, indices=[self.catalogsid]) conn.indices.put_mapping(doc_type=self.trns_catalogtype, mapping=self.trns_mapping, indices=[self.catalogsid])
def __call__(self, dquery): filters = [] catalog = self.catalogtool._catalog idxs = catalog.indexes.keys() query = MatchAllQuery() for key, value in dquery.items(): if key not in idxs: continue index = getIndex(catalog, key) if index is None: continue qq = index.get_query(key, value) if qq is None: continue if type(qq) == tuple: qq, is_query = qq else: is_query = False if is_query: query = qq else: filters.append(qq) if len(filters) != 0: query = FilteredQuery(query, ANDFilter(filters)) if 'SearchableText' in dquery: hl = HighLighter(pre_tags=['<b>'], post_tags=['</b>']) hl.add_field('SearchableText') return Search(query, highlight=hl) else: return query
def __call__(self): properties = self._default_mapping.copy() for name in self.catalog.indexes.keys(): index = getIndex(self.catalog, name) if index is not None: properties[name] = index.create_mapping(name) else: raise Exception('Can not locate index for %s' % ( name)) conn = self.es.connection index_name = self.es.index_name if conn.indices.exists(index_name): # created BEFORE we started creating this as aliases to versions, # we can't go anywhere from here beside try updating... pass else: if not self.es.index_version: # need to initialize version value self.es.bump_index_version() index_name_v = '%s_%i' % (index_name, self.es.index_version) if not conn.indices.exists(index_name_v): conn.indices.create( index_name_v, body=self.get_index_creation_body()) if not conn.indices.exists_alias(name=index_name): conn.indices.put_alias(index=index_name_v, name=index_name) for key in properties: if key in self._search_attributes: properties[key]['store'] = True return {'properties': properties}
def __call__(self, dquery): filters = [] catalog = self.catalogtool._catalog idxs = catalog.indexes.keys() query = MatchAllQuery() for key, value in dquery.items(): if key not in idxs: continue index = getIndex(catalog, key) if index is None: continue qq = index.get_query(key, value) if qq is None: continue if type(qq) == tuple: qq, is_query = qq else: is_query = False if is_query: query = qq else: filters.append(qq) if len(filters) == 0: return query else: return FilteredQuery(query, ANDFilter(filters))
def get_index_data(obj, es): # noqa: C901 catalog = es.catalogtool._catalog wrapped_object = get_wrapped_object(obj, es) index_data = {} for index_name in catalog.indexes.keys(): index = getIndex(catalog, index_name) if index is not None: try: value = index.get_value(wrapped_object) except Exception: logger.error('Error indexing value: %s: %s\n%s' % ('/'.join(obj.getPhysicalPath()), index_name, traceback.format_exc())) value = None if value in (None, 'None'): # yes, we'll index null data... value = None # Ignore errors in converting to unicode, so json.dumps # does not barf when we're trying to send data to ES. if six.PY2: if isinstance(value, str): value = six.text_type(value, 'utf-8', 'ignore') else: if isinstance(value, bytes): value = value.decode('utf-8', 'ignore') index_data[index_name] = value # in case these indexes are deleted # (to increase performance and improve ram usage) for name in getESOnlyIndexes(): if name in index_data: continue indexer = queryMultiAdapter((obj, es.catalogtool), IIndexer, name=name) if indexer is not None: try: val = indexer() if six.PY2: if isinstance(value, str): value = six.text_type(value, 'utf-8', 'ignore') else: if isinstance(value, bytes): value = value.decode('utf-8', 'ignore') index_data[name] = val except Exception: logger.error('Error indexing value: %s: %s\n%s' % ('/'.join( obj.getPhysicalPath()), name, traceback.format_exc())) else: val = getattr(obj, name, None) if callable(val): val = val() index_data[name] = val for _, adapter in getAdapters((obj, ), IAdditionalIndexDataProvider): index_data.update(adapter(es, index_data)) return index_data
def catalog_object(self, obj, uid=None, idxs=[], update_metadata=1, pghandler=None): mode = self.mode if mode in (DISABLE_MODE, DUAL_MODE): result = self.patched.catalog_object(obj, uid, idxs, update_metadata, pghandler) if mode == DISABLE_MODE: return result wrapped_object = None if not IIndexableObject.providedBy(obj): # This is the CMF 2.2 compatible approach, which should be used # going forward wrapper = queryMultiAdapter((obj, self.catalogtool), IIndexableObject) if wrapper is not None: wrapped_object = wrapper else: wrapped_object = obj else: wrapped_object = obj conn = self.conn catalog = self.catalog if idxs == []: idxs = catalog.indexes.keys() index_data = {} for index_name in idxs: index = getIndex(catalog, index_name) if index is not None: value = index.get_value(wrapped_object) if value in (None, 'None'): # yes, we'll index null data... value = None index_data[index_name] = value if update_metadata: metadata = {} for meta_name in catalog.names: attr = getattr(wrapped_object, meta_name, MV) if (attr is not MV and safe_callable(attr)): attr = attr() metadata[meta_name] = attr # XXX Also, always index path so we can use it with the brain # to make urls metadata['_path'] = wrapped_object.getPhysicalPath() index_data['_metadata'] = dumps(metadata) uid = getUID(obj) try: doc = conn.get(self.catalogsid, self.catalogtype, uid) self.registerInTransaction(uid, td.Actions.modify, doc) except NotFoundException: self.registerInTransaction(uid, td.Actions.add) conn.index(index_data, self.catalogsid, self.catalogtype, uid) if self.registry.auto_flush: conn.refresh()
def get_index_data(obj, es): catalog = es.catalogtool._catalog wrapped_object = get_wrapped_object(obj, es) index_data = {} for index_name in catalog.indexes.keys(): index = getIndex(catalog, index_name) if index is not None: try: value = index.get_value(wrapped_object) except Exception: logger.error('Error indexing value: %s: %s\n%s' % ( '/'.join(obj.getPhysicalPath()), index_name, traceback.format_exc())) value = None if value in (None, 'None'): # yes, we'll index null data... value = None # Ignore errors in converting to unicode, so json.dumps # does not barf when we're trying to send data to ES. if isinstance(value, str): value = unicode(value, 'utf-8', 'ignore') index_data[index_name] = value # in case these indexes are deleted # (to increase performance and improve ram usage) for name in getESOnlyIndexes(): if name in index_data: continue indexer = queryMultiAdapter((obj, es.catalogtool), IIndexer, name=name) if indexer is not None: try: val = indexer() if isinstance(value, str): val = unicode(val, 'utf-8', 'ignore') index_data[name] = val except Exception: logger.error('Error indexing value: %s: %s\n%s' % ( '/'.join(obj.getPhysicalPath()), name, traceback.format_exc())) else: val = getattr(obj, name, None) if callable(val): val = val() index_data[name] = val for _, adapter in getAdapters((obj,), IAdditionalIndexDataProvider): index_data.update(adapter(es, index_data)) return index_data
def catalog_object(self, obj, uid=None, idxs=[], update_metadata=1, pghandler=None): mode = self.mode if mode in (DISABLE_MODE, DUAL_MODE): result = self.patched.catalog_object( obj, uid, idxs, update_metadata, pghandler) if mode == DISABLE_MODE: return result wrapped_object = None if not IIndexableObject.providedBy(obj): # This is the CMF 2.2 compatible approach, which should be used # going forward wrapper = queryMultiAdapter((obj, self.catalogtool), IIndexableObject) if wrapper is not None: wrapped_object = wrapper else: wrapped_object = obj else: wrapped_object = obj conn = self.conn catalog = self.catalog if idxs == []: idxs = catalog.indexes.keys() index_data = {} for index_name in idxs: index = getIndex(catalog, index_name) if index is not None: value = index.get_value(wrapped_object) if value in (None, 'None'): # yes, we'll index null data... value = None index_data[index_name] = value if update_metadata: metadata = {} for meta_name in catalog.names: attr = getattr(wrapped_object, meta_name, MV) if (attr is not MV and safe_callable(attr)): attr = attr() metadata[meta_name] = attr # XXX Also, always index path so we can use it with the brain # to make urls metadata['_path'] = wrapped_object.getPhysicalPath() index_data['_metadata'] = dumps(metadata) uid = getUID(obj) try: doc = conn.get(self.catalogsid, self.catalogtype, uid) self.registerInTransaction(uid, td.Actions.modify, doc) except NotFoundException: self.registerInTransaction(uid, td.Actions.add) conn.index(index_data, self.catalogsid, self.catalogtype, uid) if self.registry.auto_flush: conn.refresh()
def __call__(self, dquery): filters = [] matches = [] catalog = self.catalogtool._catalog idxs = catalog.indexes.keys() query = {'match_all': {}} es_only_indexes = getESOnlyIndexes() for key, value in dquery.items(): if key not in idxs and key not in es_only_indexes: continue index = getIndex(catalog, key) if index is None and key in es_only_indexes: # deleted index for plone performance but still need on ES index = EZCTextIndex(catalog, key) qq = index.get_query(key, value) if qq is None: continue if index is not None and index.filter_query: if isinstance(qq, list): filters.extend(qq) else: filters.append(qq) else: if isinstance(qq, list): matches.extend(qq) else: matches.append(qq) if len(filters) == 0 and len(matches) == 0: return query else: query = {'bool': dict()} if len(filters) > 0: query['bool']['filter'] = filters if len(matches) > 0: query['bool']['should'] = matches query['bool']['minimum_should_match'] = 1 return query
def __call__(self, dquery): filters = [] matches = [] catalog = self.catalogtool._catalog idxs = catalog.indexes.keys() query = {'match_all': {}} es_only_indexes = getESOnlyIndexes() for key, value in dquery.items(): if key not in idxs and key not in es_only_indexes: continue index = getIndex(catalog, key) if index is None and key in es_only_indexes: # deleted index for plone performance but still need on ES index = EZCTextIndex(catalog, key) qq = index.get_query(key, value) if qq is None: continue if index is not None and index.filter_query: if isinstance(qq, list): filters.extend(qq) else: filters.append(qq) else: if isinstance(qq, list): matches.extend(qq) else: matches.append(qq) if len(filters) == 0 and len(matches) == 0: return query else: query = { 'bool': { 'should': matches, 'minimum_should_match': 1, 'filter': filters } } return query
def convertToElastic(self): setattr(self.catalogtool, CONVERTED_ATTR, True) self.catalogtool._p_changed = True properties = {} for name in self.catalog.indexes.keys(): index = getIndex(self.catalog, name) if index is not None: properties[name] = index.create_mapping(name) else: raise Exception("Can not locate index for %s" % ( name)) # XXX then add an index specifically to hold metadata # We don't store any of the other index data # this will be json encoded properties['_metadata'] = { 'type': 'string', 'index': 'not_analyzed', 'store': True } conn = self.conn try: conn.create_index(self.catalogsid) except IndexAlreadyExistsException: pass mapping = {'properties': properties} conn.indices.put_mapping( doc_type=self.catalogtype, mapping=mapping, indices=[self.catalogsid]) conn.indices.put_mapping( doc_type=self.trns_catalogtype, mapping=self.trns_mapping, indices=[self.catalogsid])
def index_batch(remove, index, positions, es=None): if es is None: from collective.elasticsearch.es import ElasticSearchCatalog es = ElasticSearchCatalog(api.portal.get_tool('portal_catalog')) setSite(api.portal.get()) conn = es.connection bulk_size = es.get_setting('bulk_size', 50) if len(remove) > 0: bulk_data = [] for uid in remove: bulk_data.append({ 'delete': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }) es.connection.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if len(index) > 0: if type(index) in (list, tuple, set): # does not contain objects, must be async, convert to dict index = dict([(k, None) for k in index]) bulk_data = [] for uid, obj in index.items(): if obj is None: obj = uuidToObject(uid) if obj is None: continue bulk_data.extend([{ 'index': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }, get_index_data(obj, es)]) if len(bulk_data) % bulk_size == 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) bulk_data = [] if len(bulk_data) > 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if len(positions) > 0: bulk_data = [] index = getIndex(es.catalogtool._catalog, 'getObjPositionInParent') for uid, ids in positions.items(): if uid == '/': parent = getSite() else: parent = uuidToObject(uid) if parent is None: logger.warn('could not find object to index positions') continue for _id in ids: ob = parent[_id] wrapped_object = get_wrapped_object(ob, es) try: value = index.get_value(wrapped_object) except Exception: continue bulk_data.extend([{ 'update': { '_index': es.index_name, '_type': es.doc_type, '_id': IUUID(ob) } }, { 'doc': { 'getObjPositionInParent': value } }]) if len(bulk_data) % bulk_size == 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) bulk_data = [] if len(bulk_data) > 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data)
def index_batch(remove, index, positions, es=None): if es is None: from collective.elasticsearch.es import ElasticSearchCatalog es = ElasticSearchCatalog(api.portal.get_tool('portal_catalog')) conn = es.connection bulk_size = es.get_setting('bulk_size', 50) if len(remove) > 0: bulk_data = [] for uid in remove: bulk_data.append({ 'delete': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }) es.connection.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if len(index) > 0: if type(index) in (list, tuple, set): # does not contain objects, must be async, convert to dict index = dict([(k, None) for k in index]) bulk_data = [] for uid, obj in index.items(): if obj is None: obj = uuidToObject(uid) if obj is None: continue bulk_data.extend([{ 'index': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }, get_index_data(uid, obj, es)]) if len(bulk_data) % bulk_size == 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) bulk_data = [] if len(bulk_data) > 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if len(positions) > 0: bulk_data = [] index = getIndex(es.catalogtool._catalog, 'getObjPositionInParent') for uid, ids in positions.items(): if uid == '/': parent = getSite() else: parent = uuidToObject(uid) if parent is None: logger.warn('could not find object to index positions') continue for _id in ids: ob = parent[_id] wrapped_object = get_wrapped_object(ob, es) try: value = index.get_value(wrapped_object) except: continue bulk_data.extend([{ 'update': { '_index': es.index_name, '_type': es.doc_type, '_id': IUUID(ob) } }, { 'doc': { 'getObjPositionInParent': value } }]) if len(bulk_data) % bulk_size == 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) bulk_data = [] if len(bulk_data) > 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data)
def index_batch(remove, index, positions, es=None): # noqa: C901 if es is None: from collective.elasticsearch.es import ElasticSearchCatalog es = ElasticSearchCatalog(api.portal.get_tool('portal_catalog')) setSite(api.portal.get()) conn = es.connection bulk_size = es.get_setting('bulk_size', 50) if len(remove) > 0: bulk_data = [] for uid in remove: bulk_data.append({ 'delete': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }) result = es.connection.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if "errors" in result and result["errors"] is True: logger.error("Error in bulk indexing removal: %s" % result) if len(index) > 0: if type(index) in (list, tuple, set): # does not contain objects, must be async, convert to dict index = dict([(k, None) for k in index]) bulk_data = [] for uid, obj in index.items(): # If content has been moved (ie by a contentrule) then the object # passed here is the original object, not the moved one. # So if there is a uuid, we use this to get the correct object. # See https://github.com/collective/collective.elasticsearch/issues/65 # noqa if uid is not None: obj = uuidToObject(uid) if obj is None: obj = uuidToObject(uid) if obj is None: continue bulk_data.extend([{ 'index': { '_index': es.index_name, '_type': es.doc_type, '_id': uid } }, get_index_data(obj, es)]) if len(bulk_data) % bulk_size == 0: result = conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if "errors" in result and result["errors"] is True: logger.error("Error in bulk indexing: %s" % result) bulk_data = [] if len(bulk_data) > 0: result = conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) if "errors" in result and result["errors"] is True: logger.error("Error in bulk indexing: %s" % result) if len(positions) > 0: bulk_data = [] index = getIndex(es.catalogtool._catalog, 'getObjPositionInParent') for uid, ids in positions.items(): if uid == '/': parent = getSite() else: parent = uuidToObject(uid) if parent is None: logger.warn('could not find object to index positions') continue for _id in ids: ob = parent[_id] wrapped_object = get_wrapped_object(ob, es) try: value = index.get_value(wrapped_object) except Exception: continue bulk_data.extend([{ 'update': { '_index': es.index_name, '_type': es.doc_type, '_id': IUUID(ob) } }, { 'doc': { 'getObjPositionInParent': value } }]) if len(bulk_data) % bulk_size == 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data) bulk_data = [] if len(bulk_data) > 0: conn.bulk(index=es.index_name, doc_type=es.doc_type, body=bulk_data)