def run(self): ES.setup(self.settings) model_names = split_strip(self.options.models) for model_name in model_names: self.log.info('Processing model `{}`'.format(model_name)) model = engine.get_document_cls(model_name) params = self.options.params or '' params = dict([[k, v[0]] for k, v in urllib.parse.parse_qs(params).items()]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index, chunk_size=chunk_size) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: self.log.info('Recreating `{}` ES mapping'.format(model_name)) es.delete_mapping() es.put_mapping(body=model.get_es_mapping()) self.log.info('Indexing all `{}` documents'.format(model_name)) es.index(documents) else: self.log.info( 'Indexing missing `{}` documents'.format(model_name)) es.index_missing_documents(documents) return 0
def run(self, quiet=False): from nefertari.elasticsearch import ES ES.setup(self.settings) models_paths = split_strip(self.options.models) for path in models_paths: model = resolve(path) model_name = path.split('.')[-1] params = self.options.params or '' params = dict([ [k, v[0]] for k, v in urlparse.parse_qs(params).items() ]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: es.index(documents, chunk_size=chunk_size) else: es.index_missing(documents, chunk_size=chunk_size) return 0
def on_post_bulk_insert(sender,documents,**kw): if not documents: return from nefertari.elasticsearch import ES es = ES(source=documents[0].__class__.__name__) docs = to_dicts(documents) es.index(docs)
def run(self): ES.setup(self.settings) model_names = split_strip(self.options.models) for model_name in model_names: self.log.info('Processing model `{}`'.format(model_name)) model = engine.get_document_cls(model_name) params = self.options.params or '' params = dict([ [k, v[0]] for k, v in urllib.parse.parse_qs(params).items() ]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index, chunk_size=chunk_size) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: self.log.info('Recreating `{}` ES mapping'.format(model_name)) es.delete_mapping() es.put_mapping(body=model.get_es_mapping()) self.log.info('Indexing all `{}` documents'.format( model_name)) es.index(documents) else: self.log.info('Indexing missing `{}` documents'.format( model_name)) es.index_missing_documents(documents) return 0
def run(self, quiet=False): from nefertari.elasticsearch import ES ES.setup(self.settings) models_paths = split_strip(self.options.models) for path in models_paths: model = resolve(path) model_name = path.split('.')[-1] params = self.options.params or '' params = dict([[k, v[0]] for k, v in urlparse.parse_qs(params).items()]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] es = ES(source=model_name, index_name=self.options.index) query_set = model.get_collection(**params) documents = to_dicts(query_set) if self.options.force: es.index(documents, chunk_size=chunk_size) else: es.index_missing(documents, chunk_size=chunk_size) return 0
def reindextask(model, boxsize=5000): """Index model by small chunks (ie: a box, with a reasonable size) """ global log mcls = engine.get_document_cls(model) # proceed by chunks of 'boxsize' count = mcls.get_collection(_count=True) if count < 1: # Hu ? nothing in DB return # Let us be aware of some numbers boxes = count // boxsize rest = count % boxsize es = ES(source=model) # humm quick & dirty: get a connector log.info('Processing model `{}` with {} documents in {} boxes'.format( model, count, boxes)) # dump by 'boxes' ; add one for the rest (NB: if rest=0 the last box will be # empty anyway ) for n in range(boxes + 1): log.info('Indexing missing `{}` documents (box: {}/{})'.format( model, n, boxes + 1)) query_set = mcls.get_collection( _limit=boxsize, _page=n, _sort=mcls.pk_field()) ## don't forget the sort documents = to_dicts(query_set) log.debug('---> from db {} documents ; send to ES'.format( len(documents))) ## TODO: add a control ? The last box size should be equal to 'rest' es.index(documents)
def index(self): q = self._params.pop('q', None) if not q: return [] return to_dicts(ES().get_collection( _raw_terms='name:%s*' % (q,), _limit=5, ), _keys=['id', 'name'])
def on_bulk_update(model_cls, objects, request): if not getattr(model_cls, '_index_enabled', False): return if not objects: return from nefertari.elasticsearch import ES es = ES(source=model_cls.__name__) documents = to_dicts(objects) es.index(documents, request=request) # Reindex relationships es.bulk_index_relations(objects, request=request, nested_only=True)
def on_bulk_update(update_context): request = getattr(update_context.query, '_request', None) model_cls = update_context.mapper.entity if not getattr(model_cls, '_index_enabled', False): return objects = update_context.query.all() if not objects: return from nefertari.elasticsearch import ES es = ES(source=model_cls.__name__) documents = to_dicts(objects) es.index(documents, request=request) # Reindex relationships es.bulk_index_relations(objects, request=request, nested_only=True)
def on_bulk_update(update_context): request = getattr( update_context.query, '_request', None) model_cls = update_context.mapper.entity if not getattr(model_cls, '_index_enabled', False): return objects = update_context.query.all() if not objects: return from nefertari.elasticsearch import ES es = ES(source=model_cls.__name__) documents = to_dicts(objects) es.index(documents, request=request) # Reindex relationships es.bulk_index_relations(objects, request=request, nested_only=True)
def index_models(self, model_names): self.log.info('Indexing models documents') params = self.options.params or '' params = dict([[k, v[0]] for k, v in urllib.parse.parse_qs(params).items()]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] for model_name in model_names: self.log.info('Processing model `{}`'.format(model_name)) model = engine.get_document_cls(model_name) es = ES(source=model_name, index_name=self.options.index, chunk_size=chunk_size) query_set = model.get_collection(**params) documents = to_dicts(query_set) self.log.info('Indexing missing `{}` documents'.format(model_name)) es.index_missing_documents(documents)
def index_models(self, model_names): self.log.info('Indexing models documents') params = self.options.params or '' params = dict([ [k, v[0]] for k, v in urllib.parse.parse_qs(params).items() ]) params.setdefault('_limit', params.get('_limit', 10000)) chunk_size = self.options.chunk or params['_limit'] for model_name in model_names: self.log.info('Processing model `{}`'.format(model_name)) model = engine.get_document_cls(model_name) es = ES(source=model_name, index_name=self.options.index, chunk_size=chunk_size) query_set = model.get_collection(**params) documents = to_dicts(query_set) self.log.info('Indexing missing `{}` documents'.format( model_name)) es.index_missing_documents(documents)
def bulk_index_relations(cls, items, request=None, **kwargs): """ Index objects related to :items: in bulk. Related items are first grouped in map {model_name: {item1, item2, ...}} and then indexed. :param items: Sequence of DB objects related objects if which should be indexed. :param request: Pyramid Request instance. """ index_map = defaultdict(set) for item in items: relations = item.get_related_documents(**kwargs) for model_cls, related_items in relations: indexable = getattr(model_cls, '_index_enabled', False) if indexable and related_items: index_map[model_cls.__name__].update(related_items) for model_name, instances in index_map.items(): cls(model_name).index(to_dicts(instances), request=request)
def index_relations(cls, db_obj, request=None, **kwargs): for model_cls, documents in db_obj.get_related_documents(**kwargs): if getattr(model_cls, '_index_enabled', False) and documents: cls(model_cls.__name__).index(to_dicts(documents), request=request)
def get_reference_documents(self): # TODO: Make lazy load of documents models = self.__class__._meta['delete_rules'] or {} for model_cls, key in models: documents = to_dicts(model_cls.objects(**{key: self})) yield model_cls, documents
def index_relations(cls, db_obj, request=None, **kwargs): for model_cls, documents in db_obj.get_related_documents(**kwargs): if getattr(model_cls, '_index_enabled', False) and documents: cls(model_cls.__name__).index( to_dicts(documents), request=request)