示例#1
0
    def run(self):
        ES.setup(self.settings)
        model_names = split_strip(self.options.models)

        for model_name in model_names:
            self.log.info('Processing model `{}`'.format(model_name))
            model = engine.get_document_cls(model_name)

            params = self.options.params or ''
            params = dict([[k, v[0]]
                           for k, v in urllib.parse.parse_qs(params).items()])
            params.setdefault('_limit', params.get('_limit', 10000))
            chunk_size = self.options.chunk or params['_limit']

            es = ES(source=model_name,
                    index_name=self.options.index,
                    chunk_size=chunk_size)
            query_set = model.get_collection(**params)
            documents = to_dicts(query_set)

            if self.options.force:
                self.log.info('Recreating `{}` ES mapping'.format(model_name))
                es.delete_mapping()
                es.put_mapping(body=model.get_es_mapping())
                self.log.info('Indexing all `{}` documents'.format(model_name))
                es.index(documents)
            else:
                self.log.info(
                    'Indexing missing `{}` documents'.format(model_name))
                es.index_missing_documents(documents)

        return 0
示例#2
0
文件: es.py 项目: howaryoo/nefertari
    def run(self, quiet=False):
        from nefertari.elasticsearch import ES
        ES.setup(self.settings)
        models_paths = split_strip(self.options.models)

        for path in models_paths:
            model = resolve(path)
            model_name = path.split('.')[-1]

            params = self.options.params or ''
            params = dict([
                [k, v[0]] for k, v in urlparse.parse_qs(params).items()
            ])
            params.setdefault('_limit', params.get('_limit', 10000))
            chunk_size = self.options.chunk or params['_limit']

            es = ES(source=model_name, index_name=self.options.index)
            query_set = model.get_collection(**params)
            documents = to_dicts(query_set)

            if self.options.force:
                es.index(documents, chunk_size=chunk_size)
            else:
                es.index_missing(documents, chunk_size=chunk_size)

        return 0
def on_post_bulk_insert(sender,documents,**kw):
    if not documents:
        return
    from nefertari.elasticsearch import ES
    es = ES(source=documents[0].__class__.__name__)
    docs = to_dicts(documents)
    es.index(docs)
示例#4
0
文件: es.py 项目: mbijon/nefertari
    def run(self):
        ES.setup(self.settings)
        model_names = split_strip(self.options.models)

        for model_name in model_names:
            self.log.info('Processing model `{}`'.format(model_name))
            model = engine.get_document_cls(model_name)

            params = self.options.params or ''
            params = dict([
                [k, v[0]] for k, v in urllib.parse.parse_qs(params).items()
            ])
            params.setdefault('_limit', params.get('_limit', 10000))
            chunk_size = self.options.chunk or params['_limit']

            es = ES(source=model_name, index_name=self.options.index,
                    chunk_size=chunk_size)
            query_set = model.get_collection(**params)
            documents = to_dicts(query_set)

            if self.options.force:
                self.log.info('Recreating `{}` ES mapping'.format(model_name))
                es.delete_mapping()
                es.put_mapping(body=model.get_es_mapping())
                self.log.info('Indexing all `{}` documents'.format(
                    model_name))
                es.index(documents)
            else:
                self.log.info('Indexing missing `{}` documents'.format(
                    model_name))
                es.index_missing_documents(documents)

        return 0
示例#5
0
    def run(self, quiet=False):
        from nefertari.elasticsearch import ES
        ES.setup(self.settings)
        models_paths = split_strip(self.options.models)

        for path in models_paths:
            model = resolve(path)
            model_name = path.split('.')[-1]

            params = self.options.params or ''
            params = dict([[k, v[0]]
                           for k, v in urlparse.parse_qs(params).items()])
            params.setdefault('_limit', params.get('_limit', 10000))
            chunk_size = self.options.chunk or params['_limit']

            es = ES(source=model_name, index_name=self.options.index)
            query_set = model.get_collection(**params)
            documents = to_dicts(query_set)

            if self.options.force:
                es.index(documents, chunk_size=chunk_size)
            else:
                es.index_missing(documents, chunk_size=chunk_size)

        return 0
示例#6
0
def reindextask(model, boxsize=5000):
    """Index model by small chunks (ie: a box, with a reasonable size)
    """
    global log
    mcls = engine.get_document_cls(model)
    # proceed by chunks of 'boxsize'
    count = mcls.get_collection(_count=True)
    if count < 1:  # Hu ? nothing in DB
        return
    # Let us be aware of some numbers
    boxes = count // boxsize
    rest = count % boxsize
    es = ES(source=model)  # humm quick & dirty: get a connector
    log.info('Processing model `{}` with {} documents in {} boxes'.format(
        model, count, boxes))
    # dump by 'boxes' ; add one for the rest (NB: if rest=0 the last box will be
    # empty anyway )
    for n in range(boxes + 1):
        log.info('Indexing missing `{}` documents (box: {}/{})'.format(
            model, n, boxes + 1))
        query_set = mcls.get_collection(
            _limit=boxsize, _page=n,
            _sort=mcls.pk_field())  ## don't forget the sort
        documents = to_dicts(query_set)
        log.debug('---> from db {} documents ; send to ES'.format(
            len(documents)))
        ## TODO: add a control ? The last box size should be equal to 'rest'
        es.index(documents)
示例#7
0
    def index(self):
        q = self._params.pop('q', None)
        if not q:
            return []

        return to_dicts(ES().get_collection(
            _raw_terms='name:%s*' % (q,), _limit=5,
        ), _keys=['id', 'name'])
示例#8
0
def on_bulk_update(model_cls, objects, request):
    if not getattr(model_cls, '_index_enabled', False):
        return

    if not objects:
        return

    from nefertari.elasticsearch import ES
    es = ES(source=model_cls.__name__)
    documents = to_dicts(objects)
    es.index(documents, request=request)

    # Reindex relationships
    es.bulk_index_relations(objects, request=request, nested_only=True)
示例#9
0
def on_bulk_update(update_context):
    request = getattr(update_context.query, '_request', None)
    model_cls = update_context.mapper.entity
    if not getattr(model_cls, '_index_enabled', False):
        return

    objects = update_context.query.all()
    if not objects:
        return

    from nefertari.elasticsearch import ES
    es = ES(source=model_cls.__name__)
    documents = to_dicts(objects)
    es.index(documents, request=request)

    # Reindex relationships
    es.bulk_index_relations(objects, request=request, nested_only=True)
示例#10
0
def on_bulk_update(update_context):
    request = getattr(
        update_context.query, '_request', None)
    model_cls = update_context.mapper.entity
    if not getattr(model_cls, '_index_enabled', False):
        return

    objects = update_context.query.all()
    if not objects:
        return

    from nefertari.elasticsearch import ES
    es = ES(source=model_cls.__name__)
    documents = to_dicts(objects)
    es.index(documents, request=request)

    # Reindex relationships
    es.bulk_index_relations(objects, request=request, nested_only=True)
示例#11
0
    def index_models(self, model_names):
        self.log.info('Indexing models documents')
        params = self.options.params or ''
        params = dict([[k, v[0]]
                       for k, v in urllib.parse.parse_qs(params).items()])
        params.setdefault('_limit', params.get('_limit', 10000))
        chunk_size = self.options.chunk or params['_limit']

        for model_name in model_names:
            self.log.info('Processing model `{}`'.format(model_name))
            model = engine.get_document_cls(model_name)
            es = ES(source=model_name,
                    index_name=self.options.index,
                    chunk_size=chunk_size)
            query_set = model.get_collection(**params)
            documents = to_dicts(query_set)
            self.log.info('Indexing missing `{}` documents'.format(model_name))
            es.index_missing_documents(documents)
示例#12
0
文件: es.py 项目: mkdir404/nefertari
    def index_models(self, model_names):
        self.log.info('Indexing models documents')
        params = self.options.params or ''
        params = dict([
            [k, v[0]] for k, v in urllib.parse.parse_qs(params).items()
        ])
        params.setdefault('_limit', params.get('_limit', 10000))
        chunk_size = self.options.chunk or params['_limit']

        for model_name in model_names:
            self.log.info('Processing model `{}`'.format(model_name))
            model = engine.get_document_cls(model_name)
            es = ES(source=model_name, index_name=self.options.index,
                    chunk_size=chunk_size)
            query_set = model.get_collection(**params)
            documents = to_dicts(query_set)
            self.log.info('Indexing missing `{}` documents'.format(
                model_name))
            es.index_missing_documents(documents)
示例#13
0
    def bulk_index_relations(cls, items, request=None, **kwargs):
        """ Index objects related to :items: in bulk.

        Related items are first grouped in map
        {model_name: {item1, item2, ...}} and then indexed.

        :param items: Sequence of DB objects related objects if which
            should be indexed.
        :param request: Pyramid Request instance.
        """
        index_map = defaultdict(set)
        for item in items:
            relations = item.get_related_documents(**kwargs)
            for model_cls, related_items in relations:
                indexable = getattr(model_cls, '_index_enabled', False)
                if indexable and related_items:
                    index_map[model_cls.__name__].update(related_items)

        for model_name, instances in index_map.items():
            cls(model_name).index(to_dicts(instances), request=request)
示例#14
0
    def bulk_index_relations(cls, items, request=None, **kwargs):
        """ Index objects related to :items: in bulk.

        Related items are first grouped in map
        {model_name: {item1, item2, ...}} and then indexed.

        :param items: Sequence of DB objects related objects if which
            should be indexed.
        :param request: Pyramid Request instance.
        """
        index_map = defaultdict(set)
        for item in items:
            relations = item.get_related_documents(**kwargs)
            for model_cls, related_items in relations:
                indexable = getattr(model_cls, '_index_enabled', False)
                if indexable and related_items:
                    index_map[model_cls.__name__].update(related_items)

        for model_name, instances in index_map.items():
            cls(model_name).index(to_dicts(instances), request=request)
示例#15
0
 def index_relations(cls, db_obj, request=None, **kwargs):
     for model_cls, documents in db_obj.get_related_documents(**kwargs):
         if getattr(model_cls, '_index_enabled', False) and documents:
             cls(model_cls.__name__).index(to_dicts(documents),
                                           request=request)
示例#16
0
 def get_reference_documents(self):
     # TODO: Make lazy load of documents
     models = self.__class__._meta['delete_rules'] or {}
     for model_cls, key in models:
         documents = to_dicts(model_cls.objects(**{key: self}))
         yield model_cls, documents
示例#17
0
 def get_reference_documents(self):
     # TODO: Make lazy load of documents
     models = self.__class__._meta['delete_rules'] or {}
     for model_cls, key in models:
         documents = to_dicts(model_cls.objects(**{key: self}))
         yield model_cls, documents
示例#18
0
 def index_relations(cls, db_obj, request=None, **kwargs):
     for model_cls, documents in db_obj.get_related_documents(**kwargs):
         if getattr(model_cls, '_index_enabled', False) and documents:
             cls(model_cls.__name__).index(
                 to_dicts(documents), request=request)