示例#1
0
def index_objects(mapping, queryset, index, print_progress=False):
    """
    Index synchronously model specified mapping type with an optimized query.
    """
    documents = []
    for instance in queryset_iterator(mapping, queryset, print_progress=print_progress):
        documents.append(mapping.extract_document(instance.id, instance))

        if len(documents) >= 100:
            mapping.bulk_index(documents, id_field='id', index=get_index_name(index, mapping), es=es)
            documents = []

    mapping.bulk_index(documents, id_field='id', index=get_index_name(index, mapping), es=es)
    documents = []
示例#2
0
def update_in_index(instance, mapping):
    """
    Utility function for signal listeners index to Elasticsearch.
    Currently uses synchronous tasks. And because of that all exceptions are
    caught, so failures will not interfere with the regular model updates.
    """
    if settings.ES_DISABLED:
        return
    if hasattr(instance, 'is_deleted') and instance.is_deleted:
        remove_from_index(instance, mapping)
    else:
        logger.info(u'Updating instance %s: %s' %
                    (instance.__class__.__name__, instance.pk))

        try:
            main_index_with_type = get_index_name(main_index, mapping)
            try:
                document = mapping.extract_document(instance.id, instance)
            except Exception as exc:
                logger.exception('Unable to extract document {0}: {1}'.format(
                    instance, repr(exc)))
            else:
                # Index object direct instead of bulk_index, to prevent multiple reads from db
                mapping.index(document,
                              id_=instance.id,
                              es=es,
                              index=main_index_with_type)
                es.indices.refresh(main_index_with_type)
        except Exception, e:
            logger.error(traceback.format_exc(e))
示例#3
0
def update_in_index(instance, mapping):
    """
    Utility function for signal listeners index to Elasticsearch.
    Currently uses synchronous tasks. And because of that all exceptions are
    caught, so failures will not interfere with the regular model updates.
    """
    if settings.ES_DISABLED:
        return
    if hasattr(instance, 'is_deleted') and instance.is_deleted:
        remove_from_index(instance, mapping)
    else:
        logger.info(u'Updating instance %s: %s' % (instance.__class__.__name__, instance.pk))

        try:
            main_index_with_type = get_index_name(main_index, mapping)
            try:
                document = mapping.extract_document(instance.id, instance)
            except Exception as exc:
                logger.exception('Unable to extract document {0}: {1}'.format(
                    instance, repr(exc)))
            else:
                # Index object direct instead of bulk_index, to prevent multiple reads from db
                mapping.index(document, id_=instance.id, es=es, index=main_index_with_type)
                es.indices.refresh(main_index_with_type)
        except Exception, e:
            logger.error(traceback.format_exc(e))
示例#4
0
def unindex_objects(mapping, queryset, index, print_progress=False):
    """
    Remove synchronously model specified mapping type with an optimized query.
    """
    queryset = queryset.only('pk')

    for instance in queryset_iterator(mapping, queryset, print_progress=print_progress):
        try:
            mapping.unindex(instance.pk, index=get_index_name(index, mapping), es=es)
        except NotFoundError:
            # Not present in the first place? Just ignore.
            pass
示例#5
0
def index_objects(mapping, queryset, index, print_progress=False):
    """
    Index synchronously model specified mapping type with an optimized query.
    """
    documents = []
    for instance in queryset_iterator(mapping,
                                      queryset,
                                      print_progress=print_progress):
        documents.append(mapping.extract_document(instance.id, instance))

        if len(documents) >= 100:
            mapping.bulk_index(documents,
                               id_field='id',
                               index=get_index_name(index, mapping),
                               es=es)
            documents = []

    mapping.bulk_index(documents,
                       id_field='id',
                       index=get_index_name(index, mapping),
                       es=es)
    documents = []
示例#6
0
def remove_from_index(instance, mapping):
    """
    Utility function for signal listeners to remove from Elasticsearch.
    Currently uses synchronous tasks. And because of that all exceptions are
    caught, so failures will not interfere with the regular model updates.
    """
    if settings.ES_DISABLED:
        return
    logger.info(u'Removing instance %s: %s' % (instance.__class__.__name__, instance.pk))

    try:
        main_index_with_type = get_index_name(main_index, mapping)
        tasks.unindex_objects(mapping, [instance.id], es=es, index=main_index_with_type)
        es.indices.refresh(main_index_with_type)
    except NotFoundError, e:
        logger.warn('Not found in index instance %s: %s' % (instance.__class__.__name__, instance.pk))
示例#7
0
def unindex_objects(mapping, queryset, index, print_progress=False):
    """
    Remove synchronously model specified mapping type with an optimized query.
    """
    queryset = queryset.only('pk')

    for instance in queryset_iterator(mapping,
                                      queryset,
                                      print_progress=print_progress):
        try:
            mapping.unindex(instance.pk,
                            index=get_index_name(index, mapping),
                            es=es)
        except NotFoundError:
            # Not present in the first place? Just ignore.
            pass
示例#8
0
def update_in_index(instance, mapping):
    """
    Utility function for signal listeners index to Elasticsearch.
    Currently uses synchronous tasks. And because of that all exceptions are
    caught, so failures will not interfere with the regular model updates.
    """
    if settings.ES_DISABLED:
        return
    if hasattr(instance, 'is_deleted') and instance.is_deleted:
        remove_from_index(instance, mapping)
    else:
        logger.info(u'Updating instance %s: %s' % (instance.__class__.__name__, instance.pk))

        try:
            main_index_with_type = get_index_name(main_index, mapping)
            tasks.index_objects(mapping, [instance.id], es=es, index=main_index_with_type)
            es.indices.refresh(main_index_with_type)
        except Exception, e:
            logger.error(traceback.format_exc(e))
示例#9
0
def remove_from_index(instance, mapping):
    """
    Utility function for signal listeners to remove from Elasticsearch.
    Currently uses synchronous tasks. And because of that all exceptions are
    caught, so failures will not interfere with the regular model updates.
    """
    if settings.ES_DISABLED:
        return
    logger.info(u'Removing instance %s: %s' %
                (instance.__class__.__name__, instance.pk))

    try:
        main_index_with_type = get_index_name(main_index, mapping)
        tasks.unindex_objects(mapping, [instance.id],
                              es=es,
                              index=main_index_with_type)
        es.indices.refresh(main_index_with_type)
    except NotFoundError, e:
        logger.warn('Not found in index instance %s: %s' %
                    (instance.__class__.__name__, instance.pk))
示例#10
0
    def do_search(self, return_fields=None):
        """
        Execute the search.

        Arguments:
            return_fields (list): strings of fieldnames to return from result

        Returns:
            hits (list): dicts with search results per item
            count (int): total number of results
            took (int): milliseconds Elastic search took to get the results
        """
        if settings.ES_DISABLED:
            return [], 0, 0
        self.search = self.search.filter_raw({'and': self.raw_filters})

        if self.model_type:
            self.search = self.search.doctypes(self.model_type)
            # Also limit the search to just the index with the right type.
            # This is faster than asking every index, also prevents some
            # annoying "cannot find field" errors in the elasticsearch logs.
            index_name = get_index_name(main_index, self.model_type)
            self.search = self.search.indexes(index_name)

        if self.facet:
            facet_raw = {
                "terms": {
                    "field": self.facet['field'],
                    "size": self.facet['size'],
                },
            }

            if self.facet['filter']:
                facet_filter_dict = {
                    'and': [
                        {
                            'term': {
                                'tenant': self.tenant_id
                            }
                        },
                        {
                            'query': {
                                'query_string': {
                                    'query': self.facet['filter']
                                }
                            }
                        }
                    ]
                }

                facet_raw['facet_filter'] = facet_filter_dict

            self.search = self.search.facet_raw(items=facet_raw)

        # Fire off search.
        try:
            hits = []
            execute = self.search.execute()
            for result in execute:
                hit = {
                    'id': result.id,
                }
                if not self.model_type:
                    # We will add type if not specifically searched on it.
                    hit['type'] = result.es_meta.type
                for field in result:
                    # Add specified fields, or all fields when not specified.
                    if return_fields:
                        if field in return_fields:
                            hit[field] = result[field]
                    else:
                        hit[field] = result[field]
                hits.append(hit)

            if execute.facets:
                return hits, execute.facets['items']['terms'], execute.count, execute.took

            return hits, None, execute.count, execute.took
        except RequestError as e:
            # This can happen when the query is malformed. For example:
            # A user entering special characters. This should normally be taken
            # care of where the request is built (usually in Javascript),
            # by escaping or omitting special characters.
            # This may be hard to get fool proof, therefore we also
            # catch the exception here to prevent server errors.
            logger.error('request error %s' % e)
            return [], None, 0, 0
示例#11
0
    def index(self):
        """
        Do the actual indexing for all specified targets.
        """
        for mapping in self.target_list:
            model_name = mapping.get_mapping_type_name()
            main_index_base = settings.ES_INDEXES['default']
            main_index = get_index_name(main_index_base, mapping)

            self.stdout.write('==> %s' % model_name)

            # Check if we currently have an index for this mapping.
            old_index = None
            aliases = self.es.indices.get_aliases(name=main_index)
            for key, value in aliases.iteritems():
                if value['aliases']:
                    old_index = key
                    self.stdout.write('Current index "%s"' % key)

            # Check any indices with no alias (leftovers from failed indexing).
            # Or it could be that it is still in progress,
            aliases = self.es.indices.get_aliases()
            for key, value in aliases.iteritems():
                if not key.endswith(model_name):
                    # Not the model we are looking after.
                    continue
                if key == main_index:
                    # This is an auto created index. Will be removed at end of command.
                    continue
                if not value['aliases']:
                    if self.force:
                        self.stdout.write('Removing leftover "%s"' % key)
                        self.es.indices.delete(key)
                    else:
                        raise Exception('Found leftover %s, proceed with -f to remove.'
                                        ' Make sure indexing this model is not already running!' % key)

            # Create new index.
            index_settings = {
                'mappings': {
                    model_name: mapping.get_mapping()
                },
                'settings': {
                    'analysis': get_analyzers()['analysis'],
                    'number_of_shards': 1,
                }
            }
            temp_index_base = 'index_%s' % (int(time.time()))
            temp_index = get_index_name(temp_index_base, mapping)

            self.stdout.write('Creating new index "%s"' % temp_index)
            self.es.indices.create(temp_index, body=index_settings)

            # Index documents.
            self.index_documents(mapping, temp_index_base)

            # Switch aliases.
            if old_index:
                self.es.indices.update_aliases({
                    'actions': [
                        {'remove': {'index': old_index, 'alias': main_index}},
                        {'remove': {'index': old_index, 'alias': main_index_base}},
                        {'add': {'index': temp_index, 'alias': main_index}},
                        {'add': {'index': temp_index, 'alias': main_index_base}},
                    ]
                })
                self.stdout.write('Removing previous index "%s"' % old_index)
                self.es.indices.delete(old_index)
            else:
                if self.es.indices.exists(main_index):
                    # This is a corner case. There was no alias named index_name, but
                    # an index index_name nevertheless exists, this only happens when the index
                    # was already created (because of ES auto creation features).
                    self.stdout.write('Removing previous (presumably auto created) index "%s"' % main_index)
                    self.es.indices.delete(main_index)
                self.es.indices.update_aliases({
                    'actions': [
                        {'add': {'index': temp_index, 'alias': main_index}},
                        {'add': {'index': temp_index, 'alias': main_index_base}},
                    ]
                })
            self.stdout.write('')

        self.stdout.write('Indexing finished.')
示例#12
0
    def do_search(self, return_fields=None):
        """
        Execute the search.

        Arguments:
            return_fields (list): strings of fieldnames to return from result

        Returns:
            hits (list): dicts with search results per item
            count (int): total number of results
            took (int): milliseconds Elastic search took to get the results
        """
        if settings.ES_DISABLED:
            return [], 0, 0
        self.search = self.search.filter_raw({'and': self.raw_filters})

        if self.model_type:
            self.search = self.search.doctypes(self.model_type)
            # Also limit the search to just the index with the right type.
            # This is faster than asking every index, also prevents some
            # annoying "cannot find field" errors in the elasticsearch logs.
            index_name = get_index_name(main_index, self.model_type)
            self.search = self.search.indexes(index_name)

        if self.facet:
            facet_raw = {
                'terms': {
                    'field': self.facet['field'],
                    'size': self.facet['size'],
                },
            }

            facet_filter_dict = {
                'and': [{
                    'term': {
                        'tenant': self.tenant_id,
                    }
                }]
            }

            if self.facet['filters']:
                for facet_filter in self.facet['filters']:
                    facet_filter_dict['and'].append(
                        {'query': {
                            'query_string': {
                                'query': facet_filter
                            }
                        }})

            facet_raw['facet_filter'] = facet_filter_dict

            self.search = self.search.facet_raw(items=facet_raw)

        # Fire off search.
        try:
            hits = []
            execute = self.search.execute()
            for result in execute:
                hit = {
                    'id': result.id,
                }
                if not self.model_type:
                    # We will add type if not specifically searched on it.
                    hit['type'] = result.es_meta.type
                for field in result:
                    # Add specified fields, or all fields when not specified.
                    if return_fields:
                        if field in return_fields:
                            hit[field] = result[field]
                    else:
                        hit[field] = result[field]
                hits.append(hit)

            if execute.facets:
                facets = execute.facets['items']['terms']

                if self.model_type == 'tags_tag':
                    for hit in hits:
                        # Get the object with the given name.
                        facet = next(
                            (x for x in facets
                             if x.get('term') == hit.get('name_flat')), None)

                        if facet and (not facet.get('last_used')
                                      or hit.get('last_used') >
                                      facet.get('last_used')):
                            # Set the latest usage date.
                            facet.update({'last_used': hit.get('last_used')})

                return hits, facets, execute.count, execute.took

            return hits, None, execute.count, execute.took
        except RequestError as e:
            # This can happen when the query is malformed. For example:
            # A user entering special characters. This should normally be taken
            # care of where the request is built (usually in Javascript),
            # by escaping or omitting special characters.
            # This may be hard to get fool proof, therefore we also
            # catch the exception here to prevent server errors.
            logger.error('request error %s' % e)
            return [], None, 0, 0
示例#13
0
文件: index.py 项目: Fokko/hellolily
    def handle(self, *args, **options):
        es = get_es_client()

        if args:
            self.stdout.write('Aborting, unexpected arguments %s' % list(args))
            return

        if options['list']:
            self.stdout.write('Possible models to index:\n')
            for mapping in ModelMappings.get_model_mappings().values():
                self.stdout.write(mapping.get_mapping_type_name())
            return

        target = options['target']
        if target:
            targets = target.split(',')
        else:
            targets = []  # (meaning all)
        has_targets = targets != []

        self.stdout.write('Please remember that HelloLily needs to be in maintenance mode. \n\n')

        if has_targets:
            # Do a quick run to check if all targets are valid models.
            check_targets = list(targets)  # make a copy
            for target in check_targets:
                for mapping in ModelMappings.get_model_mappings().values():
                    if self.model_targetted(mapping, [target]):
                        check_targets.remove(target)
                        break
            if check_targets:
                self.stdout.write('Aborting, following targets not recognized: %s' % check_targets)
                return

        for mapping in ModelMappings.get_model_mappings().values():
            model_name = mapping.get_mapping_type_name()
            main_index_base = settings.ES_INDEXES['default']
            main_index = get_index_name(main_index_base, mapping)

            # Skip this model if there are specific targets and not specified.
            if has_targets and not self.model_targetted(mapping, targets):
                continue

            self.stdout.write('==> %s' % model_name)

            # Check if we currently have an index for this mapping.
            old_index = None
            aliases = es.indices.get_aliases(name=main_index)
            for key, value in aliases.iteritems():
                if value['aliases']:
                    old_index = key
                    self.stdout.write('Current index "%s"' % key)

            # Check any indices with no alias (leftovers from failed indexing).
            # Or it could be that it is still in progress,
            aliases = es.indices.get_aliases()
            for key, value in aliases.iteritems():
                if not key.endswith(model_name):
                    # Not the model we are looking after.
                    continue
                if key == main_index:
                    # This is an auto created index. Will be removed at end of command.
                    continue
                if not value['aliases']:
                    if options['force']:
                        self.stdout.write('Removing leftover "%s"' % key)
                        es.indices.delete(key)
                    else:
                        raise Exception('Found leftover %s, proceed with -f to remove.'
                                        ' Make sure indexing this model is not already running!' % key)

            # Create new index.
            index_settings = {
                'mappings': {
                    model_name: mapping.get_mapping()
                },
                'settings': {
                    'analysis': get_analyzers()['analysis'],
                    'number_of_shards': 1,
                }
            }
            temp_index_base = 'index_%s' % (int(time.time()))
            temp_index = get_index_name(temp_index_base, mapping)

            self.stdout.write('Creating new index "%s"' % temp_index)
            es.indices.create(temp_index, body=index_settings)

            # Index documents.
            self.index_documents(mapping, temp_index_base)

            # Switch aliases.
            if old_index:
                es.indices.update_aliases({
                    'actions': [
                        {'remove': {'index': old_index, 'alias': main_index}},
                        {'remove': {'index': old_index, 'alias': main_index_base}},
                        {'add': {'index': temp_index, 'alias': main_index}},
                        {'add': {'index': temp_index, 'alias': main_index_base}},
                    ]
                })
                self.stdout.write('Removing previous index "%s"' % old_index)
                es.indices.delete(old_index)
            else:
                if es.indices.exists(main_index):
                    # This is a corner case. There was no alias named index_name, but
                    # an index index_name nevertheless exists, this only happens when the index
                    # was already created (because of ES auto creation features).
                    self.stdout.write('Removing previous (presumably auto created) index "%s"' % main_index)
                    es.indices.delete(main_index)
                es.indices.update_aliases({
                    'actions': [
                        {'add': {'index': temp_index, 'alias': main_index}},
                        {'add': {'index': temp_index, 'alias': main_index_base}},
                    ]
                })
            self.stdout.write('')

        self.stdout.write('Indexing finished.')

        if options['queries']:
            from django.db import connection
            for query in connection.queries:
                print query
示例#14
0
    def index(self):
        """
        Do the actual indexing for all specified targets.
        """
        for mapping in self.target_list:
            model_name = mapping.get_mapping_type_name()
            main_index_base = settings.ES_INDEXES['default']
            main_index = get_index_name(main_index_base, mapping)

            self.stdout.write('==> %s' % model_name)

            # Check if we currently have an index for this mapping.
            old_index = None
            aliases = self.es.indices.get_aliases(name=main_index)
            for key, value in aliases.iteritems():
                if value['aliases']:
                    old_index = key
                    self.stdout.write('Current index "%s"' % key)

            # Check any indices with no alias (leftovers from failed indexing).
            # Or it could be that it is still in progress,
            aliases = self.es.indices.get_aliases()
            for key, value in aliases.iteritems():
                if not key.endswith(model_name):
                    # Not the model we are looking after.
                    continue
                if key == main_index:
                    # This is an auto created index. Will be removed at end of command.
                    continue
                if not value['aliases']:
                    if self.force:
                        self.stdout.write('Removing leftover "%s"' % key)
                        self.es.indices.delete(key)
                    else:
                        raise Exception('Found leftover %s, proceed with -f to remove.'
                                        ' Make sure indexing this model is not already running!' % key)

            # Create new index.
            index_settings = {
                'mappings': {
                    model_name: mapping.get_mapping()
                },
                'settings': {
                    'analysis': get_analyzers()['analysis'],
                    'number_of_shards': 1,
                }
            }
            temp_index_base = 'index_%s' % (int(time.time()))
            temp_index = get_index_name(temp_index_base, mapping)

            self.stdout.write('Creating new index "%s"' % temp_index)
            self.es.indices.create(temp_index, body=index_settings)

            # Index documents.
            self.index_documents(mapping, temp_index_base)

            # Switch aliases.
            if old_index:
                self.es.indices.update_aliases({
                    'actions': [
                        {'remove': {'index': old_index, 'alias': main_index}},
                        {'remove': {'index': old_index, 'alias': main_index_base}},
                        {'add': {'index': temp_index, 'alias': main_index}},
                        {'add': {'index': temp_index, 'alias': main_index_base}},
                    ]
                })
                self.stdout.write('Removing previous index "%s"' % old_index)
                self.es.indices.delete(old_index)
            else:
                if self.es.indices.exists(main_index):
                    # This is a corner case. There was no alias named index_name, but
                    # an index index_name nevertheless exists, this only happens when the index
                    # was already created (because of ES auto creation features).
                    self.stdout.write('Removing previous (presumably auto created) index "%s"' % main_index)
                    self.es.indices.delete(main_index)
                self.es.indices.update_aliases({
                    'actions': [
                        {'add': {'index': temp_index, 'alias': main_index}},
                        {'add': {'index': temp_index, 'alias': main_index_base}},
                    ]
                })
            self.stdout.write('')

        self.stdout.write('Indexing finished.')