Exemplo n.º 1
0
def pre_index(new_index, old_index, alias, indexer, settings):
    """
    This sets up everything needed before indexing:
        * Flags the database.
        * Creates the new index.

    """
    # Flag the database to indicate that the reindexing has started.
    _print('Flagging the database to start the reindexation.', alias)
    Reindexing.flag_reindexing(new_index=new_index,
                               old_index=old_index,
                               alias=alias)
    time.sleep(5)  # Give celeryd some time to flag the DB.

    _print('Creating the mapping for index {index}.'.format(index=new_index),
           alias)

    # Update settings with mapping.
    settings = {
        'settings': settings,
        'mappings': indexer.get_mapping(),
    }

    # Create index and mapping.
    try:
        ES.indices.create(index=new_index, body=settings)
    except elasticsearch.ElasticsearchException as e:
        raise CommandError('ERROR: New index [%s] already exists? %s' %
                           (new_index, e))

    # Don't return until the health is green. By default waits for 30s.
    ES.cluster.health(index=new_index,
                      wait_for_status='green',
                      wait_for_relocating_shards=0)
Exemplo n.º 2
0
def pre_index(new_index, old_index, alias, index_name, settings):
    """
    This sets up everything needed before indexing:
        * Flags the database.
        * Creates the new index.

    """
    indexer = INDEXER_MAP[index_name]

    # Flag the database to indicate that the reindexing has started.
    _print('Flagging the database to start the reindexation.', alias)
    Reindexing.flag_reindexing(new_index=new_index, old_index=old_index,
                               alias=alias)
    time.sleep(5)  # Give the celery worker some time to flag the DB.

    _print('Creating the mapping for index {index}.'.format(index=new_index),
           alias)

    # Update settings with mapping.
    settings = {
        'settings': settings,
        'mappings': indexer.get_mapping(),
    }

    # Create index and mapping.
    try:
        ES.indices.create(index=new_index, body=settings)
    except elasticsearch.ElasticsearchException as e:
        raise CommandError('ERROR: New index [%s] already exists? %s'
                           % (new_index, e))

    # Don't return until the health is green. By default waits for 30s.
    ES.cluster.health(index=new_index, wait_for_status='green',
                      wait_for_relocating_shards=0)
Exemplo n.º 3
0
    def test_get_indices(self):
        # Not reindexing.
        assert not Reindexing.objects.filter(alias='foo').exists()
        assert Reindexing.get_indices('foo') == ['foo']

        # Reindexing on 'foo'.
        Reindexing.objects.create(alias='foo', new_index='bar',
                                  old_index='baz')
        self.assertSetEqual(Reindexing.get_indices('foo'), ['bar', 'baz'])

        # Doesn't clash on other aliases.
        self.assertSetEqual(Reindexing.get_indices('other'), ['other'])
Exemplo n.º 4
0
    def test_flag_reindexing(self):
        assert Reindexing.objects.count() == 0

        # Flagging for the first time.
        res = Reindexing.flag_reindexing('foo', 'bar', 'baz')
        eq_(Reindexing.objects.filter(alias='foo').count(), 1)
        eq_(res.alias, 'foo')
        eq_(res.old_index, 'bar')
        eq_(res.new_index, 'baz')

        # Flagging for the second time.
        res = Reindexing.flag_reindexing('foo', 'bar', 'baz')
        assert Reindexing.objects.filter(alias='foo').count() == 1
        assert res is None
Exemplo n.º 5
0
    def unindexer(cls, ids=None, _all=False, index=None):
        """
        Empties an index, but doesn't delete it. Useful for tearDowns.

        ids -- list of IDs to unindex.
        _all -- unindex all objects.
        """
        if _all:
            # Mostly used for test tearDowns.
            qs = cls.get_model()
            if hasattr(qs, 'with_deleted'):
                qs = qs.with_deleted
            else:
                qs = qs.objects
            ids = list(qs.order_by('id').values_list('id', flat=True))
        if not ids:
            return

        log.info('Unindexing %s %s-%s. [%s]' %
                 (cls.get_model()._meta.model_name, ids[0], ids[-1], len(ids)))

        index = index or cls.get_index()
        # Note: If reindexing is currently occurring, `get_indices` will return
        # more than one index.
        indices = Reindexing.get_indices(index)

        es = cls.get_es(urls=settings.ES_URLS)
        for id_ in ids:
            for idx in indices:
                try:
                    cls.unindex(id_=id_, es=es, index=idx)
                except elasticsearch.exceptions.NotFoundError:
                    # Ignore if it's not there.
                    log.info(u'[%s:%s] object not found in index' %
                             (cls.get_model()._meta.model_name, id_))
Exemplo n.º 6
0
def post_index(new_index, old_index, alias, index_name, settings):
    """
    Perform post-indexing tasks:
        * Optimize (which also does a refresh and a flush by default).
        * Update settings to reset number of replicas.
        * Point the alias to this new index.
        * Unflag the database.
        * Remove the old index.
        * Output the current alias configuration.

    """
    _print('Optimizing, updating settings and aliases.', alias)

    # Optimize.
    ES.indices.optimize(index=new_index)

    # Update the replicas.
    ES.indices.put_settings(index=new_index, body=settings)

    # Add and remove aliases.
    actions = [
        {'add': {'index': new_index, 'alias': alias}}
    ]
    if old_index:
        actions.append(
            {'remove': {'index': old_index, 'alias': alias}}
        )
    ES.indices.update_aliases(body=dict(actions=actions))

    _print('Unflagging the database.', alias)
    Reindexing.unflag_reindexing(alias=alias)

    _print('Removing index {index}.'.format(index=old_index), alias)
    if old_index and ES.indices.exists(index=old_index):
        ES.indices.delete(index=old_index)

    alias_output = ''
    for indexer in INDEXERS:
        alias = ES_INDEXES[indexer.get_mapping_type_name()]
        alias_output += unicode(ES.indices.get_aliases(index=alias)) + '\n'

    _print('Reindexation done. Current aliases configuration: '
           '{output}\n'.format(output=alias_output), alias)
Exemplo n.º 7
0
def post_index(new_index, old_index, alias, index_name, settings):
    """
    Perform post-indexing tasks:
        * Optimize (which also does a refresh and a flush by default).
        * Update settings to reset number of replicas.
        * Point the alias to this new index.
        * Unflag the database.
        * Remove the old index.
        * Output the current alias configuration.

    """
    _print('Optimizing, updating settings and aliases.', alias)

    # Optimize.
    ES.indices.optimize(index=new_index)

    # Update the replicas.
    ES.indices.put_settings(index=new_index, body=settings)

    # Add and remove aliases.
    actions = [{'add': {'index': new_index, 'alias': alias}}]
    if old_index:
        actions.append({'remove': {'index': old_index, 'alias': alias}})
    ES.indices.update_aliases(body=dict(actions=actions))

    _print('Unflagging the database.', alias)
    Reindexing.unflag_reindexing(alias=alias)

    _print('Removing index {index}.'.format(index=old_index), alias)
    if old_index and ES.indices.exists(index=old_index):
        ES.indices.delete(index=old_index)

    alias_output = ''
    for indexer in INDEXERS:
        alias = ES_INDEXES[indexer.get_mapping_type_name()]
        alias_output += unicode(ES.indices.get_aliases(index=alias)) + '\n'

    _print(
        'Reindexation done. Current aliases configuration: '
        '{output}\n'.format(output=alias_output), alias)
Exemplo n.º 8
0
def index(ids, indexer, **kw):
    """
    Given a list of IDs and an indexer, index into ES.
    If an reindexation is currently occurring, index on both the old and new.
    """
    task_log.info("Indexing {0} {1}-{2}. [{3}]".format(indexer.get_model()._meta.model_name, ids[0], ids[-1], len(ids)))

    # If reindexing is currently occurring, index on both old and new indexes.
    indices = Reindexing.get_indices(indexer.get_index())

    es = indexer.get_es(urls=settings.ES_URLS)
    for obj in indexer.get_indexable().filter(id__in=ids):
        doc = indexer.extract_document(obj.id, obj)
        for idx in indices:
            indexer.index(doc, id_=obj.id, es=es, index=idx)
Exemplo n.º 9
0
def index_webapps(ids, **kw):
    """TODO: use search/indexers.py:index."""
    task_log.info('Indexing apps %s-%s. [%s]' % (ids[0], ids[-1], len(ids)))

    index = kw.pop('index', WebappIndexer.get_index())
    # Note: If reindexing is currently occurring, `get_indices` will return
    # more than one index.
    indices = Reindexing.get_indices(index)

    es = WebappIndexer.get_es(urls=settings.ES_URLS)
    qs = Webapp.indexing_transformer(Webapp.with_deleted.no_cache().filter(
        id__in=ids))
    for obj in qs:
        doc = WebappIndexer.extract_document(obj.id, obj)
        for idx in indices:
            WebappIndexer.index(doc, id_=obj.id, es=es, index=idx)
Exemplo n.º 10
0
def index(ids, indexer, **kw):
    """
    Given a list of IDs and an indexer, index into ES.
    If an reindexation is currently occurring, index on both the old and new.
    """
    log.info('Indexing {0} {1}-{2}. [{3}]'.format(
        indexer.get_model()._meta.model_name, ids[0], ids[-1], len(ids)))

    # If reindexing is currently occurring, index on both old and new indexes.
    indices = Reindexing.get_indices(indexer.get_index())

    es = indexer.get_es(urls=settings.ES_URLS)
    for obj in indexer.get_indexable().filter(id__in=ids):
        doc = indexer.extract_document(obj.id, obj)
        for idx in indices:
            indexer.index(doc, id_=obj.id, es=es, index=idx)
Exemplo n.º 11
0
def unindex_webapps(ids, **kw):
    if not ids:
        return

    task_log.info('Un-indexing apps %s-%s. [%s]' % (ids[0], ids[-1], len(ids)))

    index = kw.pop('index', WebappIndexer.get_index())
    # Note: If reindexing is currently occurring, `get_indices` will return
    # more than one index.
    indices = Reindexing.get_indices(index)

    es = WebappIndexer.get_es(urls=settings.ES_URLS)
    for id_ in ids:
        for idx in indices:
            try:
                WebappIndexer.unindex(id_=id_, es=es, index=idx)
            except ElasticHttpNotFoundError:
                # Ignore if it's not there.
                task_log.info(
                    u'[Webapp:%s] Unindexing app but not found in index' % id_)
Exemplo n.º 12
0
    def unindexer(cls, ids=None, _all=False, index=None):
        """
        Empties an index, but doesn't delete it. Useful for tearDowns.

        ids -- list of IDs to unindex.
        _all -- unindex all objects.
        """
        if _all:
            # Mostly used for test tearDowns.
            qs = cls.get_model()
            if hasattr(qs, 'with_deleted'):
                qs = qs.with_deleted
            else:
                qs = qs.objects
            ids = list(qs.order_by('id').values_list('id', flat=True))
        if not ids:
            return

        task_log.info('Unindexing %s %s-%s. [%s]' %
                      (cls.get_model()._meta.model_name, ids[0], ids[-1],
                       len(ids)))

        index = index or cls.get_index()
        # Note: If reindexing is currently occurring, `get_indices` will return
        # more than one index.
        indices = Reindexing.get_indices(index)

        es = cls.get_es(urls=settings.ES_URLS)
        for id_ in ids:
            for idx in indices:
                try:
                    cls.unindex(id_=id_, es=es, index=idx)
                except elasticsearch.exceptions.NotFoundError:
                    # Ignore if it's not there.
                    task_log.info(u'[%s:%s] object not found in index' %
                                  (cls.get_model()._meta.model_name, id_))
Exemplo n.º 13
0
    def test_unflag_reindexing(self):
        assert Reindexing.objects.filter(alias='foo').count() == 0

        # Unflagging unflagged database does nothing.
        Reindexing.unflag_reindexing(alias='foo')
        assert Reindexing.objects.filter(alias='foo').count() == 0

        # Flag, then unflag.
        Reindexing.objects.create(alias='foo', new_index='bar',
                                  old_index='baz')
        assert Reindexing.objects.filter(alias='foo').count() == 1

        Reindexing.unflag_reindexing(alias='foo')
        assert Reindexing.objects.filter(alias='foo').count() == 0

        # Unflagging another alias doesn't clash.
        Reindexing.objects.create(alias='bar', new_index='bar',
                                  old_index='baz')
        Reindexing.unflag_reindexing(alias='foo')
        assert Reindexing.objects.filter(alias='bar').count() == 1
Exemplo n.º 14
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        index_choice = kwargs.get('index', None)
        prefix = kwargs.get('prefix', '')
        force = kwargs.get('force', False)

        if index_choice:
            # If we only want to reindex a subset of indexes.
            INDEXES = INDEX_CHOICES.get(index_choice, None)
            if INDEXES is None:
                raise CommandError('Incorrect index name specified. '
                                   'Choose one of: %s' %
                                   ', '.join(INDEX_CHOICES.keys()))
        else:
            INDEXES = INDEXERS

        if Reindexing.is_reindexing() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            Reindexing.unflag_reindexing()

        for INDEXER in INDEXES:
            index_name = INDEXER.get_mapping_type_name()
            chunk_size = INDEXER.chunk_size
            alias = ES_INDEXES[index_name]

            chunks, total = chunk_indexing(INDEXER, chunk_size)
            if not total:
                _print('No items to queue.', alias)
            else:
                total_chunks = int(ceil(total / float(chunk_size)))
                _print(
                    'Indexing {total} items into {n} chunks of size {size}'.
                    format(total=total, n=total_chunks,
                           size=chunk_size), alias)

            # Get the old index if it exists.
            try:
                aliases = ES.indices.get_alias(name=alias).keys()
            except elasticsearch.NotFoundError:
                aliases = []
            old_index = aliases[0] if aliases else None

            # Create a new index, using the index name with a timestamp.
            new_index = timestamp_index(prefix + alias)

            # See how the index is currently configured.
            if old_index:
                try:
                    s = (ES.indices.get_settings(index=old_index).get(
                        old_index, {}).get('settings', {}))
                except elasticsearch.NotFoundError:
                    s = {}
            else:
                s = {}
            num_replicas = s.get('number_of_replicas',
                                 settings.ES_DEFAULT_NUM_REPLICAS)
            num_shards = s.get('number_of_shards',
                               settings.ES_DEFAULT_NUM_SHARDS)

            pre_task = pre_index.si(
                new_index, old_index, alias, index_name, {
                    'analysis': INDEXER.get_analysis(),
                    'number_of_replicas': 0,
                    'number_of_shards': num_shards,
                    'store.compress.tv': True,
                    'store.compress.stored': True,
                    'refresh_interval': '-1'
                })
            post_task = post_index.si(new_index, old_index, alias, index_name,
                                      {
                                          'number_of_replicas': num_replicas,
                                          'refresh_interval': '5s'
                                      })

            # Ship it.
            if not total:
                # If there's no data we still create the index and alias.
                chain(pre_task, post_task).apply_async()
            else:
                index_tasks = [
                    run_indexing.si(new_index, index_name, chunk)
                    for chunk in chunks
                ]

                if settings.CELERY_ALWAYS_EAGER:
                    # Eager mode and chords don't get along. So we serialize
                    # the tasks as a workaround.
                    index_tasks.insert(0, pre_task)
                    index_tasks.append(post_task)
                    chain(*index_tasks).apply_async()
                else:
                    chain(pre_task, chord(header=index_tasks,
                                          body=post_task)).apply_async()

        _print('New index and indexing tasks all queued up.')
Exemplo n.º 15
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        index_choice = kwargs.get('index', None)
        prefix = kwargs.get('prefix', '')
        force = kwargs.get('force', False)

        if index_choice:
            # If we only want to reindex a subset of indexes.
            INDEXES = INDEX_CHOICES.get(index_choice, None)
            if INDEXES is None:
                raise CommandError(
                    'Incorrect index name specified. '
                    'Choose one of: %s' % ', '.join(INDEX_CHOICES.keys()))
        else:
            INDEXES = INDEXERS

        if Reindexing.is_reindexing() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            Reindexing.unflag_reindexing()

        for INDEXER in INDEXES:
            index_name = INDEXER.get_mapping_type_name()
            chunk_size = INDEXER.chunk_size
            alias = ES_INDEXES[index_name]

            chunks, total = chunk_indexing(INDEXER, chunk_size)
            if not total:
                _print('No items to queue.', alias)
            else:
                total_chunks = int(ceil(total / float(chunk_size)))
                _print('Indexing {total} items into {n} chunks of size {size}'
                       .format(total=total, n=total_chunks, size=chunk_size),
                       alias)

            # Get the old index if it exists.
            try:
                aliases = ES.indices.get_alias(name=alias).keys()
            except elasticsearch.NotFoundError:
                aliases = []
            old_index = aliases[0] if aliases else None

            # Create a new index, using the index name with a timestamp.
            new_index = timestamp_index(prefix + alias)

            # See how the index is currently configured.
            if old_index:
                try:
                    s = (ES.indices.get_settings(index=old_index).get(
                        old_index, {}).get('settings', {}))
                except elasticsearch.NotFoundError:
                    s = {}
            else:
                s = {}
            num_replicas = s.get('number_of_replicas',
                                 settings.ES_DEFAULT_NUM_REPLICAS)
            num_shards = s.get('number_of_shards',
                               settings.ES_DEFAULT_NUM_SHARDS)

            pre_task = pre_index.si(new_index, old_index, alias, index_name, {
                'analysis': INDEXER.get_analysis(),
                'number_of_replicas': 0,
                'number_of_shards': num_shards,
                'store.compress.tv': True,
                'store.compress.stored': True,
                'refresh_interval': '-1'})
            post_task = post_index.si(new_index, old_index, alias, index_name,
                                      {'number_of_replicas': num_replicas,
                                       'refresh_interval': '5s'})

            # Ship it.
            if not total:
                # If there's no data we still create the index and alias.
                chain(pre_task, post_task).apply_async()
            else:
                index_tasks = [run_indexing.si(new_index, index_name, chunk)
                               for chunk in chunks]

                if settings.CELERY_ALWAYS_EAGER:
                    # Eager mode and chords don't get along. So we serialize
                    # the tasks as a workaround.
                    index_tasks.insert(0, pre_task)
                    index_tasks.append(post_task)
                    chain(*index_tasks).apply_async()
                else:
                    chain(pre_task, chord(header=index_tasks,
                                          body=post_task)).apply_async()

        _print('New index and indexing tasks all queued up.')
Exemplo n.º 16
0
def flag_database(new_index, old_index, alias):
    """Flags the database to indicate that the reindexing has started."""
    sys.stdout.write('Flagging the database to start the reindexation\n')
    Reindexing.flag_reindexing(new_index=new_index, old_index=old_index,
                               alias=alias)
    time.sleep(5)  # Give celeryd some time to flag the DB.
Exemplo n.º 17
0
def unflag_database():
    """Unflag the database to indicate that the reindexing is over."""
    sys.stdout.write('Unflagging the database\n')
    Reindexing.unflag_reindexing()
Exemplo n.º 18
0
    def test_is_reindexing(self):
        assert not Reindexing.is_reindexing()

        Reindexing.objects.create(alias='foo', new_index='bar',
                                  old_index='baz')
        assert Reindexing.is_reindexing()
Exemplo n.º 19
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        global INDEXES

        index_choice = kwargs.get('index', None)
        prefix = kwargs.get('prefix', '')
        force = kwargs.get('force', False)

        if index_choice:
            # If we only want to reindex a subset of indexes.
            INDEXES = INDEX_DICT.get(index_choice, INDEXES)

        if Reindexing.is_reindexing() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            Reindexing.unflag_reindexing()

        for ALIAS, INDEXER, CHUNK_SIZE in INDEXES:

            chunks, total = chunk_indexing(INDEXER, CHUNK_SIZE)
            if not total:
                _print('No items to queue.', ALIAS)
            else:
                total_chunks = int(ceil(total / float(CHUNK_SIZE)))
                _print(
                    'Indexing {total} items into {n} chunks of size {size}'.
                    format(total=total, n=total_chunks,
                           size=CHUNK_SIZE), ALIAS)

            # Get the old index if it exists.
            try:
                aliases = ES.indices.get_alias(name=ALIAS).keys()
            except elasticsearch.NotFoundError:
                aliases = []
            old_index = aliases[0] if aliases else None

            # Create a new index, using the index name with a timestamp.
            new_index = timestamp_index(prefix + ALIAS)

            # See how the index is currently configured.
            if old_index:
                try:
                    s = (ES.indices.get_settings(index=old_index).get(
                        old_index, {}).get('settings', {}))
                except elasticsearch.NotFoundError:
                    s = {}
            else:
                s = {}
            num_replicas = s.get('number_of_replicas',
                                 settings.ES_DEFAULT_NUM_REPLICAS)
            num_shards = s.get('number_of_shards',
                               settings.ES_DEFAULT_NUM_SHARDS)

            pre_task = pre_index.si(
                new_index, old_index, ALIAS, INDEXER, {
                    'analysis': INDEXER.get_analysis(),
                    'number_of_replicas': 0,
                    'number_of_shards': num_shards,
                    'store.compress.tv': True,
                    'store.compress.stored': True,
                    'refresh_interval': '-1'
                })
            post_task = post_index.si(new_index, old_index, ALIAS, INDEXER, {
                'number_of_replicas': num_replicas,
                'refresh_interval': '5s'
            })

            # Ship it.
            if not total:
                # If there's no data we still create the index and alias.
                chain(pre_task, post_task).apply_async()
            else:
                index_tasks = [
                    run_indexing.si(new_index, INDEXER, chunk)
                    for chunk in chunks
                ]
                chain(pre_task, chord(header=index_tasks,
                                      body=post_task)).apply_async()

        _print('New index and indexing tasks all queued up.')
Exemplo n.º 20
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        global INDEXES

        index_choice = kwargs.get('index', None)
        prefix = kwargs.get('prefix', '')
        force = kwargs.get('force', False)

        if index_choice:
            # If we only want to reindex a subset of indexes.
            INDEXES = INDEX_DICT.get(index_choice, INDEXES)

        if Reindexing.is_reindexing() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            unflag_database()

        chain = None
        old_indexes = []
        for ALIAS, INDEXER, CHUNK_SIZE in INDEXES:
            # Get the old index if it exists.
            try:
                aliases = ES.indices.get_alias(name=ALIAS).keys()
            except elasticsearch.NotFoundError:
                aliases = []
            old_index = aliases[0] if aliases else None
            old_indexes.append(old_index)

            # Create a new index, using the index name with a timestamp.
            new_index = timestamp_index(prefix + ALIAS)

            # See how the index is currently configured.
            if old_index:
                try:
                    s = (ES.indices.get_settings(index=old_index).get(
                        old_index, {}).get('settings', {}))
                except elasticsearch.NotFoundError:
                    s = {}
            else:
                s = {}
            num_replicas = s.get('number_of_replicas',
                                 settings.ES_DEFAULT_NUM_REPLICAS)
            num_shards = s.get('number_of_shards',
                               settings.ES_DEFAULT_NUM_SHARDS)

            # Flag the database to mark as currently indexing.
            if not chain:
                chain = flag_database.si(new_index, old_index, ALIAS)
            else:
                chain |= flag_database.si(new_index, old_index, ALIAS)

            # Create the indexes and mappings.
            # Note: We set num_replicas=0 here to lower load while re-indexing.
            # In later step we increase it which results in more efficient bulk
            # copy in ES. For ES < 0.90 we manually enable compression.
            chain |= create_index.si(new_index, ALIAS, INDEXER, {
                'analysis': INDEXER.get_analysis(),
                'number_of_replicas': 0, 'number_of_shards': num_shards,
                'store.compress.tv': True, 'store.compress.stored': True,
                'refresh_interval': '-1'})

            # Index all the things!
            chain |= run_indexing.si(new_index, INDEXER, CHUNK_SIZE)

            # After indexing we optimize the index, adjust settings, and point
            # alias to the new index.
            chain |= update_alias.si(new_index, old_index, ALIAS, {
                'number_of_replicas': num_replicas, 'refresh_interval': '5s'})

        # Unflag the database to mark as done indexing.
        chain |= unflag_database.si()

        # Delete the old index, if any.
        for old_index in old_indexes:
            if old_index:
                chain |= delete_index.si(old_index)

        # All done!
        chain |= output_summary.si()

        # Ship it.
        self.stdout.write('\nNew index and indexing tasks all queued up.\n')
        os.environ['FORCE_INDEXING'] = '1'
        try:
            chain.apply_async()
        finally:
            del os.environ['FORCE_INDEXING']
Exemplo n.º 21
0
    def handle(self, *args, **kwargs):
        """Set up reindexing tasks.

        Creates a Tasktree that creates a new indexes and indexes all objects,
        then points the alias to this new index when finished.
        """
        global INDEXES

        index_choice = kwargs.get('index', None)
        prefix = kwargs.get('prefix', '')
        force = kwargs.get('force', False)

        if index_choice:
            # If we only want to reindex a subset of indexes.
            INDEXES = INDEX_DICT.get(index_choice, INDEXES)

        if Reindexing.is_reindexing() and not force:
            raise CommandError('Indexation already occuring - use --force to '
                               'bypass')
        elif force:
            Reindexing.unflag_reindexing()

        for ALIAS, INDEXER, CHUNK_SIZE in INDEXES:

            chunks, total = chunk_indexing(INDEXER, CHUNK_SIZE)
            if not total:
                _print('No items to queue.', ALIAS)
            else:
                total_chunks = int(ceil(total / float(CHUNK_SIZE)))
                _print('Indexing {total} items into {n} chunks of size {size}'
                       .format(total=total, n=total_chunks, size=CHUNK_SIZE),
                       ALIAS)

            # Get the old index if it exists.
            try:
                aliases = ES.indices.get_alias(name=ALIAS).keys()
            except elasticsearch.NotFoundError:
                aliases = []
            old_index = aliases[0] if aliases else None

            # Create a new index, using the index name with a timestamp.
            new_index = timestamp_index(prefix + ALIAS)

            # See how the index is currently configured.
            if old_index:
                try:
                    s = (ES.indices.get_settings(index=old_index).get(
                        old_index, {}).get('settings', {}))
                except elasticsearch.NotFoundError:
                    s = {}
            else:
                s = {}
            num_replicas = s.get('number_of_replicas',
                                 settings.ES_DEFAULT_NUM_REPLICAS)
            num_shards = s.get('number_of_shards',
                               settings.ES_DEFAULT_NUM_SHARDS)

            pre_task = pre_index.si(new_index, old_index, ALIAS, INDEXER, {
                'analysis': INDEXER.get_analysis(),
                'number_of_replicas': 0,
                'number_of_shards': num_shards,
                'store.compress.tv': True,
                'store.compress.stored': True,
                'refresh_interval': '-1'})
            post_task = post_index.si(new_index, old_index, ALIAS, INDEXER, {
                'number_of_replicas': num_replicas,
                'refresh_interval': '5s'})

            # Ship it.
            if not total:
                # If there's no data we still create the index and alias.
                chain(pre_task, post_task).apply_async()
            else:
                index_tasks = [run_indexing.si(new_index, INDEXER, chunk)
                               for chunk in chunks]
                chain(pre_task,
                      chord(header=index_tasks, body=post_task)).apply_async()

        _print('New index and indexing tasks all queued up.')