def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs """ if not django_settings.MARKETPLACE: raise CommandError('This command affects both the marketplace and ' 'AMO ES storage. But the command can only be ' 'run from the Marketplace.') force = kwargs.get('force', False) if database_flagged() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') prefix = kwargs.get('prefix', '') log('Starting the reindexation') if kwargs.get('wipe', False): confirm = raw_input("Are you sure you want to wipe all data from " "ES ? (yes/no): ") while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == 'yes': unflag_database() requests.delete(url('/')) else: raise CommandError("Aborted.") elif force: unflag_database() # Get list current aliases at /_aliases. all_aliases = requests.get(url('/_aliases')).json() # building the list of indexes indexes = set([prefix + index for index in _ALIASES.values()]) actions = [] def add_action(*elmt): if elmt in actions: return actions.append(elmt) all_aliases = all_aliases.items() # creating a task tree log('Building the task tree') tree = TaskTree() last_action = None to_remove = [] # for each index, we create a new time-stamped index for alias in indexes: is_stats = 'stats' in alias old_index = None for aliased_index, alias_ in all_aliases: if alias in alias_['aliases'].keys(): # mark the index to be removed later old_index = aliased_index to_remove.append(aliased_index) # mark the alias to be removed as well add_action('remove', aliased_index, alias) # create a new index, using the alias name with a timestamp new_index = timestamp_index(alias) # if old_index is None that could mean it's a full index # In that case we want to continue index in it future_alias = url('/%s' % alias) if requests.head(future_alias).status_code == 200: old_index = alias # flag the database step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_mapping, args=[new_index, alias]) step3 = step2.add_task(create_index, args=[new_index, is_stats]) last_action = step3 # adding new index to the alias add_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(run_aliases_actions, args=[actions]) # unflag the database - there's no need to duplicate the # indexing anymore delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any delete.add_task(delete_indexes, args=[to_remove]) # let's do it log('Running all indexation tasks') os.environ['FORCE_INDEXING'] = '1' try: tree.apply_async() time.sleep(10) # give celeryd some time to flag the DB while database_flagged(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # let's return the /_aliases values aliases = call_es('_aliases').json() aliases = json.dumps(aliases, sort_keys=True, indent=4) return _SUMMARY % (len(indexes), aliases)
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ if not settings.MARKETPLACE: raise CommandError('This command affects only marketplace and ' 'should be run under Marketplace settings.') force = kwargs.get('force', False) prefix = kwargs.get('prefix', '') if database_flagged() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: unflag_database() # The list of indexes that is currently aliased by `ALIAS`. try: aliases = ES.aliases(ALIAS).keys() except pyelasticsearch.exceptions.ElasticHttpNotFoundError: aliases = [] old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + ALIAS) # See how the index is currently configured. if old_index: try: s = (ES.get_settings(old_index).get(old_index, {}).get('settings', {})) except pyelasticsearch.exceptions.ElasticHttpNotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) # Flag the database. chain = flag_database.si(new_index, old_index, ALIAS) # Create the index and mapping. # # Note: We set num_replicas=0 here to decrease load while re-indexing. # In a later step we increase it which results in a more efficient bulk # copy in Elasticsearch. # For ES < 0.90 we manually enable compression. chain |= create_index.si( new_index, ALIAS, { 'analysis': WebappIndexer.get_analysis(), 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1' }) # Index all the things! chain |= run_indexing.si(new_index) # After indexing we optimize the index, adjust settings, and point the # alias to the new index. chain |= update_alias.si(new_index, old_index, ALIAS, { 'number_of_replicas': num_replicas, 'refresh_interval': '5s' }) # Unflag the database. chain |= unflag_database.si() # Delete the old index, if any. if old_index: chain |= delete_index.si(old_index) chain |= output_summary.si() self.stdout.write('\nNew index and indexing tasks all queued up.\n') os.environ['FORCE_INDEXING'] = '1' try: chain.apply_async() finally: del os.environ['FORCE_INDEXING']
def handle(self, *args, **kwargs): """Set up reindexing tasks. Creates a Tasktree that creates a new indexes and indexes all objects, then points the alias to this new index when finished. """ if not settings.MARKETPLACE: raise CommandError('This command affects only marketplace and ' 'should be run under Marketplace settings.') force = kwargs.get('force', False) prefix = kwargs.get('prefix', '') if database_flagged() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') elif force: unflag_database() # The list of indexes that is currently aliased by `ALIAS`. try: aliases = ES.aliases(ALIAS).keys() except pyelasticsearch.exceptions.ElasticHttpNotFoundError: aliases = [] old_index = aliases[0] if aliases else None # Create a new index, using the index name with a timestamp. new_index = timestamp_index(prefix + ALIAS) # See how the index is currently configured. if old_index: try: s = (ES.get_settings(old_index).get(old_index, {}) .get('settings', {})) except pyelasticsearch.exceptions.ElasticHttpNotFoundError: s = {} else: s = {} num_replicas = s.get('number_of_replicas', settings.ES_DEFAULT_NUM_REPLICAS) num_shards = s.get('number_of_shards', settings.ES_DEFAULT_NUM_SHARDS) # Flag the database. chain = flag_database.si(new_index, old_index, ALIAS) # Create the index and mapping. # # Note: We set num_replicas=0 here to decrease load while re-indexing. # In a later step we increase it which results in a more efficient bulk # copy in Elasticsearch. # For ES < 0.90 we manually enable compression. chain |= create_index.si(new_index, ALIAS, { 'number_of_replicas': 0, 'number_of_shards': num_shards, 'store.compress.tv': True, 'store.compress.stored': True, 'refresh_interval': '-1'}) # Index all the things! chain |= run_indexing.si(new_index) # After indexing we optimize the index, adjust settings, and point the # alias to the new index. chain |= update_alias.si(new_index, old_index, ALIAS, { 'number_of_replicas': num_replicas, 'refresh_interval': '5s'}) # Unflag the database. chain |= unflag_database.si() # Delete the old index, if any. if old_index: chain |= delete_index.si(old_index) chain |= output_summary.si() self.stdout.write('\nNew index and indexing tasks all queued up.\n') os.environ['FORCE_INDEXING'] = '1' try: chain.apply_async() finally: del os.environ['FORCE_INDEXING']
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs """ if not django_settings.MARKETPLACE: raise CommandError('This command affects both the marketplace and ' 'AMO ES storage. But the command can only be ' 'run from the Marketplace.') force = kwargs.get('force', False) if database_flagged() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') prefix = kwargs.get('prefix', '') log('Starting the reindexation') if kwargs.get('wipe', False): confirm = raw_input("Are you sure you want to wipe all data from " "ES ? (yes/no): ") while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == 'yes': unflag_database() requests.delete(url('/')) else: raise CommandError("Aborted.") elif force: unflag_database() # Get list current aliases at /_aliases. all_aliases = requests.get(url('/_aliases')).json # building the list of indexes indexes = set( [prefix + index for index in django_settings.ES_INDEXES.values()]) actions = [] def add_action(*elmt): if elmt in actions: return actions.append(elmt) all_aliases = all_aliases.items() # creating a task tree log('Building the task tree') tree = TaskTree() last_action = None to_remove = [] # for each index, we create a new time-stamped index for alias in indexes: is_stats = 'stats' in alias old_index = None for aliased_index, alias_ in all_aliases: if alias in alias_['aliases'].keys(): # mark the index to be removed later old_index = aliased_index to_remove.append(aliased_index) # mark the alias to be removed as well add_action('remove', aliased_index, alias) # create a new index, using the alias name with a timestamp new_index = timestamp_index(alias) # if old_index is None that could mean it's a full index # In that case we want to continue index in it future_alias = url('/%s' % alias) if requests.head(future_alias).status_code == 200: old_index = alias # flag the database step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_mapping, args=[new_index, alias]) step3 = step2.add_task(create_index, args=[new_index, is_stats]) last_action = step3 # adding new index to the alias add_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(run_aliases_actions, args=[actions]) # unflag the database - there's no need to duplicate the # indexing anymore delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any delete.add_task(delete_indexes, args=[to_remove]) # let's do it log('Running all indexation tasks') os.environ['FORCE_INDEXING'] = '1' try: tree.apply_async() time.sleep(10) # give celeryd some time to flag the DB while database_flagged(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # let's return the /_aliases values aliases = call_es('_aliases').json aliases = json.dumps(aliases, sort_keys=True, indent=4) return _SUMMARY % (len(indexes), aliases)