def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get('force', False) if is_reindexing_amo() and not force: raise CommandError('Indexation already occuring - use --force to ' 'bypass') log('Starting the reindexation', stdout=self.stdout) modules = get_modules(with_stats=kwargs.get('with_stats', False)) if kwargs.get('wipe', False): skip_confirmation = kwargs.get('noinput', False) confirm = '' if not skip_confirmation: confirm = raw_input('Are you sure you want to wipe all AMO ' 'Elasticsearch indexes? (yes/no): ') while confirm not in ('yes', 'no'): confirm = raw_input('Please enter either "yes" or "no": ') if (confirm == 'yes' or skip_confirmation): unflag_database(stdout=self.stdout) for index in set(modules.keys()): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) alias_actions = [] def add_alias_action(action, index, alias): action = {action: {'index': index, 'alias': alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task tree. log('Building the task tree', stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # For each alias, we create a new time-stamped index. for alias, module in modules.items(): old_index = None olds = ES.indices.get_aliases(alias, ignore=404) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action('remove', old_index, alias) # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_new_index, args=[alias, new_index]) step3 = step2.add_task(index_data, args=[alias, new_index]) last_action = step3 # Adding new index to the alias. add_alias_action('add', new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(update_aliases, args=[alias_actions]) # Unflag the database - there's no need to duplicate the # indexing anymore. delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any. if to_remove: delete.add_task(delete_indexes, args=[to_remove]) # Let's do it. log('Running all indexation tasks', stdout=self.stdout) os.environ['FORCE_INDEXING'] = '1' # This is a bit convoluted, and more complicated than simply providing # the soft and hard time limits on the @task decorator. But we're not # using the @task decorator here, but a decorator from celery_tasktree. if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): control.time_limit( 'olympia.lib.es.management.commands.reindex.index_data', soft=time_limits['soft'], hard=time_limits['hard']) try: tree.apply_async() if not getattr(settings, 'CELERY_ALWAYS_EAGER', False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write('.') sys.stdout.flush() time.sleep(5) finally: del os.environ['FORCE_INDEXING'] sys.stdout.write('\n') # Let's return the /_aliases values. aliases = ES.indices.get_aliases() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) log(summary, stdout=self.stdout)
def set_time_limit(task_name, soft=60, hard=120, reply=True): return control.time_limit(task_name,soft,hard,reply)
def handle(self, *args, **kwargs): """Reindexing work. Creates a Tasktree that creates new indexes over the old ones so the search feature works while the indexation occurs. """ force = kwargs.get("force", False) if is_reindexing_amo() and not force: raise CommandError("Indexation already occuring - use --force to " "bypass") log("Starting the reindexation", stdout=self.stdout) modules = ["addons"] if kwargs.get("with_stats", False): modules.append("stats") if kwargs.get("wipe", False): skip_confirmation = kwargs.get("noinput", False) confirm = "" if not skip_confirmation: confirm = raw_input("Are you sure you want to wipe all AMO " "Elasticsearch indexes? (yes/no): ") while confirm not in ("yes", "no"): confirm = raw_input('Please enter either "yes" or "no": ') if confirm == "yes" or skip_confirmation: unflag_database(stdout=self.stdout) for index in set(MODULES[m].get_alias() for m in modules): ES.indices.delete(index, ignore=404) else: raise CommandError("Aborted.") elif force: unflag_database(stdout=self.stdout) alias_actions = [] def add_alias_action(action, index, alias): action = {action: {"index": index, "alias": alias}} if action in alias_actions: return alias_actions.append(action) # Creating a task tree. log("Building the task tree", stdout=self.stdout) tree = TaskTree() last_action = None to_remove = [] # For each index, we create a new time-stamped index. for module in modules: old_index = None alias = MODULES[module].get_alias() olds = ES.indices.get_aliases(alias, ignore=404) for old_index in olds: # Mark the index to be removed later. to_remove.append(old_index) # Mark the alias to be removed from that index. add_alias_action("remove", old_index, alias) # Create a new index, using the alias name with a timestamp. new_index = timestamp_index(alias) # If old_index is None that could mean it's a full index. # In that case we want to continue index in it. if ES.indices.exists(alias): old_index = alias # Flag the database. step1 = tree.add_task(flag_database, args=[new_index, old_index, alias]) step2 = step1.add_task(create_new_index, args=[module, new_index]) step3 = step2.add_task(index_data, args=[module, new_index]) last_action = step3 # Adding new index to the alias. add_alias_action("add", new_index, alias) # Alias the new index and remove the old aliases, if any. renaming_step = last_action.add_task(update_aliases, args=[alias_actions]) # Unflag the database - there's no need to duplicate the # indexing anymore. delete = renaming_step.add_task(unflag_database) # Delete the old indexes, if any. if to_remove: delete.add_task(delete_indexes, args=[to_remove]) # Let's do it. log("Running all indexation tasks", stdout=self.stdout) os.environ["FORCE_INDEXING"] = "1" # This is a bit convoluted, and more complicated than simply providing # the soft and hard time limits on the @task decorator. But we're not # using the @task decorator here, but a decorator from celery_tasktree. if not getattr(settings, "CELERY_ALWAYS_EAGER", False): control.time_limit( "olympia.lib.es.management.commands.reindex.index_data", soft=time_limits["soft"], hard=time_limits["hard"], ) try: tree.apply_async() if not getattr(settings, "CELERY_ALWAYS_EAGER", False): time.sleep(10) # give celeryd some time to flag the DB while is_reindexing_amo(): sys.stdout.write(".") sys.stdout.flush() time.sleep(5) finally: del os.environ["FORCE_INDEXING"] sys.stdout.write("\n") # Let's return the /_aliases values. aliases = ES.indices.get_aliases() aliases = json.dumps(aliases, sort_keys=True, indent=4) summary = _SUMMARY % (len(modules), aliases) log(summary, stdout=self.stdout)