def migrate_from_pulp2(migration_plan_pk, dry_run=False): """ Main task to migrate from Pulp 2 to Pulp 3. Schedule other tasks based on the specified Migration Plan. Args: migration_plan_pk (str): The migration plan PK. dry_run (bool): If True, nothing is migrated, only validation happens. """ plan = MigrationPlan.objects.get(pk=migration_plan_pk) if dry_run: _logger.debug('Running in a dry-run mode.') # TODO: Migration Plan validation return # MongoDB connection initialization connection.initialize() # TODO: Migration Plan parsing and validation # For now, the list of plugins to migrate is hard-coded. plugins_to_migrate = ['iso'] loop = asyncio.get_event_loop() loop.run_until_complete(pre_migrate_all_without_content(plan)) loop.run_until_complete(migrate_repositories()) loop.run_until_complete(migrate_importers(plugins_to_migrate)) loop.run_until_complete(pre_migrate_all_content(plugins_to_migrate)) loop.run_until_complete(migrate_content(plugins_to_migrate)) # without RemoteArtifacts yet # loop.run_until_complete(create_repo_versions()) # loop.run_until_complete(migrate_distributors(plugins_to_migrate)) loop.close()
def migrate_from_pulp2( migration_plan_pk, validate=False, dry_run=False, skip_corrupted=False ): """ Main task to migrate from Pulp 2 to Pulp 3. Schedule other tasks based on the specified Migration Plan. Args: migration_plan_pk (str): The migration plan PK. validate (bool): If True, don't migrate unless validation is successful. dry_run (bool): If True, nothing is migrated, only validation happens. skip_corrupted (bool): If True, corrupted content is skipped during migration, no task failure. """ # MongoDB connection initialization connection.initialize() plan = MigrationPlan.objects.get(pk=migration_plan_pk) missing_resources = plan.get_missing_resources() if (validate or dry_run) and missing_resources: raise PlanValidationError( "Validation failed: resources missing {}".format(missing_resources) ) if dry_run: return task_group = TaskGroup(description="Migration Sub-tasks") task_group.save() GroupProgressReport( message="Repo version creation", code="create.repo_version", task_group=task_group, ).save() GroupProgressReport( message="Distribution creation", code="create.distribution", task_group=task_group, ).save() current_task = Task.current() current_task.task_group = task_group current_task.save() resource = CreatedResource(content_object=task_group) resource.save() # TODO: if plan is empty for a plugin, only migrate downloaded content pre_migrate_all_without_content(plan) pre_migrate_all_content(plan) handle_outdated_resources(plan) migrate_repositories(plan) migrate_importers(plan) migrate_content(plan, skip_corrupted=skip_corrupted) create_repoversions_publications_distributions(plan) task_group.finish()
def validate(self, data): """ Validate that the Serializer contains valid data. Validates JSON structure of migration_plan. Checks pulp2 and pulp3 plugins are installed. """ schema = json.loads(SCHEMA) validator = Draft7Validator(schema) if isinstance(data['plan'], str): loaded_plan = json.loads(data['plan']) elif isinstance(data['plan'], dict): loaded_plan = data['plan'] else: raise serializers.ValidationError( _("Must provide a (JSON-encoded) string or dict for 'plan', not list") ) err = [] for error in sorted(validator.iter_errors(loaded_plan), key=str): err.append(error.message) if err: raise serializers.ValidationError( _("Provided Migration Plan format is invalid:'{}'".format(err)) ) plugins_to_migrate = set() for plugin_type in loaded_plan['plugins']: plugins_to_migrate.add(plugin_type['type']) if len(loaded_plan['plugins']) != len(plugins_to_migrate): raise serializers.ValidationError( _("Provided Migration Plan contains same plugin type specified more that once.") ) # MongoDB connection initialization connection.initialize() db = connection.get_database() for plugin in plugins_to_migrate: plugin_migrator = PLUGIN_MIGRATORS.get(plugin) if not plugin_migrator: raise serializers.ValidationError( _("Migration of {} plugin is not supported.".format(plugin)) ) if plugin_migrator.pulp3_plugin not in INSTALLED_PULP_PLUGINS: raise serializers.ValidationError( _("Plugin {} is not installed in pulp3.".format(plugin)) ) try: db.command("collstats", plugin_migrator.pulp2_collection) except OperationFailure: raise serializers.ValidationError( _("Plugin {} is not installed in pulp2.".format(plugin)) ) data['plan'] = loaded_plan return data
def __init__(self, migration_plan): self._plugin_plans = [] for plugin_data in migration_plan.plan['plugins']: self._plugin_plans.append(PluginMigrationPlan(plugin_data)) self.repositories_missing_importers = [] self.missing_repositories = [] self.repositories_missing_distributors = [] # Make sure we've initialized the MongoDB connection first connection.initialize() self._check_missing()
def validate(self, data): """ Validate that the Serializer contains valid data. Validates JSON structure of migration_plan. Checks pulp2 and pulp3 plugins are installed. """ schema = json.loads(SCHEMA) validator = Draft7Validator(schema) loaded_plan = json.loads(data['plan']) err = [] for error in sorted(validator.iter_errors(loaded_plan), key=str): err.append(error.message) if err: raise serializers.ValidationError( _("Provided Migration Plan format is invalid:'{}'".format(err)) ) plugins_to_migrate = set() for plugin_type in loaded_plan['plugins']: plugins_to_migrate.add(plugin_type['type']) if len(loaded_plan['plugins']) != len(plugins_to_migrate): raise serializers.ValidationError( _("Provided Migration Plan contains same plugin type specified more that once.") ) # MongoDB connection initialization connection.initialize() db = connection.get_database() for plugin in plugins_to_migrate: if PULP_2TO3_PLUGIN_MAP.get(plugin) not in INSTALLED_PULP_PLUGINS: raise serializers.ValidationError( _("Plugin {} is not installed in pulp3.".format(plugin)) ) try: collection = PULP2_COLLECTION_MAP.get(plugin) db.command("collstats", collection) except OperationFailure: raise serializers.ValidationError( _("Plugin {} is not installed in pulp2.".format(plugin)) ) data['plan'] = loaded_plan return data
def __init__(self, migration_plan): self.migration_plan = migration_plan self.plugins_to_migrate = [] self.importers_to_migrate = [] self.distributors_to_migrate = [] # pre-migration *just* needs these repos and nothing else self.repositories_to_migrate = [] # a nested data structure with a format roughly matching the JSON schema. # dictionary where the key is the name of the pulp3 repo and the value is a dict # of other information like repo_versions, importer to use, etc. self.repositories_to_create = {} self.missing_importers = [] self.missing_repositories = [] self.missing_distributors = [] # Make sure we've initialized the MongoDB connection first connection.initialize() self._populate() self._check_missing()
def migrate_from_pulp2(migration_plan_pk, validate=False, dry_run=False): """ Main task to migrate from Pulp 2 to Pulp 3. Schedule other tasks based on the specified Migration Plan. Args: migration_plan_pk (str): The migration plan PK. validate (bool): If True, don't migrate unless validation is successful. dry_run (bool): If True, nothing is migrated, only validation happens. """ # MongoDB connection initialization connection.initialize() plan = MigrationPlan.objects.get(pk=migration_plan_pk) missing_resources = plan.get_missing_resources() if (validate or dry_run) and missing_resources: raise PlanValidationError( "Validation failed: resources missing {}".format(missing_resources) ) if dry_run: return # TODO: if plan is empty for a plugin, only migrate downloaded content loop = asyncio.get_event_loop() loop.run_until_complete(pre_migrate_all_without_content(plan)) loop.run_until_complete(migrate_repositories(plan)) loop.run_until_complete(migrate_importers(plan)) loop.run_until_complete(pre_migrate_all_content(plan)) loop.run_until_complete(migrate_content(plan)) loop.run_until_complete(create_repo_versions(plan)) # loop.run_until_complete(migrate_distributors(plugins_to_migrate)) loop.close()
def migrate_from_pulp2(migration_plan_pk, validate=False, dry_run=False): """ Main task to migrate from Pulp 2 to Pulp 3. Schedule other tasks based on the specified Migration Plan. Args: migration_plan_pk (str): The migration plan PK. validate (bool): If True, don't migrate unless validation is successful. dry_run (bool): If True, nothing is migrated, only validation happens. """ def get_repo_types(plan): """ Create mappings for pulp 2 repository types. Identify type by inspecting content of a repo. One mapping is repo_id -> repo_type, the other is repo_type -> list of repo_ids. It's used later during pre-migration and identification of removed repos from pulp 2 Args: plan(MigrationPlan): A Migration Plan Returns: repo_id_to_type(dict): mapping from a pulp 2 repo_id to a plugin/repo type type_to_repo_ids(dict): mapping from a plugin/repo type to the list of repo_ids """ repo_id_to_type = {} type_to_repo_ids = defaultdict(set) # mapping content type -> plugin/repo type, e.g. 'docker_blob' -> 'docker' content_type_to_plugin = {} for plugin in plan.get_plugin_plans(): for content_type in plugin.migrator.pulp2_content_models: content_type_to_plugin[ content_type] = plugin.migrator.pulp2_plugin repos = set(plugin.get_repositories()) repos |= set(plugin.get_importers_repos()) repos |= set(plugin.get_distributors_repos()) for repo in repos: repo_id_to_type[repo] = plugin.type type_to_repo_ids[plugin.type].update(repos) # TODO: optimizations. # It looks at each content at the moment. Potential optimizations: # - This is a big query, paginate? # - Filter by repos from the plan # - Query any but one record for a repo for rec in RepositoryContentUnit.objects().\ only('repo_id', 'unit_type_id').as_pymongo().no_cache(): repo_id = rec['repo_id'] unit_type_id = rec['unit_type_id'] # a type for a repo is already known or this content/repo type is not supported if repo_id in repo_id_to_type or unit_type_id not in content_type_to_plugin: continue plugin_name = content_type_to_plugin[unit_type_id] repo_id_to_type[repo_id] = plugin_name type_to_repo_ids[plugin_name].add(repo_id) return repo_id_to_type, type_to_repo_ids # MongoDB connection initialization connection.initialize() plan = MigrationPlan.objects.get(pk=migration_plan_pk) missing_resources = plan.get_missing_resources() if (validate or dry_run) and missing_resources: raise PlanValidationError( "Validation failed: resources missing {}".format( missing_resources)) if dry_run: return task_group = TaskGroup(description="Migration Sub-tasks") task_group.save() GroupProgressReport(message="Repo version creation", code="create.repo_version", task_group=task_group).save() GroupProgressReport(message="Distribution creation", code="create.distribution", task_group=task_group).save() current_task = Task.current() current_task.task_group = task_group current_task.save() resource = CreatedResource(content_object=task_group) resource.save() # call it here and not inside steps below to generate mapping only once repo_id_to_type, type_to_repo_ids = get_repo_types(plan) # TODO: if plan is empty for a plugin, only migrate downloaded content pre_migrate_all_without_content(plan, type_to_repo_ids, repo_id_to_type) pre_migrate_all_content(plan) mark_removed_resources(plan, type_to_repo_ids) delete_old_resources(plan) migrate_repositories(plan) migrate_importers(plan) migrate_content(plan) create_repoversions_publications_distributions(plan) task_group.finish()
def migrate_from_pulp2(migration_plan_pk, validate=False, dry_run=False): """ Main task to migrate from Pulp 2 to Pulp 3. Schedule other tasks based on the specified Migration Plan. Args: migration_plan_pk (str): The migration plan PK. validate (bool): If True, don't migrate unless validation is successful. dry_run (bool): If True, nothing is migrated, only validation happens. """ def get_repo_types(plan): """ Create mappings for pulp 2 repository types. Identify type by inspecting content of a repo. One mapping is repo_id -> repo_type, the other is repo_type -> list of repo_ids. It's used later during pre-migration and identification of removed repos from pulp 2 Args: plan(MigrationPlan): A Migration Plan Returns: repo_id_to_type(dict): mapping from a pulp 2 repo_id to a plugin/repo type type_to_repo_ids(dict): mapping from a plugin/repo type to the list of repo_ids """ repo_id_to_type = {} type_to_repo_ids = defaultdict(list) # mapping content type -> plugin/repo type, e.g. 'docker_blob' -> 'docker' content_type_to_plugin = {} for plugin in plan.get_plugin_plans(): for content_type in plugin.migrator.pulp2_content_models: content_type_to_plugin[ content_type] = plugin.migrator.pulp2_plugin # TODO: optimizations. # It looks at each content at the moment. Potential optimizations: # - Filter by repos from the plan # - Query any but one record for a repo for rec in RepositoryContentUnit.objects().only( 'repo_id', 'unit_type_id'): repo_id = rec['repo_id'] unit_type_id = rec['unit_type_id'] # a type for a repo is already known or this content/repo type is not supported if repo_id in repo_id_to_type or unit_type_id not in content_type_to_plugin: continue plugin_name = content_type_to_plugin[unit_type_id] repo_id_to_type[repo_id] = plugin_name type_to_repo_ids[plugin_name].append(repo_id) return repo_id_to_type, type_to_repo_ids # MongoDB connection initialization connection.initialize() plan = MigrationPlan.objects.get(pk=migration_plan_pk) missing_resources = plan.get_missing_resources() if (validate or dry_run) and missing_resources: raise PlanValidationError( "Validation failed: resources missing {}".format( missing_resources)) if dry_run: return # call it here and not inside steps below to generate mapping only once repo_id_to_type, type_to_repo_ids = get_repo_types(plan) # TODO: if plan is empty for a plugin, only migrate downloaded content loop = asyncio.get_event_loop() loop.run_until_complete( pre_migrate_all_without_content(plan, type_to_repo_ids, repo_id_to_type)) loop.run_until_complete(mark_removed_resources(plan, type_to_repo_ids)) loop.run_until_complete(migrate_repositories(plan)) loop.run_until_complete(migrate_importers(plan)) loop.run_until_complete(pre_migrate_all_content(plan)) loop.run_until_complete(migrate_content(plan)) loop.run_until_complete(create_repo_versions(plan)) loop.run_until_complete(migrate_distributors(plan)) loop.close()
from pulpcore.client.pulp_file import (ApiClient as FileApiClient, ContentFilesApi, RepositoriesFileApi, RepositoriesFileVersionsApi) from pulpcore.client.pulp_2to3_migration import (ApiClient as MigrationApiClient, MigrationPlansApi) from pulp_2to3_migration.pulp2.base import (Repository as Pulp2Repository, RepositoryContentUnit) from pulp_2to3_migration.pulp2.connection import initialize from pulp_2to3_migration.tests.functional.util import monitor_task # Can't import ISO model due to PLUGIN_MIGRATORS needing Django app # from pulp_2to3_migration.app.plugin.iso.pulp2_models import ISO # Initialize MongoDB connection initialize() PULP_2_ISO_REPOSITORIES = [ repo for repo in Pulp2Repository.objects.all() if repo.notes.get('_repo-type')[:-5] == 'iso' ] PULP_2_ISO_FIXTURE_DATA = { # {'file': 3, ...} repo.repo_id: RepositoryContentUnit.objects.filter(repo_id=repo.repo_id).count() for repo in PULP_2_ISO_REPOSITORIES } EMPTY_ISO_MIGRATION_PLAN = json.dumps({"plugins": [{"type": "iso"}]})