Exemplo n.º 1
0
def _fix_dataset_urls(client):
    """Ensure dataset and its files have correct url format."""
    for dataset in get_client_datasets(client):
        dataset.url = dataset._id
        for file_ in dataset.files:
            if file_.url:
                file_.url = url_to_string(file_.url)

        dataset.to_yaml()
Exemplo n.º 2
0
def _migrate_dataset_and_files_project(client):
    """Ensure dataset files have correct project."""
    project = Project.from_yaml(client.renku_metadata_path, client)
    project.to_yaml(client.renku_metadata_path)

    for dataset in get_client_datasets(client):
        dataset._project = project
        if not dataset.creators:
            dataset.creators = [project.creator]
        for file_ in dataset.files:
            file_._project = project

        dataset.to_yaml()
Exemplo n.º 3
0
def _migrate_broken_dataset_paths(client):
    """Ensure all paths are using correct directory structure."""
    for dataset in get_client_datasets(client):
        expected_path = client.renku_datasets_path / dataset.identifier
        if not dataset.name:
            dataset.name = dataset.title

        # migrate the refs
        ref = LinkReference.create(
            client=client,
            name="datasets/{0}".format(dataset.name),
            force=True,
        )
        ref.set_reference(expected_path / client.METADATA)

        old_dataset_path = client.renku_datasets_path / uuid.UUID(
            dataset.identifier).hex

        dataset.path = os.path.relpath(expected_path, client.path)

        if not expected_path.exists():
            shutil.move(old_dataset_path, expected_path)

        for file_ in dataset.files:
            file_path = Path(file_.path)
            if not file_path.exists() or file_.path.startswith(".."):
                new_path = Path(
                    os.path.abspath(client.renku_datasets_path /
                                    dataset.identifier /
                                    file_path)).relative_to(client.path)

                file_.path = new_path

            file_.name = os.path.basename(file_.path)

        dataset.to_yaml(expected_path / client.METADATA)
Exemplo n.º 4
0
def _fix_labels_and_ids(client):
    """Ensure files have correct label instantiation."""
    for dataset in get_client_datasets(client):
        dataset._id = generate_dataset_id(client=client,
                                          identifier=dataset.identifier)
        dataset._label = dataset.identifier

        for file_ in dataset.files:
            if not Path(file_.path).exists():
                continue
            _, commit, _ = client.resolve_in_submodules(
                client.find_previous_commit(file_.path, revision="HEAD"),
                file_.path,
            )

            if not _is_file_id_valid(file_._id, file_.path, commit.hexsha):
                file_._id = generate_file_id(client=client,
                                             hexsha=commit.hexsha,
                                             path=file_.path)

            if not file_._label or commit.hexsha not in file_._label or file_.path not in file_._label:
                file_._label = generate_label(file_.path, commit.hexsha)

        dataset.to_yaml()
def _fix_dataset_metadata(client):
    for dataset in get_client_datasets(client):
        dataset.to_yaml()
Exemplo n.º 6
0
def _migrate_submodule_based_datasets(client):
    from renku.core.management import LocalClient
    from renku.core.management.migrate import is_project_unsupported

    submodules = client.repo.submodules
    if not submodules:
        return

    for s in submodules:
        try:
            s.update()
        except GitError:
            pass

    submodules_urls = {s.path: s.url for s in submodules}

    repo_paths = []
    symlinks = []

    for dataset in get_client_datasets(client):
        for file_ in dataset.files:
            path = client.path / file_.path
            if not path.is_symlink():
                continue

            target = path.resolve()

            if "/.renku/vendors/" not in str(target):
                continue

            repo = Repo(target.parent, search_parent_directories=True)
            repo_path = repo.working_dir
            if repo_path not in repo_paths:
                repo_paths.append(repo_path)

            symlinks.append((file_.path, target, repo_path))

    if not symlinks:
        return

    remote_clients = {p: LocalClient(p) for p in repo_paths}

    for remote_client in remote_clients.values():
        if not is_project_unsupported(remote_client):
            migrate(remote_client)

    metadata = {}

    for path, target, repo_path in symlinks:
        remote_client = remote_clients[repo_path]
        path_within_repo = target.relative_to(repo_path)

        repo_is_remote = ".renku/vendors/local" not in repo_path
        based_on = None
        submodule_path = Path(repo_path).relative_to(client.path)

        url = submodules_urls.get(str(submodule_path), "")

        if repo_is_remote:
            based_on = _fetch_file_metadata(remote_client, path_within_repo)
            if based_on:
                based_on.url = url
                based_on.based_on = None
            else:
                based_on = DatasetFile.from_revision(remote_client, path=path_within_repo, url=url)
            data = DatasetFileSchema(client=remote_client).dump(based_on)
            based_on = DatasetFileSchemaV3(client=remote_client).load(data)
        else:
            if url:
                full_path = Path(url) / path_within_repo
                rel_path = os.path.relpath(full_path, client.path)
                url = f"file://{rel_path}"

        metadata[path] = (based_on, url)

        path = client.path / path
        path.unlink()

        try:
            shutil.move(target, path)
        except FileNotFoundError:
            raise errors.InvalidFileOperation(f"File was not found: {target}")

    for s in submodules:
        if s.path.startswith(".renku/vendors/"):
            try:
                s.remove(force=True)
            except ValueError:
                pass

    for dataset in get_client_datasets(client):
        for file_ in dataset.files:
            if file_.path in metadata:
                based_on, url = metadata[file_.path]
                file_.based_on = based_on
                file_.url = remove_credentials(url)

        dataset.to_yaml()