Exemplo n.º 1
0
    def get_containing_subdataset(self, path, recursion_limit=None):
        """Get the (sub-)dataset containing `path`

        Note: The "mount point" of a subdataset is classified as belonging to
        that respective subdataset.

        WARNING: This function is rather expensive, because it queries for all
        subdatasets recursively, and repeatedly -- which can take a substantial
        amount of time for datasets with many (sub-)subdatasets.  In Many cases
        the `subdatasets` command can be used with its `contains` parameter to
        achieve the desired result in a less expensive way.

        Parameters
        ----------
        path : str
          Path to determine the containing (sub-)dataset for
        recursion_limit: int or None
          limit the subdatasets to take into account to the given number of
          hierarchy levels

        Returns
        -------
        Dataset
        """

        if recursion_limit is not None and (recursion_limit < 1):
            lgr.warning("recursion limit < 1 (%s) always results in self.",
                        recursion_limit)
            return self

        if is_explicit_path(path):
            path = resolve_path(path, self)
            if not path.startswith(self.path):
                raise PathOutsideRepositoryError(file_=path, repo=self)
            path = relpath(path, self.path)

        candidates = []
        # TODO: this one would follow all the sub-datasets, which might
        # be inefficient if e.g. there is lots of other sub-datasets already
        # installed but under another sub-dataset.  There is a TODO 'pattern'
        # option which we could use I guess eventually
        for subds in self.subdatasets(
                recursive=True,
                #pattern=
                recursion_limit=recursion_limit,
                result_xfm='relpaths'):
            common = commonprefix((with_pathsep(subds), with_pathsep(path)))
            if common.endswith(sep) and common == with_pathsep(subds):
                candidates.append(common)
        if candidates:
            return Dataset(path=opj(self.path, max(candidates, key=len)))
        return self
Exemplo n.º 2
0
def test_get_recurse_dirs(o_path, c_path):

    # prepare source:
    origin = Dataset(o_path).create(force=True)
    origin.save("Initial", auto_add_changes=True)

    ds = install(c_path, source=o_path)

    file_list = ['file1.txt',
                 opj('subdir', 'file2.txt'),
                 opj('subdir', 'subsubdir', 'file3.txt'),
                 opj('subdir', 'subsubdir', 'file4.txt')]
    files_in_sub = [f for f in file_list if f.startswith(with_pathsep('subdir'))]

    # no content present:
    ok_(not any(ds.repo.file_has_content(file_list)))

    result = ds.get('subdir')

    # check result:
    eq_(set([item.get('file') for item in result]),
        set(files_in_sub))
    ok_(all([x['success'] is True for x in result if x['file'] in files_in_sub]))
    eq_(len(result), len(files_in_sub))

    # got all files beneath subdir:
    ok_(all(ds.repo.file_has_content(files_in_sub)))

    # additionally got file1.txt silently, since it has the same content as
    # subdir/subsubdir/file4.txt:
    ok_(ds.repo.file_has_content('file1.txt') is True)
Exemplo n.º 3
0
def test_get_recurse_dirs(o_path, c_path):

    # prepare source:
    origin = Dataset(o_path).create(force=True)
    origin.save()

    ds = install(
        c_path, source=o_path,
        result_xfm='datasets', return_type='item-or-list')

    file_list = ['file1.txt',
                 opj('subdir', 'file2.txt'),
                 opj('subdir', 'subsubdir', 'file3.txt'),
                 opj('subdir', 'subsubdir', 'file4.txt')]
    files_in_sub = [f for f in file_list if f.startswith(with_pathsep('subdir'))]

    # no content present:
    ok_(not any(ds.repo.file_has_content(file_list)))

    result = ds.get('subdir')
    # check result:
    assert_status('ok', result)
    eq_(set([item.get('path')[len(ds.path) + 1:] for item in result
             if item['type'] == 'file']),
        set(files_in_sub))
    # we also get one report on the subdir
    eq_(len(result) - 1, len(files_in_sub))

    # got all files beneath subdir:
    ok_(all(ds.repo.file_has_content(files_in_sub)))

    # additionally got file1.txt silently, since it has the same content as
    # subdir/subsubdir/file4.txt:
    ok_(ds.repo.file_has_content('file1.txt') is True)
Exemplo n.º 4
0
def test_get_recurse_dirs(o_path, c_path):

    # prepare source:
    origin = Dataset(o_path).create(force=True)
    origin.add('.')

    ds = install(
        c_path, source=o_path,
        result_xfm='datasets', return_type='item-or-list')

    file_list = ['file1.txt',
                 opj('subdir', 'file2.txt'),
                 opj('subdir', 'subsubdir', 'file3.txt'),
                 opj('subdir', 'subsubdir', 'file4.txt')]
    files_in_sub = [f for f in file_list if f.startswith(with_pathsep('subdir'))]

    # no content present:
    ok_(not any(ds.repo.file_has_content(file_list)))

    result = ds.get('subdir')

    # check result:
    assert_status('ok', result)
    eq_(set([item.get('path')[len(ds.path) + 1:] for item in result
             if item['type'] == 'file']),
        set(files_in_sub))
    # we also get one report on the subdir
    eq_(len(result) - 1, len(files_in_sub))

    # got all files beneath subdir:
    ok_(all(ds.repo.file_has_content(files_in_sub)))

    # additionally got file1.txt silently, since it has the same content as
    # subdir/subsubdir/file4.txt:
    ok_(ds.repo.file_has_content('file1.txt') is True)
Exemplo n.º 5
0
def test_get_recurse_dirs(o_path, c_path):

    # prepare source:
    origin = Dataset(o_path).create(force=True)
    origin.save("Initial", all_changes=True)

    ds = install(c_path, source=o_path)

    file_list = ['file1.txt',
                 opj('subdir', 'file2.txt'),
                 opj('subdir', 'subsubdir', 'file3.txt'),
                 opj('subdir', 'subsubdir', 'file4.txt')]
    files_in_sub = [f for f in file_list if f.startswith(with_pathsep('subdir'))]

    # no content present:
    ok_(not any(ds.repo.file_has_content(file_list)))

    result = ds.get('subdir')

    # check result:
    eq_(set([item.get('file') for item in result]),
        set(files_in_sub))
    ok_(all([x['success'] is True for x in result if x['file'] in files_in_sub]))
    eq_(len(result), len(files_in_sub))

    # got all files beneath subdir:
    ok_(all(ds.repo.file_has_content(files_in_sub)))

    # additionally got file1.txt silently, since it has the same content as
    # subdir/subsubdir/file4.txt:
    ok_(ds.repo.file_has_content('file1.txt') is True)
Exemplo n.º 6
0
    def get_containing_subdataset(self, path, recursion_limit=None):
        """Get the (sub-)dataset containing `path`

        Note: The "mount point" of a subdataset is classified as belonging to
        that respective subdataset.

        Parameters
        ----------
        path : str
          Path to determine the containing (sub-)dataset for
        recursion_limit: int or None
          limit the subdatasets to take into account to the given number of
          hierarchy levels

        Returns
        -------
        Dataset
        """

        if recursion_limit is not None and (recursion_limit < 1):
            lgr.warning("recursion limit < 1 (%s) always results in self.",
                        recursion_limit)
            return self

        if is_explicit_path(path):
            path = resolve_path(path, self)
            if not path.startswith(self.path):
                raise PathOutsideRepositoryError(file_=path, repo=self)
            path = relpath(path, self.path)

        candidates = []
        # TODO: this one would follow all the sub-datasets, which might
        # be inefficient if e.g. there is lots of other sub-datasets already
        # installed but under another sub-dataset.  There is a TODO 'pattern'
        # option which we could use I guess eventually
        for subds in self.get_subdatasets(
                recursive=True,
                #pattern=
                recursion_limit=recursion_limit,
                absolute=False):
            common = commonprefix((with_pathsep(subds), with_pathsep(path)))
            if common.endswith(sep) and common == with_pathsep(subds):
                candidates.append(common)
        if candidates:
            return Dataset(path=opj(self.path, max(candidates, key=len)))
        return self
Exemplo n.º 7
0
    def get_containing_subdataset(self, path, recursion_limit=None):
        """Get the (sub-)dataset containing `path`

        Note: The "mount point" of a subdataset is classified as belonging to
        that respective subdataset.

        Parameters
        ----------
        path : str
          Path to determine the containing (sub-)dataset for
        recursion_limit: int or None
          limit the subdatasets to take into account to the given number of
          hierarchy levels

        Returns
        -------
        Dataset
        """

        if recursion_limit is not None and (recursion_limit < 1):
            lgr.warning("recursion limit < 1 (%s) always results in self.",
                        recursion_limit)
            return self

        if is_explicit_path(path):
            path = resolve_path(path, self)
            if not path.startswith(self.path):
                raise PathOutsideRepositoryError(file_=path, repo=self)
            path = relpath(path, self.path)

        candidates = []
        # TODO: this one would follow all the sub-datasets, which might
        # be inefficient if e.g. there is lots of other sub-datasets already
        # installed but under another sub-dataset.  There is a TODO 'pattern'
        # option which we could use I guess eventually
        for subds in self.get_subdatasets(recursive=True,
                                          #pattern=
                                          recursion_limit=recursion_limit,
                                          absolute=False):
            common = commonprefix((with_pathsep(subds), with_pathsep(path)))
            if common.endswith(sep) and common == with_pathsep(subds):
                candidates.append(common)
        if candidates:
            return Dataset(path=opj(self.path, max(candidates, key=len)))
        return self
Exemplo n.º 8
0
def fs_traverse(path,
                repo,
                parent=None,
                subdatasets=None,
                render=True,
                recurse_datasets=False,
                recurse_directories=False,
                json=None,
                basepath=None):
    """Traverse path through its nodes and returns a dictionary of relevant
    attributes attached to each node

    Parameters
    ----------
    path: str
      Path to the directory to be traversed
    repo: AnnexRepo or GitRepo
      Repo object the directory belongs too
    parent: dict
      Extracted info about parent directory
    recurse_directories: bool
      Recurse into subdirectories (note that subdatasets are not traversed)
    render: bool
       To render from within function or not. Set to false if results to be
       manipulated before final render

    Returns
    -------
    list of dict
      extracts and returns a (recursive) list of directory info at path
      does not traverse into annex, git or hidden directories
    """
    subdatasets = subdatasets or []
    fs = fs_extract(path, repo, basepath=basepath or path)
    dataset = Dataset(repo.path)
    submodules = {sm.path: sm for sm in repo.get_submodules()}
    # TODO:  some submodules might not even have a local empty directory
    # (git doesn't care about those), so us relying on listdir here and
    # for _traverse_handle_subds might not work out.
    # E.g. create-sibling --ui true ... --existing=reconfigure
    #  causes removal of those empty ones on the remote end
    if isdir(path):  # if node is a directory
        children = [
            fs.copy()
        ]  # store its info in its children dict too  (Yarik is not sure why, but I guess for .?)
        # ATM seems some pieces still rely on having this duplication, so left as is
        # TODO: strip away
        for node in listdir(path):
            nodepath = opj(path, node)

            # Might contain subdatasets, so we should analyze and prepare entries
            # to pass down... in theory we could just pass full paths may be? strip
            node_subdatasets = []
            is_subdataset = False
            if isdir(nodepath):
                node_sep = with_pathsep(node)
                for subds in subdatasets:
                    if subds == node:
                        # it is the subdataset
                        is_subdataset = True
                    else:
                        # use path_is_subdir
                        if subds.startswith(node_sep):
                            node_subdatasets += [subds[len(node_sep):]]

            # TODO:  it might be a subdir which is non-initialized submodule!
            # if not ignored, append child node info to current nodes dictionary
            if is_subdataset:
                # repo.path is real, so we are doomed (for now at least)
                # to resolve nodepath as well to get relpath for it
                node_relpath = relpath(realpath(nodepath), repo.path)
                subds = _traverse_handle_subds(
                    node_relpath,
                    dataset,
                    recurse_datasets=recurse_datasets,
                    recurse_directories=recurse_directories,
                    json=json)
                # Enhance it with external url if available
                submod_url = submodules[node_relpath].url
                if submod_url and is_datalad_compat_ri(submod_url):
                    subds['url'] = submod_url
                children.append(subds)
            elif not ignored(nodepath):
                # if recursive, create info dictionary (within) each child node too
                if recurse_directories:
                    subdir = fs_traverse(
                        nodepath,
                        repo,
                        subdatasets=node_subdatasets,
                        parent=None,  # children[0],
                        recurse_datasets=recurse_datasets,
                        recurse_directories=recurse_directories,
                        json=json,
                        basepath=basepath or path)
                    subdir.pop('nodes', None)
                else:
                    # read child metadata from its metadata file if it exists
                    subdir_json = metadata_locator(path=node,
                                                   ds_path=basepath or path)
                    if exists(subdir_json):
                        with open(subdir_json) as data_file:
                            subdir = js.load(data_file)
                            subdir.pop('nodes', None)
                    # else extract whatever information you can about the child
                    else:
                        # Yarik: this one is way too lean...
                        subdir = fs_extract(nodepath,
                                            repo,
                                            basepath=basepath or path)
                # append child metadata to list
                children.extend([subdir])

        # sum sizes of all 1st level children
        children_size = {}
        for node in children[1:]:
            for size_type, child_size in node['size'].items():
                children_size[size_type] = children_size.get(
                    size_type, 0) + machinesize(child_size)

        # update current node sizes to the humanized aggregate children size
        fs['size'] = children[0]['size'] = \
            {size_type: humanize.naturalsize(child_size)
             for size_type, child_size in children_size.items()}

        children[0][
            'name'] = '.'  # replace current node name with '.' to emulate unix syntax
        if parent:
            parent[
                'name'] = '..'  # replace parent node name with '..' to emulate unix syntax
            children.insert(
                1, parent
            )  # insert parent info after current node info in children dict

        fs['nodes'] = children  # add children info to main fs dictionary
        if render:  # render directory node at location(path)
            fs_render(fs, json=json, ds_path=basepath or path)
            lgr.info('Directory: %s' % path)

    return fs
Exemplo n.º 9
0
    def __call__(
            path=None,
            force=False,
            description=None,
            dataset=None,
            no_annex=False,
            save=True,
            annex_version=None,
            annex_backend='MD5E',
            native_metadata_type=None,
            shared_access=None,
            git_opts=None,
            annex_opts=None,
            annex_init_opts=None):

        # two major cases
        # 1. we got a `dataset` -> we either want to create it (path is None),
        #    or another dataset in it (path is not None)
        # 2. we got no dataset -> we want to create a fresh dataset at the
        #    desired location, either at `path` or PWD

        # sanity check first
        if git_opts:
            lgr.warning(
                "`git_opts` argument is presently ignored, please complain!")
        if no_annex:
            if description:
                raise ValueError("Incompatible arguments: cannot specify "
                                 "description for annex repo and declaring "
                                 "no annex repo.")
            if annex_opts:
                raise ValueError("Incompatible arguments: cannot specify "
                                 "options for annex and declaring no "
                                 "annex repo.")
            if annex_init_opts:
                raise ValueError("Incompatible arguments: cannot specify "
                                 "options for annex init and declaring no "
                                 "annex repo.")

        if not isinstance(force, bool):
            raise ValueError("force should be bool, got %r.  Did you mean to provide a 'path'?" % force)

        # straight from input arg, no messing around before this
        if path is None:
            if dataset is None:
                # nothing given explicity, assume create fresh right here
                path = getpwd()
            else:
                # no path, but dataset -> create that dataset
                path = dataset.path
        else:
            # resolve the path against a potential dataset
            path = resolve_path(path, ds=dataset)

        # we know that we need to create a dataset at `path`
        assert(path is not None)

        if git_opts is None:
            git_opts = {}
        if shared_access:
            # configure `git --shared` value
            git_opts['shared'] = shared_access

        # check for sane subdataset path
        real_targetpath = with_pathsep(realpath(path))  # realpath OK
        if dataset is not None:
            # make sure we get to an expected state
            if not real_targetpath.startswith(  # realpath OK
                    with_pathsep(realpath(dataset.path))):  # realpath OK
                raise ValueError("path {} outside {}".format(path, dataset))

        # important to use the given Dataset object to avoid spurious ID
        # changes with not-yet-materialized Datasets
        tbds = dataset if dataset is not None and dataset.path == path else Dataset(path)

        # don't create in non-empty directory without `force`:
        if isdir(tbds.path) and listdir(tbds.path) != [] and not force:
            raise ValueError("Cannot create dataset in directory %s "
                             "(not empty). Use option 'force' in order to "
                             "ignore this and enforce creation." % tbds.path)

        if no_annex:
            lgr.info("Creating a new git repo at %s", tbds.path)
            GitRepo(
                tbds.path,
                url=None,
                create=True,
                git_opts=git_opts)
        else:
            # always come with annex when created from scratch
            lgr.info("Creating a new annex repo at %s", tbds.path)
            AnnexRepo(
                tbds.path,
                url=None,
                create=True,
                backend=annex_backend,
                version=annex_version,
                description=description,
                git_opts=git_opts,
                annex_opts=annex_opts,
                annex_init_opts=annex_init_opts)

        if native_metadata_type is not None:
            if not isinstance(native_metadata_type, list):
                native_metadata_type = [native_metadata_type]
            for nt in native_metadata_type:
                tbds.config.add('datalad.metadata.nativetype', nt)

        # record an ID for this repo for the afterlife
        # to be able to track siblings and children
        id_var = 'datalad.dataset.id'
        if id_var in tbds.config:
            # make sure we reset this variable completely, in case of a re-create
            tbds.config.unset(id_var, where='dataset')
        tbds.config.add(
            id_var,
            tbds.id if tbds.id is not None else uuid.uuid1().urn.split(':')[-1],
            where='dataset')

        # make sure that v6 annex repos never commit content under .datalad
        with open(opj(tbds.path, '.datalad', '.gitattributes'), 'a') as gitattr:
            # TODO this will need adjusting, when annex'ed aggregate meta data
            # comes around
            gitattr.write('** annex.largefiles=nothing\n')

        # save everthing
        tbds.add('.datalad', to_git=True, save=False)

        if save:
            save_dataset(
                tbds,
                paths=['.datalad'],
                message='[DATALAD] new dataset')

            # the next only makes sense if we saved the created dataset,
            # otherwise we have no committed state to be registered
            # in the parent
            if dataset is not None and dataset.path != tbds.path:
                # we created a dataset in another dataset
                # -> make submodule
                dataset.add(tbds.path, save=save, ds2super=True)

        return tbds
Exemplo n.º 10
0
    task = dataset.config.get("datalad.run.substitutions.bids-task")
    if task and task != "None":
        outputs.append(
            op.join(
                dataset.path,
                format_command(dataset,
                               "task-{bids-task}_{bids-modality}.json")))
    # we expect location to be a directory (with DICOMS somewhere beneath)
    if not op.isdir(location):
        raise ValueError("%s is not a directory" % location)

    from datalad.utils import with_pathsep
    # append location with /* to specify inputs for containers-run
    # we need to get those files, but nothing from within a possible .datalad
    # for example
    inputs = [with_pathsep(location) + "*", rel_spec_path]

    run_results = list()
    with patch.dict('os.environ', {
            'HIRNI_STUDY_SPEC': rel_spec_path,
            'HIRNI_SPEC2BIDS_SUBJECT': subject
    }):

        dataset.containers_run(
            [
                'heudiconv',
                # XXX absolute path will make rerun on other
                # system impossible -- hard to avoid
                # TODO: from toolbox? config?
                '-f',
                heuristic_path,
Exemplo n.º 11
0
def fs_traverse(path, repo, parent=None,
                subdatasets=None,
                render=True,
                recurse_datasets=False,
                recurse_directories=False,
                json=None, basepath=None):
    """Traverse path through its nodes and returns a dictionary of relevant
    attributes attached to each node

    Parameters
    ----------
    path: str
      Path to the directory to be traversed
    repo: AnnexRepo or GitRepo
      Repo object the directory belongs too
    parent: dict
      Extracted info about parent directory
    recurse_directories: bool
      Recurse into subdirectories (note that subdatasets are not traversed)
    render: bool
       To render from within function or not. Set to false if results to be
       manipulated before final render

    Returns
    -------
    list of dict
      extracts and returns a (recursive) list of directory info at path
      does not traverse into annex, git or hidden directories
    """
    subdatasets = subdatasets or []
    fs = fs_extract(path, repo, basepath=basepath or path)
    dataset = Dataset(repo.path)
    submodules = {sm.path: sm
                  for sm in repo.get_submodules()}
    # TODO:  some submodules might not even have a local empty directory
    # (git doesn't care about those), so us relying on listdir here and
    # for _traverse_handle_subds might not work out.
    # E.g. create-sibling --ui true ... --existing=reconfigure
    #  causes removal of those empty ones on the remote end
    if isdir(path):                     # if node is a directory
        children = [fs.copy()]          # store its info in its children dict too  (Yarik is not sure why, but I guess for .?)
        # ATM seems some pieces still rely on having this duplication, so left as is
        # TODO: strip away
        for node in listdir(path):
            nodepath = opj(path, node)

            # Might contain subdatasets, so we should analyze and prepare entries
            # to pass down... in theory we could just pass full paths may be? strip
            node_subdatasets = []
            is_subdataset = False
            if isdir(nodepath):
                node_sep = with_pathsep(node)
                for subds in subdatasets:
                    if subds == node:
                        # it is the subdataset
                        is_subdataset = True
                    else:
                        # use path_is_subdir
                        if subds.startswith(node_sep):
                            node_subdatasets += [subds[len(node_sep):]]

            # TODO:  it might be a subdir which is non-initialized submodule!
            # if not ignored, append child node info to current nodes dictionary
            if is_subdataset:
                # repo.path is real, so we are doomed (for now at least)
                # to resolve nodepath as well to get relpath for it
                node_relpath = relpath(realpath(nodepath), repo.path)
                subds = _traverse_handle_subds(
                    node_relpath,
                    dataset,
                    recurse_datasets=recurse_datasets,
                    recurse_directories=recurse_directories,
                    json=json
                )
                # Enhance it with external url if available
                submod_url = submodules[node_relpath].url
                if submod_url and is_datalad_compat_ri(submod_url):
                    subds['url'] = submod_url
                children.append(subds)
            elif not ignored(nodepath):
                # if recursive, create info dictionary (within) each child node too
                if recurse_directories:
                    subdir = fs_traverse(nodepath,
                                         repo,
                                         subdatasets=node_subdatasets,
                                         parent=None,  # children[0],
                                         recurse_datasets=recurse_datasets,
                                         recurse_directories=recurse_directories,
                                         json=json,
                                         basepath=basepath or path)
                    subdir.pop('nodes', None)
                else:
                    # read child metadata from its metadata file if it exists
                    subdir_json = metadata_locator(path=node, ds_path=basepath or path)
                    if exists(subdir_json):
                        with open(subdir_json) as data_file:
                            subdir = js.load(data_file)
                            subdir.pop('nodes', None)
                    # else extract whatever information you can about the child
                    else:
                        # Yarik: this one is way too lean...
                        subdir = fs_extract(nodepath,
                                            repo,
                                            basepath=basepath or path)
                # append child metadata to list
                children.extend([subdir])

        # sum sizes of all 1st level children
        children_size = {}
        for node in children[1:]:
            for size_type, child_size in node['size'].items():
                children_size[size_type] = children_size.get(size_type, 0) + machinesize(child_size)

        # update current node sizes to the humanized aggregate children size
        fs['size'] = children[0]['size'] = \
            {size_type: humanize.naturalsize(child_size)
             for size_type, child_size in children_size.items()}

        children[0]['name'] = '.'       # replace current node name with '.' to emulate unix syntax
        if parent:
            parent['name'] = '..'       # replace parent node name with '..' to emulate unix syntax
            children.insert(1, parent)  # insert parent info after current node info in children dict

        fs['nodes'] = children          # add children info to main fs dictionary
        if render:                      # render directory node at location(path)
            fs_render(fs, json=json, ds_path=basepath or path)
            lgr.info('Directory: %s' % path)

    return fs
Exemplo n.º 12
0
    def __call__(path,
                 acqid=None,
                 dataset=None,
                 subject=None,
                 anon_subject=None,
                 properties=None):
        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose="import DICOM session")
        if acqid:
            # acquisition was specified => we know where to create subds
            acq_dir = op.join(ds.path, acqid)
            if not op.exists(acq_dir):
                makedirs(acq_dir)
            # TODO: if exists: needs to be empty?

            dicom_ds = _create_subds_from_tarball(path, acq_dir)

        else:
            # we don't know the acquisition id yet => create in tmp

            acq_dir = op.join(ds.path, '.git', 'datalad', 'hirni_import')
            assert not op.exists(acq_dir)
            # TODO: don't assert; check and adapt instead

            try:
                dicom_ds = _create_subds_from_tarball(path, acq_dir)
                dicom_ds = _guess_acquisition_and_move(dicom_ds, ds)
            except OSError as e:
                # TODO: Was FileExistsError. Find more accurate PY2/3 solution
                # than just OSError
                yield dict(status='impossible',
                           path=e.filename,
                           type='file',
                           action='import DICOM tarball',
                           logger=lgr,
                           message=exc_str(e))
                rmtree(acq_dir)
                return  # we can't do anything
            finally:
                if op.exists(acq_dir):
                    lgr.debug("Killing temp dataset at %s ...", acq_dir)
                    rmtree(acq_dir)

        acqid = op.basename(op.dirname(dicom_ds.path))
        ds.save(dicom_ds.path,
                message="[HIRNI] Add aquisition {}".format(acqid))

        # Note: use path with trailing slash to indicate we want metadata about the content of this subds,
        # not the subds itself.
        ds.meta_aggregate(with_pathsep(dicom_ds.path), into='top')

        ds.hirni_dicom2spec(path=dicom_ds.path,
                            spec=op.normpath(
                                op.join(dicom_ds.path, op.pardir,
                                        "studyspec.json")),
                            subject=subject,
                            anon_subject=anon_subject,
                            acquisition=acqid,
                            properties=properties)

        # TODO: This should probably be optional
        # We have the tarball and can drop extracted stuff:
        dicom_ds.drop([
            f for f in listdir(dicom_ds.path)
            if f != ".datalad" and f != ".git"
        ])

        # finally clean up git objects:
        dicom_ds.repo.cmd_call_wrapper.run(['git', 'gc'])

        # TODO: yield error results etc.
        yield dict(status='ok',
                   path=dicom_ds.path,
                   type='dataset',
                   action='import DICOM tarball',
                   logger=lgr)
Exemplo n.º 13
0
    def __call__(url,
                 path="{subject}/{session}/{scan}/",
                 project=None,
                 force=False,
                 dataset=None):
        from pyxnat import Interface as XNATInterface

        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='initialization')

        config = ds.config
        path = with_pathsep(path)

        # prep for yield
        res = dict(
            action='xnat_init',
            path=ds.path,
            type='dataset',
            logger=lgr,
            refds=ds.path,
        )

        # obtain user credentials, use simplified/stripped URL as identifier
        # given we don't have more knowledge than the user, do not
        # give a `url` to provide hints on how to obtain credentials
        parsed_url = urlparse(url)
        no_proto_url = '{}{}'.format(parsed_url.netloc,
                                     parsed_url.path).replace(' ', '')
        cred = UserPassword(name=no_proto_url, url=None)()

        xn = XNATInterface(server=url, **cred)

        # now we make a simple request to obtain the server version
        # we don't care much, but if the URL or the credentials are wrong
        # we will not get to see one
        try:
            xnat_version = xn.version()
            lgr.debug("XNAT server version is %s", xnat_version)
        except Exception as e:
            yield dict(
                res,
                status='error',
                message=('Failed to access the XNAT server. Full error:\n%s',
                         e),
            )
            return

        if project is None:
            from datalad.ui import ui
            projects = xn.select.projects().get()
            ui.message('No project name specified. The following projects are '
                       'available on {} for user {}:'.format(
                           url, cred['user']))
            for p in sorted(projects):
                # list and prep for C&P
                # TODO multi-column formatting?
                ui.message("  {}".format(quote_cmdlinearg(p)))
            return

        # query the specified project to make sure it exists and is accessible
        proj = xn.select.project(project)

        try:
            nsubj = len(proj.subjects().get())
        except Exception as e:
            yield dict(
                res,
                status='error',
                message=(
                    'Failed to obtain information on project %s from XNAT. '
                    'Full error:\n%s', project, e),
            )
            return

        lgr.info('XNAT reports %i subjects currently on-record for project %s',
                 nsubj, project)

        # check if dataset already initialized
        auth_dir = ds.pathobj / '.datalad' / 'providers'
        if auth_dir.exists() and not force:
            yield dict(
                res,
                status='error',
                message='Dataset found already initialized, '
                'use `force` to reinitialize',
            )
            return

        # put essential configuration into the dataset
        config.set('datalad.xnat.default.url',
                   url,
                   where='dataset',
                   reload=False)
        config.set('datalad.xnat.default.project', project, where='dataset')
        config.set('datalad.xnat.default.path', path, where='dataset')

        ds.save(
            path=ds.pathobj / '.datalad' / 'config',
            to_git=True,
            message="Configure default XNAT url and project",
        )

        # Configure XNAT access authentication
        ds.run_procedure(spec='cfg_xnat_dataset')

        yield dict(
            res,
            status='ok',
        )
        return