Exemplo n.º 1
0
    def __call__(dataset, pattern, ref_dir='.', makedirs=False):
        # could be extended to accept actual largefile expressions
        from os.path import join as opj
        from os.path import isabs
        from os.path import exists
        from os import makedirs as makedirsfx
        from datalad.distribution.dataset import require_dataset
        from datalad.support.annexrepo import AnnexRepo
        from datalad.utils import assure_list

        pattern = assure_list(pattern)
        ds = require_dataset(dataset, check_installed=True,
                             purpose='no_annex configuration')

        res_kwargs = dict(
            path=ds.path,
            type='dataset',
            action='no_annex',
        )

        # all the ways we refused to cooperate
        if not isinstance(ds.repo, AnnexRepo):
            yield dict(
                res_kwargs,
                status='notneeded',
                message='dataset has no annex')
            return
        if any(isabs(p) for p in pattern):
            yield dict(
                res_kwargs,
                status='error',
                message=('path pattern for `no_annex` configuration must be relative paths: %s',
                         pattern))
            return
        if isabs(ref_dir):
            yield dict(
                res_kwargs,
                status='error',
                message=('`ref_dir` for `no_annex` configuration must be a relative path: %s',
                         ref_dir))
            return

        gitattr_dir = opj(ds.path, ref_dir)
        if not exists(gitattr_dir):
            if makedirs:
                makedirsfx(gitattr_dir)
            else:
                yield dict(
                    res_kwargs,
                    status='error',
                    message='target directory for `no_annex` does not exist (consider makedirs=True)')
                return

        gitattr_file = opj(gitattr_dir, '.gitattributes')
        ds.repo.set_gitattributes(
            [(p, {'annex.largefiles': 'nothing'}) for p in pattern],
            attrfile=gitattr_file)
        yield dict(res_kwargs, status='ok')

        for r in ds.save(
                gitattr_file,
                to_git=True,
                message="[DATALAD] exclude paths from annex'ing",
                result_filter=None,
                result_xfm=None):
            yield r
Exemplo n.º 2
0
def dlplugin(dataset, pattern, ref_dir='.', makedirs='no'):
    # could be extended to accept actual largefile expressions
    """Configure a dataset to never put some content into the dataset's annex

    This can be useful in mixed datasets that also contain textual data, such
    as source code, which can be efficiently and more conveniently managed
    directly in Git.

    Patterns generally look like this::

      code/*

    which would match all file in the code directory. In order to match all
    files under ``code/``, including all its subdirectories use such a
    pattern::

      code/**

    Note that the plugin works incrementally, hence any existing configuration
    (e.g. from a previous plugin run) is amended, not replaced.

    Parameters
    ----------
    dataset : Dataset
      dataset to configure
    pattern : list
      list of path patterns. Any content whose path is matching any pattern
      will not be annexed when added to a dataset, but instead will be
      tracked directly in Git. Path pattern have to be relative to the
      directory given by the `ref_dir` option. By default, patterns should
      be relative to the root of the dataset.
    ref_dir : str, optional
      Relative path (within the dataset) to the directory that is to be
      configured. All patterns are interpreted relative to this path,
      and configuration is written to a ``.gitattributes`` file in this
      directory.
    makedirs : bool, optional
      If set, any missing directories will be created in order to be able
      to place a file into ``ref_dir``. Default: False.
    """
    from os.path import join as opj
    from os.path import isabs
    from os.path import exists
    from os import makedirs as makedirsfx
    from datalad.distribution.dataset import require_dataset
    from datalad.support.annexrepo import AnnexRepo
    from datalad.support.constraints import EnsureBool
    from datalad.utils import assure_list

    makedirs = EnsureBool()(makedirs)
    pattern = assure_list(pattern)
    ds = require_dataset(dataset,
                         check_installed=True,
                         purpose='no_annex configuration')

    res_kwargs = dict(
        path=ds.path,
        type='dataset',
        action='no_annex',
    )

    # all the ways we refused to cooperate
    if not isinstance(ds.repo, AnnexRepo):
        yield dict(res_kwargs,
                   status='notneeded',
                   message='dataset has no annex')
        return
    if any(isabs(p) for p in pattern):
        yield dict(
            res_kwargs,
            status='error',
            message=
            ('path pattern for `no_annex` configuration must be relative paths: %s',
             pattern))
        return
    if isabs(ref_dir):
        yield dict(
            res_kwargs,
            status='error',
            message=
            ('`ref_dir` for `no_annex` configuration must be a relative path: %s',
             ref_dir))
        return

    gitattr_dir = opj(ds.path, ref_dir)
    if not exists(gitattr_dir):
        if makedirs:
            makedirsfx(gitattr_dir)
        else:
            yield dict(
                res_kwargs,
                status='error',
                message=
                'target directory for `no_annex` does not exist (consider makedirs=True)'
            )
            return

    gitattr_file = opj(gitattr_dir, '.gitattributes')
    with open(gitattr_file, 'a') as fp:
        for p in pattern:
            fp.write('{} annex.largefiles=nothing'.format(p))
        yield dict(res_kwargs, status='ok')

    for r in dataset.add(gitattr_file,
                         to_git=True,
                         message="[DATALAD] exclude paths from annex'ing",
                         result_filter=None,
                         result_xfm=None):
        yield r