def test_gitattributes(path): gr = GitRepo(path, create=True) # starts without any attributes file ok_(not op.exists(op.join(gr.path, '.gitattributes'))) eq_(gr.get_gitattributes('.')['.'], {}) # bool is a tag or unsets, anything else is key/value gr.set_gitattributes([('*', {'tag': True}), ('*', {'sec.key': 'val'})]) ok_(op.exists(op.join(gr.path, '.gitattributes'))) eq_(gr.get_gitattributes('.')['.'], {'tag': True, 'sec.key': 'val'}) # unset by amending the record, but does not remove notion of the # tag entirely gr.set_gitattributes([('*', {'tag': False})]) eq_(gr.get_gitattributes('.')['.'], {'tag': False, 'sec.key': 'val'}) # attributes file is not added or commited, we can ignore such # attributes eq_(gr.get_gitattributes('.', index_only=True)['.'], {}) # we can send absolute path patterns and write to any file, and # the patterns will be translated relative to the target file gr.set_gitattributes([(op.join(gr.path, 'relative', 'ikethemike/**'), { 'bang': True })], attrfile=op.join('relative', '.gitattributes')) # directory and file get created ok_(op.exists(op.join(gr.path, 'relative', '.gitattributes'))) eq_( gr.get_gitattributes( op.join(gr.path, 'relative', 'ikethemike', 'probe')), # always comes out relative to the repo root, even if abs goes in { op.join('relative', 'ikethemike', 'probe'): { 'tag': False, 'sec.key': 'val', 'bang': True } }) if get_encoding_info()['default'] != 'ascii': # do not perform this on obscure systems without anything like UTF # it is not relevant whether a path actually exists, and paths # with spaces and other funky stuff are just fine funky = u'{} {}'.format(get_most_obscure_supported_name(), get_most_obscure_supported_name()) gr.set_gitattributes([(funky, {'this': 'that'})]) eq_( gr.get_gitattributes(funky)[funky], { 'this': 'that', 'tag': False, 'sec.key': 'val', }) # mode='w' should replace the entire file: gr.set_gitattributes([('**', {'some': 'nonsense'})], mode='w') eq_(gr.get_gitattributes('.')['.'], {'some': 'nonsense'})
def test_gitattributes(path): gr = GitRepo(path, create=True) # starts without any attributes file ok_(not op.exists(op.join(gr.path, '.gitattributes'))) eq_(gr.get_gitattributes('.')['.'], {}) # bool is a tag or unsets, anything else is key/value gr.set_gitattributes([('*', {'tag': True}), ('*', {'sec.key': 'val'})]) ok_(op.exists(op.join(gr.path, '.gitattributes'))) eq_(gr.get_gitattributes('.')['.'], {'tag': True, 'sec.key': 'val'}) # unset by amending the record, but does not remove notion of the # tag entirely gr.set_gitattributes([('*', {'tag': False})]) eq_(gr.get_gitattributes('.')['.'], {'tag': False, 'sec.key': 'val'}) # attributes file is not added or commited, we can ignore such # attributes eq_(gr.get_gitattributes('.', index_only=True)['.'], {}) # we can send absolute path patterns and write to any file, and # the patterns will be translated relative to the target file gr.set_gitattributes([ (op.join(gr.path, 'relative', 'ikethemike/**'), {'bang': True})], attrfile=op.join('relative', '.gitattributes')) # directory and file get created ok_(op.exists(op.join(gr.path, 'relative', '.gitattributes'))) eq_(gr.get_gitattributes( op.join(gr.path, 'relative', 'ikethemike', 'probe')), # always comes out relative to the repo root, even if abs goes in {op.join('relative', 'ikethemike', 'probe'): {'tag': False, 'sec.key': 'val', 'bang': True}}) if get_encoding_info()['default'] != 'ascii': # do not perform this on obscure systems without anything like UTF # it is not relevant whether a path actually exists, and paths # with spaces and other funky stuff are just fine funky = u'{} {}'.format( get_most_obscure_supported_name(), get_most_obscure_supported_name()) gr.set_gitattributes([(funky, {'this': 'that'})]) eq_(gr.get_gitattributes(funky)[funky], { 'this': 'that', 'tag': False, 'sec.key': 'val', }) # mode='w' should replace the entire file: gr.set_gitattributes([('**', {'some': 'nonsense'})], mode='w') eq_(gr.get_gitattributes('.')['.'], {'some': 'nonsense'})
def __call__( path=None, initopts=None, force=False, description=None, dataset=None, no_annex=_NoAnnexDefault, annex=True, fake_dates=False, cfg_proc=None ): # TODO: introduced with 0.13, remove with 0.14 if no_annex is not _NoAnnexDefault: # the two mirror options do not agree and the deprecated one is # not at default value warnings.warn("datalad-create's `no_annex` option is deprecated " "and will be removed in a future release, " "use the reversed-sign `annex` option instead.", DeprecationWarning) # honor the old option for now annex = not no_annex # we only perform negative tests below no_annex = not annex if dataset: if isinstance(dataset, Dataset): ds = dataset else: ds = Dataset(dataset) refds_path = ds.path else: ds = refds_path = None # two major cases # 1. we got a `dataset` -> we either want to create it (path is None), # or another dataset in it (path is not None) # 2. we got no dataset -> we want to create a fresh dataset at the # desired location, either at `path` or PWD # sanity check first if no_annex: if description: raise ValueError("Incompatible arguments: cannot specify " "description for annex repo and declaring " "no annex repo.") if (isinstance(initopts, (list, tuple)) and '--bare' in initopts) or ( isinstance(initopts, dict) and 'bare' in initopts): raise ValueError( "Creation of bare repositories is not supported. Consider " "one of the create-sibling commands, or use " "Git to init a bare repository and push an existing dataset " "into it.") if path: path = resolve_path(path, dataset) path = path if path \ else getpwd() if ds is None \ else refds_path # we know that we need to create a dataset at `path` assert(path is not None) # assure cfg_proc is a list (relevant if used via Python API) cfg_proc = assure_list(cfg_proc) # prep for yield res = dict(action='create', path=str(path), logger=lgr, type='dataset', refds=refds_path) refds = None if refds_path and refds_path != str(path): refds = require_dataset( refds_path, check_installed=True, purpose='creating a subdataset') path_inrefds = path_under_rev_dataset(refds, path) if path_inrefds is None: yield dict( res, status='error', message=( "dataset containing given paths is not underneath " "the reference dataset %s: %s", ds, str(path)), ) return # try to locate an immediate parent dataset # we want to know this (irrespective of whether we plan on adding # this new dataset to a parent) in order to avoid conflicts with # a potentially absent/uninstalled subdataset of the parent # in this location # it will cost some filesystem traversal though... parentds_path = get_dataset_root( op.normpath(op.join(str(path), os.pardir))) if parentds_path: prepo = GitRepo(parentds_path) parentds_path = ut.Path(parentds_path) # we cannot get away with a simple # GitRepo.get_content_info(), as we need to detect # uninstalled/added subdatasets too check_path = ut.Path(path) pstatus = prepo.status( untracked='no', # limit query to target path for a potentially massive speed-up paths=[check_path.relative_to(parentds_path)]) if (not pstatus.get(check_path, {}).get("type") == "dataset" and any(check_path == p or check_path in p.parents for p in pstatus)): # redo the check in a slower fashion, it is already broken # let's take our time for a proper error message conflict = [ p for p in pstatus if check_path == p or check_path in p.parents] res.update({ 'status': 'error', 'message': ( 'collision with content in parent dataset at %s: %s', str(parentds_path), [str(c) for c in conflict])}) yield res return if not force: # another set of check to see whether the target path is pointing # into a known subdataset that is not around ATM subds_status = { parentds_path / k.relative_to(prepo.path) for k, v in pstatus.items() if v.get('type', None) == 'dataset'} check_paths = [check_path] check_paths.extend(check_path.parents) if any(p in subds_status for p in check_paths): conflict = [p for p in check_paths if p in subds_status] res.update({ 'status': 'error', 'message': ( 'collision with %s (dataset) in dataset %s', str(conflict[0]), str(parentds_path))}) yield res return # important to use the given Dataset object to avoid spurious ID # changes with not-yet-materialized Datasets tbds = ds if isinstance(ds, Dataset) and \ ds.path == path else Dataset(str(path)) # don't create in non-empty directory without `force`: if op.isdir(tbds.path) and listdir(tbds.path) != [] and not force: res.update({ 'status': 'error', 'message': 'will not create a dataset in a non-empty directory, use ' '`force` option to ignore'}) yield res return # stuff that we create and want to have tracked with git (not annex) add_to_git = {} if initopts is not None and isinstance(initopts, list): initopts = {'_from_cmdline_': initopts} # Note for the code below: # OPT: be "smart" and avoid re-resolving .repo -- expensive in DataLad # Re-use tbrepo instance, do not use tbds.repo # create and configure desired repository if no_annex: lgr.info("Creating a new git repo at %s", tbds.path) tbrepo = GitRepo( tbds.path, url=None, create=True, create_sanity_checks=False, git_opts=initopts, fake_dates=fake_dates) # place a .noannex file to indicate annex to leave this repo alone stamp_path = ut.Path(tbrepo.path) / '.noannex' stamp_path.touch() add_to_git[stamp_path] = { 'type': 'file', 'state': 'untracked'} else: # always come with annex when created from scratch lgr.info("Creating a new annex repo at %s", tbds.path) tbrepo = AnnexRepo( tbds.path, url=None, create=True, create_sanity_checks=False, # do not set backend here, to avoid a dedicated commit backend=None, # None causes version to be taken from config version=None, description=description, git_opts=initopts, fake_dates=fake_dates ) # set the annex backend in .gitattributes as a staged change tbrepo.set_default_backend( cfg.obtain('datalad.repo.backend'), persistent=True, commit=False) add_to_git[tbrepo.pathobj / '.gitattributes'] = { 'type': 'file', 'state': 'added'} # make sure that v6 annex repos never commit content under .datalad attrs_cfg = ( ('config', 'annex.largefiles', 'nothing'), ('metadata/aggregate*', 'annex.largefiles', 'nothing'), ('metadata/objects/**', 'annex.largefiles', '({})'.format(cfg.obtain( 'datalad.metadata.create-aggregate-annex-limit')))) attrs = tbrepo.get_gitattributes( [op.join('.datalad', i[0]) for i in attrs_cfg]) set_attrs = [] for p, k, v in attrs_cfg: if not attrs.get( op.join('.datalad', p), {}).get(k, None) == v: set_attrs.append((p, {k: v})) if set_attrs: tbrepo.set_gitattributes( set_attrs, attrfile=op.join('.datalad', '.gitattributes')) # prevent git annex from ever annexing .git* stuff (gh-1597) attrs = tbrepo.get_gitattributes('.git') if not attrs.get('.git', {}).get( 'annex.largefiles', None) == 'nothing': tbrepo.set_gitattributes([ ('**/.git*', {'annex.largefiles': 'nothing'})]) # must use the repo.pathobj as this will have resolved symlinks add_to_git[tbrepo.pathobj / '.gitattributes'] = { 'type': 'file', 'state': 'untracked'} # OPT: be "smart" and avoid re-resolving .repo -- expensive in DataLad # Note, must not happen earlier (before if) since "smart" it would not be tbds_config = tbds.config # record an ID for this repo for the afterlife # to be able to track siblings and children id_var = 'datalad.dataset.id' # Note, that Dataset property `id` will change when we unset the # respective config. Therefore store it before: tbds_id = tbds.id if id_var in tbds_config: # make sure we reset this variable completely, in case of a # re-create tbds_config.unset(id_var, where='dataset') if _seed is None: # just the standard way uuid_id = uuid.uuid1().urn.split(':')[-1] else: # Let's generate preseeded ones uuid_id = str(uuid.UUID(int=random.getrandbits(128))) tbds_config.add( id_var, tbds_id if tbds_id is not None else uuid_id, where='dataset', reload=False) # make config overrides permanent in the repo config # this is similar to what `annex init` does # we are only doing this for config overrides and do not expose # a dedicated argument, because it is sufficient for the cmdline # and unnecessary for the Python API (there could simply be a # subsequence ds.config.add() call) for k, v in tbds_config.overrides.items(): tbds_config.add(k, v, where='local', reload=False) # all config manipulation is done -> fll reload tbds_config.reload() # must use the repo.pathobj as this will have resolved symlinks add_to_git[tbrepo.pathobj / '.datalad'] = { 'type': 'directory', 'state': 'untracked'} # save everything, we need to do this now and cannot merge with the # call below, because we may need to add this subdataset to a parent # but cannot until we have a first commit tbrepo.save( message='[DATALAD] new dataset', git=True, # we have to supply our own custom status, as the repo does # not have a single commit yet and the is no HEAD reference # TODO make `GitRepo.status()` robust to this state. _status=add_to_git, ) for cfg_proc_ in cfg_proc: for r in tbds.run_procedure('cfg_' + cfg_proc_): yield r # the next only makes sense if we saved the created dataset, # otherwise we have no committed state to be registered # in the parent if isinstance(refds, Dataset) and refds.path != tbds.path: # we created a dataset in another dataset # -> make submodule for r in refds.save( path=tbds.path, ): yield r res.update({'status': 'ok'}) yield res