def track_paths_in_storage(self, *paths): """Track paths in the external storage.""" # Calculate which paths can be tracked in lfs track_paths = [] attrs = self.find_attr(*paths) for path in paths: # Do not add files with filter=lfs in .gitattributes if attrs.get(path, {}).get('filter') == 'lfs': continue path = Path(path) if path.is_dir(): track_paths.append(str(path / '**')) elif path.suffix != '.ipynb': # TODO create configurable filter and follow .gitattributes track_paths.append(str(path)) if track_paths: try: call( self._CMD_STORAGE_TRACK + track_paths, stdout=PIPE, stderr=STDOUT, cwd=str(self.path), ) except (KeyboardInterrupt, OSError) as e: raise BadParameter('Couldn\'t run \'git lfs\':\n{0}'.format(e))
def track_paths_in_storage(self, *paths): """Track paths in the external storage.""" if self.use_external_storage and self.external_storage_installed: track_paths = [] attrs = self.find_attr(*paths) for path in paths: # Do not add files with filter=lfs in .gitattributes if attrs.get(path, {}).get('filter') == 'lfs': continue path = Path(path) if path.is_dir(): track_paths.append(str(path / '**')) elif path.suffix != '.ipynb': # TODO create configurable filter and follow .gitattributes track_paths.append(str(path)) call( self._CMD_STORAGE_TRACK + track_paths, stdout=PIPE, stderr=STDOUT, cwd=str(self.path), ) elif self.use_external_storage: raise errors.ExternalStorageNotInstalled(self.repo)
def _add_from_url(self, dataset, path, url, nocopy=False, **kwargs): """Process an add from url and return the location on disk.""" u = parse.urlparse(url) if u.scheme not in Dataset.SUPPORTED_SCHEMES: raise NotImplementedError('{} URLs are not supported'.format( u.scheme)) dst = path.joinpath(os.path.basename(url)).absolute() if u.scheme in ('', 'file'): src = Path(u.path).absolute() # if we have a directory, recurse if src.is_dir(): files = {} os.mkdir(dst) for f in src.iterdir(): files.update( self._add_from_url(dataset, dst, f.absolute().as_posix(), nocopy=nocopy)) return files if nocopy: try: os.link(src, dst) except Exception as e: raise Exception('Could not create hard link ' '- retry without nocopy.') from e else: shutil.copy(src, dst) # Do not expose local paths. src = None else: try: response = requests.get(url) dst.write_bytes(response.content) except error.HTTPError as e: # pragma nocover raise e # make the added file read-only mode = dst.stat().st_mode & 0o777 dst.chmod(mode & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)) self.track_paths_in_storage(dst.relative_to(self.path)) dataset_path = self.path / self.datadir / dataset.name result = dst.relative_to(dataset_path).as_posix() return { result: DatasetFile( path=result, url=url, authors=dataset.authors, dataset=dataset.name, ) }
def _expand_directories(paths): """Expand directory with all files it contains.""" for path in paths: path_ = Path(path) if path_.is_dir(): for expanded in path_.rglob('*'): yield str(expanded) else: yield path
def get_project_config_path(path=None): """Return project configuration folder if exist.""" project_path = Path(path or '.').absolute().joinpath(RENKU_HOME) if project_path.exists() and project_path.is_dir(): return str(project_path)
def _add_from_url(self, dataset, path, url, link=False, **kwargs): """Process an add from url and return the location on disk.""" u = parse.urlparse(url) if u.scheme not in Dataset.SUPPORTED_SCHEMES: raise NotImplementedError( '{} URLs are not supported'.format(u.scheme) ) # Respect the directory struture inside the source path. relative_to = kwargs.pop('relative_to', None) if relative_to: dst_path = Path(u.path).resolve().absolute().relative_to( Path(relative_to).resolve().absolute() ) else: dst_path = os.path.basename(u.path) dst = path.joinpath(dst_path).absolute() if u.scheme in ('', 'file'): src = Path(u.path).absolute() # if we have a directory, recurse if src.is_dir(): files = [] dst.mkdir(parents=True, exist_ok=True) for f in src.iterdir(): files.extend( self._add_from_url( dataset, dst, f.absolute().as_posix(), link=link, **kwargs ) ) return files # Make sure the parent directory exists. dst.parent.mkdir(parents=True, exist_ok=True) if link: try: os.link(str(src), str(dst)) except Exception as e: raise Exception( 'Could not create hard link ' '- retry without --link.' ) from e else: shutil.copy(str(src), str(dst)) # Do not expose local paths. src = None else: try: response = requests.get(url) dst.write_bytes(response.content) except error.HTTPError as e: # pragma nocover raise e # make the added file read-only mode = dst.stat().st_mode & 0o777 dst.chmod(mode & ~(stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)) self.track_paths_in_storage(str(dst.relative_to(self.path))) return [{ 'path': dst.relative_to(self.path), 'url': url, 'creator': dataset.creator, 'dataset': dataset.name, 'parent': self }]