class Archive(object): ''' Example ------- .. code-block:: python >>> arch = Archive('/climate/raw', rcp='rcp85') >>> arch.update(5) <Archive /climate/raw/rcp85.nc> bumped 0.0.1 --> 0.0.2 >>> arch.update(3) <Archive /climate/raw/rcp85.nc> bumped 0.0.2 --> 0.0.3 ''' def __init__(self, name, indices): self.name = name self.indices = indices self.version = BumpableVersion('0.0.1') self.value = pd.DataFrame(np.random.random((5, 4))) def __repr__(self): return '<{} {}>'.format(self.__class__.__name__, self.name) def update(self, value): self.value = value old = str(self.version) self.version.bump('patch', inplace=True) print('{} bumped {} --> {}'.format(str(self), old, self.version)) @contextmanager def open(self, *args, **kwargs): print('loading {}'.format(self)) s = io.BytesIO() self.value.to_csv(s) s.seek(0, 0) yield s
def get_local_path( self, version=None, bumpversion=None, prerelease=None, dependencies=None, metadata=None): ''' Returns a local path for read/write Parameters ---------- version : str Version number of the file to retrieve (default latest) bumpversion : str Version component to update on write if archive is versioned. Valid bumpversion values are 'major', 'minor', and 'patch', representing the three components of the strict version numbering system (e.g. "1.2.3"). If bumpversion is None the version number is not updated on write. Either bumpversion or prerelease (or both) must be a non-None value. If the archive is not versioned, bumpversion is ignored. prerelease : str Prerelease component of archive version to update on write if archive is versioned. Valid prerelease values are 'alpha' and 'beta'. Either bumpversion or prerelease (or both) must be a non-None value. If the archive is not versioned, prerelease is ignored. metadata : dict Updates to archive metadata. Pass {key: None} to remove a key from the archive's metadata. ''' if metadata is None: metadata = {} latest_version = self.get_latest_version() version = _process_version(self, version) version_hash = self.get_version_hash(version) if self.versioned: if latest_version is None: latest_version = BumpableVersion() next_version = latest_version.bump( kind=bumpversion, prerelease=prerelease, inplace=False) msg = "Version must be bumped on write. " \ "Provide bumpversion and/or prerelease." assert next_version > latest_version, msg read_path = self.get_version_path(version) write_path = self.get_version_path(next_version) else: read_path = self.archive_path write_path = self.archive_path next_version = None # version_check returns true if fp's hash is current as of read def version_check(chk): return chk['checksum'] == version_hash # Updater updates the manager with the latest version number def updater(checksum, algorithm): self._update_manager( archive_metadata=metadata, version_metadata=dict( version=next_version, dependencies=dependencies, checksum=checksum, algorithm=algorithm)) path = data_file.get_local_path( self.authority, self.api.cache, updater, version_check, self.api.hash_file, read_path, write_path) with path as fp: yield fp
def update( self, filepath, cache=False, remove=False, bumpversion=None, prerelease=None, dependencies=None, metadata=None): ''' Enter a new version to a DataArchive Parameters ---------- filepath : str The path to the file on your local file system cache : bool Turn on caching for this archive if not already on before update remove : bool removes a file from your local directory bumpversion : str Version component to update on write if archive is versioned. Valid bumpversion values are 'major', 'minor', and 'patch', representing the three components of the strict version numbering system (e.g. "1.2.3"). If bumpversion is None the version number is not updated on write. Either bumpversion or prerelease (or both) must be a non-None value. If the archive is not versioned, bumpversion is ignored. prerelease : str Prerelease component of archive version to update on write if archive is versioned. Valid prerelease values are 'alpha' and 'beta'. Either bumpversion or prerelease (or both) must be a non-None value. If the archive is not versioned, prerelease is ignored. metadata : dict Updates to archive metadata. Pass {key: None} to remove a key from the archive's metadata. ''' if metadata is None: metadata = {} latest_version = self.get_latest_version() hashval = self.api.hash_file(filepath) checksum = hashval['checksum'] algorithm = hashval['algorithm'] if checksum == self.get_latest_hash(): self.update_metadata(metadata) if remove and os.path.isfile(filepath): os.remove(filepath) return if self.versioned: if latest_version is None: latest_version = BumpableVersion() next_version = latest_version.bump( kind=bumpversion, prerelease=prerelease, inplace=False) else: next_version = None next_path = self.get_version_path(next_version) if cache: self.cache(next_version) if self.is_cached(next_version): self.authority.upload(filepath, next_path) self.api.cache.upload(filepath, next_path, remove=remove) else: self.authority.upload(filepath, next_path, remove=remove) self._update_manager( archive_metadata=metadata, version_metadata=dict( checksum=checksum, algorithm=algorithm, version=next_version, dependencies=dependencies))