def validate_file(self): meta = self.meta if meta.etag: local_checksum, local_count = self.local.etag(meta.chunksize) cache_checksum, cache_count = meta.etag if local_checksum != cache_checksum or local_count != cache_count: msg = (f'etags do not match!\n(!=' f'\n{local_checksum}-{local_count}' f'\n{cache_checksum}-{cache_count}\n)') log.critical(msg) elif meta.checksum: lc = self.local.meta.checksum cc = self.meta.checksum if lc != cc: msg = f'Checksums do not match!\n(!=\n{lc}\n{cc}\n)' log.critical( msg ) # haven't figured out how to comput the bf checksums yet #raise exc.ChecksumError(msg) elif meta.size is not None: log.warning(f'No checksum! Your data is at risk!\n' f'{self.remote!r} -> {self.local!r}! ') ls = self.local.meta.size cs = self.meta.size if ls != cs: raise exc.SizeError(f'Sizes do not match!\n(!=\n{ls}\n{cs}\n)') else: log.warning(f'No checksum and no size! Your data is at risk!\n' '{self.remote!r} -> {self.local!r}! ')
def fetch(self, size_limit_mb=2): """ bypass remote to fetch directly based on stored meta """ meta = self.meta if self.is_dir(): raise NotImplementedError( 'not going to fetch all data in a dir at the moment') if meta.file_id is None: self.refresh(update_data=True, force=True) # the file name could be different so we have to return here return size_ok = size_limit_mb is not None and meta.size is not None and meta.size.mb < size_limit_mb size_not_ok = size_limit_mb is not None and meta.size is not None and meta.size.mb > size_limit_mb if size_ok or size_limit_mb is None: # FIXME should we force fetch here by default if the file exists? if self.is_broken_symlink(): # FIXME touch a temporary file and set the meta first! self.unlink() self.touch() self._meta_setter(meta) log.info(f'Fetching remote via cache id {self.id} -> {self.local}') self.local.data = self.data # note that this should trigger storage to .ops/objects if size_not_ok: log.warning( f'File is over the size limit {meta.size.mb} > {size_limit_mb}' )
def _meta_updater(self, pathmeta, fetch=True): original = self.meta file_is_different, updated = self._update_meta(original, pathmeta) # FIXME missing checksum is one source of problems here must_fetch = file_is_different and self.is_file() and self.exists( ) and fetch if must_fetch: try: # FIXME performance, and pathmeta.checksum is None case if self.local.content_different( ) and self.local.meta.checksum != pathmeta.checksum: raise exc.LocalChangesError(f'not fetching {self}') except exc.NoRemoteFileWithThatIdError as e: log.warning( 'cant fetch remote file there may be untracked local changes for\n{self}' ) log.info(f'crumpling to preserve existing metadata\n{self}') trashed = self.crumple() try: self._meta_setter(updated) if must_fetch: self.fetch(size_limit_mb=None) except BaseException as e: log.error(e) if must_fetch: trashed.rename(self) raise e return file_is_different
def _meta_setter(self, pathmeta, memory_only=False): """ we need memory_only for bootstrap I think """ if not pathmeta: log.warning(f'Trying to set empty pathmeta on {self}') return if self.exists_not_symlink( ): # if a file already exists just follow instructions super()._meta_setter(pathmeta) else: if not hasattr(self, '_remote') or self._remote is None: self._bootstrapping_id = pathmeta.id # need to run this to create directories self._bootstrap_prepare_filesystem(parents=False, fetch_data=False, size_limit_mb=0) if self.exists(): # we a directory now super()._meta_setter(pathmeta) elif self._not_exists_cache: cache = self._not_exists_cache(self, meta=pathmeta) if self._backup_cache: cache = self._backup_cache(self, meta=pathmeta) if hasattr(self, '_meta'): delattr(self, '_meta') if hasattr(self, '_id'): delattr(self, '_id')
def meta(self): #if hasattr(self, '_in_bootstrap'): #if hasattr(self, '_meta'): # if we have in memory we are bootstrapping so don't fiddle about #return self._meta exists = self.exists() if exists: #log.debug(self) # TODO this still gets hit a lot in threes meta = super().meta if meta: # implicit else failover to backup cache return meta elif not exists and self._not_exists_cache and self.is_symlink(): try: cache = self._not_exists_cache(self) return cache.meta except exc.NoCachedMetadataError as e: log.warning(e) if self._backup_cache: try: cache = self._backup_cache(self) meta = cache.meta if meta: log.info(f'restoring from backup {meta}') self._meta_setter( meta) # repopulate primary cache from backup return meta except exc.NoCachedMetadataError as e: log.warning(e)
def __init__( self, size=None, created=None, updated=None, checksum=None, etag=None, chunksize=None, # used for properly checksumming? id=None, file_id=None, old_id=None, gid=None, # needed to determine local writability user_id=None, mode=None, errors=tuple(), **kwargs): if not file_id and file_id is not None and file_id is not 0: raise TypeError('wat') if created is not None and not isinstance( created, int) and not isinstance(created, datetime): _created = created else: self._created_ok = created _created = None if updated is not None and not isinstance( updated, int) and not isinstance(updated, datetime): _updated = updated else: self._updated_ok = updated _updated = None if id is not None and not isinstance(id, str): # no implicit type mutation, the system providing the ids # is where the information about how to do the converstion lives # we don't handle it here raise TypeError(f'id must be a string! {id!r}') self.size = size if size is None else FileSize(size) self._created = _created self._updated = _updated self.checksum = checksum self.etag = etag self.chunksize = chunksize self.id = id self.file_id = file_id self.old_id = old_id self.gid = gid self.user_id = user_id self.mode = mode self.errors = tuple(errors) if errors else tuple() if kwargs: log.warning(f'Unexpected meta values! {kwargs}') self.__kwargs = kwargs # roundtrip values we don't explicitly handle
def _datetime(self, value): if not isinstance(value, datetime): raise TypeError(f'{type(value)} is not a datetime for {value}') has_tz = (value.tzinfo is not None and value.tzinfo.utcoffset(None) is not None) value = isoformat(value) if not has_tz: log.warning('why do you have a timestamp without a timezone ;_;') return value
def popd(N=0, n=False): """ see popd --help """ # note that python lists append in the oppsite direction # so we invert the N dex reversed_index = - (N + 1) if AugmentedPath._stack: path = AugmentedPath._stack.pop(reversed_index) path.chdir() print(*reversed(AugmentedPath._stack), AugmentedPath.cwd()) return path else: log.warning('popd: directory stack empty')
def encode(self, field, value): if field == 'errors': return list(value) if field == 'checksum': if isinstance(value, bytes): value = value.hex() try: return _str_encode(field, value) except exc.UnhandledTypeError: log.warning(f'conversion not implemented for field {field}') return value
def encode(self, field, value): #if field in ('created', 'updated') and not isinstance(value, datetime): #field.replace(cls.path_field_sep, ',') # FIXME hack around iso8601 # turns it 8601 isnt actually standard >_< with . instead of , sigh empty_iterable = hasattr(value, '__iter__') and not value if value is None or empty_iterable: raise TypeError('cannot encode an empty value') try: return _bytes_encode(field, value) except exc.UnhandledTypeError: log.warning(f'conversion not implemented for field {field}') raise exc.UnhandledTypeError(f'dont know what to do with {value!r}')
def decode(self, field, value): if field in ('created', 'updated'): # FIXME human readable vs integer try: # needed for legacy cases value, = struct.unpack('d', value) return datetime.fromtimestamp(value) except struct.error: pass vd = value.decode() setattr(self, '_' + field, vd) # FIXME with timezone vs without ... return vd elif field == 'checksum': return value elif field == 'etag': # struct pack this sucker so the count can fit as well? value = value.decode() # FIXME checksum, strcount = value.rsplit('-', 1) count = int(strcount) return bytes.fromhex(checksum), count elif field == 'errors': value = value.decode() return tuple(_ for _ in value.split(';') if _) elif field == 'user_id': try: return int(value) except ValueError: # FIXME :/ uid vs owner_id etc ... return value.decode() elif field in ('id', 'mode', 'old_id'): return value.decode() elif field not in self.fields: log.warning(f'Unhandled field {field}') return value else: try: return int(value) except ValueError as e: log.exception(f'{field} {value}') raise e
def move(self, *, remote=None, target=None, meta=None): """ instantiate a new cache and cleanup self because we are moving """ # FIXME what to do if we have data if remote is None and (target is None or meta is None): raise TypeError( 'either remote or meta and target are required arguments') # deal with moving to a different directory that might not even exist yet if target is None: if not isinstance(self.anchor, self.__class__): raise TypeError( f'mismatched anchor types {self!r} {self.anchor!r}') target = self.anchor / remote # FIXME why does this not try to instantiate the caches? or does it? if target.absolute() == self.absolute(): log.warning(f'trying to move a file onto itself {self.absolute()}') return target common = self.commonpath(target).absolute() target_parent = target.parent.absolute() parent = self.parent.absolute() assert target.name != self.name or target_parent != parent if target_parent != parent: _id = remote.id if remote else meta.id log.warning('A parent of current file has changed location!\n' f'{common}\n{self.relative_to(common)}\n' f'{target.relative_to(common)}\n{_id}') if not target_parent.exists(): if remote is None: # we have to have a remote to pull parent structure remote = self._remote_class(meta) target_parent.mkdir_cache(remote) do_cast = not isinstance(target, self.__class__) if do_cast: target = self.__class__(target, meta=meta) if target.exists() or target.is_broken_symlink(): if target.id == self.id: #(remote.id if remote else meta.id): if self.is_broken_symlink(): # we may be a package with extra metadata that needs to # be merged with the target before we are unlinked file_is_different = target._meta_updater(self.meta) # FIXME ... if file is different then this causes staleness # and we need to fetch if file_is_different: log.critical('DO SOMETHING ABOUT THIS STALE DATA' f'\n{target}\n{target.meta.as_pretty()}') elif do_cast: # the target meta was just put there, if the ids match it should be ok # however since arbitrary meta can be passed in, best to double check file_is_different = target._meta_updater(self.meta) if file_is_different: log.critical('Something has gone wrong' f'\n{target}\n{target.meta.as_pretty()}') else: # directory moves that are resolved during pull log.warning(f'what is this!?\n{target}\n{self}') elif target.is_broken_symlink(): remote._cache = self # restore the mapping for remote -> self raise exc.WhyDidntThisGetMovedBeforeError( f'\n{target}\n{self}') else: raise exc.PathExistsError(f'Target {target} already exists!') if self.exists(): safe_unlink = target.local.parent / f'.unlink-{target.name}' try: if target.is_broken_symlink(): target.rename(safe_unlink) self.rename( target ) # if target is_dir then this will fail, which is ok except BaseException as e: log.exception(e) if safe_unlink.is_broken_symlink(): safe_unlink.rename(target) finally: if safe_unlink.is_broken_symlink(): safe_unlink.unlink() elif self.is_broken_symlink(): # we don't move to trash here because this was just a file rename self.unlink( ) # don't move the meta since it will break the naming insurance measure return target
def meta(self, pathmeta): if not self.exists(): # if the path does not exist write even temporary to disk if self.is_symlink(): meta = self.meta if meta == pathmeta: log.debug( f'Metadata unchanged for {meta.id}. Not updating.') return if meta.id != pathmeta.id: msg = ('Existing cache id does not match new id!\n' f'{self!r}\n' f'{meta.id} != {pathmeta.id}\n' f'{meta.as_pretty()}\n' f'{pathmeta.as_pretty()}') log.critical(msg) meta_newer = 'Meta newer. Not updating.' pathmeta_newer = 'Other meta newer.' msg = '{}' # apparently I was out of my mind when I wrote this originally ... if meta.updated is None and pathmeta.updated is None: log.warning( 'no change since either has an updated value (wat)' ) return #FIXME if meta.updated > pathmeta.updated: log.info(msg.format(meta_newer)) return # this is the right thing to do for a sane filesystem elif meta.updated < pathmeta.updated: log.info(msg.format(pathmeta_newer)) # THIS IS EXPLICITLY ALLOWED else: # they are equal extra = 'Both updated at the same time ' if meta.created is not None and pathmeta.created is not None: if meta.created > pathmeta.created: log.info(msg.format(extra + meta_newer)) return elif meta.created < pathmeta.created: log.info(msg.format(extra + pathmeta_newer)) # THIS IS EXPLICITLY ALLOWED else: # same created log.info( msg.format( 'Identical timestamps. Not updating.')) return elif meta.created is not None: log.info( msg.format( extra + 'Meta has datetime other does not. Not updating.' )) return elif pathmeta.created is not None: msg = msg.format( extra + 'Meta has no datetime other does.') log.info(msg) raise exc.MetadataIdMismatchError(msg) else: # both none log.info( msg.format(extra + ( 'Identical update time both missing created time. ' 'Not updating.'))) return # equality # id mismatch all cases above should return or raise except for other metadata newer if meta.size is not None and pathmeta.size is None: log.error('new meta has no size so will not overwrite') return # FIXME do the timestamp dance above here log.debug('Metadata exists, but ids match so will update') # trash old versions instead of just unlinking pc = self.local.cache trash = pc.trash self.rename(trash / f'{pc.parent.id}-{meta.id}-{self.name}') #self.unlink() # FIXME if an id starts with / then the local name is overwritten due to pathlib logic # we need to error if that happens #symlink = pathlib.PurePosixPath(self.local.name, pathmeta.as_symlink().as_posix().strip('/')) symlink = pathlib.PurePosixPath( self.local.name) / pathmeta.as_symlink() self.local.symlink_to(symlink) else: raise exc.PathExistsError(f'Path exists {self}')