def fetch(self, size_limit_mb=2): """ bypass remote to fetch directly based on stored meta """ meta = self.meta if self.is_dir(): raise NotImplementedError( 'not going to fetch all data in a dir at the moment') if meta.file_id is None: self.refresh(update_data=True, force=True) # the file name could be different so we have to return here return size_ok = size_limit_mb is not None and meta.size is not None and meta.size.mb < size_limit_mb size_not_ok = size_limit_mb is not None and meta.size is not None and meta.size.mb > size_limit_mb if size_ok or size_limit_mb is None: # FIXME should we force fetch here by default if the file exists? if self.is_broken_symlink(): # FIXME touch a temporary file and set the meta first! self.unlink() self.touch() self._meta_setter(meta) log.info(f'Fetching remote via cache id {self.id} -> {self.local}') self.local.data = self.data # note that this should trigger storage to .ops/objects if size_not_ok: log.warning( f'File is over the size limit {meta.size.mb} > {size_limit_mb}' )
def _meta_updater(self, pathmeta, fetch=True): original = self.meta file_is_different, updated = self._update_meta(original, pathmeta) # FIXME missing checksum is one source of problems here must_fetch = file_is_different and self.is_file() and self.exists( ) and fetch if must_fetch: try: # FIXME performance, and pathmeta.checksum is None case if self.local.content_different( ) and self.local.meta.checksum != pathmeta.checksum: raise exc.LocalChangesError(f'not fetching {self}') except exc.NoRemoteFileWithThatIdError as e: log.warning( 'cant fetch remote file there may be untracked local changes for\n{self}' ) log.info(f'crumpling to preserve existing metadata\n{self}') trashed = self.crumple() try: self._meta_setter(updated) if must_fetch: self.fetch(size_limit_mb=None) except BaseException as e: log.error(e) if must_fetch: trashed.rename(self) raise e return file_is_different
def refresh(self, update_data=False, size_limit_mb=2, force=False): if self.meta is None: breakpoint() limit = (size_limit_mb if not self.meta.size or (size_limit_mb > self.meta.size.mb) else self.meta.size.mb + 1) new = self.remote.refresh(update_cache=True, update_data=update_data, update_data_on_cache=(self.is_file() and self.exists()), size_limit_mb=size_limit_mb, force=force) if new is not None: return new else: log.info(f'Remote for {self} has been deleted. Moving to trash.') try: self.rename(self.trash / f'{self.parent.id}-{self.id}-{self.name}') except FileNotFoundError as e: if not self.trash.exists(): self.trash.mkdir() log.info('created {self.trash}') else: raise e
def meta(self): #if hasattr(self, '_in_bootstrap'): #if hasattr(self, '_meta'): # if we have in memory we are bootstrapping so don't fiddle about #return self._meta exists = self.exists() if exists: #log.debug(self) # TODO this still gets hit a lot in threes meta = super().meta if meta: # implicit else failover to backup cache return meta elif not exists and self._not_exists_cache and self.is_symlink(): try: cache = self._not_exists_cache(self) return cache.meta except exc.NoCachedMetadataError as e: log.warning(e) if self._backup_cache: try: cache = self._backup_cache(self) meta = cache.meta if meta: log.info(f'restoring from backup {meta}') self._meta_setter( meta) # repopulate primary cache from backup return meta except exc.NoCachedMetadataError as e: log.warning(e)
def dedupe(self, other, pretend=False): # FIXME blackfynn doesn't set update when a folder name changes ??! if self.id != other.id: raise ValueError( 'Can only dedupe when ids match, {self.id} != {other.id}') su, ou = self.meta.updated, other.meta.updated lsu, lou = self.local.meta.updated, other.local.meta.updated if su < ou: old, new = self, other elif su > ou: new, old = self, other elif lsu is None and lou is None: new, old = self, other elif lsu is None: old, new = self, other elif lou is None: new, old = self, other elif lsu < lou: old, new = self, other elif lsu > lou: new, old = self, other else: # == ss, os = self.meta.size, other.meta.size if ss is not None and os is not None: new, old = self, other elif ss is None: old, new = self, other elif os is None: new, old = self, other else: raise BaseException('how did we get here!?') file_is_different, meta = self._update_meta(old.meta, new.meta) if file_is_different: log.info(f'{self}\n!=\n{other}\n{meta}') if not pretend: #old.rename('/dev/null') # hah pass return new
def _update_meta(old, new): if not old: return False, new # if there is no file it is both different and not different if not new: return False, old file_is_different = False kwargs = {k: v for k, v in old.items()} if old.id != new.id: kwargs['old_id'] = old.id for k, vnew in new.items(): vold = kwargs[k] if vnew is None or hasattr(vnew, '__iter__') and not vnew: # don't update with None or empty iterables continue if vold is not None and vold != vnew: log.info(f'{old.id} field {k} changed from {vold} -> {vnew}') if k in ('created', 'updated', 'size', 'checksum', 'file_id'): file_is_different = True kwargs[k] = vnew if file_is_different: # strip fields missing from new in the case where # we aren't merging metadata from two different sources for k, vnew in new.items(): if k == 'old_id': continue if vnew is None: log.debug(kwargs.pop(k)) #old.updated == new.updated #old.updated < new.updated #old.updated > new.updated #old.created == new.created #old.created < new.created #old.created > new.created return file_is_different, PathMeta(**kwargs)
def _bootstrap( self, meta, *, parents=False, fetch_data=False, size_limit_mb=2, recursive=False, only=tuple(), skip=tuple(), sparse=tuple(), ): """ The actual bootstrap implementation """ # figure out if we are actually bootstrapping this class or skipping it if not meta or meta.id is None: raise exc.BootstrappingError( f'PathMeta to bootstrap from has no id! {meta}') if only or skip or sparse: if self._meta_is_root(meta): # since we only go one organization at a time right now # we never want to skip the top level id log.info(f'Bootstrapping {meta.id} -> {self.local!r}') elif meta.id in skip: log.info(f'Skipped {meta.id} since it is in skip') return elif only and meta.id not in only: log.info(f'Skipped {meta.id} since it is not in only') return else: if sparse and meta.id in sparse: log.info(f'Sparse strap {meta.id} -> {self.local!r}') self._is_sparse_root = True sparse = True else: # if you pass the only mask so do your children log.info(f'Bootstrapping {meta.id} -> {self.local!r}') only = tuple() if self.meta is not None and not recursive: msg = f'{self} already has meta!\n{self.meta.as_pretty()}' raise exc.BootstrappingError(msg) if self.exists() and self.meta and self.meta.id == meta.id: self._meta_updater(meta) else: # set single use bootstrapping id self._bootstrapping_id = meta.id # directory, file, or fake file as symlink? is_file_and_fetch_data = self._bootstrap_prepare_filesystem( parents, fetch_data, size_limit_mb, sparse, ) is_file_and_fetch_data = False # XXX NOTE _bootstrap_prepare_filesystem always returns None # remove this static assignment to False if there is a use case for bootstrapping the data self._bootstrap_data(is_file_and_fetch_data) if recursive: # ah the irony of using loops to do this yield from self._bootstrap_recursive(only, skip, sparse) yield self
def meta(self, pathmeta): if not self.exists(): # if the path does not exist write even temporary to disk if self.is_symlink(): meta = self.meta if meta == pathmeta: log.debug( f'Metadata unchanged for {meta.id}. Not updating.') return if meta.id != pathmeta.id: msg = ('Existing cache id does not match new id!\n' f'{self!r}\n' f'{meta.id} != {pathmeta.id}\n' f'{meta.as_pretty()}\n' f'{pathmeta.as_pretty()}') log.critical(msg) meta_newer = 'Meta newer. Not updating.' pathmeta_newer = 'Other meta newer.' msg = '{}' # apparently I was out of my mind when I wrote this originally ... if meta.updated is None and pathmeta.updated is None: log.warning( 'no change since either has an updated value (wat)' ) return #FIXME if meta.updated > pathmeta.updated: log.info(msg.format(meta_newer)) return # this is the right thing to do for a sane filesystem elif meta.updated < pathmeta.updated: log.info(msg.format(pathmeta_newer)) # THIS IS EXPLICITLY ALLOWED else: # they are equal extra = 'Both updated at the same time ' if meta.created is not None and pathmeta.created is not None: if meta.created > pathmeta.created: log.info(msg.format(extra + meta_newer)) return elif meta.created < pathmeta.created: log.info(msg.format(extra + pathmeta_newer)) # THIS IS EXPLICITLY ALLOWED else: # same created log.info( msg.format( 'Identical timestamps. Not updating.')) return elif meta.created is not None: log.info( msg.format( extra + 'Meta has datetime other does not. Not updating.' )) return elif pathmeta.created is not None: msg = msg.format( extra + 'Meta has no datetime other does.') log.info(msg) raise exc.MetadataIdMismatchError(msg) else: # both none log.info( msg.format(extra + ( 'Identical update time both missing created time. ' 'Not updating.'))) return # equality # id mismatch all cases above should return or raise except for other metadata newer if meta.size is not None and pathmeta.size is None: log.error('new meta has no size so will not overwrite') return # FIXME do the timestamp dance above here log.debug('Metadata exists, but ids match so will update') # trash old versions instead of just unlinking pc = self.local.cache trash = pc.trash self.rename(trash / f'{pc.parent.id}-{meta.id}-{self.name}') #self.unlink() # FIXME if an id starts with / then the local name is overwritten due to pathlib logic # we need to error if that happens #symlink = pathlib.PurePosixPath(self.local.name, pathmeta.as_symlink().as_posix().strip('/')) symlink = pathlib.PurePosixPath( self.local.name) / pathmeta.as_symlink() self.local.symlink_to(symlink) else: raise exc.PathExistsError(f'Path exists {self}')