def _setup_state(self, update_collection): """ Create a new cache file or load a previously existing one. """ # Load an already existing collection for update if update_collection and re.match(arvados.util.collection_uuid_pattern, update_collection): try: self._remote_collection = arvados.collection.Collection(update_collection) except arvados.errors.ApiError as error: raise CollectionUpdateError("Cannot read collection {} ({})".format(update_collection, error)) else: self.update = True elif update_collection: # Collection locator provided, but unknown format raise CollectionUpdateError("Collection locator unknown: '{}'".format(update_collection)) if self.use_cache: # Set up cache file name from input paths. md5 = hashlib.md5() md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost').encode()) realpaths = sorted(os.path.realpath(path) for path in self.paths) md5.update(b'\0'.join([p.encode() for p in realpaths])) if self.filename: md5.update(self.filename.encode()) cache_filename = md5.hexdigest() cache_filepath = os.path.join( arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'), cache_filename) if self.resume and os.path.exists(cache_filepath): self.logger.info("Resuming upload from cache file {}".format(cache_filepath)) self._cache_file = open(cache_filepath, 'a+') else: # --no-resume means start with a empty cache file. self.logger.info("Creating new cache file at {}".format(cache_filepath)) self._cache_file = open(cache_filepath, 'w+') self._cache_filename = self._cache_file.name self._lock_file(self._cache_file) self._cache_file.seek(0) with self._state_lock: if self.use_cache: try: self._state = json.load(self._cache_file) if not set(['manifest', 'files']).issubset(set(self._state.keys())): # Cache at least partially incomplete, set up new cache self._state = copy.deepcopy(self.EMPTY_STATE) except ValueError: # Cache file empty, set up new cache self._state = copy.deepcopy(self.EMPTY_STATE) else: self.logger.info("No cache usage requested for this run.") # No cache file, set empty state self._state = copy.deepcopy(self.EMPTY_STATE) # Load the previous manifest so we can check if files were modified remotely. self._local_collection = arvados.collection.Collection(self._state['manifest'], replication_desired=self.replication_desired, put_threads=self.put_threads)
def make_path(cls, args): md5 = hashlib.md5() md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost')) realpaths = sorted(os.path.realpath(path) for path in args.paths) md5.update('\0'.join(realpaths)) if any(os.path.isdir(path) for path in realpaths): md5.update(str(max(args.max_manifest_depth, -1))) elif args.filename: md5.update(args.filename) return os.path.join( arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700, 'raise'), md5.hexdigest())
def make_path(cls, args): md5 = hashlib.md5() md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost').encode()) realpaths = sorted(os.path.realpath(path) for path in args.paths) md5.update(b'\0'.join([p.encode() for p in realpaths])) if any(os.path.isdir(path) for path in realpaths): md5.update(b'-1') elif args.filename: md5.update(args.filename.encode()) return os.path.join( arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700, 'raise'), md5.hexdigest())
def _get_cache_filepath(self): # Set up cache file name from input paths. md5 = hashlib.md5() md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost').encode()) realpaths = sorted(os.path.realpath(path) for path in self.paths) md5.update(b'\0'.join([p.encode() for p in realpaths])) if self.filename: md5.update(self.filename.encode()) cache_filename = md5.hexdigest() cache_filepath = os.path.join( arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'), cache_filename) return cache_filepath
def prep_image_file(filename): # Return a file object ready to save a Docker image, # and a boolean indicating whether or not we need to actually save the # image (False if a cached save is available). cache_dir = arv_cmd.make_home_conf_dir(os.path.join(".cache", "arvados", "docker"), 0o700) if cache_dir is None: image_file = tempfile.NamedTemporaryFile(suffix=".tar") need_save = True else: file_path = os.path.join(cache_dir, filename) try: with open(stat_cache_name(file_path)) as statfile: prev_stat = json.load(statfile) now_stat = os.stat(file_path) need_save = any(prev_stat[field] != now_stat[field] for field in [ST_MTIME, ST_SIZE]) except STAT_CACHE_ERRORS + (AttributeError, IndexError): need_save = True # We couldn't compare against old stats image_file = open(file_path, "w+b" if need_save else "rb") return image_file, need_save
def prep_image_file(filename): # Return a file object ready to save a Docker image, # and a boolean indicating whether or not we need to actually save the # image (False if a cached save is available). cache_dir = arv_cmd.make_home_conf_dir( os.path.join('.cache', 'arvados', 'docker'), 0o700) if cache_dir is None: image_file = tempfile.NamedTemporaryFile(suffix='.tar') need_save = True else: file_path = os.path.join(cache_dir, filename) try: with open(stat_cache_name(file_path)) as statfile: prev_stat = json.load(statfile) now_stat = os.stat(file_path) need_save = any(prev_stat[field] != now_stat[field] for field in [ST_MTIME, ST_SIZE]) except STAT_CACHE_ERRORS + (AttributeError, IndexError): need_save = True # We couldn't compare against old stats image_file = open(file_path, 'w+b' if need_save else 'rb') return image_file, need_save
def get_cache_dir(): return arv_cmd.make_home_conf_dir( os.path.join('.cache', 'arvados', 'docker'), 0o700)
def setup_user_cache(cls): return arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700)