def latest_node_backup(self, *, fqdn): index_path = 'index/latest_backup/{}/backup_name.txt'.format(fqdn) try: latest_backup_name = self.storage_driver.get_blob_content_as_string( index_path) differential_blob = self.storage_driver.get_blob( '{}/{}/meta/differential'.format(fqdn, latest_backup_name)) # Should be removed after while. Here for backwards compatibility. incremental_blob = self.storage_driver.get_blob( '{}/{}/meta/incremental'.format(fqdn, latest_backup_name)) node_backup = NodeBackup( storage=self, fqdn=fqdn, name=latest_backup_name, differential_blob=differential_blob if differential_blob is not None else incremental_blob) if not node_backup.exists(): logging.warning( 'Latest backup points to non-existent backup. Deleting the marker' ) self.remove_latest_backup_marker(fqdn) raise Exception return node_backup except Exception: logging.info('Node {} does not have latest backup'.format(fqdn)) return None
def get_node_backup(self, *, fqdn, name, differential_mode=False): return NodeBackup( storage=self, name=name, fqdn=fqdn, differential_mode=differential_mode )
def discover_node_backups(self, *, fqdn=None): """ Discovers nodes backups by traversing data folders. This operation is very taxing for cloud backends and should be avoided. We keep it in the codebase for the sole reason of allowing the compute-backup-indices to work. """ def get_backup_name_from_blob(blob): blob_path = pathlib.Path(blob.name) if self.prefix_path == '': fqdn, name, *_ = blob_path.parts else: _, fqdn, name, *_ = blob_path.parts return fqdn, name def is_schema_blob(blob): return blob.name.endswith('/schema.cql') def includes_schema_blob(blobs): return any(map(is_schema_blob, blobs)) prefix_path = fqdn if fqdn else '' logging.debug("Listing blobs with prefix '{}'".format(prefix_path)) storage_objects = filter( lambda blob: "meta" in blob.name, self.storage_driver.list_objects(path=prefix_path)) all_blobs = sorted(storage_objects, key=operator.attrgetter('name')) logging.debug("Finished listing blobs") for (fqdn, backup_name), blobs in itertools.groupby( all_blobs, key=get_backup_name_from_blob): # consume the _blobs_ iterator into a list because we need to traverse it twice backup_blobs = list(blobs) if includes_schema_blob(backup_blobs): logging.debug("Found backup {}.{}".format(fqdn, backup_name)) yield NodeBackup(storage=self, fqdn=fqdn, name=backup_name, preloaded_blobs=backup_blobs)
def list_node_backups(self, *, fqdn=None, backup_index_blobs=None): """ Lists node backups using the index. If there is no backup index, no backups will be found. Use discover_node_backups to discover backups from the data folders. """ def is_tokenmap_file(blob): return "tokenmap" in blob.name def get_blob_name(blob): return blob.name def get_all_backup_blob_names(blobs): # if the tokenmap file exists, we assume the whole backup exists too all_backup_blobs = filter(is_tokenmap_file, blobs) return list(map(get_blob_name, all_backup_blobs)) def get_blobs_for_fqdn(blobs, fqdn): return list(filter(lambda b: fqdn in b, blobs)) if backup_index_blobs is None: backup_index_blobs = self.list_backup_index_blobs() blobs_by_backup = self.group_backup_index_by_backup_and_node( backup_index_blobs) all_backup_blob_names = get_all_backup_blob_names(backup_index_blobs) if len(all_backup_blob_names) == 0: logging.info( 'No backups found in index. Consider running "medusa build-index" if you have some backups' ) # possibly filter out backups only for given fqdn if fqdn is not None: relevant_backup_names = get_blobs_for_fqdn(all_backup_blob_names, fqdn) else: relevant_backup_names = all_backup_blob_names # use the backup names and fqdns from index entries to construct NodeBackup objects node_backups = list() for backup_index_entry in relevant_backup_names: _, _, backup_name, tokenmap_file = backup_index_entry.split('/') # tokenmap file is in format 'tokenmap_fqdn.json' tokenmap_fqdn = self.get_fqdn_from_any_index_blob(tokenmap_file) manifest_blob, schema_blob, tokenmap_blob = None, None, None started_blob, finished_blob = None, None started_timestamp, finished_timestamp = None, None if tokenmap_fqdn in blobs_by_backup[backup_name]: manifest_blob = self.lookup_blob(blobs_by_backup, backup_name, tokenmap_fqdn, 'manifest') schema_blob = self.lookup_blob(blobs_by_backup, backup_name, tokenmap_fqdn, 'schema') tokenmap_blob = self.lookup_blob(blobs_by_backup, backup_name, tokenmap_fqdn, 'tokenmap') started_blob = self.lookup_blob(blobs_by_backup, backup_name, tokenmap_fqdn, 'started') finished_blob = self.lookup_blob(blobs_by_backup, backup_name, tokenmap_fqdn, 'finished') differential_blob = self.lookup_blob(blobs_by_backup, backup_name, tokenmap_fqdn, 'differential') # Should be removed after while. Here for backwards compatibility. incremental_blob = self.lookup_blob(blobs_by_backup, backup_name, tokenmap_fqdn, 'incremental') if started_blob is not None: started_timestamp = self.get_timestamp_from_blob_name( started_blob.name) else: started_timestamp = None if finished_blob is not None: finished_timestamp = self.get_timestamp_from_blob_name( finished_blob.name) else: finished_timestamp = None nb = NodeBackup( storage=self, fqdn=tokenmap_fqdn, name=backup_name, manifest_blob=manifest_blob, schema_blob=schema_blob, tokenmap_blob=tokenmap_blob, started_timestamp=started_timestamp, started_blob=started_blob, finished_timestamp=finished_timestamp, finished_blob=finished_blob, differential_blob=differential_blob if differential_blob is not None else incremental_blob) node_backups.append(nb) # once we have all the backups, we sort them by their start time. we get oldest ones first sorted_node_backups = sorted( # before sorting the backups, ensure we can work out at least their start time filter(lambda nb: nb.started is not None, node_backups), key=lambda nb: nb.started) # then, before returning the backups, we pick only the existing ones previous_existed = False for node_backup in sorted_node_backups: # we try to be smart here - once we have seen an existing one, we assume all later ones exist too if previous_existed: yield node_backup continue # the idea is to save .exist() calls as they actually go to the storage backend and cost something # this is mostly meant to handle the transition period when backups expire before the index does, # which is a consequence of the transition period and running the build-index command if node_backup.exists(): previous_existed = True yield node_backup else: logging.debug( 'Backup {} for fqdn {} present only in index'.format( node_backup.name, node_backup.fqdn)) # if a backup doesn't exist, we should remove its entry from the index too try: self.remove_backup_from_index(node_backup) except InvalidCredsError: logging.debug( 'This account cannot perform the cleanup_storage' '{} for fqdn {} present only in index.' 'Ignoring and continuing...'.format( node_backup.name, node_backup.fqdn))