def _find_nearest_basebackup(self, recovery_target_time=None): applicable_basebackups = [] basebackups = self.storage.list_basebackups() for basebackup in basebackups: if recovery_target_time: # We really need the backup end time here, but pg_basebackup based backup methods don't provide # it for us currently, so fall back to using start-time. if "end-time" in basebackup["metadata"]: backup_ts = dates.parse_timestamp( basebackup["metadata"]["end-time"]) else: backup_ts = dates.parse_timestamp( basebackup["metadata"]["start-time"]) if backup_ts >= recovery_target_time: continue applicable_basebackups.append(basebackup) if not applicable_basebackups: raise RestoreError("No applicable basebackups found, exiting") # NOTE: as above, we may not have end-time so just sort by start-time, the order should be the same applicable_basebackups.sort( key=lambda basebackup: basebackup["metadata"]["start-time"]) caption = "Found {} applicable basebackup{}".format( len(applicable_basebackups), "" if len(applicable_basebackups) == 1 else "s") print_basebackup_list(applicable_basebackups, caption=caption) selected = applicable_basebackups[-1]["name"] print("\nSelecting {!r} for restore".format(selected)) return selected
def test_parse_timestamp(): local_aware = datetime.datetime.now(dateutil.tz.tzlocal()) # split local_aware such as "2021-02-08T09:58:27.988218-05:00" into date, time, tzoffset components str_date, str_localtime_aware = local_aware.isoformat().split("T", 1) str_localtime_naive = re.split("[+-]", str_localtime_aware, maxsplit=1)[0] str_local_aware_named = "{}T{} {}".format(str_date, str_localtime_naive, local_aware.tzname()) assert parse_timestamp(str_local_aware_named) == local_aware local_naive = parse_timestamp(str_local_aware_named, with_tz=False, assume_local=True) assert local_naive == local_aware.replace(tzinfo=None) str_unknown_aware = "2017-02-02 12:00:00 XYZ" unknown_aware_utc = parse_timestamp(str_unknown_aware) assert unknown_aware_utc.tzinfo == datetime.timezone.utc assert unknown_aware_utc.isoformat() == "2017-02-02T12:00:00+00:00" if local_aware.tzname() in ["EET", "EEST"]: unknown_aware_local = parse_timestamp(str_unknown_aware, assume_local=True) assert unknown_aware_local.tzinfo == dateutil.tz.tzlocal() assert unknown_aware_local.isoformat() == "2017-02-02T12:00:00+02:00"
def _find_nearest_basebackup(self, recovery_target_time=None): applicable_basebackups = [] basebackups = self.storage.list_basebackups() for basebackup in basebackups: if recovery_target_time: # We really need the backup end time here, but pg_basebackup based backup methods don't provide # it for us currently, so fall back to using start-time. if "end-time" in basebackup["metadata"]: backup_ts = dates.parse_timestamp(basebackup["metadata"]["end-time"]) else: backup_ts = dates.parse_timestamp(basebackup["metadata"]["start-time"]) if backup_ts >= recovery_target_time: continue applicable_basebackups.append(basebackup) if not applicable_basebackups: raise RestoreError("No applicable basebackups found, exiting") # NOTE: as above, we may not have end-time so just sort by start-time, the order should be the same applicable_basebackups.sort(key=lambda basebackup: basebackup["metadata"]["start-time"]) caption = "Found {} applicable basebackup{}".format( len(applicable_basebackups), "" if len(applicable_basebackups) == 1 else "s") print_basebackup_list(applicable_basebackups, caption=caption) selected = applicable_basebackups[-1] print("\nSelecting {!r} for restore".format(selected["name"])) return selected
def print_basebackup_list(basebackups, *, caption="Available basebackups", verbose=True): print(caption, "\n") fmt = "{name:40} {size:>11} {orig_size:>11} {time:20}".format print( fmt(name="Basebackup", size="Backup size", time="Start time", orig_size="Orig size")) print(fmt(name="-" * 40, size="-" * 11, time="-" * 20, orig_size="-" * 11)) for b in sorted(basebackups, key=lambda b: b["name"]): meta = b["metadata"].copy() lm = meta.pop("start-time") if isinstance(lm, str): lm = dates.parse_timestamp(lm) if lm.tzinfo: lm = lm.astimezone(datetime.timezone.utc).replace(tzinfo=None) lm_str = lm.isoformat()[:19] + "Z" # # pylint: disable=no-member size_str = "{} MB".format(b["size"] // (1024**2)) orig_size = int(meta.pop("original-file-size", 0) or 0) if orig_size: orig_size_str = "{} MB".format(orig_size // (1024**2)) else: orig_size_str = "n/a" print( fmt(name=b["name"], size=size_str, time=lm_str, orig_size=orig_size_str)) if verbose: print(" metadata:", meta)
def test_parse_timestamp(): local_aware = datetime.datetime.now(dateutil.tz.tzlocal()) str_local_aware_naive = local_aware.isoformat().split("+", 1)[0] str_local_aware_named = "{} {}".format(str_local_aware_naive, local_aware.tzname()) assert parse_timestamp(str_local_aware_named) == local_aware local_naive = parse_timestamp(str_local_aware_named, with_tz=False, assume_local=True) assert local_naive == local_aware.replace(tzinfo=None) str_unknown_aware = "2017-02-02 12:00:00 XYZ" unknown_aware_utc = parse_timestamp(str_unknown_aware) assert unknown_aware_utc.tzinfo == datetime.timezone.utc assert unknown_aware_utc.isoformat() == "2017-02-02T12:00:00+00:00" if local_aware.tzname() in ["EET", "EEST"]: unknown_aware_local = parse_timestamp(str_unknown_aware, assume_local=True) assert unknown_aware_local.tzinfo == dateutil.tz.tzlocal() assert unknown_aware_local.isoformat() == "2017-02-02T12:00:00+02:00"
def patch_basebackup_info(self, *, entry, site_config): # drop path from resulting list and convert timestamps entry["name"] = os.path.basename(entry["name"]) metadata = entry["metadata"] metadata["start-time"] = dates.parse_timestamp(metadata["start-time"]) # If backup was created by old PGHoard version some fields related to backup scheduling might be missing. # Set "best guess" values for those fields here to simplify logic elsewhere. if "backup-decision-time" in metadata: metadata["backup-decision-time"] = dates.parse_timestamp( metadata["backup-decision-time"]) else: metadata["backup-decision-time"] = metadata["start-time"] # Backups are usually scheduled if "backup-reason" not in metadata: metadata["backup-reason"] = "scheduled" # Calculate normalized backup time based on start time if missing if "normalized-backup-time" not in metadata: metadata[ "normalized-backup-time"] = self.get_normalized_backup_time( site_config, now=metadata["start-time"])
def parse_backup_label(self, backup_label_data): for line in backup_label_data.split(b"\n"): if line.startswith(b"START WAL LOCATION"): start_wal_segment = line.split()[5].strip(b")").decode("utf8") elif line.startswith(b"START TIME: "): start_time_text = line[len("START TIME: "):].decode("utf8") start_time_dt = dates.parse_timestamp(start_time_text, assume_local=True) start_time = start_time_dt.isoformat() self.log.debug("Found: %r as starting wal segment, start_time: %r", start_wal_segment, start_time) return start_wal_segment, start_time
def parse_backup_label(self, backup_label_data): if isinstance(backup_label_data, str): backup_label_data = backup_label_data.encode("utf-8") for line in backup_label_data.split(b"\n"): if line.startswith(b"START WAL LOCATION"): start_wal_segment = line.split()[5].strip(b")").decode("utf8") elif line.startswith(b"START TIME: "): start_time_text = line[len("START TIME: "):].decode("utf8") start_time_dt = dates.parse_timestamp(start_time_text, assume_local=True) start_time = start_time_dt.isoformat() self.log.debug("Found: %r as starting wal segment, start_time: %r", start_wal_segment, start_time) return start_wal_segment, start_time
def test_parse_timestamp(): local_aware = datetime.datetime.now(dateutil.tz.tzlocal()) str_local_aware_naive = local_aware.isoformat().split("+", 1)[0] str_local_aware_named = "{} {}".format(str_local_aware_naive, local_aware.tzname()) assert parse_timestamp(str_local_aware_named) == local_aware local_naive = parse_timestamp(str_local_aware_named, with_tz=False, assume_local=True) assert local_naive == local_aware.replace(tzinfo=None) str_unknown_aware = "2017-02-02 12:00:00 XYZ" unknown_aware_utc = parse_timestamp(str_unknown_aware) assert unknown_aware_utc.tzinfo == datetime.timezone.utc assert unknown_aware_utc.isoformat() == "2017-02-02T12:00:00+00:00" if local_aware.tzname() in ["EET", "EEST"]: unknown_aware_local = parse_timestamp(str_unknown_aware, assume_local=True) assert unknown_aware_local.tzinfo == dateutil.tz.tzlocal() assert unknown_aware_local.isoformat() == "2017-02-02T12:00:00+02:00"
def get_remote_basebackups_info(self, site): storage = self.site_transfers.get(site) if not storage: storage_config = get_object_storage_config(self.config, site) storage = get_transfer(storage_config) self.site_transfers[site] = storage results = storage.list_path(os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup")) for entry in results: # drop path from resulting list and convert timestamps entry["name"] = os.path.basename(entry["name"]) entry["metadata"]["start-time"] = dates.parse_timestamp(entry["metadata"]["start-time"]) results.sort(key=lambda entry: entry["metadata"]["start-time"]) return results
def get_remote_basebackups_info(self, site): storage = self.site_transfers.get(site) if not storage: storage_config = get_object_storage_config(self.config, site) storage = get_transfer(storage_config) self.site_transfers[site] = storage results = storage.list_path(os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup")) for entry in results: # drop path from resulting list and convert timestamps entry["name"] = os.path.basename(entry["name"]) entry["metadata"]["start-time"] = dates.parse_timestamp(entry["metadata"]["start-time"]) results.sort(key=lambda entry: entry["metadata"]["start-time"]) return results
def print_basebackup_list(basebackups, *, caption="Available basebackups", verbose=True): print(caption, "\n") fmt = "{name:40} {size:>11} {orig_size:>11} {time:20}".format print(fmt(name="Basebackup", size="Backup size", time="Start time", orig_size="Orig size")) print(fmt(name="-" * 40, size="-" * 11, time="-" * 20, orig_size="-" * 11)) for b in sorted(basebackups, key=lambda b: b["name"]): meta = b["metadata"].copy() lm = meta.pop("start-time") if isinstance(lm, str): lm = dates.parse_timestamp(lm) if lm.tzinfo: lm = lm.astimezone(datetime.timezone.utc).replace(tzinfo=None) lm_str = lm.isoformat()[:19] + "Z" # # pylint: disable=no-member size_str = "{} MB".format(int(meta.get("total-size-enc", b["size"])) // (1024 ** 2)) orig_size = int(meta.get("total-size-plain", meta.get("original-file-size")) or 0) if orig_size: orig_size_str = "{} MB".format(orig_size // (1024 ** 2)) else: orig_size_str = "n/a" print(fmt(name=b["name"], size=size_str, time=lm_str, orig_size=orig_size_str)) if verbose: print(" metadata:", meta)
def _get_basebackup(self, pgdata, basebackup, site, primary_conninfo=None, recovery_end_command=None, recovery_target_action=None, recovery_target_name=None, recovery_target_time=None, recovery_target_xid=None, restore_to_master=None, overwrite=False, tablespace_mapping=None): targets = [ recovery_target_name, recovery_target_time, recovery_target_xid ] if sum(0 if flag is None else 1 for flag in targets) > 1: raise RestoreError("Specify at most one of recovery_target_name, " "recovery_target_time or recovery_target_xid") # If basebackup that we want it set as latest, figure out which one it is if recovery_target_time: try: recovery_target_time = dates.parse_timestamp( recovery_target_time) except (TypeError, ValueError) as ex: raise RestoreError("recovery_target_time {!r}: {}".format( recovery_target_time, ex)) basebackup = self._find_nearest_basebackup(recovery_target_time) elif basebackup == "latest": basebackup = self._find_nearest_basebackup() # Grab basebackup metadata to make sure it exists and to look up tablespace requirements metadata = self.storage.get_basebackup_metadata(basebackup) # Make sure we have a proper place to write the $PGDATA and possible tablespaces dirs_to_create = [] dirs_to_recheck = [] dirs_to_wipe = [] if not os.path.exists(pgdata): dirs_to_create.append(pgdata) elif overwrite: dirs_to_create.append(pgdata) dirs_to_wipe.append(pgdata) elif os.listdir(pgdata) in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them dirs_to_recheck.append(["$PGDATA", pgdata]) else: raise RestoreError( "$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting." .format(pgdata)) tablespaces = {} tsmetare = re.compile("^tablespace-name-([0-9]+)$") for kw, value in metadata.items(): match = tsmetare.match(kw) if not match: continue tsoid = match.group(1) tsname = value tspath = tablespace_mapping.pop( tsname, metadata["tablespace-path-{}".format(tsoid)]) if not os.path.exists(tspath): raise RestoreError( "Tablespace {!r} target directory {!r} does not exist, aborting." .format(tsname, tspath)) if os.listdir(tspath) not in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them raise RestoreError( "Tablespace {!r} target directory {!r} exists but is not empty, aborting." .format(tsname, tspath)) print("Using existing empty directory {!r} for tablespace {!r}". format(tspath, tsname)) tablespaces[tsname] = { "oid": int(tsoid), "path": tspath, } dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath]) # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the # user probably made a typo with tablespace names, abort in that case. if tablespace_mapping: raise RestoreError( "Tablespace mapping for {} was requested, but the tablespaces are not present in the backup" .format(sorted(tablespace_mapping))) # First check that the existing (empty) directories are writable, then possibly wipe any directories as # requested by --overwrite and finally create the new dirs for diruse, dirname in dirs_to_recheck: try: tempfile.TemporaryFile(dir=dirname).close() except PermissionError: raise RestoreError( "{} target directory {!r} is empty, but not writable, aborting." .format(diruse, dirname)) for dirname in dirs_to_wipe: shutil.rmtree(dirname) for dirname in dirs_to_create: os.makedirs(dirname) os.chmod(dirname, 0o700) def download_progress(current_pos, expected_max, end=""): print("\rDownload progress: {:.2%}".format(current_pos / expected_max), end=end) with tempfile.TemporaryFile(dir=self.config["backup_location"], prefix="basebackup.", suffix=".pghoard") as tmp: self.storage.get_basebackup_file_to_fileobj( basebackup, tmp, progress_callback=download_progress) download_progress(1, 1, end="\n") tmp.seek(0) with rohmufile.file_reader(fileobj=tmp, metadata=metadata, key_lookup=config.key_lookup_for_site( self.config, site)) as input_obj: if metadata.get("format") == "pghoard-bb-v1": self._extract_pghoard_bb_v1(input_obj, pgdata, tablespaces) else: self._extract_basic(input_obj, pgdata) create_recovery_conf( dirpath=pgdata, site=site, port=self.config["http_port"], primary_conninfo=primary_conninfo, recovery_end_command=recovery_end_command, recovery_target_action=recovery_target_action, recovery_target_name=recovery_target_name, recovery_target_time=recovery_target_time, recovery_target_xid=recovery_target_xid, restore_to_master=restore_to_master, ) print("Basebackup restoration complete.") print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata) print( "On systemd based systems you can run systemctl start postgresql") print( "On SYSV Init based systems you can run /etc/init.d/postgresql start" )
def _get_basebackup(self, pgdata, basebackup, site, debug=False, status_output_file=None, primary_conninfo=None, recovery_end_command=None, recovery_target_action=None, recovery_target_name=None, recovery_target_time=None, recovery_target_xid=None, restore_to_master=None, overwrite=False, tablespace_mapping=None, tablespace_base_dir=None): targets = [ recovery_target_name, recovery_target_time, recovery_target_xid ] if sum(0 if flag is None else 1 for flag in targets) > 1: raise RestoreError("Specify at most one of recovery_target_name, " "recovery_target_time or recovery_target_xid") # If basebackup that we want it set as latest, figure out which one it is if recovery_target_time: try: recovery_target_time = dates.parse_timestamp( recovery_target_time) except (TypeError, ValueError) as ex: raise RestoreError("recovery_target_time {!r}: {}".format( recovery_target_time, ex)) basebackup = self._find_nearest_basebackup(recovery_target_time) elif basebackup == "latest": basebackup = self._find_nearest_basebackup() elif isinstance(basebackup, str): basebackup = self._find_basebackup_for_name(basebackup) # Grab basebackup metadata to make sure it exists and to look up tablespace requirements metadata = self.storage.get_basebackup_metadata(basebackup["name"]) tablespaces = {} # Make sure we have a proper place to write the $PGDATA and possible tablespaces dirs_to_create = [] dirs_to_recheck = [] dirs_to_wipe = [] if not os.path.exists(pgdata): dirs_to_create.append(pgdata) elif overwrite: dirs_to_create.append(pgdata) dirs_to_wipe.append(pgdata) elif os.listdir(pgdata) in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them dirs_to_recheck.append(["$PGDATA", pgdata]) else: raise RestoreError( "$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting." .format(pgdata)) if metadata.get("format") == "pghoard-bb-v2": # "Backup file" is a metadata object, fetch it to get more information bmeta_compressed = self.storage.get_file_bytes(basebackup["name"]) with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, key_lookup=config.key_lookup_for_site( self.config, site)) as input_obj: bmeta = common.extract_pghoard_bb_v2_metadata(input_obj) self.log.debug("Backup metadata: %r", bmeta) tablespaces = bmeta["tablespaces"] basebackup_data_files = [[ os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup_chunk", chunk["chunk_filename"]), chunk["result_size"], ] for chunk in bmeta["chunks"]] # We need the files from the main basebackup file too basebackup_data_files.append([(bmeta_compressed, metadata), 0]) elif metadata.get("format") == "pghoard-bb-v1": # Tablespace information stored in object store metadata, look it up tsmetare = re.compile("^tablespace-name-([0-9]+)$") for kw, value in metadata.items(): match = tsmetare.match(kw) if not match: continue tsoid = match.group(1) tsname = value tspath = metadata["tablespace-path-{}".format(tsoid)] tablespaces[tsname] = { "oid": int(tsoid), "path": tspath, } basebackup_data_files = [[basebackup["name"], basebackup["size"]]] else: # Object is a raw (encrypted, compressed) basebackup basebackup_data_files = [[basebackup["name"], basebackup["size"]]] if tablespace_base_dir and not os.path.exists( tablespace_base_dir) and not overwrite: # we just care that the dir exists, but we're OK if there are other objects there raise RestoreError( "Tablespace base directory {!r} does not exist, aborting.". format(tablespace_base_dir)) # Map tablespaces as requested and make sure the directories exist for tsname, tsinfo in tablespaces.items(): tspath = tablespace_mapping.pop(tsname, tsinfo["path"]) if tablespace_base_dir and not os.path.exists(tspath): tspath = os.path.join(tablespace_base_dir, str(tsinfo["oid"])) os.makedirs(tspath, exist_ok=True) if not os.path.exists(tspath): raise RestoreError( "Tablespace {!r} target directory {!r} does not exist, aborting." .format(tsname, tspath)) if os.listdir(tspath) not in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them raise RestoreError( "Tablespace {!r} target directory {!r} exists but is not empty, aborting." .format(tsname, tspath)) tsinfo["path"] = tspath print("Using existing empty directory {!r} for tablespace {!r}". format(tspath, tsname)) dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath]) # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the # user probably made a typo with tablespace names, abort in that case. if tablespace_mapping: raise RestoreError( "Tablespace mapping for {} was requested, but the tablespaces are not present in the backup" .format(sorted(tablespace_mapping))) # First check that the existing (empty) directories are writable, then possibly wipe any directories as # requested by --overwrite and finally create the new dirs for diruse, dirname in dirs_to_recheck: try: tempfile.TemporaryFile(dir=dirname).close() except PermissionError: raise RestoreError( "{} target directory {!r} is empty, but not writable, aborting." .format(diruse, dirname)) for dirname in dirs_to_wipe: shutil.rmtree(dirname) for dirname in dirs_to_create: os.makedirs(dirname) os.chmod(dirname, 0o700) fetcher = BasebackupFetcher( app_config=self.config, data_files=basebackup_data_files, status_output_file=status_output_file, debug=debug, pgdata=pgdata, site=site, tablespaces=tablespaces, ) fetcher.fetch_all() create_recovery_conf( dirpath=pgdata, site=site, port=self.config["http_port"], primary_conninfo=primary_conninfo, recovery_end_command=recovery_end_command, recovery_target_action=recovery_target_action, recovery_target_name=recovery_target_name, recovery_target_time=recovery_target_time, recovery_target_xid=recovery_target_xid, restore_to_master=restore_to_master, ) print("Basebackup restoration complete.") print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata) print( "On systemd based systems you can run systemctl start postgresql") print( "On SYSV Init based systems you can run /etc/init.d/postgresql start" )
def _get_basebackup(self, pgdata, basebackup, site, primary_conninfo=None, recovery_end_command=None, recovery_target_action=None, recovery_target_name=None, recovery_target_time=None, recovery_target_xid=None, restore_to_master=None, overwrite=False, tablespace_mapping=None): targets = [recovery_target_name, recovery_target_time, recovery_target_xid] if sum(0 if flag is None else 1 for flag in targets) > 1: raise RestoreError("Specify at most one of recovery_target_name, " "recovery_target_time or recovery_target_xid") # If basebackup that we want it set as latest, figure out which one it is if recovery_target_time: try: recovery_target_time = dates.parse_timestamp(recovery_target_time) except (TypeError, ValueError) as ex: raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex)) basebackup = self._find_nearest_basebackup(recovery_target_time) elif basebackup == "latest": basebackup = self._find_nearest_basebackup() # Grab basebackup metadata to make sure it exists and to look up tablespace requirements metadata = self.storage.get_basebackup_metadata(basebackup) tablespaces = {} # Make sure we have a proper place to write the $PGDATA and possible tablespaces dirs_to_create = [] dirs_to_recheck = [] dirs_to_wipe = [] if not os.path.exists(pgdata): dirs_to_create.append(pgdata) elif overwrite: dirs_to_create.append(pgdata) dirs_to_wipe.append(pgdata) elif os.listdir(pgdata) in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them dirs_to_recheck.append(["$PGDATA", pgdata]) else: raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting." .format(pgdata)) if metadata.get("format") == "pghoard-bb-v2": # "Backup file" is a metadata object, fetch it to get more information bmeta_compressed = self.storage.get_file_bytes(basebackup) with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj: bmeta = common.extract_pghoard_bb_v2_metadata(input_obj) self.log.debug("Backup metadata: %r", bmeta) tablespaces = bmeta["tablespaces"] basebackup_data_files = [ [ os.path.join(self.config["path_prefix"], site, "basebackup_chunk", chunk["chunk_filename"]), chunk["result_size"], ] for chunk in bmeta["chunks"] ] # We need the files from the main basebackup file too basebackup_data_files.append([(io.BytesIO(bmeta_compressed), metadata), 0]) elif metadata.get("format") == "pghoard-bb-v1": # Tablespace information stored in object store metadata, look it up tsmetare = re.compile("^tablespace-name-([0-9]+)$") for kw, value in metadata.items(): match = tsmetare.match(kw) if not match: continue tsoid = match.group(1) tsname = value tspath = metadata["tablespace-path-{}".format(tsoid)] tablespaces[tsname] = { "oid": int(tsoid), "path": tspath, } basebackup_data_files = [[basebackup, -1]] else: # Object is a raw (encrypted, compressed) basebackup basebackup_data_files = [[basebackup, -1]] # Map tablespaces as requested and make sure the directories exist for tsname, tsinfo in tablespaces.items(): tspath = tablespace_mapping.pop(tsname, tsinfo["path"]) if not os.path.exists(tspath): raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting." .format(tsname, tspath)) if os.listdir(tspath) not in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting." .format(tsname, tspath)) tsinfo["path"] = tspath print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname)) dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath]) # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the # user probably made a typo with tablespace names, abort in that case. if tablespace_mapping: raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup" .format(sorted(tablespace_mapping))) # First check that the existing (empty) directories are writable, then possibly wipe any directories as # requested by --overwrite and finally create the new dirs for diruse, dirname in dirs_to_recheck: try: tempfile.TemporaryFile(dir=dirname).close() except PermissionError: raise RestoreError("{} target directory {!r} is empty, but not writable, aborting." .format(diruse, dirname)) for dirname in dirs_to_wipe: shutil.rmtree(dirname) for dirname in dirs_to_create: os.makedirs(dirname) os.chmod(dirname, 0o700) total_download_size = sum(item[1] for item in basebackup_data_files) progress_report_time = [0] download_errors = 0 extract_errors = 0 with futures.ThreadPoolExecutor(max_workers=self.config["compression"]["thread_count"]) as extract_executor: extract_jobs = [] with futures.ThreadPoolExecutor(max_workers=self.config["transfer"]["thread_count"]) as download_executor: download_jobs = [] download_progress_per_file = { basebackup_data_file: 0 for basebackup_data_file, _ in basebackup_data_files if not isinstance(basebackup_data_file, tuple) } def download_progress(end=""): # report max once per second if time.monotonic() - progress_report_time[0] < 1: return progress_report_time[0] = time.monotonic() total_downloaded = sum(download_progress_per_file.values()) if total_download_size <= 0: progress = 0 else: progress = total_downloaded / total_download_size print("\rDownload progress: {progress:.2%} ({dl_mib:.0f} / {total_mib:.0f} MiB)\r".format( progress=progress, dl_mib=total_downloaded / (1024 ** 2), total_mib=total_download_size / (1024 ** 2), ), end=end) for basebackup_data_file, backup_data_file_size in basebackup_data_files: if isinstance(basebackup_data_file, tuple): tmp_obj, tmp_metadata = basebackup_data_file extract_jobs.append(extract_executor.submit( self.extract_one_backup, obj=tmp_obj, metadata=tmp_metadata, pgdata=pgdata, site=site, tablespaces=tablespaces, )) continue def single_download_progress(current_pos, expected_max, this_file_name=basebackup_data_file, this_file_size=backup_data_file_size): download_progress_per_file[this_file_name] = this_file_size * (current_pos / expected_max) download_progress() # NOTE: Most of the transfer clients aren't thread-safe, so initialize a new transfer # client for each download. We could use thread local storage or pooling here, but # probably not worth the trouble for this use case. transfer = get_transfer(common.get_object_storage_config(self.config, site)) download_jobs.append(download_executor.submit( self.download_one_backup, basebackup_data_file=basebackup_data_file, progress_callback=single_download_progress, site=site, transfer=transfer, )) for future in futures.as_completed(download_jobs): if future.exception(): self.log.error("Got error from chunk download: %s", future.exception()) download_errors += 1 continue tmp_obj, tmp_metadata = future.result() extract_jobs.append(extract_executor.submit( self.extract_one_backup, obj=tmp_obj, metadata=tmp_metadata, pgdata=pgdata, site=site, tablespaces=tablespaces, )) progress_report_time[0] = 0 download_progress(end="\n") for future in futures.as_completed(extract_jobs): if future.exception(): self.log.error("Got error from chunk extraction: %s", future.exception()) extract_errors += 1 continue if download_errors: raise RestoreError("Backup download failed with {} errors".format(download_errors)) if extract_errors: raise RestoreError("Backup extraction failed with {} errors".format(extract_errors)) create_recovery_conf( dirpath=pgdata, site=site, port=self.config["http_port"], primary_conninfo=primary_conninfo, recovery_end_command=recovery_end_command, recovery_target_action=recovery_target_action, recovery_target_name=recovery_target_name, recovery_target_time=recovery_target_time, recovery_target_xid=recovery_target_xid, restore_to_master=restore_to_master, ) print("Basebackup restoration complete.") print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata) print("On systemd based systems you can run systemctl start postgresql") print("On SYSV Init based systems you can run /etc/init.d/postgresql start")
def _get_basebackup(self, pgdata, basebackup, site, debug=False, status_output_file=None, primary_conninfo=None, recovery_end_command=None, recovery_target_action=None, recovery_target_name=None, recovery_target_time=None, recovery_target_xid=None, restore_to_master=None, overwrite=False, tablespace_mapping=None, tablespace_base_dir=None): targets = [recovery_target_name, recovery_target_time, recovery_target_xid] if sum(0 if flag is None else 1 for flag in targets) > 1: raise RestoreError("Specify at most one of recovery_target_name, " "recovery_target_time or recovery_target_xid") # If basebackup that we want it set as latest, figure out which one it is if recovery_target_time: try: recovery_target_time = dates.parse_timestamp(recovery_target_time) except (TypeError, ValueError) as ex: raise RestoreError("recovery_target_time {!r}: {}".format(recovery_target_time, ex)) basebackup = self._find_nearest_basebackup(recovery_target_time) elif basebackup == "latest": basebackup = self._find_nearest_basebackup() elif isinstance(basebackup, str): basebackup = self._find_basebackup_for_name(basebackup) # Grab basebackup metadata to make sure it exists and to look up tablespace requirements metadata = self.storage.get_basebackup_metadata(basebackup["name"]) tablespaces = {} # Make sure we have a proper place to write the $PGDATA and possible tablespaces dirs_to_create = [] dirs_to_recheck = [] dirs_to_wipe = [] if not os.path.exists(pgdata): dirs_to_create.append(pgdata) elif overwrite: dirs_to_create.append(pgdata) dirs_to_wipe.append(pgdata) elif os.listdir(pgdata) in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them dirs_to_recheck.append(["$PGDATA", pgdata]) else: raise RestoreError("$PGDATA target directory {!r} exists, is not empty and --overwrite not specified, aborting." .format(pgdata)) if metadata.get("format") == "pghoard-bb-v2": # "Backup file" is a metadata object, fetch it to get more information bmeta_compressed = self.storage.get_file_bytes(basebackup["name"]) with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj: bmeta = common.extract_pghoard_bb_v2_metadata(input_obj) self.log.debug("Backup metadata: %r", bmeta) tablespaces = bmeta["tablespaces"] basebackup_data_files = [ [ os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup_chunk", chunk["chunk_filename"]), chunk["result_size"], ] for chunk in bmeta["chunks"] ] # We need the files from the main basebackup file too basebackup_data_files.append([(bmeta_compressed, metadata), 0]) elif metadata.get("format") == "pghoard-bb-v1": # Tablespace information stored in object store metadata, look it up tsmetare = re.compile("^tablespace-name-([0-9]+)$") for kw, value in metadata.items(): match = tsmetare.match(kw) if not match: continue tsoid = match.group(1) tsname = value tspath = metadata["tablespace-path-{}".format(tsoid)] tablespaces[tsname] = { "oid": int(tsoid), "path": tspath, } basebackup_data_files = [[basebackup["name"], basebackup["size"]]] else: # Object is a raw (encrypted, compressed) basebackup basebackup_data_files = [[basebackup["name"], basebackup["size"]]] if tablespace_base_dir and not os.path.exists(tablespace_base_dir) and not overwrite: # we just care that the dir exists, but we're OK if there are other objects there raise RestoreError("Tablespace base directory {!r} does not exist, aborting." .format(tablespace_base_dir)) # Map tablespaces as requested and make sure the directories exist for tsname, tsinfo in tablespaces.items(): tspath = tablespace_mapping.pop(tsname, tsinfo["path"]) if tablespace_base_dir and not os.path.exists(tspath): tspath = os.path.join(tablespace_base_dir, str(tsinfo["oid"])) os.makedirs(tspath, exist_ok=True) if not os.path.exists(tspath): raise RestoreError("Tablespace {!r} target directory {!r} does not exist, aborting." .format(tsname, tspath)) if os.listdir(tspath) not in ([], ["lost+found"]): # Allow empty directories as well as ext3/4 mount points to be used, but check that we can write to them raise RestoreError("Tablespace {!r} target directory {!r} exists but is not empty, aborting." .format(tsname, tspath)) tsinfo["path"] = tspath print("Using existing empty directory {!r} for tablespace {!r}".format(tspath, tsname)) dirs_to_recheck.append(["Tablespace {!r}".format(tsname), tspath]) # We .pop() the elements of tablespace_mapping above - if mappings are given they must all exist or the # user probably made a typo with tablespace names, abort in that case. if tablespace_mapping: raise RestoreError("Tablespace mapping for {} was requested, but the tablespaces are not present in the backup" .format(sorted(tablespace_mapping))) # First check that the existing (empty) directories are writable, then possibly wipe any directories as # requested by --overwrite and finally create the new dirs for diruse, dirname in dirs_to_recheck: try: tempfile.TemporaryFile(dir=dirname).close() except PermissionError: raise RestoreError("{} target directory {!r} is empty, but not writable, aborting." .format(diruse, dirname)) for dirname in dirs_to_wipe: shutil.rmtree(dirname) for dirname in dirs_to_create: os.makedirs(dirname) os.chmod(dirname, 0o700) fetcher = BasebackupFetcher( app_config=self.config, data_files=basebackup_data_files, status_output_file=status_output_file, debug=debug, pgdata=pgdata, site=site, tablespaces=tablespaces, ) fetcher.fetch_all() create_recovery_conf( dirpath=pgdata, site=site, port=self.config["http_port"], primary_conninfo=primary_conninfo, recovery_end_command=recovery_end_command, recovery_target_action=recovery_target_action, recovery_target_name=recovery_target_name, recovery_target_time=recovery_target_time, recovery_target_xid=recovery_target_xid, restore_to_master=restore_to_master, ) print("Basebackup restoration complete.") print("You can start PostgreSQL by running pg_ctl -D %s start" % pgdata) print("On systemd based systems you can run systemctl start postgresql") print("On SYSV Init based systems you can run /etc/init.d/postgresql start")