def test_alert_files(self): alert_file_path = os.path.join(self.config["alert_file_dir"], "test_alert") create_alert_file(self.pghoard.config, "test_alert") assert os.path.exists(alert_file_path) is True delete_alert_file(self.pghoard.config, "test_alert") assert os.path.exists(alert_file_path) is False
def handle_upload(self, site, key, file_to_transfer): try: storage = self.get_object_storage(site) unlink_local = False if "blob" in file_to_transfer: storage.store_file_from_memory( key, file_to_transfer["blob"], metadata=file_to_transfer["metadata"]) else: # Basebackups may be multipart uploads, depending on the driver. # Swift needs to know about this so it can do possible cleanups. multipart = file_to_transfer["filetype"] == "basebackup" try: storage.store_file_from_disk( key, file_to_transfer["local_path"], metadata=file_to_transfer["metadata"], multipart=multipart) unlink_local = True except LocalFileIsRemoteFileError: pass if unlink_local: try: self.log.debug( "Deleting file: %r since it has been uploaded", file_to_transfer["local_path"]) os.unlink(file_to_transfer["local_path"]) metadata_path = file_to_transfer["local_path"] + ".metadata" with suppress(FileNotFoundError): os.unlink(metadata_path) except Exception as ex: # pylint: disable=broad-except self.log.exception("Problem in deleting file: %r", file_to_transfer["local_path"]) self.stats.unexpected_exception( ex, where="handle_upload_unlink") return {"success": True, "opaque": file_to_transfer.get("opaque")} except Exception as ex: # pylint: disable=broad-except if file_to_transfer.get("retry_number", 0) > 0: self.log.exception("Problem in moving file: %r, need to retry", file_to_transfer["local_path"]) # Ignore the exception the first time round as some object stores have frequent Internal Errors # and the upload usually goes through without any issues the second time round self.stats.unexpected_exception(ex, where="handle_upload") else: self.log.warning( "Problem in moving file: %r, need to retry (%s: %s)", file_to_transfer["local_path"], ex.__class__.__name__, ex) # Sleep for a bit to avoid busy looping time.sleep(0.5) file_to_transfer["retry_number"] = file_to_transfer.get( "retry_number", 0) + 1 if file_to_transfer["retry_number"] > self.config[ "upload_retries_warning_limit"]: create_alert_file(self.config, "upload_retries_warning") self.transfer_queue.put(file_to_transfer) return {"success": False, "call_callback": False, "exception": ex}
def check_pg_server_version(self, connection_string): pg_version = None try: with closing(psycopg2.connect(connection_string)) as c: pg_version = c.server_version except psycopg2.OperationalError as ex: self.log.warning("%s (%s) connecting to DB at: %r", ex.__class__.__name__, ex, connection_string) if "password authentication" in str(ex) or "authentication failed" in str(ex): create_alert_file(self.config, "authentication_error") else: create_alert_file(self.config, "configuration_error") except Exception: # log all errors and return None; pylint: disable=broad-except self.log.exception("Problem in getting PG server version") return pg_version
def check_pg_versions_ok(self, pg_version_server, command): if pg_version_server is None: # remote pg version not available, don't create version alert in this case return False if not pg_version_server or pg_version_server <= 90200: self.log.error("pghoard does not support versions earlier than 9.2, found: %r", pg_version_server) create_alert_file(self.config, "version_unsupported_error") return False pg_version_client = self.config[command + "_version"] if pg_version_server // 100 != pg_version_client // 100: self.log.error("Server version: %r does not match %s version: %r", pg_version_server, self.config[command + "_path"], pg_version_client) create_alert_file(self.config, "version_mismatch_error") return False return True
def check_pg_versions_ok(self, site, pg_version_server, command): if pg_version_server is None: # remote pg version not available, don't create version alert in this case return False if not pg_version_server: self.log.error("pghoard does not support versions earlier than 9.3, found: %r", pg_version_server) create_alert_file(self.config, "version_unsupported_error") return False pg_version_client = self.config["backup_sites"][site][command + "_version"] if pg_version_server // 100 != pg_version_client // 100: self.log.error("Server version: %r does not match %s version: %r", pg_version_server, self.config[command + "_path"], pg_version_client) create_alert_file(self.config, "version_mismatch_error") return False return True
def handle_upload(self, site, key, file_to_transfer): try: storage = self.get_object_storage(site) unlink_local = False if "blob" in file_to_transfer: storage.store_file_from_memory(key, file_to_transfer["blob"], metadata=file_to_transfer["metadata"]) else: # Basebackups may be multipart uploads, depending on the driver. # Swift needs to know about this so it can do possible cleanups. multipart = file_to_transfer["filetype"] in {"basebackup", "basebackup_chunk"} try: storage.store_file_from_disk(key, file_to_transfer["local_path"], metadata=file_to_transfer["metadata"], multipart=multipart) unlink_local = True except LocalFileIsRemoteFileError: pass if unlink_local: try: self.log.debug("Deleting file: %r since it has been uploaded", file_to_transfer["local_path"]) os.unlink(file_to_transfer["local_path"]) metadata_path = file_to_transfer["local_path"] + ".metadata" with suppress(FileNotFoundError): os.unlink(metadata_path) except Exception as ex: # pylint: disable=broad-except self.log.exception("Problem in deleting file: %r", file_to_transfer["local_path"]) self.metrics.unexpected_exception(ex, where="handle_upload_unlink") return {"success": True, "opaque": file_to_transfer.get("opaque")} except Exception as ex: # pylint: disable=broad-except if file_to_transfer.get("retry_number", 0) > 0: self.log.exception("Problem in moving file: %r, need to retry", file_to_transfer["local_path"]) # Ignore the exception the first time round as some object stores have frequent Internal Errors # and the upload usually goes through without any issues the second time round self.metrics.unexpected_exception(ex, where="handle_upload") else: self.log.warning("Problem in moving file: %r, need to retry (%s: %s)", file_to_transfer["local_path"], ex.__class__.__name__, ex) file_to_transfer["retry_number"] = file_to_transfer.get("retry_number", 0) + 1 if file_to_transfer["retry_number"] > self.config["upload_retries_warning_limit"]: create_alert_file(self.config, "upload_retries_warning") # Sleep for a bit to avoid busy looping. Increase sleep time if the op fails multiple times self.sleep(min(0.5 * 2 ** (file_to_transfer["retry_number"] - 1), 20)) self.transfer_queue.put(file_to_transfer) return {"success": False, "call_callback": False, "exception": ex}
def check_pg_server_version(self, connection_string): pg_version = None try: with closing(psycopg2.connect(connection_string)) as c: pg_version = c.server_version # pylint: disable=no-member except psycopg2.OperationalError as ex: self.log.warning("%s (%s) connecting to DB at: %r", ex.__class__.__name__, ex, connection_string) if "password authentication" in str(ex) or "authentication failed" in str(ex): create_alert_file(self.config, "authentication_error") else: create_alert_file(self.config, "configuration_error") except Exception as ex: # log all errors and return None; pylint: disable=broad-except self.log.exception("Problem in getting PG server version") self.stats.unexpected_exception(ex, where="check_pg_server_version") return pg_version
def handle_upload(self, site, key, file_to_transfer): try: storage = self.get_object_storage(site) unlink_local = False if "blob" in file_to_transfer: storage.store_file_from_memory(key, file_to_transfer["blob"], metadata=file_to_transfer["metadata"]) else: # Basebackups may be multipart uploads, depending on the driver. # Swift needs to know about this so it can do possible cleanups. multipart = file_to_transfer["filetype"] == "basebackup" try: storage.store_file_from_disk( key, file_to_transfer["local_path"], metadata=file_to_transfer["metadata"], multipart=multipart ) unlink_local = True except LocalFileIsRemoteFileError: pass if unlink_local: try: self.log.debug("Deleting file: %r since it has been uploaded", file_to_transfer["local_path"]) os.unlink(file_to_transfer["local_path"]) metadata_path = file_to_transfer["local_path"] + ".metadata" with suppress(FileNotFoundError): os.unlink(metadata_path) except Exception as ex: # pylint: disable=broad-except self.log.exception("Problem in deleting file: %r", file_to_transfer["local_path"]) self.stats.unexpected_exception(ex, where="handle_upload_unlink") return {"success": True, "opaque": file_to_transfer.get("opaque")} except Exception as ex: # pylint: disable=broad-except self.log.exception("Problem in moving file: %r, need to retry", file_to_transfer["local_path"]) self.stats.unexpected_exception(ex, where="handle_upload") # Sleep for a bit to avoid busy looping time.sleep(0.5) file_to_transfer["retries"] = file_to_transfer.get("retries", 0) + 1 if file_to_transfer["retries"] > self.config["upload_retries_warning_limit"]: create_alert_file(self.config, "upload_retries_warning") self.transfer_queue.put(file_to_transfer) return {"success": False, "call_callback": False, "exception": ex}
def check_pg_server_version(self, connection_string, site): if "pg_version" in self.config["backup_sites"][site]: return self.config["backup_sites"][site]["pg_version"] pg_version = None try: with closing(psycopg2.connect(connection_string)) as c: pg_version = c.server_version # pylint: disable=no-member # Cache pg_version so we don't have to query it again, note that this means that for major # version upgrades you want to restart pghoard. self.config["backup_sites"][site]["pg_version"] = pg_version except psycopg2.OperationalError as ex: self.log.warning("%s (%s) connecting to DB at: %r", ex.__class__.__name__, ex, connection_string) if "password authentication" in str(ex) or "authentication failed" in str(ex): create_alert_file(self.config, "authentication_error") else: create_alert_file(self.config, "configuration_error") except Exception as ex: # log all errors and return None; pylint: disable=broad-except self.log.exception("Problem in getting PG server version") self.metrics.unexpected_exception(ex, where="check_pg_server_version") return pg_version
def handle_upload(self, site, key, file_to_transfer): try: storage = self.get_object_storage(site) unlink_local = False if "blob" in file_to_transfer: storage.store_file_from_memory(key, file_to_transfer["blob"], metadata=file_to_transfer["metadata"]) else: # Basebackups may be multipart uploads, depending on the driver. # Swift needs to know about this so it can do possible cleanups. multipart = file_to_transfer["filetype"] == "basebackup" try: storage.store_file_from_disk(key, file_to_transfer["local_path"], metadata=file_to_transfer["metadata"], multipart=multipart) unlink_local = True except LocalFileIsRemoteFileError: pass if unlink_local: try: self.log.debug("Deleting file: %r since it has been uploaded", file_to_transfer["local_path"]) os.unlink(file_to_transfer["local_path"]) metadata_path = file_to_transfer["local_path"] + ".metadata" with suppress(FileNotFoundError): os.unlink(metadata_path) except: # pylint: disable=bare-except self.log.exception("Problem in deleting file: %r", file_to_transfer["local_path"]) return {"success": True, "opaque": file_to_transfer.get("opaque")} except Exception as ex: # pylint: disable=broad-except self.log.exception("Problem in moving file: %r, need to retry", file_to_transfer["local_path"]) # Sleep for a bit to avoid busy looping time.sleep(0.5) file_to_transfer["retries"] = file_to_transfer.get("retries", 0) + 1 if file_to_transfer["retries"] > self.config["upload_retries_warning_limit"]: create_alert_file(self.config, "upload_retries_warning") self.transfer_queue.put(file_to_transfer) return {"success": False, "call_callback": False, "exception": ex}