def main(cfg_name): if not cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable or use --config <file> on the" " command line", file=sys.stderr, ) return 2 try: config = PbenchServerConfig(cfg_name) except BadConfig as e: print(f"{_NAME_}: {e} (config file {cfg_name})", file=sys.stderr) return 1 logger = get_pbench_logger(_NAME_, config) qdir, receive_dir = fetch_config_val(config, logger) if qdir is None and receive_dir is None: return 2 qdir_md5 = qdirs_check("quarantine", Path(qdir, "md5-002"), logger) duplicates = qdirs_check("duplicates", Path(qdir, "duplicates-002"), logger) # The following directory holds tarballs that are quarantined because # of operational errors on the server. They should be retried after # the problem is fixed: basically, move them back into the reception # area for 002 agents and wait. errors = qdirs_check("errors", Path(qdir, "errors-002"), logger) if qdir_md5 is None or duplicates is None or errors is None: return 1 counts = process_tb(config, logger, receive_dir, qdir_md5, duplicates, errors) result_string = (f"{config.TS}: Processed {counts.ntotal} entries," f" {counts.ntbs} tarballs successful," f" {counts.nquarantined} quarantined tarballs," f" {counts.ndups} duplicately-named tarballs," f" {counts.nerrs} errors.") logger.info(result_string) # prepare and send report with tempfile.NamedTemporaryFile(mode="w+t", dir=config.TMP) as reportfp: reportfp.write(f"{counts.nstatus}{result_string}\n") reportfp.seek(0) report = Report(config, _NAME_) report.init_report_template() try: report.post_status(config.timestamp(), "status", reportfp.name) except Exception as exc: logger.warning("Report post Unsuccesful: '{}'", exc) return 0
) parsed = parser.parse_args() try: config = PbenchServerConfig(parsed.cfg_name) except BadConfig as e: print(f"{_prog}: {e}", file=sys.stderr) sys.exit(1) # We're going to need the Postgres DB to track dataset state, so setup # DB access. We don't pass a Logger here, because that introduces lots # of spurious changes in the gold CLI test output. init_db(config, None) hostname = gethostname() pid = parsed.pid group_id = parsed.group_id user_id = parsed.user_id report = Report( config, parsed.name, pid=pid, group_id=group_id, user_id=user_id, hostname=hostname ) report.init_report_template() try: report.post_status(parsed.timestamp, parsed.doctype, parsed.file_to_index[0]) except Exception: status = 1 else: status = 0 sys.exit(status)
def main(cfg_name): if not cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable or use --config <file> on the" " command line", file=sys.stderr, ) return 2 try: config = PbenchServerConfig(cfg_name) except BadConfig as e: print(f"{_NAME_}: {e}", file=sys.stderr) return 1 logger = get_pbench_logger(_NAME_, config) # We're going to need the Postgres DB to track dataset state, so setup # DB access. init_db(config, logger) # Add a BACKUP and QDIR field to the config object config.BACKUP = config.conf.get("pbench-server", "pbench-backup-dir") config.QDIR = config.get("pbench-server", "pbench-quarantine-dir") # call the LocalBackupObject class lb_obj = LocalBackupObject(config) # call the S3Config class s3_obj = S3Config(config, logger) lb_obj, s3_obj = sanity_check(lb_obj, s3_obj, config, logger) if lb_obj is None and s3_obj is None: return 3 logger.info("start-{}", config.TS) # Initiate the backup counts = backup_data(lb_obj, s3_obj, config, logger) result_string = (f"Total processed: {counts.ntotal}," f" Local backup successes: {counts.nbackup_success}," f" Local backup failures: {counts.nbackup_fail}," f" S3 upload successes: {counts.ns3_success}," f" S3 upload failures: {counts.ns3_fail}," f" Quarantined: {counts.nquaran}") logger.info(result_string) prog = Path(sys.argv[0]).name # prepare and send report with tempfile.NamedTemporaryFile(mode="w+t", dir=config.TMP) as reportfp: reportfp.write( f"{prog}.{config.timestamp()}({config.PBENCH_ENV})\n{result_string}\n" ) reportfp.seek(0) report = Report(config, _NAME_) report.init_report_template() try: report.post_status(config.timestamp(), "status", reportfp.name) except Exception: pass logger.info("end-{}", config.TS) return 0
def main(options): if not options.cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set" " _PBENCH_SERVER_CONFIG env variable", file=sys.stderr, ) return 1 try: config = PbenchServerConfig(options.cfg_name) except BadConfig as e: print(f"{_NAME_}: {e}", file=sys.stderr) return 2 logger = get_pbench_logger(_NAME_, config) archivepath = config.ARCHIVE incoming = config.INCOMING incomingpath = config.get_valid_dir_option("INCOMING", incoming, logger) if not incomingpath: return 3 results = config.RESULTS resultspath = config.get_valid_dir_option("RESULTS", results, logger) if not resultspath: return 3 users = config.USERS userspath = config.get_valid_dir_option("USERS", users, logger) if not userspath: return 3 # Fetch the configured maximum number of days a tar can remain "unpacked" # in the INCOMING tree. try: max_unpacked_age = config.conf.get("pbench-server", "max-unpacked-age") except NoOptionError as e: logger.error(f"{e}") return 5 try: max_unpacked_age = int(max_unpacked_age) except Exception: logger.error("Bad maximum unpacked age, {}", max_unpacked_age) return 6 # First phase is to find all the tar balls which are beyond the max # unpacked age, and which still have an unpacked directory in INCOMING. if config._ref_datetime is not None: try: curr_dt = config._ref_datetime except Exception: # Ignore bad dates from test environment. curr_dt = datetime.utcnow() else: curr_dt = datetime.utcnow() _msg = "Culling unpacked tar balls {} days older than {}" if options.dry_run: print( _msg.format(max_unpacked_age, curr_dt.strftime(_STD_DATETIME_FMT))) else: logger.debug(_msg, max_unpacked_age, curr_dt.strftime(_STD_DATETIME_FMT)) actions_taken = [] errors = 0 start = pbench.server._time() gen = gen_list_unpacked_aged(incomingpath, archivepath, curr_dt, max_unpacked_age) if config._unittests: # force the generator and sort the list gen = sorted(list(gen)) for tb_incoming_dir, controller_name in gen: act_set = remove_unpacked( tb_incoming_dir, controller_name, resultspath, userspath, logger, options.dry_run, ) unpacked_dir_name = Path(tb_incoming_dir).name act_path = Path(controller_name, unpacked_dir_name) act_set.set_name(act_path) actions_taken.append(act_set) if act_set.errors > 0: # Stop any further unpacked tar ball removal if an error is # encountered. break end = pbench.server._time() # Generate the ${TOP}/public_html prefix so we can strip it from the # various targets in the report. public_html = os.path.realpath(os.path.join(config.TOP, "public_html")) # Write the actions taken into a report file. with tempfile.NamedTemporaryFile(mode="w+t", prefix=f"{_NAME_}.", suffix=".report", dir=config.TMP) as tfp: duration = end - start total = len(actions_taken) print( f"Culled {total:d} unpacked tar ball directories ({errors:d}" f" errors) in {duration:0.2f} secs", file=tfp, ) if total > 0: print("\nActions Taken:", file=tfp) for act_set in sorted(actions_taken, key=lambda a: a.name): print( f" - {act_set.name} ({act_set.errors:d} errors," f" {act_set.duration():0.2f} secs)", file=tfp, ) for act in act_set.actions: assert act.noun.startswith( public_html ), f"Logic bomb! {act.noun} not in .../public_html/" tgt = Path(act.noun[len(public_html) + 1:]) if act.verb == "mv": name = tgt.name controller = tgt.parent ex_tgt = controller / f".delete.{name}" print(f" $ {act.verb} {tgt} {ex_tgt} # {act.status}", file=tfp) else: print(f" $ {act.verb} {tgt} # {act.status}", file=tfp) # Flush out the report ahead of posting it. tfp.flush() tfp.seek(0) # We need to generate a report that lists all the actions taken. report = Report(config, _NAME_) report.init_report_template() try: report.post_status(config.timestamp(), "status" if errors == 0 else "errors", tfp.name) except Exception: pass return errors
def main(): cfg_name = os.environ.get("_PBENCH_SERVER_CONFIG") if not cfg_name: print( f"{_NAME_}: ERROR: No config file specified; set _PBENCH_SERVER_CONFIG env variable or" f" use --config <file> on the command line", file=sys.stderr, ) return 2 try: config = PbenchServerConfig(cfg_name) except BadConfig as e: print(f"{_NAME_}: {e}", file=sys.stderr) return 1 logger = get_pbench_logger(_NAME_, config) # We're going to need the Postgres DB to track dataset state, so setup # DB access. init_db(config, logger) # add a BACKUP field to the config object config.BACKUP = backup = config.conf.get("pbench-server", "pbench-backup-dir") if not backup: logger.error( "Unspecified backup directory, no pbench-backup-dir config in" " pbench-server section") return 1 backuppath = config.get_valid_dir_option("BACKUP", backup, logger) if not backuppath: return 1 # instantiate the s3config class s3_config_obj = S3Config(config, logger) s3_config_obj = sanity_check(s3_config_obj, logger) logger.info("start-{}", config.TS) start = config.timestamp() prog = Path(sys.argv[0]).name sts = 0 # N.B. tmpdir is the pathname of the temp directory. with tempfile.TemporaryDirectory() as tmpdir: archive_obj = BackupObject("ARCHIVE", config.ARCHIVE, tmpdir, logger) local_backup_obj = BackupObject("BACKUP", config.BACKUP, tmpdir, logger) s3_backup_obj = BackupObject("S3", s3_config_obj, tmpdir, logger) with tempfile.NamedTemporaryFile(mode="w+t", dir=tmpdir) as reportfp: reportfp.write( f"{prog}.{config.TS} ({config.PBENCH_ENV}) started at {start}\n" ) if s3_config_obj is None: reportfp.write( "\nNOTICE: S3 backup service is inaccessible; skipping" " ARCHIVE to S3 comparison\n\n") # FIXME: Parallelize these three ... # Create entry list for archive logger.debug("Starting archive list creation") ar_start = config.timestamp() ret_sts = archive_obj.entry_list_creation() if ret_sts == Status.FAIL: sts += 1 logger.debug("Finished archive list ({!r})", ret_sts) # Create entry list for backup logger.debug("Starting local backup list creation") lb_start = config.timestamp() ret_sts = local_backup_obj.entry_list_creation() if ret_sts == Status.FAIL: sts += 1 logger.debug("Finished local backup list ({!r})", ret_sts) # Create entry list for S3 if s3_config_obj is not None: logger.debug("Starting S3 list creation") s3_start = config.timestamp() ret_sts = s3_backup_obj.entry_list_creation() if ret_sts == Status.FAIL: sts += 1 logger.debug("Finished S3 list ({!r})", ret_sts) logger.debug("Checking MD5 signatures of archive") ar_md5_start = config.timestamp() try: # Check the data integrity in ARCHIVE (Question 1). md5_result_archive = archive_obj.checkmd5() except Exception as ex: msg = f"Failed to check data integrity of ARCHIVE ({config.ARCHIVE})" logger.exception(msg) reportfp.write(f"\n{msg} - '{ex}'\n") sts += 1 else: if md5_result_archive > 0: # Create a report for failed MD5 results from ARCHIVE (Question 1) archive_obj.report_failed_md5(reportfp) sts += 1 logger.debug( "Checking MD5 signature of archive: {} errors", md5_result_archive, ) logger.debug("Finished checking MD5 signatures of archive") logger.debug("Checking MD5 signatures of local backup") lb_md5_start = config.timestamp() try: # Check the data integrity in BACKUP (Question 2). md5_result_backup = local_backup_obj.checkmd5() except Exception as ex: msg = f"Failed to check data integrity of BACKUP ({config.BACKUP})" logger.exception(msg) reportfp.write(f"\n{msg} - '{ex}'\n") else: if md5_result_backup > 0: # Create a report for failed MD5 results from BACKUP (Question 2) local_backup_obj.report_failed_md5(reportfp) sts += 1 logger.debug( "Checking MD5 signature of local backup: {} errors", md5_result_backup, ) logger.debug("Finished checking MD5 signatures of local backup") # Compare ARCHIVE with BACKUP (Questions 3 and 3a). msg = "Comparing ARCHIVE with BACKUP" reportfp.write(f"\n{msg}\n{'-' * len(msg)}\n") logger.debug("{}: start", msg) compare_entry_lists(archive_obj, local_backup_obj, reportfp, logger) logger.debug("{}: end", msg) if s3_config_obj is not None: # Compare ARCHIVE with S3 (Questions 4, 4a, and 4b). msg = "Comparing ARCHIVE with S3" reportfp.write(f"\n{msg}\n{'-' * len(msg)}\n") logger.debug("{}: start", msg) compare_entry_lists(archive_obj, s3_backup_obj, reportfp, logger) logger.debug("{}: end", msg) if s3_config_obj is None: s3_start = "<skipped>" reportfp.write(f"\n\nPhases (started):\n" f"Archive List Creation: {ar_start}\n" f"Local Backup List Creation: {lb_start}\n" f"S3 List Creation: {s3_start}\n" f"Archive MD5 Checks: {ar_md5_start}\n" f"Local Backup MD5 Checks: {lb_md5_start}\n") end = config.timestamp() reportfp.write( f"\n{prog}.{config.TS} ({config.PBENCH_ENV}) finished at {end}\n" ) # Rewind to the beginning. reportfp.seek(0) logger.debug("Sending report: start") report = Report(config, _NAME_) report.init_report_template() try: report.post_status(config.timestamp(), "status", reportfp.name) except Exception: pass logger.debug("Sending report: end") logger.info("end-{}", config.TS) return sts