def __init__(self, wiki, prefetch=True, prefetchdate=None, spawn=True, job=None, skip_jobs=None, restart=False, notice="", dryrun=False, enabled=None, partnum_todo=None, checkpoint_file=None, page_id_range=None, skipdone=False, cleanup=False, do_prereqs=False, verbose=False): self.wiki = wiki self.db_name = wiki.db_name self.prefetch = prefetch self.prefetchdate = prefetchdate self.spawn = spawn self.filepart_info = FilePartInfo(wiki, self.db_name, self.log_and_print) self.restart = restart self.html_notice_file = None self.log = None self.dryrun = dryrun self._partnum_todo = partnum_todo self.checkpoint_file = checkpoint_file self.page_id_range = page_id_range self.skipdone = skipdone self.verbose = verbose self.enabled = enabled self.cleanup_old_files = cleanup self.do_prereqs = do_prereqs if self.checkpoint_file is not None: fname = DumpFilename(self.wiki) fname.new_from_filename(checkpoint_file) # we should get file partnum if any if self._partnum_todo is None and fname.partnum_int: self._partnum_todo = fname.partnum_int elif (self._partnum_todo is not None and fname.partnum_int and self._partnum_todo != fname.partnum_int): raise BackupError("specifed partnum to do does not match part number " "of checkpoint file %s to redo", self.checkpoint_file) self.checkpoint_file = fname if self.enabled is None: self.enabled = {} for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME, RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME, Feeds.NAME, NoticeFile.NAME, "makedir", "clean_old_dumps", "cleanup_old_files", "check_trunc_files"]: self.enabled[setting] = True if not self.cleanup_old_files: if "cleanup_old_files" in self.enabled: del self.enabled["cleanup_old_files"] if self.dryrun or self._partnum_todo is not None or self.checkpoint_file is not None: for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME, RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME, Feeds.NAME, NoticeFile.NAME, "makedir", "clean_old_dumps"]: if setting in self.enabled: del self.enabled[setting] if self.dryrun: for setting in ["check_trunc_files"]: if setting in self.enabled: del self.enabled[setting] if "logging" in self.enabled: del self.enabled["logging"] self.job_requested = job if self.job_requested == "latestlinks": for setting in [StatusHtml.NAME, IndexHtml.NAME, RunInfoFile.NAME]: if setting in self.enabled: del self.enabled[setting] if self.job_requested == "createdirs": for setting in [SymLinks.NAME, Feeds.NAME, RunSettings.NAME]: if setting in self.enabled: del self.enabled[setting] if self.job_requested == "latestlinks" or self.job_requested == "createdirs": for setting in [Checksummer.NAME, NoticeFile.NAME, "makedir", "clean_old_dumps", "check_trunc_files"]: if setting in self.enabled: del self.enabled[setting] if self.job_requested == "noop": for setting in ["clean_old_dumps", "check_trunc_files"]: if setting in self.enabled: del self.enabled[setting] self.skip_jobs = skip_jobs if skip_jobs is None: self.skip_jobs = [] self.db_server_info = DbServerInfo(self.wiki, self.db_name, self.log_and_print) self.dump_dir = DumpDir(self.wiki, self.db_name) # these must come after the dumpdir setup so we know which directory we are in if "logging" in self.enabled and "makedir" in self.enabled: file_obj = DumpFilename(self.wiki) file_obj.new_from_filename(self.wiki.config.log_file) self.log_filename = self.dump_dir.filename_private_path(file_obj) self.make_dir(os.path.join(self.wiki.private_dir(), self.wiki.date)) self.log = Logger(self.log_filename) # thread should die horribly when main script dies. no exceptions. self.log.daemon = True self.log.start() self.dumpjobdata = DumpRunJobData(self.wiki, self.dump_dir, notice, self.log_and_print, self.debug, self.enabled, self.verbose) # some or all of these dump_items will be marked to run self.dump_item_list = DumpItemList(self.wiki, self.prefetch, self.prefetchdate, self.spawn, self._partnum_todo, self.checkpoint_file, self.job_requested, self.skip_jobs, self.filepart_info, self.page_id_range, self.dumpjobdata, self.dump_dir, self.verbose) # only send email failure notices for full runs if self.job_requested: email = False else: email = True self.failurehandler = FailureHandler(self.wiki, email) self.statushtml = StatusHtml(self.wiki, self.dump_dir, self.dump_item_list.dump_items, self.dumpjobdata, self.enabled, self.failurehandler, self.log_and_print, self.verbose) self.indexhtml = IndexHtml(self.wiki, self.dump_dir, self.dump_item_list.dump_items, self.dumpjobdata, self.enabled, self.failurehandler, self.log_and_print, self.verbose)
class Runner(object): def __init__(self, wiki, prefetch=True, prefetchdate=None, spawn=True, job=None, skip_jobs=None, restart=False, notice="", dryrun=False, enabled=None, partnum_todo=None, checkpoint_file=None, page_id_range=None, skipdone=False, cleanup=False, do_prereqs=False, verbose=False): self.wiki = wiki self.db_name = wiki.db_name self.prefetch = prefetch self.prefetchdate = prefetchdate self.spawn = spawn self.filepart_info = FilePartInfo(wiki, self.db_name, self.log_and_print) self.restart = restart self.html_notice_file = None self.log = None self.dryrun = dryrun self._partnum_todo = partnum_todo self.checkpoint_file = checkpoint_file self.page_id_range = page_id_range self.skipdone = skipdone self.verbose = verbose self.enabled = enabled self.cleanup_old_files = cleanup self.do_prereqs = do_prereqs if self.checkpoint_file is not None: fname = DumpFilename(self.wiki) fname.new_from_filename(checkpoint_file) # we should get file partnum if any if self._partnum_todo is None and fname.partnum_int: self._partnum_todo = fname.partnum_int elif (self._partnum_todo is not None and fname.partnum_int and self._partnum_todo != fname.partnum_int): raise BackupError("specifed partnum to do does not match part number " "of checkpoint file %s to redo", self.checkpoint_file) self.checkpoint_file = fname if self.enabled is None: self.enabled = {} for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME, RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME, Feeds.NAME, NoticeFile.NAME, "makedir", "clean_old_dumps", "cleanup_old_files", "check_trunc_files"]: self.enabled[setting] = True if not self.cleanup_old_files: if "cleanup_old_files" in self.enabled: del self.enabled["cleanup_old_files"] if self.dryrun or self._partnum_todo is not None or self.checkpoint_file is not None: for setting in [StatusHtml.NAME, IndexHtml.NAME, Checksummer.NAME, RunInfoFile.NAME, SymLinks.NAME, RunSettings.NAME, Feeds.NAME, NoticeFile.NAME, "makedir", "clean_old_dumps"]: if setting in self.enabled: del self.enabled[setting] if self.dryrun: for setting in ["check_trunc_files"]: if setting in self.enabled: del self.enabled[setting] if "logging" in self.enabled: del self.enabled["logging"] self.job_requested = job if self.job_requested == "latestlinks": for setting in [StatusHtml.NAME, IndexHtml.NAME, RunInfoFile.NAME]: if setting in self.enabled: del self.enabled[setting] if self.job_requested == "createdirs": for setting in [SymLinks.NAME, Feeds.NAME, RunSettings.NAME]: if setting in self.enabled: del self.enabled[setting] if self.job_requested == "latestlinks" or self.job_requested == "createdirs": for setting in [Checksummer.NAME, NoticeFile.NAME, "makedir", "clean_old_dumps", "check_trunc_files"]: if setting in self.enabled: del self.enabled[setting] if self.job_requested == "noop": for setting in ["clean_old_dumps", "check_trunc_files"]: if setting in self.enabled: del self.enabled[setting] self.skip_jobs = skip_jobs if skip_jobs is None: self.skip_jobs = [] self.db_server_info = DbServerInfo(self.wiki, self.db_name, self.log_and_print) self.dump_dir = DumpDir(self.wiki, self.db_name) # these must come after the dumpdir setup so we know which directory we are in if "logging" in self.enabled and "makedir" in self.enabled: file_obj = DumpFilename(self.wiki) file_obj.new_from_filename(self.wiki.config.log_file) self.log_filename = self.dump_dir.filename_private_path(file_obj) self.make_dir(os.path.join(self.wiki.private_dir(), self.wiki.date)) self.log = Logger(self.log_filename) # thread should die horribly when main script dies. no exceptions. self.log.daemon = True self.log.start() self.dumpjobdata = DumpRunJobData(self.wiki, self.dump_dir, notice, self.log_and_print, self.debug, self.enabled, self.verbose) # some or all of these dump_items will be marked to run self.dump_item_list = DumpItemList(self.wiki, self.prefetch, self.prefetchdate, self.spawn, self._partnum_todo, self.checkpoint_file, self.job_requested, self.skip_jobs, self.filepart_info, self.page_id_range, self.dumpjobdata, self.dump_dir, self.verbose) # only send email failure notices for full runs if self.job_requested: email = False else: email = True self.failurehandler = FailureHandler(self.wiki, email) self.statushtml = StatusHtml(self.wiki, self.dump_dir, self.dump_item_list.dump_items, self.dumpjobdata, self.enabled, self.failurehandler, self.log_and_print, self.verbose) self.indexhtml = IndexHtml(self.wiki, self.dump_dir, self.dump_item_list.dump_items, self.dumpjobdata, self.enabled, self.failurehandler, self.log_and_print, self.verbose) def log_queue_reader(self, log): if not log: return done = False while not done: done = log.do_job_on_log_queue() def log_and_print(self, message): if hasattr(self, 'log') and self.log and "logging" in self.enabled: self.log.add_to_log_queue("%s\n" % message) sys.stderr.write("%s\n" % message) def html_update_callback(self): self.indexhtml.update_index_html() self.statushtml.update_status_file() # returns 0 on success, 1 on error def save_command(self, commands, outfile): """For one pipeline of commands, redirect output to a given file.""" commands[-1].extend([">", outfile]) series = [commands] if self.dryrun: self.pretty_print_commands([series]) return 0 else: return self.run_command([series], callback_timed=self.html_update_callback) def pretty_print_commands(self, command_series_list): for series in command_series_list: for pipeline in series: command_strings = [] for command in pipeline: command_strings.append(" ".join(command)) pipeline_string = " | ".join(command_strings) print "Command to run: ", pipeline_string # command series list: list of (commands plus args) # is one pipeline. list of pipelines = 1 series. # this function wants a list of series. # be a list (the command name and the various args) # If the shell option is true, all pipelines will be run under the shell. # callbackinterval: how often we will call callback_timed (in milliseconds), # defaults to every 5 secs def run_command(self, command_series_list, callback_stderr=None, callback_stderr_arg=None, callback_timed=None, callback_timed_arg=None, shell=False, callback_interval=5000): """Nonzero return code from the shell from any command in any pipeline will cause this function to print an error message and return 1, indicating error. Returns 0 on success. If a callback function is passed, it will receive lines of output from the call. If the callback function takes another argument (which will be passed before the line of output) must be specified by the arg paraemeter. If no callback is provided, and no output file is specified for a given pipe, the output will be written to stderr. (Do we want that?) This function spawns multiple series of pipelines in parallel. """ if self.dryrun: self.pretty_print_commands(command_series_list) return 0 else: commands = CommandsInParallel(command_series_list, callback_stderr=callback_stderr, callback_stderr_arg=callback_stderr_arg, callback_timed=callback_timed, callback_timed_arg=callback_timed_arg, shell=shell, callback_interval=callback_interval) commands.run_commands() if commands.exited_successfully(): return 0 else: problem_commands = commands.commands_with_errors() error_string = "Error from command(s): " for cmd in problem_commands: error_string = error_string + "%s " % cmd self.log_and_print(error_string) return 1 def debug(self, stuff): self.log_and_print("%s: %s %s" % (TimeUtils.pretty_time(), self.db_name, stuff)) def run_handle_failure(self): if self.failurehandler.failure_count < 1: # Email the site administrator just once per database self.failurehandler.report_failure() self.failurehandler.failure_count += 1 def do_run_item(self, item): prereq_job = None Maintenance.exit_if_in_maintenance_mode( "In maintenance mode, exiting dump of %s at step %s" % (self.db_name, item.name())) if item.to_run(): item.start() self.indexhtml.update_index_html() self.statushtml.update_status_file() self.dumpjobdata.do_before_job(self.dump_item_list.dump_items) try: item.dump(self) except Exception as ex: exc_type, exc_value, exc_traceback = sys.exc_info() if self.verbose: sys.stderr.write(repr(traceback.format_exception( exc_type, exc_value, exc_traceback))) if (exc_type.__name__ == 'BackupPrereqError' or exc_type.__name__ == 'BackupError'): error_message = str(ex) if error_message.startswith("Required job "): prereq_job = error_message.split(" ")[2] self.debug(error_message) if prereq_job is None: # exception that doesn't have to do with missing prereqs. self.debug("*** exception! " + str(ex)) item.set_status("failed") if item.status() == "done": self.dumpjobdata.do_after_job(item) elif item.status() == "waiting" or item.status() == "skipped": # don't update the checksum files for this item. pass else: # Here for example status is "failed". But maybe also # "in-progress", if an item chooses to override dump(...) and # forgets to set the status. This is a failure as well. self.run_handle_failure() return prereq_job def run(self): if self.job_requested: if not self.dump_item_list.old_runinfo_retrieved and self.wiki.exists_perdump_index(): # There was a previous run of all or part of this date, but... # There was no old RunInfo to be had (or an error was encountered getting it) # so we can't rerun a step and keep all the status information # about the old run around. # In this case ask the user if they reeeaaally want to go ahead print "No information about the previous run for this date could be retrieved." print "This means that the status information about the old run will be lost, and" print "only the information about the current (and future) runs will be kept." reply = raw_input("Continue anyways? [y/N]: ") if reply not in ["y", "Y"]: raise RuntimeError("No run information available for previous dump, exiting") if not self.dump_item_list.mark_dumps_to_run(self.job_requested, self.skipdone): # probably no such job sys.stderr.write("No job marked to run, exiting") return None if self.restart: # mark all the following jobs to run as well self.dump_item_list.mark_following_jobs_to_run(self.skipdone) else: self.dump_item_list.mark_all_jobs_to_run(self.skipdone) Maintenance.exit_if_in_maintenance_mode( "In maintenance mode, exiting dump of %s" % self.db_name) self.make_dir(os.path.join(self.wiki.public_dir(), self.wiki.date)) self.make_dir(os.path.join(self.wiki.private_dir(), self.wiki.date)) self.show_runner_state("Cleaning up old dumps for %s" % self.db_name) self.clean_old_dumps() self.clean_old_dumps(private=True) # Informing what kind backup work we are about to do if self.job_requested: if self.restart: self.log_and_print("Preparing for restart from job %s of %s" % (self.job_requested, self.db_name)) else: self.log_and_print("Preparing for job %s of %s" % (self.job_requested, self.db_name)) else: self.show_runner_state("Starting backup of %s" % self.db_name) self.dumpjobdata.do_before_dump() for item in self.dump_item_list.dump_items: prereq_job = self.do_run_item(item) if self.do_prereqs and prereq_job is not None: doing = [] doing.append(item) # we have the lock so we might as well run the prereq job now. # there may be a string of prereqs not met, # i.e. articlesrecombine -> articles -> stubs # so we're willing to walk back up the list up to five items, # assume there's something really broken if it takes more than that while prereq_job is not None and len(doing) < 5: new_item = self.dump_item_list.find_item_by_name(prereq_job) new_item.set_to_run(True) prereq_job = self.do_run_item(new_item) if prereq_job is not None: # this job has a dependency too, add to the todo stack doing.insert(0, new_item) # back up the stack and do the dependents if stack isn't too long. if len(doing) < 5: for item in doing: self.do_run_item(item) # special case if self.job_requested == "createdirs": if not os.path.exists(os.path.join(self.wiki.public_dir(), self.wiki.date)): os.makedirs(os.path.join(self.wiki.public_dir(), self.wiki.date)) if not os.path.exists(os.path.join(self.wiki.private_dir(), self.wiki.date)): os.makedirs(os.path.join(self.wiki.private_dir(), self.wiki.date)) if self.dump_item_list.all_possible_jobs_done(): # All jobs are either in status "done", "waiting", "failed", "skipped" self.indexhtml.update_index_html("done") self.statushtml.update_status_file("done") else: # This may happen if we start a dump now and abort before all items are # done. Then some are left for example in state "waiting". When # afterwards running a specific job, all (but one) of the jobs # previously in "waiting" are still in status "waiting" self.indexhtml.update_index_html("partialdone") self.statushtml.update_status_file("partialdone") self.dumpjobdata.do_after_dump(self.dump_item_list.dump_items) # special case if (self.job_requested and self.job_requested == "latestlinks" and self.dump_item_list.all_possible_jobs_done()): self.dumpjobdata.do_latest_job() # Informing about completion if self.job_requested: if self.restart: self.show_runner_state("Completed run restarting from job %s for %s" % (self.job_requested, self.db_name)) else: self.show_runner_state("Completed job %s for %s" % (self.job_requested, self.db_name)) else: self.show_runner_state_complete() # let caller know if this was a successful run if self.failurehandler.failure_count > 0: return False else: return True def clean_old_dumps(self, private=False): """Removes all but the wiki.config.keep last dumps of this wiki. If there is already a directory for todays dump, this is omitted in counting and not removed.""" if "clean_old_dumps" in self.enabled: if private: old = self.wiki.dump_dirs(private=True) dumptype = 'private' else: old = self.wiki.dump_dirs() dumptype = 'public' if old: if old[-1] == self.wiki.date: # If we're re-running today's (or jobs from a given day's) dump, don't count # it as one of the old dumps to keep... or delete it halfway through! old = old[:-1] if self.wiki.config.keep > 0: # Keep the last few old = old[:-(self.wiki.config.keep)] if old: for dump in old: self.show_runner_state("Purging old %s dump %s for %s" % (dumptype, dump, self.db_name)) if private: base = os.path.join(self.wiki.private_dir(), dump) else: base = os.path.join(self.wiki.public_dir(), dump) shutil.rmtree("%s" % base) else: self.show_runner_state("No old %s dumps to purge." % dumptype) def show_runner_state(self, message): self.debug(message) def show_runner_state_complete(self): self.debug("SUCCESS: done.") def make_dir(self, dirname): if "makedir" in self.enabled: if exists(dirname): self.debug("Checkdir dir %s ..." % dirname) else: self.debug("Creating %s ..." % dirname) os.makedirs(dirname)