def get_wiki_config(self, wikiname): ''' parse and return the configuration for a particular wiki ''' wikiconf = Config(self.configfile) wikiconf.parse_conffile_per_project(wikiname) return wikiconf
def main(): 'main entry point, does all the work' wiki = None output_file = None start = None end = None configfile = "wikidump.conf" dryrun = False try: (options, remainder) = getopt.gnu_getopt( sys.argv[1:], "w:o:s:e:C:fhv", ["wiki=", "outfile=", "start=", "end=", "config=", "help", "dryrun"]) except getopt.GetoptError as err: usage("Unknown option specified: " + str(err)) for (opt, val) in options: if opt in ["-w", "--wiki"]: wiki = val elif opt in ["-o", "--outfile"]: output_file = val elif opt in ["-s", "--start"]: start = val elif opt in ["-e", "--end"]: end = val elif opt in ["-C", "--config"]: configfile = val elif opt in ["-d", "--dryrun"]: dryrun = True elif opt in ["-h", "--help"]: usage('Help for this script\n') else: usage("Unknown option specified: <%s>" % opt) if len(remainder) > 0: usage("Unknown option(s) specified: <%s>" % remainder[0]) if wiki is None: usage("mandatory argument argument missing: --wiki") if output_file is None: usage("mandatory argument argument missing: --output") if start is not None: if not start.isdigit(): usage("value for --start must be a number") else: start = int(start) if end is not None: if not end.isdigit(): usage("value for --end must be a number") else: end = int(end) - 1 if not os.path.exists(configfile): usage("no such file found: " + configfile) wikiconf = Config(configfile) wikiconf.parse_conffile_per_project(wiki) dologsbackup(wiki, output_file, wikiconf, start, end, dryrun)
def main(): os.environ['DUMPS'] = str(os.getpid()) try: date = None config_file = False force_lock = False prefetch = True prefetchdate = None spawn = True restart = False jobs_requested = None skip_jobs = None enable_logging = False html_notice = "" dryrun = False partnum_todo = None after_checkpoint = False checkpoint_file = None page_id_range = None cutoff = None exitcode = 1 skipdone = False do_locking = False verbose = False cleanup_files = False do_prereqs = False try: (options, remainder) = getopt.gnu_getopt( sys.argv[1:], "", ['date=', 'job=', 'skipjobs=', 'configfile=', 'addnotice=', 'delnotice', 'force', 'dryrun', 'noprefetch', 'prefetchdate=', 'nospawn', 'restartfrom', 'aftercheckpoint=', 'log', 'partnum=', 'checkpoint=', 'pageidrange=', 'cutoff=', "skipdone", "exclusive", "prereqs", "cleanup", 'verbose']) except Exception as ex: usage("Unknown option specified") for (opt, val) in options: if opt == "--date": date = val elif opt == "--configfile": config_file = val elif opt == '--checkpoint': checkpoint_file = val elif opt == '--partnum': partnum_todo = int(val) elif opt == "--force": force_lock = True elif opt == '--aftercheckpoint': after_checkpoint = True checkpoint_file = val elif opt == "--noprefetch": prefetch = False elif opt == "--prefetchdate": prefetchdate = val elif opt == "--nospawn": spawn = False elif opt == "--dryrun": dryrun = True elif opt == "--job": jobs_requested = val elif opt == "--skipjobs": skip_jobs = val elif opt == "--restartfrom": restart = True elif opt == "--log": enable_logging = True elif opt == "--addnotice": html_notice = val elif opt == "--delnotice": html_notice = False elif opt == "--pageidrange": page_id_range = val elif opt == "--cutoff": cutoff = val if not cutoff.isdigit() or not len(cutoff) == 8: usage("--cutoff value must be in yyyymmdd format") elif opt == "--skipdone": skipdone = True elif opt == "--cleanup": cleanup_files = True elif opt == "--exclusive": do_locking = True elif opt == "--verbose": verbose = True elif opt == "--prereqs": do_prereqs = True if jobs_requested is not None: if ',' in jobs_requested: jobs_todo = jobs_requested.split(',') else: jobs_todo = [jobs_requested] else: jobs_todo = [] if dryrun and (len(remainder) == 0): usage("--dryrun requires the name of a wikidb to be specified") if restart and not jobs_requested: usage("--restartfrom requires --job and the job from which to restart") if restart and len(jobs_todo) > 1: usage("--restartfrom requires --job and exactly one job from which to restart") if partnum_todo is not None and not jobs_requested: usage("--partnum option requires specific job(s) for which to rerun that part") if partnum_todo is not None and restart: usage("--partnum option can be specified only for a specific list of jobs") if checkpoint_file is not None and (len(remainder) == 0): usage("--checkpoint option requires the name of a wikidb to be specified") if checkpoint_file is not None and not jobs_requested: usage("--checkpoint option requires --job") if page_id_range and not jobs_requested: usage("--pageidrange option requires --job") if page_id_range and checkpoint_file is not None: usage("--pageidrange option cannot be used with --checkpoint option") if prefetchdate is not None and not prefetch: usage("prefetchdate and noprefetch options may not be specified together") if prefetchdate is not None and (not prefetchdate.isdigit() or len(prefetchdate) != 8): usage("prefetchdate must be of the form YYYYMMDD") if skip_jobs is None: skip_jobs = [] else: skip_jobs = skip_jobs.split(",") # allow alternate config file if config_file: config = Config(config_file) else: config = Config() externals = ['php', 'mysql', 'mysqldump', 'head', 'tail', 'checkforbz2footer', 'grep', 'gzip', 'bzip2', 'writeuptopageid', 'recompressxml', 'sevenzip', 'cat'] failed = False unknowns = [] notfound = [] for external in externals: try: ext = getattr(config, external) except AttributeError as ex: unknowns.append(external) failed = True else: if not exists(ext): notfound.append(ext) failed = True if failed: if unknowns: sys.stderr.write("Unknown config param(s): %s\n" % ", ".join(unknowns)) if notfound: sys.stderr.write("Command(s) not found: %s\n" % ", ".join(notfound)) sys.stderr.write("Exiting.\n") sys.exit(1) if (dryrun or partnum_todo is not None or (jobs_requested is not None and not restart and not do_locking and not force_lock)): locks_enabled = False else: locks_enabled = True if dryrun: print "***" print "Dry run only, no files will be updated." print "***" if len(remainder) > 0: wiki = Wiki(config, remainder[0]) if cutoff: # fixme if we asked for a specific job then check that job only # not the dir last_ran = wiki.latest_dump() if last_ran >= cutoff: wiki = None if wiki is not None and locks_enabled: locker = Locker(wiki, date) if force_lock and locks_enabled: lockfiles = locker.is_locked() locker.unlock(lockfiles, owner=False) if locks_enabled: locker.lock() else: # if the run is across all wikis and we are just doing one job, # we want the age of the wikis by the latest status update # and not the date the run started if jobs_requested is not None and jobs_requested[0] == 'createdirs': check_status_time = False # there won't actually be a status for this job but we want # to ensure that the directory and the status file are present # and intact check_job_status = True check_prereq_status = False else: check_status_time = bool(jobs_requested is not None) check_job_status = bool(skipdone) check_prereq_status = bool(jobs_requested is not None and skipdone) wiki = find_lock_next_wiki(config, locks_enabled, cutoff, prefetch, prefetchdate, spawn, dryrun, html_notice, check_status_time, check_job_status, check_prereq_status, date, jobs_todo[0] if len(jobs_todo) else None, skip_jobs, page_id_range, partnum_todo, checkpoint_file, skipdone, restart, verbose) if wiki is not None and wiki: # process any per-project configuration options config.parse_conffile_per_project(wiki.db_name) if date == 'last': dumps = sorted(wiki.dump_dirs()) if dumps: date = dumps[-1] else: date = None if date is None or not date: date = TimeUtils.today() wiki.set_date(date) if after_checkpoint: fname = DumpFilename(wiki) fname.new_from_filename(checkpoint_file) if not fname.is_checkpoint_file: usage("--aftercheckpoint option requires the " "name of a checkpoint file, bad filename provided") page_id_range = str(int(fname.last_page_id) + 1) partnum_todo = fname.partnum_int # now we don't need this. checkpoint_file = None after_checkpoint_jobs = ['articlesdump', 'metacurrentdump', 'metahistorybz2dump'] if (jobs_requested is None or not set(jobs_requested).issubset(set(after_checkpoint_jobs))): usage("--aftercheckpoint option requires --job option with one or more of %s" % ", ".join(after_checkpoint_jobs)) enabled = {} if enable_logging: enabled = {"logging": True} if restart: sys.stderr.write("Running %s, restarting from job %s...\n" % (wiki.db_name, jobs_todo[0])) elif jobs_requested: sys.stderr.write("Running %s, jobs %s...\n" % (wiki.db_name, jobs_requested)) else: sys.stderr.write("Running %s...\n" % wiki.db_name) # no specific jobs requested, runner will do them all if not len(jobs_todo): runner = Runner(wiki, prefetch, prefetchdate, spawn, None, skip_jobs, restart, html_notice, dryrun, enabled, partnum_todo, checkpoint_file, page_id_range, skipdone, cleanup_files, do_prereqs, verbose) result = runner.run() if result is not None and result: exitcode = 0 else: # do each job requested one at a time for job in jobs_todo: runner = Runner(wiki, prefetch, prefetchdate, spawn, job, skip_jobs, restart, html_notice, dryrun, enabled, partnum_todo, checkpoint_file, page_id_range, skipdone, cleanup_files, do_prereqs, verbose) result = runner.run() if result is not None and result: exitcode = 0 # if we are doing one piece only of the dump, we don't unlock either if locks_enabled: locker = Locker(wiki, date) lockfiles = locker.is_locked() locker.unlock(lockfiles, owner=True) elif wiki is not None: sys.stderr.write("Wikis available to run but prereqs not complete.\n") exitcode = 0 else: sys.stderr.write("No wikis available to run.\n") exitcode = 255 finally: cleanup() sys.exit(exitcode)