def parse_conffile_overrideables(self): """ globals like entries in 'wiki' or 'output' that can be overriden by a specific named section """ self.db_list_unsorted = MiscUtils.db_list(self.get_opt_in_overrides_or_default( "wiki", "dblist", 0), nosort=True) # permit comma-separated list of files so that eg some script # can skip all private and/or closed wikis in addition to some # other exclusion list to_skip = self.get_opt_in_overrides_or_default("wiki", "skipdblist", 0) self.skip_db_list = self.get_skipdbs(to_skip) try: self.private_list = MiscUtils.db_list(self.get_opt_in_overrides_or_default( "wiki", "privatelist", 0)) except FileNotFoundError: self.private_list = [] try: self.closed_list = MiscUtils.db_list(self.get_opt_in_overrides_or_default( "wiki", "closedlist", 0)) except FileNotFoundError: self.closed_list = [] try: self.flow_list = MiscUtils.db_list(self.get_opt_in_overrides_or_default( "wiki", "flowlist", 0)) except FileNotFoundError: self.flow_list = [] self.tablejobs = self.get_opt_in_overrides_or_default( "wiki", "tablejobs", 0) self.apijobs = self.get_opt_in_overrides_or_default( "wiki", "apijobs", 0) self.db_list_unsorted = [dbname for dbname in self.db_list_unsorted if dbname not in self.skip_db_list] self.db_list = sorted(self.db_list_unsorted) if not self.conf.has_section('output'): self.conf.add_section('output') self.public_dir = self.get_opt_in_overrides_or_default("output", "public", 0) self.private_dir = self.get_opt_in_overrides_or_default("output", "private", 0) self.temp_dir = self.get_opt_in_overrides_or_default("output", "temp", 0) self.web_root = self.get_opt_in_overrides_or_default("output", "webroot", 0) self.index = self.get_opt_in_overrides_or_default("output", "index", 0) self.template_dir = self.get_opt_in_overrides_or_default("output", "templatedir", 0) self.perdump_index = self.get_opt_in_overrides_or_default("output", "perdumpindex", 0) self.log_file = self.get_opt_in_overrides_or_default("output", "logfile", 0) self.fileperms = self.get_opt_in_overrides_or_default("output", "fileperms", 0) self.fileperms = int(self.fileperms, 0) if not self.conf.has_section('misc'): self.conf.add_section('misc') self.fixed_dump_order = self.get_opt_in_overrides_or_default("misc", "fixeddumporder", 0) self.fixed_dump_order = int(self.fixed_dump_order, 0)
def get_db_user_and_password(self): # get these by running a MediaWiki maintenance script; # yes, this means you need a full installation of MediaWiki # (but not web service) in order to use these methods command_list = MultiVersion.mw_script_as_array(self.config, "getConfiguration.php") pull_vars = ["wgDBuser", "wgDBpassword"] command = "{php} {command} --wiki={dbname} --format=json --regex='{vars}'" command = command.format( php=MiscUtils.shell_escape(self.config.php), command=" ".join(command_list), dbname=MiscUtils.shell_escape(self.db_name), vars="|".join(pull_vars)) results = RunSimpleCommand.run_with_output(command, shell=True).strip() settings = json.loads(results.decode('utf-8')) db_user = settings['wgDBuser'] db_password = settings['wgDBpassword'] return db_user, db_password
def get_skipdbs(self, filenames): """ permit comma-separated list of files so that eg some script can skip all private and/or closed wikis in addition to some other exclusion list """ if ',' in filenames: skipfiles = filenames.split(',') else: skipfiles = [filenames] skip_db_list = [] for skipfile in skipfiles: skip_db_list.extend(MiscUtils.db_list(skipfile)) return list(set(skip_db_list))
def get_last_lines_from_n(self, fileobj, runner, count): if not fileobj.filename or not exists(runner.dump_dir.filename_public_path(fileobj)): return None dumpfile = DumpFile(self.wiki, runner.dump_dir.filename_public_path(fileobj, self.wiki.date), fileobj, self.verbose) pipeline = dumpfile.setup_uncompression_command() tail = self.wiki.config.tail if not exists(tail): raise BackupError("tail command %s not found" % tail) tail_esc = MiscUtils.shell_escape(tail) pipeline.append([tail, "-n", "+%s" % count]) # without shell proc = CommandPipeline(pipeline, quiet=True) proc.run_pipeline_get_output() if (proc.exited_successfully() or (proc.get_failed_cmds_with_retcode() == [[-signal.SIGPIPE, pipeline[0]]]) or (proc.get_failed_cmds_with_retcode() == [[signal.SIGPIPE + 128, pipeline[0]]])): last_lines = proc.output() return last_lines
def get_first_500_lines(self): if self.first_lines: return self.first_lines if not self.filename or not exists(self.filename): return None pipeline = self.setup_uncompression_command() if not exists(self._wiki.config.head): raise BackupError("head command %s not found" % self._wiki.config.head) head = self._wiki.config.head head_esc = MiscUtils.shell_escape(head) pipeline.append([head, "-500"]) # without shell proc = CommandPipeline(pipeline, quiet=True) proc.run_pipeline_get_output() if (proc.exited_successfully() or (proc.get_failed_cmds_with_retcode() == [[-signal.SIGPIPE, pipeline[0]]]) or (proc.get_failed_cmds_with_retcode() == [[signal.SIGPIPE + 128, pipeline[0]]])): self.first_lines = proc.output() return self.first_lines
def build_recombine_command_string(self, runner, files, output_file, compression_command, uncompression_command, end_header_marker="</siteinfo>"): output_filename = runner.dump_dir.filename_public_path(output_file) partnum = 0 recombines = [] if not exists(runner.wiki.config.head): raise BackupError("head command %s not found" % runner.wiki.config.head) head = runner.wiki.config.head if not exists(runner.wiki.config.tail): raise BackupError("tail command %s not found" % runner.wiki.config.tail) tail = runner.wiki.config.tail if not exists(runner.wiki.config.grep): raise BackupError("grep command %s not found" % runner.wiki.config.grep) grep = runner.wiki.config.grep # we assume the result is always going to be run in a subshell. # much quicker than this script trying to read output # and pass it to a subprocess output_filename_esc = MiscUtils.shell_escape(output_filename) head_esc = MiscUtils.shell_escape(head) tail_esc = MiscUtils.shell_escape(tail) grep_esc = MiscUtils.shell_escape(grep) uncompression_command_esc = uncompression_command[:] for command in uncompression_command_esc: command = MiscUtils.shell_escape(command) for command in compression_command: command = MiscUtils.shell_escape(command) if not files: raise BackupError("No files for the recombine step found in %s." % self.name()) for file_obj in files: # uh oh FIXME # f = MiscUtils.shell_escape(file_obj.filename) fpath = runner.dump_dir.filename_public_path(file_obj) partnum = partnum + 1 pipeline = [] uncompress_this_file = uncompression_command[:] uncompress_this_file.append(fpath) pipeline.append(uncompress_this_file) # warning: we figure any header (<siteinfo>...</siteinfo>) # is going to be less than 2000 lines! pipeline.append([head, "-2000"]) pipeline.append([grep, "-n", end_header_marker]) # without shell proc = CommandPipeline(pipeline, quiet=True) proc.run_pipeline_get_output() if ((proc.output()) and (proc.exited_successfully() or proc.get_failed_cmds_with_retcode() == [[-signal.SIGPIPE, uncompress_this_file]] or proc.get_failed_cmds_with_retcode() == [[signal.SIGPIPE + 128, uncompress_this_file]])): (header_end_num, junk_unused) = proc.output().split(":", 1) # get header_end_num else: raise BackupError("Could not find 'end of header' marker for %s" % fpath) recombine = " ".join(uncompress_this_file) header_end_num = int(header_end_num) + 1 if partnum == 1: # first file, put header and contents recombine = recombine + " | %s -n -1 " % head elif partnum == len(files): # last file, put footer recombine = recombine + (" | %s -n +%s" % (tail, header_end_num)) else: # put contents only recombine = recombine + (" | %s -n +%s" % (tail, header_end_num)) recombine = recombine + " | %s -n -1 " % head recombines.append(recombine) recombine_command_string = ("(" + ";".join(recombines) + ")" + "|" + "%s %s" % (compression_command, output_filename)) return recombine_command_string
def parse_conffile_globally(self): self.db_list = MiscUtils.db_list(self.conf.get("wiki", "dblist")) # permit comma-separated list of files so that eg some script # can skip all private and/or closed wikis in addition to some # other exclusion list to_skip = self.conf.get("wiki", "skipdblist") if ',' in to_skip: skipfiles = to_skip.split(',') else: skipfiles = [to_skip] self.skip_db_list = [] for skipfile in skipfiles: self.skip_db_list.extend(MiscUtils.db_list(skipfile)) self.skip_db_list = list(set(self.skip_db_list)) self.private_list = MiscUtils.db_list(self.conf.get("wiki", "privatelist")) self.closed_list = MiscUtils.db_list(self.conf.get("wiki", "closedlist")) self.flow_list = MiscUtils.db_list(self.conf.get("wiki", "flowlist")) self.tablejobs = self.conf.get("wiki", "tablejobs") self.db_list = list(set(self.db_list) - set(self.skip_db_list)) if not self.conf.has_section('database'): self.conf.add_section('database') self.max_allowed_packet = self.conf.get("database", "max_allowed_packet") if not self.conf.has_section('output'): self.conf.add_section('output') self.public_dir = self.conf.get("output", "public") self.private_dir = self.conf.get("output", "private") self.temp_dir = self.conf.get("output", "temp") self.web_root = self.conf.get("output", "webroot") self.index = self.conf.get("output", "index") self.template_dir = self.conf.get("output", "templatedir") self.perdump_index = self.conf.get("output", "perdumpindex") self.log_file = self.conf.get("output", "logfile") self.fileperms = self.conf.get("output", "fileperms") self.fileperms = int(self.fileperms, 0) if not self.conf.has_section('reporting'): self.conf.add_section('reporting') self.admin_mail = self.conf.get("reporting", "adminmail") self.mail_from = self.conf.get("reporting", "mailfrom") self.smtp_server = self.conf.get("reporting", "smtpserver") self.stale_age = self.conf.getint("reporting", "staleage") self.skip_privatetables = self.conf.getint("reporting", "skipprivatetables") if not self.conf.has_section('tools'): self.conf.add_section('tools') self.php = self.conf.get("tools", "php") self.gzip = self.conf.get("tools", "gzip") self.bzip2 = self.conf.get("tools", "bzip2") self.sevenzip = self.conf.get("tools", "sevenzip") self.mysql = self.conf.get("tools", "mysql") self.mysqldump = self.conf.get("tools", "mysqldump") self.head = self.conf.get("tools", "head") self.tail = self.conf.get("tools", "tail") self.cat = self.conf.get("tools", "cat") self.grep = self.conf.get("tools", "grep") self.checkforbz2footer = self.conf.get("tools", "checkforbz2footer") self.writeuptopageid = self.conf.get("tools", "writeuptopageid") self.recompressxml = self.conf.get("tools", "recompressxml") if not self.conf.has_section('cleanup'): self.conf.add_section('cleanup') self.keep = self.conf.getint("cleanup", "keep") if not self.conf.has_section('query'): self.conf.add_section('query') self.queryfile = self.conf.get("query", "queryfile")