def dump_html(self): ''' dump HTML-formated revision content from RESTBase for the given wiki and date ''' dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date) outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date) htmlfile = HTMLFile(self.wiki.config, self.wiki.date, self.wiki.db_name) outputfile = htmlfile.get_filename(self.args['ns']) # /usr/bin/nodejs ./bin/dump_wiki --domain en.wikipedia.org --ns 0 \ # --apiURL http://en.wikipedia.org/w/api.php \ # --dataBase /srv/www/htmldumps/dumps/20160826/en.wikipedia.org.articles.ns0.sqlite3 domain = self.get_domain_from_wikidbname() # FIXME: the nodejs wrapper which will do the compress etc stuff for one wiki is # not yet written command = [self.wiki.config.nodejs] command.append(self.wiki.config.scriptpath) command.extend(["--domain", domain, "--ns", self.args['ns'], "--apiURL", "http://%s/w/api.php" % domain, "--dataBase", os.path.join(outputdir, outputfile), "--wiki=%s" % self.wiki.db_name, "--output=gzip:%s" % os.path.join(outputdir, outputfile)]) if self.dryrun: print("would run command for html dump:", command) else: success = RunSimpleCommand.run_with_no_output( command, shell=False, timeout=self.get_lock_timeout_interval(), timeout_callback=self.periodic_callback) if not success: self.log.warning("error producing html files for wiki %s", self.wiki.db_name) return False return True
def dump_stub(self, start_revid, end_revid): ''' dump stubs (metadata) for revs from start_revid up to but not including end_revid ''' if not self.steps['stubs']['run']: return True dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date) outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date) stubfile = StubFile(self.wiki.config, self.wiki.date, self.wiki.db_name) outputfile = stubfile.get_filename() script_command = MultiVersion.mw_script_as_array(self.wiki.config, "dumpBackup.php") command = [self.wiki.config.php] command.extend(script_command) command.extend(["--wiki=%s" % self.wiki.db_name, "--stub", "--quiet", "--output=gzip:%s" % os.path.join(outputdir, outputfile), "--revrange", "--revstart=%s" % start_revid, "--revend=%s" % end_revid]) if self.dryrun: print "would run command for stubs dump:", command else: log.info("running with no output: " + " ".join(command)) success = RunSimpleCommand.run_with_no_output( command, shell=False, timeout=self.get_lock_timeout_interval(), timeout_callback=self.periodic_callback) if not success: log.warning("error producing stub files for wiki %s", self.wiki.db_name) return False return True
def dump_revs(self): ''' dump revision content corresponding to previously-dumped stubs (revision metadata) ''' if not self.steps['revs']['run']: return True dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date) outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date) revsfile = RevsFile(self.wiki.config, self.wiki.date, self.wiki.db_name) outputfile = revsfile.get_filename() script_command = MultiVersion.mw_script_as_array(self.wiki.config, "dumpTextPass.php") command = [self.wiki.config.php] command.extend(script_command) stubfile = StubFile(self.wiki.config, self.wiki.date, self.wiki.db_name) stuboutputfile = stubfile.get_filename() command.extend(["--wiki=%s" % self.wiki.db_name, "--stub=gzip:%s" % os.path.join(outputdir, stuboutputfile), "--quiet", "--spawn=%s" % self.wiki.config.php, "--output=bzip2:%s" % os.path.join(outputdir, outputfile)]) if self.dryrun: print "would run command for revs dump:", command else: log.info("running with no output: " + " ".join(command)) success = RunSimpleCommand.run_with_no_output( command, shell=False, timeout=self.get_lock_timeout_interval(), timeout_callback=self.periodic_callback) if not success: log.warning("error producing revision text files" " for wiki %s", self.wiki.db_name) return False return True
def run(self, wiki, filenameformat, output_dir, overwrite, base=None): ''' run a (maintenance) script on one wiki, expecting relevant output to go to a file ''' (outfile_base, outfile_path) = self.skip_if_done( wiki, filenameformat, output_dir, overwrite) if outfile_base is None: return True command = self.get_command(wiki, outfile_path, outfile_base, base) if not isinstance(command, basestring): # see if the list elts are lists tht need to be turned into strings command = [element if isinstance(element, basestring) else ' '.join(element) for element in command] command = '|'.join(command) if self.dryrun: print "Would run:", print command return True else: return RunSimpleCommand.run_with_no_output( command, maxtries=1, shell=True, verbose=self.verbose)