class Importer(object): """The Importer ill import all articles and files from configuration.datapath into the configured wiki.""" def __init__(self, configuration, wiki): self.configuration = configuration self.wikiutil = WikiUtil(wiki) self.wiki = self.wikiutil.wiki # WikiUtil modifies wiki def _buildPageListPerDir(self, pagelist, dirname, fnames): for fname in fnames: path = os.path.join(dirname, fname) if os.path.isfile(path) and path.endswith('.xml'): print "I: Considering file %s." % path pagelist.append(self.readDumpFile(path)) def _buildPageList(self): pagelist = [] os.path.walk(self.configuration.datapath, self._buildPageListPerDir, pagelist) return pagelist def _buildFileListPerDir(self, filelist, dirname, fnames): for fname in fnames: path = os.path.join(dirname, fname) if os.path.isfile(path) and path.endswith('.sha1'): path = path.rpartition('.')[0] print "I: Considering file %s." % path filelist.append(path) def _buildFileList(self): filelist = [] os.path.walk(os.path.join(self.configuration.datapath, 'File'), self._buildFileListPerDir, filelist) return filelist def readDumpFile(self, path): xmlEl = self.wikiutil.readXmlFromFile(path) title = xmlEl.find(XMLNS + u'page/' + XMLNS + u'title').text revision = xmlEl.find(XMLNS + u'page/' + XMLNS + u'revision') revision.find( XMLNS + u'timestamp').text = self.wikiutil.buildWikiTimestampNow() revision.find(XMLNS + u'id').text = '1' io = StringIO(xml.etree.ElementTree.tostring(xmlEl)) io.name = "import.xml" return {u'title': title, u'io': io, u'xmldoc': xmlEl} def importPage(self, page): """Runs an XML import for page "page".""" title = page[u'title'] print "I: Importing page %s." % title oldpage = self.wikiutil.retrievePageExportXml(title) if oldpage is not None: oldtext = self.wikiutil.getPageTextFromXml(oldpage) newtext = self.wikiutil.getPageTextFromXml(page[u'xmldoc']) if oldtext == newtext: print "I: Skipping, no changes." return # fetch import token params = { 'action': 'query', 'prop': 'info', 'intoken': 'import', 'titles': title } request = APIRequest(self.wiki, params) result = request.query()['query'] if u'warnings' in result.keys(): print 'W: Wiki gave warning: ' + result[u'warnings'][u'info'][u'*'] pages = result['pages'] importtoken = pages[pages.keys()[0]]['importtoken'] # now post the page params = {'action': 'import', 'token': importtoken} request = APIRequest(self.wiki, params) request.setMultipart() request.changeParam('xml', page[u'io']) result = request.query() if result[u'import'][0][u'revisions'] != 1: print "E: Page import failed. Wiki said:", result return 1 self.pagecount = self.pagecount + 1 self.changecount = self.changecount + result[u'import'][0][u'revisions'] def importFile(self, path): """Uploads a file from "path" to the wiki, first checking against the stored sha1sums. """ prefix = os.path.commonprefix( [path, os.path.join(self.configuration.datapath, 'File')]) title = path[len(prefix) + 1:] print "I: Importing image %s." % title # get new sha1 new_sha1sum = file(path + '.sha1', 'r').read(40) new_sha1dum = new_sha1sum.strip('\r\n ') # get old sha1 from wiki, don't import if they match old_image = self.wikiutil.retrieveImageInfo('File:%s' % title) if not old_image.has_key('missing'): old_sha1sum = old_image['imageinfo'][0][u'sha1'] if old_sha1sum == new_sha1sum: print "I: Skipping, no changes." return # fetch import token params = { 'action': 'query', 'prop': 'info', 'intoken': 'edit', 'titles': 'File:%s' % title } request = APIRequest(self.wiki, params) result = request.query()['query'] if u'warnings' in result.keys(): print 'W: Wiki gave warning: ' + result[u'warnings'][u'info'][u'*'] pages = result['pages'] token = pages[pages.keys()[0]]['edittoken'] # get us the file object, so we can attach it to our request f = file(path, 'r') # now post the real upload request params = { 'action': 'upload', 'filename': title, 'token': token, 'comment': 'Origin-SICEKIT', 'ignorewarnings': True } request = APIRequest(self.wiki, params) request.setMultipart() request.changeParam('file', f) result = request.query() f.close() if result[u'upload'][u'result'] != 'Success': print "E: Page import failed. Wiki said:", result return 1 self.pagecount = self.pagecount + 1 self.changecount = self.changecount + 1 def run(self): """Main entry point for the Importer.""" self.pagecount = 0 self.changecount = 0 print "I: Looking for pages to import in %s." % self.configuration.datapath pagelist = self._buildPageList() print "I: Now importing pages." map(self.importPage, pagelist) print "I: Looking for images to import in %s." % self.configuration.datapath filelist = self._buildFileList() print "I: Now importing images." map(self.importFile, filelist) print "I: Imported %d pages, resulting in %d changes." % ( self.pagecount, self.changecount) return 0
class Importer(object): """The Importer ill import all articles and files from configuration.datapath into the configured wiki.""" def __init__(self, configuration, wiki): self.configuration = configuration self.wikiutil = WikiUtil(wiki) self.wiki = self.wikiutil.wiki # WikiUtil modifies wiki def _buildPageListPerDir(self, pagelist, dirname, fnames): for fname in fnames: path = os.path.join(dirname, fname) if os.path.isfile(path) and path.endswith('.xml'): print "I: Considering file %s." % path pagelist.append(self.readDumpFile(path)) def _buildPageList(self): pagelist = [] os.path.walk(self.configuration.datapath, self._buildPageListPerDir, pagelist) return pagelist def _buildFileListPerDir(self, filelist, dirname, fnames): for fname in fnames: path = os.path.join(dirname, fname) if os.path.isfile(path) and path.endswith('.sha1'): path = path.rpartition('.')[0] print "I: Considering file %s." % path filelist.append(path) def _buildFileList(self): filelist = [] os.path.walk(os.path.join(self.configuration.datapath, 'File'), self._buildFileListPerDir, filelist) return filelist def readDumpFile(self, path): xmlEl = self.wikiutil.readXmlFromFile(path) title = xmlEl.find(XMLNS+u'page/'+XMLNS+u'title').text revision = xmlEl.find(XMLNS+u'page/'+XMLNS+u'revision') revision.find(XMLNS+u'timestamp').text = self.wikiutil.buildWikiTimestampNow() revision.find(XMLNS+u'id').text = '1' io = StringIO(xml.etree.ElementTree.tostring(xmlEl)) io.name = "import.xml" return {u'title': title, u'io':io, u'xmldoc': xmlEl} def importPage(self, page): """Runs an XML import for page "page".""" title = page[u'title'] print "I: Importing page %s." % title oldpage = self.wikiutil.retrievePageExportXml(title) if oldpage is not None: oldtext = self.wikiutil.getPageTextFromXml(oldpage) newtext = self.wikiutil.getPageTextFromXml(page[u'xmldoc']) if oldtext == newtext: print "I: Skipping, no changes." return # fetch import token params = {'action':'query', 'prop':'info', 'intoken':'import', 'titles':title} request = APIRequest(self.wiki, params) result = request.query()['query'] if u'warnings' in result.keys(): print 'W: Wiki gave warning: ' + result[u'warnings'][u'info'][u'*'] pages = result['pages'] importtoken = pages[pages.keys()[0]]['importtoken'] # now post the page params = {'action':'import', 'token':importtoken} request = APIRequest(self.wiki, params) request.setMultipart() request.changeParam('xml', page[u'io']) result = request.query() if result[u'import'][0][u'revisions'] != 1: print "E: Page import failed. Wiki said:", result return 1 self.pagecount = self.pagecount + 1 self.changecount = self.changecount + result[u'import'][0][u'revisions'] def importFile(self, path): """Uploads a file from "path" to the wiki, first checking against the stored sha1sums. """ prefix = os.path.commonprefix([path, os.path.join(self.configuration.datapath, 'File')]) title = path[len(prefix)+1:] print "I: Importing image %s." % title # get new sha1 new_sha1sum = file(path + '.sha1', 'r').read(40) new_sha1dum = new_sha1sum.strip('\r\n ') # get old sha1 from wiki, don't import if they match old_image = self.wikiutil.retrieveImageInfo('File:%s' % title) if not old_image.has_key('missing'): old_sha1sum = old_image['imageinfo'][0][u'sha1'] if old_sha1sum == new_sha1sum: print "I: Skipping, no changes." return # fetch import token params = {'action':'query', 'prop':'info', 'intoken':'edit', 'titles':'File:%s'%title} request = APIRequest(self.wiki, params) result = request.query()['query'] if u'warnings' in result.keys(): print 'W: Wiki gave warning: ' + result[u'warnings'][u'info'][u'*'] pages = result['pages'] token = pages[pages.keys()[0]]['edittoken'] # get us the file object, so we can attach it to our request f = file(path, 'r') # now post the real upload request params = {'action':'upload', 'filename':title, 'token':token, 'comment':'Origin-SICEKIT', 'ignorewarnings':True} request = APIRequest(self.wiki, params) request.setMultipart() request.changeParam('file', f) result = request.query() f.close() if result[u'upload'][u'result'] != 'Success': print "E: Page import failed. Wiki said:", result return 1 self.pagecount = self.pagecount + 1 self.changecount = self.changecount + 1 def run(self): """Main entry point for the Importer.""" self.pagecount = 0 self.changecount = 0 print "I: Looking for pages to import in %s." % self.configuration.datapath pagelist = self._buildPageList() print "I: Now importing pages." map(self.importPage, pagelist) print "I: Looking for images to import in %s." % self.configuration.datapath filelist = self._buildFileList() print "I: Now importing images." map(self.importFile, filelist) print "I: Imported %d pages, resulting in %d changes." % (self.pagecount, self.changecount) return 0
class Exporter(object): def __init__(self, configuration, wiki): self.configuration = configuration self.wikiutil = WikiUtil(wiki) self.wiki = self.wikiutil.wiki def buildPageList(self): return self.wikiutil.retrieveCategoryMemberList(self.configuration.export_category) def buildPageFilesystemPath(self, page, extension='.xml'): tmp = page[u'title'].replace(':','/').rsplit('/', 1) if len(tmp) == 1: tmp = ['', tmp[0]] (directory, filename) = tmp return (os.path.join(self.configuration.datapath, directory), filename+extension) def writeDumpFile(self, page, xmlElement): (directory, filename) = self.buildPageFilesystemPath(page) if not os.path.exists(directory): os.makedirs(directory) return self.wikiutil.writeXmlToFile(os.path.join(directory, filename), xmlElement) def exportPage(self, page): title = page[u'title'] print "I: Exporting page %s." % title xml = self.wikiutil.retrievePageExportXml(title) # remove stuff we don't need / don't want to leak revision = xml.find(XMLNS+u'page/'+XMLNS+u'revision') if revision.find(XMLNS+u'comment'): revision.remove(revision.find(XMLNS+u'comment')) contributor = revision.find(XMLNS+u'contributor') contributor.find(XMLNS+u'username').text = u'SICEKIT' contributor.find(XMLNS+u'id').text = u'0' siteinfo = xml.find(XMLNS+u'siteinfo') siteinfo.find(XMLNS+u'sitename').text = u'SICEKIT' siteinfo.find(XMLNS+u'base').text = u'chrome:///sicekit' siteinfo.find(XMLNS+u'generator').text = u'SICEKIT' siteinfo.remove(siteinfo.find(XMLNS+u'namespaces')) bytes = self.writeDumpFile(page, xml) self.pagecount = self.pagecount + 1 self.bytecount = self.bytecount + bytes def buildImageList(self, pages): titles = '|'.join(map(lambda x:x[u'title'], pages)) params = {'action':'query','prop':'images','titles':titles} request = APIRequest(self.wiki, params) pages = request.query()['query']['pages'] images = [] # filter page list so we only get images for pageid in pages.keys(): page = pages[pageid] if page.has_key('images'): images.extend(map(lambda img: img[u'title'], page['images'])) return images def exportImage(self, title): print "I: Exporting image %s." % title page = self.wikiutil.retrieveImageInfo(title) if page.has_key('missing'): print "W: Image %s does not exist." % title return imageinfo = page['imageinfo'][0] url = imageinfo[u'url'] sha1 = imageinfo[u'sha1'] directory = self.buildPageFilesystemPath(page)[0] if not os.path.exists(directory): os.makedirs(directory) image_path = os.path.join(*self.buildPageFilesystemPath(page, extension='')) sha1_path = os.path.join(*self.buildPageFilesystemPath(page, extension='.sha1')) bytes = self.wikiutil.downloadFile(url, image_path) f = file(sha1_path, 'w') f.write(sha1.encode('ascii')) f.close() self.bytecount = self.bytecount + bytes + 40 #sha1 is 40byte self.pagecount = self.pagecount + 2 def run(self): if os.path.exists(self.configuration.datapath): print "I: Wiping export directory %s." % self.configuration.datapath shutil.rmtree(self.configuration.datapath) print "I: Exporting pages to %s." % self.configuration.datapath pages = self.buildPageList() self.pagecount = 0 self.bytecount = 0 map(self.exportPage, pages) print "I: Exporting images." images = self.buildImageList(pages) map(self.exportImage, images) print "I: Exported %d bytes in %d objects." % (self.bytecount, self.pagecount) return 0