def _new_page(self, pageid): if self._pageid != pageid: if self._revids: revs = ' '.join(str(revid) for revid in self._revids) self._maker.add('%s:revs' % self._pageid, revs) self._revids = [] self._pageid = pageid if self._maker is not None: if self._pageid is None or self.maxsize <= self._maker.get_size(): self._maker.finish() self._maker = None if self._maker is None: if self._pageid is not None: path = (self.pathpat % {'index': self._index}) self._maker = CDBMaker(path) self._index += 1 return
def _new_page(self, pageid): if self._pageid != pageid: if self._revids: revs = " ".join(str(revid) for revid in self._revids) self._maker.add("%s:revs" % self._pageid, revs) self._revids = [] self._pageid = pageid if self._maker is not None: if self._pageid is None or self.maxsize <= self._maker.get_size(): self._maker.finish() self._maker = None if self._maker is None: if self._pageid is not None: path = self.pathpat % {"index": self._index} self._maker = CDBMaker(path) self._index += 1 return
class WikiDBWriter(object): def __init__(self, pathpat, ext="", codec="utf-8", maxsize=2 ** 31): self.pathpat = pathpat self.ext = ext self.codec = codec self.maxsize = maxsize self._index = 0 self._maker = None self._pageid = None self._revids = [] return def _new_page(self, pageid): if self._pageid != pageid: if self._revids: revs = " ".join(str(revid) for revid in self._revids) self._maker.add("%s:revs" % self._pageid, revs) self._revids = [] self._pageid = pageid if self._maker is not None: if self._pageid is None or self.maxsize <= self._maker.get_size(): self._maker.finish() self._maker = None if self._maker is None: if self._pageid is not None: path = self.pathpat % {"index": self._index} self._maker = CDBMaker(path) self._index += 1 return def _add_data(self, key, value): data = value.encode(self.codec, "ignore") data = compress(key, data) self._maker.add(key, data) return def close(self): self._new_page(None) return def add_page(self, pageid, title): self._new_page(pageid) title = title.encode(self.codec, "ignore") self._maker.add("%s:title" % pageid, title) return def add_revid(self, pageid, revid): assert self._pageid == pageid self._revids.append(revid) return def add_wiki(self, pageid, revid, wiki): assert revid in self._revids key = "%s/%s:wiki" % (pageid, revid) key += self.ext self._add_data(key, wiki) return def add_text(self, pageid, revid, wiki): assert revid in self._revids key = "%s/%s:text" % (pageid, revid) key += self.ext self._add_data(key, wiki) return
class WikiDBWriter(object): def __init__(self, pathpat, ext='', codec='utf-8', maxsize=2**31): self.pathpat = pathpat self.ext = ext self.codec = codec self.maxsize = maxsize self._index = 0 self._maker = None self._pageid = None self._revids = [] return def _new_page(self, pageid): if self._pageid != pageid: if self._revids: revs = ' '.join(str(revid) for revid in self._revids) self._maker.add('%s:revs' % self._pageid, revs) self._revids = [] self._pageid = pageid if self._maker is not None: if self._pageid is None or self.maxsize <= self._maker.get_size(): self._maker.finish() self._maker = None if self._maker is None: if self._pageid is not None: path = (self.pathpat % {'index': self._index}) self._maker = CDBMaker(path) self._index += 1 return def _add_data(self, key, value): data = value.encode(self.codec, 'ignore') data = compress(key, data) self._maker.add(key, data) return def close(self): self._new_page(None) return def add_page(self, pageid, title): self._new_page(pageid) title = title.encode(self.codec, 'ignore') self._maker.add('%s:title' % pageid, title) return def add_revid(self, pageid, revid): assert self._pageid == pageid self._revids.append(revid) return def add_wiki(self, pageid, revid, wiki): assert revid in self._revids key = '%s/%s:wiki' % (pageid, revid) key += self.ext self._add_data(key, wiki) return def add_text(self, pageid, revid, wiki): assert revid in self._revids key = '%s/%s:text' % (pageid, revid) key += self.ext self._add_data(key, wiki) return