class FileJobPoller(object): def __init__(self, queue_dir, processing_dir=None, sleep_time=1, max_num_jobs=5): self.queue_dir = utils.ensure_dir(queue_dir) self.sleep_time = sleep_time self.max_num_jobs = max_num_jobs self.num_jobs = 0 self.log = Log('FileJobPoller') self.files = [] def _reap_children(self): while self.num_jobs > 0: try: if self.num_jobs == self.max_num_jobs: flags = 0 else: flags = os.WNOHANG pid, rc = os.waitpid(-1, flags) except OSError, exc: self.log.ERROR('waitpid(-1) failed: %s' % exc) break if (pid, rc) == (0, 0): break self.num_jobs -= 1 self.log.info('child %s exited: %s. have %d jobs' % (pid, rc, self.num_jobs))
def __init__(self, queue_dir, processing_dir=None, sleep_time=1, max_num_jobs=5): self.queue_dir = utils.ensure_dir(queue_dir) self.sleep_time = sleep_time self.max_num_jobs = max_num_jobs self.num_jobs = 0 self.log = Log('FileJobPoller') self.files = []
class FileJobQueuer(object): """Write a file for each new job request""" def __init__(self, queue_dir): self.queue_dir = utils.ensure_dir(queue_dir) self.log = Log('FileJobQueuer') def __call__(self, job_type, job_id, args): job_file = '%s.job' % os.path.join(self.queue_dir, job_id) if os.path.exists(job_file): self.log.warn('Job file %r already exists' % job_file) return open(job_file + '.tmp', 'wb').write('\n'.join(args)) os.rename(job_file + '.tmp', job_file)
class FileJobQueuer(object): """Write a file for each new job request""" def __init__(self, queue_dir): self.queue_dir = utils.ensure_dir(queue_dir) self.log = Log('FileJobQueuer') def __call__(self, job_type, job_id, args): job_file = '%s.job' % os.path.join(self.queue_dir, job_id) if os.path.exists(job_file): self.log.warn('Job file %r already exists' % job_file) return open(job_file + '.tmp', 'wb').write(cPickle.dumps(args)) os.rename(job_file + '.tmp', job_file)
class FileJobPoller(object): def __init__(self, queue_dir, processing_dir=None, sleep_time=1, max_num_jobs=5): self.queue_dir = utils.ensure_dir(queue_dir) self.sleep_time = sleep_time self.max_num_jobs = max_num_jobs self.num_jobs = 0 self.log = Log('FileJobPoller') self.files = [] def _reap_children(self): while self.num_jobs>0: try: if self.num_jobs==self.max_num_jobs: flags = 0 else: flags = os.WNOHANG pid, rc = os.waitpid(-1, flags) except OSError, exc: self.log.ERROR('waitpid(-1) failed: %s' % exc) break if (pid, rc) == (0, 0): break self.num_jobs -= 1 self.log.info('child %s exited: %s. have %d jobs' % (pid, rc, self.num_jobs))
#! /usr/bin/env python # Copyright (c) 2007-2008 PediaPress GmbH # See README.txt for additional licensing information. import os from ConfigParser import ConfigParser import StringIO from mwlib import utils, metabook from mwlib.log import Log log = Log('mwlib.utils') def wiki_mwapi(base_url=None, template_blacklist=None, template_exclusion_category=None, username=None, password=None, domain=None, script_extension=None, **kwargs): from mwlib import mwapidb return mwapidb.WikiDB( base_url, template_blacklist=template_blacklist, template_exclusion_category=template_exclusion_category, username=username, password=password, domain=domain,
# Copyright (c) 2007-2009 PediaPress GmbH # See README.rst for additional licensing information. import os import sys try: import simplejson as json except ImportError: import json from mwlib.log import Log log = Log('mwlib.status') class Status(object): qproxy = None stdout = sys.stdout def __init__( self, filename=None, podclient=None, progress_range=(0, 100), status=None, ): self.filename = filename self.podclient = podclient if status is not None: self.status = status else:
http://en.wikipedia.org/wiki/Wikipedia:Don%27t_use_line_breaks http://meta.wikimedia.org/wiki/Help:Advanced_editing http://meta.wikimedia.org/wiki/Help:HTML_in_wikitext """ import re import time from mwlib.parser import Math, Ref, Link, URL, NamedURL # not used but imported from mwlib.parser import CategoryLink, SpecialLink, Caption, LangLink # not used but imported from mwlib.parser import ArticleLink, InterwikiLink, NamespaceLink from mwlib.parser import Item, ItemList, Node, Table, Row, Cell, Paragraph, PreFormatted from mwlib.parser import Section, Style, TagNode, Text, Timeline from mwlib.parser import ImageLink, Article, Book, Chapter import copy from mwlib.log import Log log = Log("advtree") def _idIndex(lst, el): """Return index of first appeareance of element el in list lst""" for i, e in enumerate(lst): if e is el: return i raise ValueError('element %r not found' % el) def debug(method): # use as decorator def f(self, *args, **kargs): log("\n%s called with %r %r" % (method.__name__, args, kargs)) log("on %r attrs:%r style:%r" % (self, self.attributes, self.style) ) p = self
import urllib from mwlib import parser, uparser, utils from mwlib.log import Log log = Log('wikidbbase') class WikiDBBase(object): """Base class for WikiDBs""" def getLinkURL(self, link, title, revision=None): """Get a full HTTP URL for the given link object, parsed from an article in this WikiDB. @param link: link node from parser @type link: L{mwlib.parser.Link} @param title: title of containing article @type title: unicode @param revision: revision of containing article (optional) @type revision: unicode @returns: full HTTP URL or None if it could not be constructed @rtype: str or NoneType """ if isinstance(link, parser.ArticleLink)\ or isinstance(link, parser.CategoryLink)\ or isinstance(link, parser.NamespaceLink):
#!/usr/bin/env python # ~ -*- coding:utf-8 -*- import re from lxml.builder import ElementMaker from mwlib.log import Log from mwlib.pdf.htmlfilters.misc import hash_anchor log = Log("mwlib.pdf.generators.contributors") E = ElementMaker() def generate_article_contributors(articles): title = _("Article Sources and Contributors") node = E.article( {"class": "contributors", "data-pp-footer-text": _("Appendix")}, E.h1(title, hash_anchor(title)), ) contributors = E.div({"class": ""}) for article in articles: contributors.append( E.p( E.strong({"class": "title"}, article.title, " "), E.span({"class": "label"}, _("Source:"), " "), E.span({"class": "url"}, article.url, " "), E.span({"class": "label"}, _("Contributors:"), " "), E.span({"class": "contributors"}, filter_anon_ip_edits(article.authors),), ) )
import os import re import shutil import tempfile import time import urllib import urllib2 import urlparse import simplejson from mwlib import uparser, utils from mwlib.log import Log log = Log("mwapidb") try: from mwlib.licenses import lower2normal except ImportError: log.warn('no licenses found') lower2normal = {} # ============================================================================== def fetch_url(url, ignore_errors=False): log.info("fetching %r" % (url,)) opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'mwlib')] try:
print "you need to install odfpy: http://opendocumentfellowship.com/projects/odfpy" print "currently only version 0.7 is supported" raise from odf.opendocument import OpenDocumentText from odf import text, dc, meta, table, draw, math, element from mwlib import parser from mwlib.log import Log from mwlib import advtree from mwlib import odfstyles as style from mwlib import writerbase from mwlib.treecleaner import TreeCleaner from mwlib import odfconf log = Log("odfwriter") # using alpha software is challenging as APIs change ------------------- # check for ODF version and monkey patch stuff e = element.Element(qname = ("a","n")) if hasattr(e, "elements"): # odfpy-0.7 def _f(self, c): log("assumming odfpy-0.7x") self.elements.append(e) element.Element.appendChild = _f element.Element.lastChild = property(lambda s:s.elements[-1]) element.Element.setAttribute = element.Element.addAttribute else: # assume the odfpy-08 api is stable # but we don't support this now, as they changed their API # easy_install odfpy==0.7.0 might help
def __init__(self, queue_dir): self.queue_dir = utils.ensure_dir(queue_dir) self.log = Log('FileJobQueuer')
"""Simplistic threaded job scheduler""" import Queue import threading import traceback from mwlib.log import Log log = Log('mwlib.jobsched') # ============================================================================== class JobScheduler(object): """Simple threaded job scheduler""" def __init__(self, num_threads): """Init instance with a number of worker threads and a job callable @param num_threads: number of threads to start @type num_threads: int """ self.num_threads = num_threads self.job_queue = Queue.Queue() self.semaphore = None def add_job(self, job_id, do_job, **kwargs): """Schedule a job to be executed in a separate thread. The job_id and all additional kwargs are passed to the do_job callable given in the constructor.
#! /usr/bin/env python """WSGI dispatcher base class""" import cgi import os import StringIO import time import traceback from mwlib.log import Log # ============================================================================== log = Log('mwlib.wsgi') # ============================================================================== class Request(object): max_post_data_size = 1024*1024 def __init__(self, env): self.env = env self.method = self.env['REQUEST_METHOD'].upper() self.path = self.env.get('PATH_INFO') self.query = self.multi2single(cgi.parse_qs(self.env.get('QUERY_STRING', ''))) if self.method == 'POST': self.post_data = self.read_post_data() else: self.post_data = {}
import sys import json from mwlib.log import Log log = Log('mwlib.statusfile') class Status(object): def __init__(self, filename=None, podclient=None, progress_range=(0, 100), auto_dump=True, ): self.filename = filename self.podclient = podclient self.status = {} self.progress_range = progress_range def __call__(self, status=None, progress=None, article=None, auto_dump=True, **kwargs): if status is not None and status != self.status.get('status'): print 'STATUS: %s' % status self.status['status'] = status if progress is not None: assert 0 <= progress and progress <= 100, 'progress not in range 0..100' progress = int( self.progress_range[0] + progress*(self.progress_range[1] - self.progress_range[0])/100 )
#! /usr/bin/env python # Copyright (c) 2007-2008 PediaPress GmbH # See README.txt for additional licensing information. """usable/user parser""" from mwlib import parser, scanner, expander from mwlib.log import Log log = Log('uparser') def simplify(node, **kwargs): "concatenates textnodes in order to reduce the number of objects" Text = parser.Text last = None toremove = [] for i, c in enumerate(node.children): if c.__class__ == Text: # would isinstance be safe? if last: last.caption += c.caption toremove.append(i) else: last = c else: simplify(c) last = None for i, ii in enumerate(toremove): del node.children[ii - i]
from __builtin__ import all except ImportError: def all(iterable): """all(iterable) -> bool Return True if bool(x) is True for all values x in the iterable. """ for x in iterable: if not x: return False return True # ============================================================================== log = Log('mwlib.utils') # ============================================================================== def fsescape(s): """Escape string to be safely used in path names @param s: some string @type s: basestring @returns: escaped string @rtype: str """ res = [] for x in s:
#! /usr/bin/env python import locale import os import sys import traceback from mwlib.log import Log import mwlib from mwlib.pdf import html2pdf reload(sys) sys.setdefaultencoding("UTF8") log = Log("mwlib.pdf.writer") def patch_logging(output_filename): fn = os.path.join(os.path.dirname(output_filename), "render.log") mwlib.utils.start_logging(fn) def writer(env, output, status_callback, debug=True, lang=None, x=False): if not lang: _locale = locale.getlocale(locale.LC_NUMERIC) if _locale: lang = _locale[0] crop_marks = False if not x: patch_logging(output) renderer = html2pdf.PrincePdfWriter(env,
import sys import tempfile import time import traceback import urllib import urllib2 import urlparse import UserDict from mwlib.log import Log from hashlib import md5 # provide all() for python 2.4 all = all log = Log('mwlib.utils') def get_print_template_maker(pattern): assert "$1" in pattern, 'pattern %r does not contain "$1"' % pattern def make_print_template(title): if ':' in title: p, s = title.split(":", 1) s = pattern.replace("$1", s) return '%s:%s' % (p, s) else: return pattern.replace("$1", title) return make_print_template
import cgi import StringIO try: import xml.etree.ElementTree as ET except: from elementtree import ElementTree as ET from mwlib import parser from mwlib import advtree from mwlib import xmltreecleaner from mwlib.log import Log from mwlib import writerbase version = "0.2" log = Log("xmlwriter") def showNode(obj): attrs = obj.__dict__.keys() log(obj.__class__.__name__) stuff = [ "%s => %r" % (k, getattr(obj, k)) for k in attrs if (not k in ("_parentref", "children")) and getattr(obj, k) ] if stuff: log(repr(stuff)) def indent(elem, level=0): i = u"\n" + level * u" "
class FileJobPoller(object): def __init__(self, queue_dir, processing_dir, sleep_time=1, max_num_jobs=5): self.queue_dir = utils.ensure_dir(queue_dir) self.processing_dir = utils.ensure_dir(processing_dir) self.sleep_time = sleep_time self.max_num_jobs = max_num_jobs self.num_jobs = 0 self.log = Log('FileJobPoller') def run_forever(self): self.log.info('running with a max. of %d jobs' % self.max_num_jobs) try: while True: filename = self.poll() if self.num_jobs < self.max_num_jobs and filename: self.num_jobs += 1 self.start_job(filename) self.log.info('child started: have %d jobs' % self.num_jobs) else: time.sleep(self.sleep_time) while self.num_jobs > 0: pid, rc = os.waitpid(-1, os.WNOHANG) if (pid, rc) == (0, 0): break self.num_jobs -= 1 self.log.info('child killed: have %d jobs' % self.num_jobs) except KeyboardInterrupt: while self.num_jobs > 0: os.waitpid(-1, 0) self.num_jobs -= 1 self.log.info('exit') def poll(self): files = [] for filename in os.listdir(self.queue_dir): path = os.path.join(self.queue_dir, filename) if not os.path.isfile(path): continue heapq.heappush(files, (os.stat(path).st_mtime, filename)) if files: return files[0][1] return None def start_job(self, filename): src = os.path.join(self.queue_dir, filename) path = os.path.join(self.processing_dir, filename) try: os.rename(src, path) except Exception, exc: self.log.warn('Could not rename %r to %r: %s' % (src, path, exc)) traceback.print_exc() return self.log.info('starting job %r' % filename) pid = os.fork() if pid == 0: try: args = open(path, 'rb').read().split('\n') self.log.info('executing: %r' % args) try: rc = subprocess.call(args) assert rc == 0, 'non-zero return code' except Exception, exc: self.log.warn('Error executing %r: %s' % (args, exc)) traceback.print_exc() finally: try: os.unlink(path) except Exception, exc: self.log.warn('Could not remove file %r: %s' % (path, exc)) traceback.print_exc() os._exit(0)
import urllib import urllib2 try: from hashlib import md5 except ImportError: from md5 import md5 import shutil import sys import time import tempfile import re from mwlib import uparser, utils from mwlib.log import Log log = Log("netdb") # ============================================================================== def hashpath(name): """Compute hashpath for an image in the same way as MediaWiki does @param name: name of an image @type name: unicode @returns: hashpath to image @type: str """ name = name.replace(' ', '_') name = name[:1].upper()+name[1:]
# -*- coding: utf-8 -*- # Copyright (c) 2007-2008 PediaPress GmbH # See README.txt for additional licensing information. import os from mwlib import parser, rendermath, timeline import urllib import cgi #from PIL import Image from mwlib.log import Log log = Log("htmlwriter") class HTMLWriter(object): imglevel = 0 namedLinkCount = 1 def __init__(self, out, images=None, math_renderer=None): self.out = out self.level = 0 self.images = images # self.images = imgdb.ImageDB(os.path.expanduser("~/images")) self.references = [] if math_renderer is None: self.math_renderer = rendermath.Renderer() else:
import os import re import urllib from PIL import Image from lxml import etree from lxml.builder import ElementMaker from mwlib.log import Log from mwlib.pdf import utils from mwlib.pdf.htmlfilters.sizetools import resize_node_width_to_columns from .. import config from ..config import column_width_pt log = Log("mwlib.pdf.html2pdf") E = ElementMaker() number_re = re.compile(r"^(\d+)") valid_image_extensions = [".png", ".jpg", ".gif", ".svg", ".jpeg"] def fix_image_src(article): """ replace img src with path on local disc """ for img in article.dom.xpath("//img"): src = img.get("src") if os.path.splitext(src)[1] == ".gif": img_name = src.split("/")[-1]
class FileJobPoller(object): def __init__(self, queue_dir, processing_dir=None, sleep_time=1, max_num_jobs=5): self.queue_dir = utils.ensure_dir(queue_dir) self.sleep_time = sleep_time self.max_num_jobs = max_num_jobs self.num_jobs = 0 self.log = Log('FileJobPoller') self.files = [] def _reap_children(self): while self.num_jobs > 0: try: if self.num_jobs == self.max_num_jobs: flags = 0 else: flags = os.WNOHANG pid, rc = os.waitpid(-1, flags) except OSError as exc: self.log.ERROR('waitpid(-1) failed: %s' % exc) break if (pid, rc) == (0, 0): break self.num_jobs -= 1 self.log.info('child %s exited: %s. have %d jobs' % (pid, rc, self.num_jobs)) def run_forever(self): self.log.info('running with a max. of %d jobs' % self.max_num_jobs) while True: try: self.poll() if not self.files: time.sleep(self.sleep_time) while self.num_jobs < self.max_num_jobs and self.files: self.start_job(self.files.pop()) self._reap_children() except KeyboardInterrupt: while self.num_jobs > 0: os.waitpid(-1, 0) self.num_jobs -= 1 break except Exception as err: self.log.error("caught exception: %r" % (err, )) traceback.print_exc() self.log.info('exit') def poll(self): if self.files: return files = [] for filename in os.listdir(self.queue_dir): if filename.endswith(".tmp"): continue path = os.path.join(self.queue_dir, filename) if not os.path.isfile(path): continue try: mtime = os.stat(path).st_mtime except Exception as exc: self.log.ERROR('Could not stat %r: %s' % (path, exc)) continue files.append((mtime, filename)) files.sort(reverse=True) self.files = [x[1] for x in files] def start_job(self, filename): """Fork, and execute job from given file @returns: whether a new job as been started @rtype: bool """ src = os.path.join(self.queue_dir, filename) try: args = cPickle.loads(open(src, 'rb').read()) finally: os.unlink(src) self.log.info('starting job %r' % filename) pid = os.fork() self.num_jobs += 1 if pid != 0: return True # child process: try: os.execvp(args[0], args) except BaseException: traceback.print_exc() finally: self.log.warn('error running %r' % (args,)) os._exit(10)
# Copyright (c) 2007-2009 PediaPress GmbH # See README.rst for additional licensing information. """ class for defining DTD-Like Rules for the tree """ from advtree import Article from mwlib.log import Log log = Log("sanitychecker") # ----------------------------------------------------------- # Constraints # ----------------------------------------------------------- class ConstraintBase(object): def __init__(self, *klasses): self.klasses = klasses def test(self, nodes): return True, None # passed def __repr__(self): return "%s(%s)" % (self.__class__.__name__, ", ".join( k.__name__ for k in self.klasses)) class Forbid(ConstraintBase): "forbid any of the classes" def test(self, nodes):
#! /usr/bin/env python """WSGI dispatcher base class""" import cgi import os import StringIO import time import traceback from mwlib.log import Log # ============================================================================== log = Log('mwlib.wsgi') # ============================================================================== class Request(object): max_post_data_size = 1024 * 1024 def __init__(self, env): self.env = env self.method = self.env['REQUEST_METHOD'].upper() self.path = self.env.get('PATH_INFO') self.query = self.multi2single( cgi.parse_qs(self.env.get('QUERY_STRING', ''))) if self.method == 'POST': self.post_data = self.read_post_data() else: self.post_data = {}
def check_service(): import sys import time from mwlib.client import Client from mwlib.log import Log from mwlib import utils log = Log('mw-check-service') parser = optparse.OptionParser(usage="%prog [OPTIONS] BASEURL METABOOK") default_url = 'http://localhost:8899/' parser.add_option('-u', '--url', help='URL of HTTP interface to mw-serve (default: %r)' % default_url, default=default_url, ) parser.add_option('-w', '--writer', help='writer to use for rendering (default: rl)', default='rl', ) parser.add_option('--max-render-time', help='maximum number of seconds rendering may take (default: 120)', default='120', metavar='SECONDS', ) parser.add_option('--save-output', help='if specified, save rendered file with given filename', metavar='FILENAME', ) parser.add_option('-l', '--logfile', help='log output to LOGFILE', ) parser.add_option('--report-from-mail', help='sender of error mails (--report-recipient also needed)', metavar='EMAIL', ) parser.add_option('--report-recipient', help='recipient of error mails (--report-from-mail also needed)', metavar='EMAIL', ) options, args = parser.parse_args() if len(args) != 2: parser.error('exactly 2 arguments required') base_url = args[0] metabook = open(args[1], 'rb').read() max_render_time = int(options.max_render_time) if options.report_recipient and options.report_from_mail: def report(msg): utils.report( system='mw-check-service', subject='mw-check-service error', from_email=options.report_from_mail.encode('utf-8'), mail_recipients=[options.report_recipient.encode('utf-8')], msg=msg, ) else: report = log.ERROR writer = options.writer if options.logfile: utils.start_logging(options.logfile) client = Client(options.url) def check_req(command, **kwargs): try: success = client.request(command, kwargs, is_json=(command != 'download')) except Exception, exc: report('request failed: %s' % exc) sys.exit(1) if success: return client.response if client.error is not None: report('request failed: %s' % client.error) sys.exit(1) else: report('request failed: got response code %d' % client.response_code) sys.exit(1)
def check_service(): import sys import time from mwlib.client import Client from mwlib.log import Log from mwlib import utils log = Log('mw-check-service') parser = optparse.OptionParser(usage="%prog [OPTIONS] BASEURL METABOOK") default_url = 'http://localhost:8899/' parser.add_option( '-u', '--url', help='URL of HTTP interface to mw-serve (default: %r)' % default_url, default=default_url, ) parser.add_option( '-w', '--writer', help='writer to use for rendering (default: rl)', default='rl', ) parser.add_option( '--max-render-time', help='maximum number of seconds rendering may take (default: 120)', default='120', metavar='SECONDS', ) parser.add_option( '--save-output', help='if specified, save rendered file with given filename', metavar='FILENAME', ) parser.add_option( '-l', '--logfile', help='log output to LOGFILE', ) parser.add_option( '--report-from-mail', help='sender of error mails (--report-recipient also needed)', metavar='EMAIL', ) parser.add_option( '--report-recipient', help='recipient of error mails (--report-from-mail also needed)', metavar='EMAIL', ) options, args = parser.parse_args() if len(args) != 2: parser.error('exactly 2 arguments required') base_url = args[0] metabook = open(args[1], 'rb').read() max_render_time = int(options.max_render_time) if options.report_recipient and options.report_from_mail: def report(msg): utils.report( system='mw-check-service', subject='mw-check-service error', from_email=options.report_from_mail.encode('utf-8'), mail_recipients=[options.report_recipient.encode('utf-8')], msg=msg, ) else: report = log.ERROR writer = options.writer if options.logfile: utils.start_logging(options.logfile) client = Client(options.url) def check_req(command, **kwargs): try: success = client.request(command, kwargs, is_json=(command != 'download')) except Exception as exc: report('request failed: %s' % exc) sys.exit(1) if success: return client.response if client.error is not None: report('request failed: %s' % client.error) sys.exit(1) else: report('request failed: got response code %d' % client.response_code) sys.exit(1) start_time = time.time() log.info('sending render command') response = check_req( 'render', base_url=base_url, metabook=metabook, writer=writer, force_render=True, ) collection_id = response['collection_id'] while True: time.sleep(1) if time.time() - start_time > max_render_time: report('rendering exceeded allowed time of %d s' % max_render_time) sys.exit(2) log.info('checking status') response = check_req( 'render_status', collection_id=collection_id, writer=writer, ) if response['state'] == 'finished': break log.info('downloading') response = check_req( 'download', collection_id=collection_id, writer=writer, ) if len(response) < 100: report('got suspiciously small file from download: size is %d Bytes' % len(response)) sys.exit(3) log.info('resulting file is %d Bytes' % len(response)) if options.save_output: log.info('saving to %r' % options.save_output) open(options.save_output, 'wb').write(response) render_time = time.time() - start_time log.info('rendering ok, took %fs' % render_time)
# Copyright (c) 2007-2009 PediaPress GmbH # See README.rst for additional licensing information. from mwlib import expander, nshandling, metabook from mwlib.log import Log from mwlib.refine import core, compat log = Log('refine.uparser') def parseString(title=None, raw=None, wikidb=None, revision=None, lang=None, magicwords=None, expandTemplates=True): """parse article with title from raw mediawiki text""" uniquifier = None siteinfo = None assert title is not None, 'no title given' if raw is None: page = wikidb.normalize_and_get_page(title, 0) if page: raw = page.rawtext else: raw = None assert raw is not None, "cannot get article %r" % (title, ) input = raw
#! /usr/bin/env python # Copyright (c) 2007-2008 PediaPress GmbH # See README.txt for additional licensing information. """expand magic variables/colon functions http://meta.wikimedia.org/wiki/Help:Colon_function http://meta.wikimedia.org/wiki/Help:Magic_words http://meta.wikimedia.org/wiki/ParserFunctions """ import datetime import urllib from mwlib.log import Log from mwlib import expr log = Log("expander") def singlearg(fun): def wrap(self, args): rl = args if not rl: a = u'' else: a = rl[0] return fun(self, a) return wrap
import time import urlparse import urllib import urllib2 import httplib try: import simplejson as json except ImportError: import json from mwlib.log import Log from mwlib.utils import get_multipart from mwlib import conf log = Log("mwapidb") class PODClient(object): def __init__(self, posturl, redirecturl=None): self.posturl = posturl.encode('utf-8') self.redirecturl = redirecturl def _post(self, data, content_type=None): if content_type is not None: headers = {'Content-Type': content_type} else: headers = {} return urllib2.urlopen( urllib2.Request(self.posturl, data, headers=headers)).read()
# Copyright (c) 2007-2009 PediaPress GmbH # See README.rst for additional licensing information. import os import zipfile import shutil import tempfile import urllib import sqlite3dbm from hashlib import sha1 from mwlib import myjson as json from mwlib import nshandling, utils from mwlib.log import Log log = Log('nuwiki') class page(object): expanded = 0 def __init__(self, meta, rawtext): self.__dict__.update(meta) self.rawtext = rawtext class DumbJsonDB(object): def __init__(self, fn, allow_pickle=False): self.fn = fn self.allow_pickle = allow_pickle
#! /usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2007-2008 PediaPress GmbH # See README.txt for additional licensing information. import sys import os import re from mwlib.scanner import tokenize, TagToken, EndTagToken from mwlib.log import Log log = Log("parser") tag_li = TagToken("li") tag_div = TagToken("div") class TokenSet(object): def __init__(self, lst): self.types = set() self.values = set() for x in lst: if isinstance(x, type): self.types.add(x) else: self.values.add(x) def __contains__(self, x): return x in self.values or type(x) in self.types
import os from lxml.builder import ElementMaker from mwlib.log import Log from pycountry import languages log = Log("mwlib.pdf.generators.front_matter") E = ElementMaker() def generate_cover_page(env, lang): site_name = env.wiki.siteinfo["general"].get("sitename") title = env.metabook.title or _("Wiki Articles") subtitle = env.metabook.subtitle or _( "A collection from {}".format(site_name)) editor = env.metabook.editor or "ckepper" article_count = get_article_count(env.metabook.items) img_path = os.path.join(os.path.dirname(__file__), "..", "images") wikipedia_logo = E.img({ "src": "{}".format(os.path.join(img_path, "Wikipedia_wordmark.svg")), "class": "wikipedia_logo", }) pediapress_logo = E.img({ "src": "{}".format(os.path.join(img_path, "pediapress_square_bw.svg")), "class": "pediapress_logo", })
import sys import tempfile import time import traceback import urllib import urllib2 import urlparse import UserDict from mwlib.log import Log from hashlib import md5 # provide all() for python 2.4 all = all log = Log('mwlib.utils') def fsescape(s): """Escape string to be safely used in path names @param s: some string @type s: basestring @returns: escaped string @rtype: str """ res = [] for x in s: c = ord(x)
import sys import tempfile import time import traceback import urllib import urllib2 import urlparse import UserDict from mwlib.log import Log from hashlib import md5 # provide all() for python 2.4 all = all log = Log("mwlib.utils") def fsescape(s): """Escape string to be safely used in path names @param s: some string @type s: basestring @returns: escaped string @rtype: str """ res = [] for x in s: c = ord(x)