def do_render(self, collection_id, post_data, is_new=False): params = self._get_params(post_data, collection_id=collection_id) metabook_data = params.metabook_data base_url = params.base_url writer = params.writer if writer not in name2writer: return self.error_response("unknown writer %r" % writer) if is_new and not metabook_data: return self.error_response('POST argument metabook or collection_id required') if not is_new and metabook_data: return self.error_response('Specify either metabook or collection_id, not both') if base_url and not self.is_good_baseurl(base_url): log.bad("bad base_url: %r" % (base_url, )) return self.error_response( "bad base_url %r. check your $wgServer and $wgScriptPath variables. localhost, 192.168.*.* and 127.0.*.* are not allowed." % (base_url, )) log.info('render %s %s' % (collection_id, writer)) response = { 'collection_id': collection_id, 'writer': writer, 'is_cached': False, } self.qserve.qadd(channel="makezip", payload=dict(params=params.__dict__), jobid="%s:makezip" % (collection_id, ), timeout=20 * 60) self.qserve.qadd(channel="render", payload=dict(params=params.__dict__), jobid="%s:render-%s" % (collection_id, writer), timeout=20 * 60) return response
def do_render(self, collection_id, post_data, is_new=False): params = self._get_params(post_data, collection_id=collection_id) metabook_data = params.metabook_data base_url = params.base_url writer = params.writer if writer not in name2writer: return self.error_response("unknown writer %r" % writer) if is_new and not metabook_data: return self.error_response('POST argument metabook or collection_id required') if not is_new and metabook_data: return self.error_response('Specify either metabook or collection_id, not both') if base_url and not self.is_good_baseurl(base_url): log.bad("bad base_url: %r" % (base_url, )) return self.error_response("bad base_url %r. check your $wgServer and $wgScriptPath variables" % (base_url, )) log.info('render %s %s' % (collection_id, writer)) response = { 'collection_id': collection_id, 'writer': writer, 'is_cached': False, } self.qserve.qadd(channel="makezip", payload=dict(params=params.__dict__), jobid="%s:makezip" % (collection_id, ), timeout=20 * 60) self.qserve.qadd(channel="render", payload=dict(params=params.__dict__), jobid="%s:render-%s" % (collection_id, writer), timeout=20 * 60) return response
def do_zip_post(self, collection_id, post_data, is_new=False): params = self._get_params(post_data, collection_id=collection_id) try: post_data['metabook'] except KeyError as exc: return self.error_response('POST argument required: %s' % exc) pod_api_url = params.pod_api_url if pod_api_url: result = json.loads(unicode(urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8')) post_url = result['post_url'].encode('utf-8') response = { 'state': 'ok', 'redirect_url': result['redirect_url'].encode('utf-8'), } else: try: post_url = post_data['post_url'] except KeyError: return self.error_response('POST argument required: post_url') response = {'state': 'ok'} log.info('zip_post %s %s' % (collection_id, pod_api_url)) params.post_url = post_url self.qserve.qadd(channel="post", # jobid="%s:post" % collection_id, payload=dict(params=params.__dict__), timeout=20 * 60) return response
def do_download(self, collection_id, post_data, is_new=False): if is_new: return self.error_response('POST argument required: collection_id') writer = post_data.get('writer', self.default_writer) try: log.info('download %s %s' % (collection_id, writer)) output_path = self.get_path(collection_id, self.output_filename, writer) os.utime(output_path, None) status = self.read_status_file(collection_id, writer) response = Response() response.app_iter = FileIterable(output_path) response.content_length = os.path.getsize(output_path) if 'content_type' in status: response.content_type = status['content_type'].encode('utf-8', 'ignore') else: log.warn('no content type in status file') if 'file_extension' in status: response.headers['Content-Disposition'] = 'inline; filename=collection.%s' % ( status['file_extension'].encode('utf-8', 'ignore'), ) else: log.warn('no file extension in status file') return response except Exception, exc: log.ERROR('exception in do_download(): %r' % exc) return Response(status=500)
def do_zip_post(self, collection_id, post_data, is_new=False): params = self._get_params(post_data, collection_id=collection_id) try: post_data['metabook'] except KeyError as exc: return self.error_response('POST argument required: %s' % exc) pod_api_url = params.pod_api_url if pod_api_url: result = json.loads( unicode( urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8')) post_url = result['post_url'].encode('utf-8') response = { 'state': 'ok', 'redirect_url': result['redirect_url'].encode('utf-8'), } else: try: post_url = post_data['post_url'] except KeyError: return self.error_response('POST argument required: post_url') response = {'state': 'ok'} log.info('zip_post %s %s' % (collection_id, pod_api_url)) params.post_url = post_url self.qserve.qadd( channel="post", # jobid="%s:post" % collection_id, payload=dict(params=params.__dict__), timeout=20 * 60) return response
def new_collection(self, post_data): collection_id = make_collection_id(post_data) collection_dir = self.get_collection_dir(collection_id) if not os.path.isdir(collection_dir): log.info('Creating new collection dir %r' % collection_dir) os.makedirs(collection_dir) return collection_id
def new_collection(self, post_data): collection_id = make_collection_id(post_data) colldir = self.get_collection_dir(collection_id) try: log.info('Creating directory %r' % colldir) os.mkdir(colldir) except OSError, exc: if getattr(exc, 'errno') not in (errno.EEXIST, errno.EISDIR): raise
def postRenderKillCommand(collection_id, serviceurl, writer): log.info('POSTing render_kill command %r' % collection_id) data = { "collection_id": collection_id, "writer": writer, "command": "render_kill", } data = urllib.urlencode(data) res = urllib2.urlopen(urllib2.Request(serviceurl.encode("utf8"), data)).read() return json.loads(unicode(res, 'utf-8'))
def removeContainerTable(containertable): newtables = [] for row in containertable: for cell in row: for item in cell: if item.__class__ == Table: newtables.append(item) else: log.info("unmatched node:", item.__class__) return newtables
def postRenderCommand(metabook, baseurl, serviceurl, writer): log.info('POSTing render command %s %s' % (baseurl, writer)) data = { "metabook": json.dumps(metabook), "writer": writer, "writer_options": writer_options.get(writer, ''), "base_url": baseurl.encode('utf-8'), "command": "render", } data = urllib.urlencode(data) res = urllib2.urlopen(urllib2.Request(serviceurl.encode("utf8"), data)).read() return json.loads(unicode(res, 'utf-8'))
def new_collection(self, post_data): collection_id = make_collection_id(post_data) collection_dirs = self.get_collection_dirs(collection_id) for i in range(len(collection_dirs)): p = os.path.join(*collection_dirs[:i + 1]) if os.path.isdir(p): continue try: log.info('Creating directory %r' % p) os.mkdir(p) except OSError, exc: if getattr(exc, 'errno') not in (errno.EEXIST, errno.EISDIR): raise
def no_job_queue(job_type, collection_id, args): """Just spawn a new process for the given job""" if os.name == 'nt': kwargs = {} else: kwargs = {'close_fds': True} try: log.info('queueing %r' % args) subprocess.Popen(args, **kwargs) except OSError, exc: raise RuntimeError('Could not execute command %r: %s' % ( args[0], exc, ))
def checkservice(api, serviceurl, baseurl, writer, maxarticles, from_email=None, mail_recipients=None, render_timeout=RENDER_TIMEOUT_DEFAULT # seconds or None ): # arts = getRandomArticles(api, min=1, max=maxarticles) # log.info('random articles: %r' % arts) # metabook = getMetabook(arts) metabook = getRandomMetabook(api, min=5, max=maxarticles) if not metabook: reportError('render', metabook, dict(reason="getRandomMetabook Failed"), baseurl, writer, from_email=from_email, mail_recipients=mail_recipients) time.sleep(60) res = postRenderCommand(metabook, baseurl, serviceurl, writer) collection_id = res['collection_id'] st = time.time() while True: time.sleep(1) res = getRenderStatus(res["collection_id"], serviceurl, writer) if res["state"] != "progress": break if render_timeout and (time.time() - st) > render_timeout: log.timeout('Killing render proc for collection ID %r' % collection_id) r = postRenderKillCommand(collection_id, serviceurl, writer) if r['killed']: log.info('Killed.') else: log.warn('Nothing to kill!?') res["state"] = "failed" res["reason"] = "render_timeout (%ds)" % render_timeout break if res["state"] == "finished": d = download(res["collection_id"], serviceurl, writer).read() log.info("received %s document with %d bytes" % (writer, len(d))) checkDoc(d, writer) return True else: reportError('render', metabook, res, baseurl, writer, from_email=from_email, mail_recipients=mail_recipients, ) return False
def purge_cache(max_age, cache_dir): """Remove all subdirectories of cache_dir whose mtime is before now-max_age @param max_age: max age of directories in seconds @type max_age: int @param cache_dir: cache directory @type cache_dir: basestring """ now = time.time() for path in get_collection_dirs(cache_dir): if now - os.stat(path).st_mtime < max_age: continue try: log.info('removing directory %r' % path) shutil.rmtree(path) except Exception, exc: log.ERROR('could not remove directory %r: %s' % (path, exc))
def getRandomMetabook(api, min=1, max=100): b = bookshelf.Bookshelf(api) booknames = b.booknames() num_articles = -1 mbook = None tries = 100 while tries and num_articles > max or num_articles < min: tries -= 1 if tries == 0: return None bn = random.choice(booknames) log.info("getRandomMetabook trying %r" % bn) c = api.content_query(bn) if not c: continue mbook = mwlib.metabook.parse_collection_page(c) num_articles = len(mbook.articles()) log.info("getRandomMetabook num arts min:%d this:%d max:%d" % (min, num_articles, max)) mbook['book_page'] = bn addLicense(mbook) return mbook
def do_render_kill(self, collection_id, post_data, is_new=False): if is_new: return self.error_response('POST argument required: collection_id') writer = post_data.get('writer', self.default_writer) log.info('render_kill %s %s' % (collection_id, writer)) killed = False # pid_path = self.get_path(collection_id, self.pid_filename, writer) # try: # pid = int(open(pid_path, 'rb').read()) # os.kill(pid, signal.SIGKILL) # killed = True # except (OSError, ValueError, IOError): # pass return { 'collection_id': collection_id, 'writer': writer, 'killed': killed, }
def getParsedTemplate(self, name): if name.startswith("[["): return None if name == '': return '' if name.startswith(":"): log.info("including article") raw = self.db.getRawArticle(name[1:]) else: if len(name) > 1: name = name[0].capitalize() + name[1:] name = self.templateprefix + name # Check to see if this is a template in our blacklist -- # one that we don't want to bother rendering. if name in self.templateblacklist: log.info("Skipping template " + name.encode('utf8')) raw = None else: raw = self.db.getTemplate(name, True) if raw is None: log.warn("no template", repr(name)) res = None else: log.info("parsing template", repr(name)) res = parse(raw) if DEBUG: print "TEMPLATE:", name, repr(raw) res.show() return res
def do_render_status(self, collection_id, post_data, is_new=False): if is_new: return self.error_response('POST argument required: collection_id') def retval(**kw): return dict(collection_id=collection_id, writer=writer, **kw) writer = post_data.get('writer', self.default_writer) log.info('render_status %s %s' % (collection_id, writer)) output_path = self.get_path(collection_id, self.output_filename, writer) if os.path.exists(output_path): return retval(state="finished") error_path = self.get_path(collection_id, self.error_filename, writer) if os.path.exists(error_path): text = unicode(open(error_path, 'rb').read(), 'utf-8', 'ignore') if text.startswith('traceback\n'): metabook_path = self.get_path(collection_id, self.metabook_filename) if os.path.exists(metabook_path): metabook = unicode(open(metabook_path, 'rb').read(), 'utf-8', 'ignore') else: metabook = None mail_sent = self.get_path(collection_id, "mail-sent") if not os.path.exists(mail_sent): self.send_report_mail('rendering failed', collection_id=collection_id, writer=writer, error=text, metabook=metabook, ) open(mail_sent, "w") return retval(state="failed", error=text) status = self.read_status_file(collection_id, writer) if status.get('state') == 'error': return retval(state="failed", error="unknown error") return retval(state="progress", status=status)
def reformatTable(t, maxCols): nodeInfo = getContentType(t) numCols = maxCols numRows = len(t.rows) onlyTables = len(t.children) > 0 #if table is empty onlyTables and onlyLists are False onlyLists = len(t.children) > 0 if not nodeInfo: onlyTables = False onlyLists = False for row in nodeInfo: for cell in row: cellNodeTypes, cellTextLen = cell if not all(nodetype==Table for nodetype in cellNodeTypes): onlyTables = False if not all(nodetype==ItemList for nodetype in cellNodeTypes): onlyLists = False if onlyTables and numCols > 1: log.info('got table only table - removing container') t = removeContainerTable(t) if onlyLists and numCols > 2 : log.info('got list only table - reducing columns to 2') t = reduceCols(t, colnum=2) if onlyLists: log.info('got list only table - splitting list items') t = splitListItems(t) pass return t
def checkDoc(data, writer): log.info('checkDoc %s' % writer) assert len(data) > 0 if writer == 'rl': fd, filename = tempfile.mkstemp(suffix='.pdf') os.write(fd, data) os.close(fd) try: popen = subprocess.Popen(args=['pdfinfo', filename], stdout=subprocess.PIPE) rc = popen.wait() assert rc == 0, 'pdfinfo rc = %d' % rc for line in popen.stdout: line = line.strip() if not line.startswith('Pages:'): continue num_pages = int(line.split()[-1]) assert num_pages > 0, 'PDF is empty' break else: raise RuntimeError('invalid PDF') finally: os.unlink(filename)
def clean_cache(max_age, cache_dir): """Clean all subdirectories of cache_dir whose mtime is before now-max_age @param max_age: max age of directories in seconds @type max_age: int @param cache_dir: cache directory @type cache_dir: basestring """ now = time.time() for d in os.listdir(cache_dir): path = os.path.join(cache_dir, d) if not os.path.isdir(path) or not collection_id_rex.match(d): log.warn('unknown item in cache dir %r: %r' % (cache_dir, d)) continue if now - os.stat(path).st_mtime < max_age: continue try: log.info('removing directory %r' % path) shutil.rmtree(path) except Exception, exc: log.ERROR('could not remove directory %r: %s' % (path, exc))
def getParsedTemplate(self, name): if name.startswith("[["): return None if name == '': return '' if name.startswith(":"): log.info("including article") raw = self.db.getRawArticle(name[1:]) else: if len(name) > 1: name = name[0].capitalize() + name[1:] name = self.templateprefix + name # Check to see if this is a template in our blacklist -- # one that we don't want to bother rendering. if name in self.templateblacklist: log.info("Skipping template " + name.encode('utf8')) raw = None else: raw = self.db.getTemplate(name, True) if raw is None: log.warn("no template", repr(name)) res = None else: # add newline to templates starting with a (semi)colon, or tablemarkup # XXX what else? see test_implicit_newline in test_expander if raw.startswith(":") or raw.startswith(";") or raw.startswith( "{|"): raw = '\n' + raw log.info("parsing template", repr(name)) res = Parser(raw).parse() if DEBUG: print "TEMPLATE:", name, repr(raw) res.show() return res
def getParsedTemplate(self, name): if name.startswith("[["): return None if name == "": return "" if name.startswith(":"): log.info("including article") raw = self.db.getRawArticle(name[1:]) else: if len(name) > 1: name = name[0].capitalize() + name[1:] name = self.templateprefix + name # Check to see if this is a template in our blacklist -- # one that we don't want to bother rendering. if name in self.templateblacklist: log.info("Skipping template " + name.encode("utf8")) raw = None else: raw = self.db.getTemplate(name, True) if raw is None: log.warn("no template", repr(name)) res = None else: # add newline to templates starting with a (semi)colon, or tablemarkup # XXX what else? see test_implicit_newline in test_expander if raw.startswith(":") or raw.startswith(";") or raw.startswith("{|"): raw = "\n" + raw log.info("parsing template", repr(name)) res = Parser(raw).parse() if DEBUG: print "TEMPLATE:", name, repr(raw) res.show() return res
def do_download(self, collection_id, post_data, is_new=False): if is_new: return self.error_response('POST argument required: collection_id') writer = post_data.get('writer', self.default_writer) w=name2writer[writer] jobid="%s:render-%s" % (collection_id, writer) res = self.qserve.qinfo(jobid=jobid) or {} download_url = res["result"]["url"] print "fetching", download_url f = urllib2.urlopen(download_url) info = f.info() response = Response() for h in ("Content-Length",): # "Content-Type", "Content-Disposition"): v = info.getheader(h) if v: print "copy header:", h, v response.headers[h] = v if w.content_type: response.content_type = w.content_type if w.file_extension: response.headers['Content-Disposition'] = 'inline; filename=collection.%s' % (w.file_extension.encode('utf-8', 'ignore')) def readdata(): while 1: d = f.read(4096) if not d: break yield d response.app_iter = readdata() return response try: log.info('download %s %s' % (collection_id, writer)) redir = os.environ.get("NSERVE_REDIRECT") if redir: response = Response() response.status = 301 url = "%s/%s/%s/output.%s" % (redir, collection_id[:2], collection_id, writer) print "REDIRECT:", url response.location = url return response if 1: response=Response() response.headers["X-Accel-Redirect"] = "/%s/%s/output.%s" % (collection_id[:2], collection_id, writer) if w.content_type: response.content_type = w.content_type if w.file_extension: response.headers['Content-Disposition'] = 'inline; filename=collection.%s' % (w.file_extension.encode('utf-8', 'ignore')) return response output_path = self.get_path(collection_id, "output", writer) os.utime(output_path, None) data = open(output_path, "rb").read() response = Response(data, content_length=len(data)) if w.content_type: response.content_type = w.content_type if w.file_extension: response.headers['Content-Disposition'] = 'inline; filename=collection.%s' % ( w.file_extension.encode('utf-8', 'ignore')) return response except Exception, exc: log.ERROR('exception in do_download(): %r' % exc) return Response(status=500)
def download(colid, serviceurl, writer): log.info('download') data = urllib.urlencode({"command": "download", "collection_id": colid, 'writer': writer}) return urllib2.urlopen(urllib2.Request(serviceurl.encode("utf8"), data)) # fh
except KeyError, exc: return self.error_response("POST argument required: %s" % exc) pod_api_url = params.pod_api_url if pod_api_url: result = json.loads(unicode(urllib2.urlopen(pod_api_url, data="any").read(), "utf-8")) post_url = result["post_url"].encode("utf-8") response = {"state": "ok", "redirect_url": result["redirect_url"].encode("utf-8")} else: try: post_url = post_data["post_url"] except KeyError: return self.error_response("POST argument required: post_url") response = {"state": "ok"} log.info("zip_post %s %s" % (collection_id, pod_api_url)) params.post_url = post_url self.qserve.qadd( channel="post", payload=dict(params=params.__dict__), timeout=20 * 60 # jobid="%s:post" % collection_id, ) return response def _parse_qs(qs): for i, x in enumerate(qs): if ":" in x: host, port = x.split(":", 1) port = int(port) qs[i] = (host, port) else:
def main(): from SocketServer import ForkingMixIn, ThreadingMixIn from wsgiref.simple_server import make_server, WSGIServer from flup.server import fcgi, fcgi_fork, scgi, scgi_fork class ForkingWSGIServer(ForkingMixIn, WSGIServer): pass class ThreadingWSGIServer(ThreadingMixIn, WSGIServer): pass proto2server = { 'http': ForkingWSGIServer, 'http_threaded': ThreadingWSGIServer, 'fcgi': fcgi_fork.WSGIServer, 'fcgi_threaded': fcgi.WSGIServer, 'scgi': scgi_fork.WSGIServer, 'scgi_threaded': scgi.WSGIServer, } parser = optparse.OptionParser(usage="%prog [OPTIONS]") parser.add_option('-l', '--logfile', help='log output to LOGFILE', ) parser.add_option('-d', '--daemonize', action='store_true', help='become daemon as soon as possible', ) parser.add_option('--pid-file', help='write PID of daemonized process to this file', ) parser.add_option('-P', '--protocol', help='one of %s (default: http)' % ', '.join(proto2server.keys()), default='http', ) parser.add_option('-p', '--port', help='port to listen on (default: 8899)', default='8899', ) parser.add_option('-i', '--interface', help='interface to listen on (default: 0.0.0.0)', default='0.0.0.0', ) parser.add_option('--cache-dir', help='cache directory (default: /var/cache/mw-serve/)', default='/var/cache/mw-serve/', ) parser.add_option('--mwrender', help='(path to) mw-render executable', default='mw-render', ) parser.add_option('--mwrender-logfile', help='global logfile for mw-render', metavar='LOGFILE', ) parser.add_option('--mwzip', help='(path to) mw-zip executable', default='mw-zip', ) parser.add_option('--mwzip-logfile', help='global logfile for mw-zip', metavar='LOGFILE', ) parser.add_option('--mwpost', help='(path to) mw-post executable', default='mw-post', ) parser.add_option('--mwpost-logfile', help='global logfile for mw-post', metavar='LOGFILE', ) parser.add_option('-q', '--queue-dir', help='queue dir of mw-watch (if not specified, no queue is used)', ) parser.add_option('-m', '--method', help='prefork or threaded (default: prefork)', default='prefork', ) parser.add_option('--max-requests', help='maximum number of requests a child process can handle before it is killed, irrelevant for --method=threaded (default: 0 = no limit)', default='0', metavar='NUM', ) parser.add_option('--min-spare', help='minimum number of spare processes/threads (default: 2)', default='2', metavar='NUM', ) parser.add_option('--max-spare', help='maximum number of spare processes/threads (default: 5)', default='5', metavar='NUM', ) parser.add_option('--max-children', help='maximum number of processes/threads (default: 50)', default='50', metavar='NUM', ) parser.add_option('--report-from-mail', help='sender of error mails (--report-recipient also needed)', metavar='EMAIL', ) parser.add_option('--report-recipient', help='recipient of error mails (--report-from-mail also needed)', metavar='EMAIL', ) options, args = parser.parse_args() if args: parser.error('no arguments supported') if options.protocol not in proto2server: parser.error('unsupported protocol (must be one of %s)' % ( ', '.join(proto2server.keys()), )) def to_int(opt_name): try: setattr(options, opt_name, int(getattr(options, opt_name))) except ValueError: parser.error('--%s value must be an integer' % opt_name.replace('_', '-')) to_int('port') to_int('max_requests') to_int('min_spare') to_int('max_spare') to_int('max_children') if options.method not in ('prefork', 'threaded'): parser.error('the only supported values for --method are "prefork" and "threaded"') from mwlib import serve, log, utils log = log.Log('mw-serve') if options.logfile: utils.start_logging(options.logfile) if options.daemonize: utils.daemonize() if options.pid_file: open(options.pid_file, 'wb').write('%d\n' % os.getpid()) if options.method == 'threaded': options.protocol += '_threaded' flup_kwargs = { 'maxThreads': options.max_children, } else: flup_kwargs = { 'maxChildren': options.max_children, 'maxRequests': options.max_requests, } log.info("serving %s on %s:%s" % (options.protocol, options.interface, options.port)) if options.report_recipient and options.report_from_mail: report_from_mail = options.report_from_mail.encode('utf-8') report_recipients = [options.report_recipient.encode('utf-8')] else: report_from_mail = None report_recipients = None app = serve.Application( cache_dir=options.cache_dir, mwrender_cmd=options.mwrender, mwrender_logfile=options.mwrender_logfile, mwzip_cmd=options.mwzip, mwzip_logfile=options.mwzip_logfile, mwpost_cmd=options.mwpost, mwpost_logfile=options.mwpost_logfile, queue_dir=options.queue_dir, report_from_mail=report_from_mail, report_recipients=report_recipients, ) if options.protocol.startswith('http'): server = make_server(options.interface, options.port, app, server_class=proto2server[options.protocol], ) try: server.serve_forever() except KeyboardInterrupt: pass else: serverclass = proto2server[options.protocol] serverclass(app, bindAddress=(options.interface, options.port), minSpare=options.min_spare, maxSpare=options.max_spare, **flup_kwargs ).run() if options.pid_file: utils.safe_unlink(options.pid_file) log.info('exit.')
def serve(): from SocketServer import ForkingMixIn, ThreadingMixIn from wsgiref.simple_server import make_server, WSGIServer from flup.server import fcgi, fcgi_fork, scgi, scgi_fork class ForkingWSGIServer(ForkingMixIn, WSGIServer): pass class ThreadingWSGIServer(ThreadingMixIn, WSGIServer): pass proto2server = { 'http': ForkingWSGIServer, 'http_threaded': ThreadingWSGIServer, 'fcgi': fcgi_fork.WSGIServer, 'fcgi_threaded': fcgi.WSGIServer, 'scgi': scgi_fork.WSGIServer, 'scgi_threaded': scgi.WSGIServer, } parser = optparse.OptionParser(usage="%prog [OPTIONS]") parser.add_option( '-l', '--logfile', help='log output to LOGFILE', ) parser.add_option( '-d', '--daemonize', action='store_true', help='become daemon as soon as possible', ) parser.add_option( '--pid-file', help='write PID of daemonized process to this file', ) parser.add_option( '-P', '--protocol', help='one of %s (default: http)' % ', '.join(proto2server.keys()), default='http', ) parser.add_option( '-p', '--port', help='port to listen on (default: 8899)', default='8899', ) parser.add_option( '-i', '--interface', help='interface to listen on (default: 0.0.0.0)', default='0.0.0.0', ) parser.add_option( '--cache-dir', help='cache directory (default: /var/cache/mw-serve/)', default='/var/cache/mw-serve/', ) parser.add_option( '--mwrender', help='(path to) mw-render executable', default='mw-render', ) parser.add_option( '--mwrender-logfile', help='global logfile for mw-render', metavar='LOGFILE', ) parser.add_option( '--mwzip', help='(path to) mw-zip executable', default='mw-zip', ) parser.add_option( '--mwzip-logfile', help='global logfile for mw-zip', metavar='LOGFILE', ) parser.add_option( '--mwpost', help='(path to) mw-post executable', default='mw-post', ) parser.add_option( '--mwpost-logfile', help='global logfile for mw-post', metavar='LOGFILE', ) parser.add_option( '-q', '--queue-dir', help='queue dir of mw-watch (if not specified, no queue is used)', ) parser.add_option( '-m', '--method', help='prefork or threaded (default: prefork)', default='prefork', ) parser.add_option( '--max-requests', help= 'maximum number of requests a child process can handle before it is killed, irrelevant for --method=threaded (default: 0 = no limit)', default='0', metavar='NUM', ) parser.add_option( '--min-spare', help='minimum number of spare processes/threads (default: 2)', default='2', metavar='NUM', ) parser.add_option( '--max-spare', help='maximum number of spare processes/threads (default: 5)', default='5', metavar='NUM', ) parser.add_option( '--max-children', help='maximum number of processes/threads (default: 50)', default='50', metavar='NUM', ) parser.add_option( '--report-from-mail', help='sender of error mails (--report-recipient also needed)', metavar='EMAIL', ) parser.add_option( '--report-recipient', help='recipient of error mails (--report-from-mail also needed)', metavar='EMAIL', ) parser.add_option( '--clean-cache', help= 'clean cache files that have not been touched for at least HOURS hours and exit', metavar='HOURS', ) options, args = parser.parse_args() if options.clean_cache: try: options.clean_cache = int(options.clean_cache) except ValueError: parser.error('--clean-cache value must be an integer') from mwlib.serve import clean_cache clean_cache(options.clean_cache * 60 * 60, cache_dir=options.cache_dir) return if options.protocol not in proto2server: parser.error('unsupported protocol (must be one of %s)' % (', '.join(proto2server.keys()), )) def to_int(opt_name): try: setattr(options, opt_name, int(getattr(options, opt_name))) except ValueError: parser.error('--%s value must be an integer' % opt_name.replace('_', '-')) to_int('port') to_int('max_requests') to_int('min_spare') to_int('max_spare') to_int('max_children') if options.method not in ('prefork', 'threaded'): parser.error( 'the only supported values for --method are "prefork" and "threaded"' ) from mwlib import serve, log, utils log = log.Log('mw-serve') if options.logfile: utils.start_logging(options.logfile) if options.daemonize: utils.daemonize() if options.pid_file: open(options.pid_file, 'wb').write('%d\n' % os.getpid()) if options.method == 'threaded': options.protocol += '_threaded' flup_kwargs = { 'maxThreads': options.max_children, } else: flup_kwargs = { 'maxChildren': options.max_children, 'maxRequests': options.max_requests, } log.info("serving %s on %s:%s" % (options.protocol, options.interface, options.port)) if options.report_recipient and options.report_from_mail: report_from_mail = options.report_from_mail.encode('utf-8') report_recipients = [options.report_recipient.encode('utf-8')] else: report_from_mail = None report_recipients = None app = serve.Application( cache_dir=options.cache_dir, mwrender_cmd=options.mwrender, mwrender_logfile=options.mwrender_logfile, mwzip_cmd=options.mwzip, mwzip_logfile=options.mwzip_logfile, mwpost_cmd=options.mwpost, mwpost_logfile=options.mwpost_logfile, queue_dir=options.queue_dir, report_from_mail=report_from_mail, report_recipients=report_recipients, ) if options.protocol.startswith('http'): server = make_server( options.interface, options.port, app, server_class=proto2server[options.protocol], ) try: server.serve_forever() except KeyboardInterrupt: pass else: serverclass = proto2server[options.protocol] serverclass(app, bindAddress=(options.interface, options.port), minSpare=options.min_spare, maxSpare=options.max_spare, **flup_kwargs).run() if options.pid_file: utils.safe_unlink(options.pid_file) log.info('exit.')
pod_api_url = params.pod_api_url if pod_api_url: result = json.loads(unicode(urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8')) post_url = result['post_url'].encode('utf-8') response = { 'state': 'ok', 'redirect_url': result['redirect_url'].encode('utf-8'), } else: try: post_url = post_data['post_url'] except KeyError: return self.error_response('POST argument required: post_url') response = {'state': 'ok'} log.info('zip_post %s %s' % (collection_id, pod_api_url)) params.post_url = post_url self.qserve.qadd(channel="post", # jobid="%s:post" % collection_id, payload=dict(params=params.__dict__), timeout=20 * 60) return response def _parse_qs(qs): for i, x in enumerate(qs): if ":" in x: host, port = x.split(":", 1) port = int(port) qs[i] = (host, port) else:
try: base_url = post_data['base_url'] writer = post_data.get('writer', self.default_writer) except KeyError, exc: return self.error_response('POST argument required: %s' % exc) writer_options = post_data.get('writer_options', '') template_blacklist = post_data.get('template_blacklist', '') template_exclusion_category = post_data.get('template_exclusion_category', '') login_credentials = post_data.get('login_credentials', '') force_render = bool(post_data.get('force_render')) script_extension = post_data.get('script_extension', '') if not collection_id: collection_id = self.new_collection(post_data) log.info('render %s %s' % (collection_id, writer)) response = { 'collection_id': collection_id, 'writer': writer, 'is_cached': False, } pid_path = self.get_path(collection_id, self.pid_filename, writer) if os.path.exists(pid_path): log.info('mw-render already running for collection %r' % collection_id) return response output_path = self.get_path(collection_id, self.output_filename, writer) if os.path.exists(output_path): if force_render:
except KeyError, exc: return self.error_response('POST argument required: %s' % exc) writer_options = post_data.get('writer_options', '') template_blacklist = post_data.get('template_blacklist', '') template_exclusion_category = post_data.get('template_exclusion_category', '') print_template_prefix = post_data.get('print_template_prefix', '') print_template_pattern = post_data.get('print_template_pattern', '') login_credentials = post_data.get('login_credentials', '') force_render = bool(post_data.get('force_render')) script_extension = post_data.get('script_extension', '') language = post_data.get('language', '') if not collection_id: collection_id = self.new_collection(post_data) log.info('render %s %s' % (collection_id, writer)) response = { 'collection_id': collection_id, 'writer': writer, 'is_cached': False, } pid_path = self.get_path(collection_id, self.pid_filename, writer) if os.path.exists(pid_path): log.info('mw-render already running for collection %r' % collection_id) return response output_path = self.get_path(collection_id, self.output_filename, writer) if os.path.exists(output_path): if force_render:
def main(): parser = OptionParser(usage="%prog [OPTIONS]") parser.add_option("-b", "--baseurl", help="baseurl of wiki") parser.add_option("-w", "--writer", help="writer to use") parser.add_option('-l', '--logfile', help='log output to LOGFILE') parser.add_option('-f', '--from-email', help='From: email address for error mails', ) parser.add_option('-r', '--mail-recipients', help='To: email addresses ("," separated) for error mails', ) parser.add_option('-m', '--max-narticles', help='maximum number of articles for random collections (min is 1)', default=10, ) parser.add_option('-s', '--serviceurl', help="location of the mw-serve server to test", default='http://tools.pediapress.com/mw-serve/', # default='http://localhost:8899/mw-serve/', ) use_help = 'Use --help for usage information.' options, args = parser.parse_args() assert options.from_email if options.logfile: utils.start_logging(options.logfile) baseurl2api = {} baseurls = options.baseurl.split() for baseurl in baseurls: baseurl2api[baseurl] = mwapidb.APIHelper(baseurl) maxarts = int(options.max_narticles) mail_recipients = None if options.mail_recipients: mail_recipients = options.mail_recipients.split(',') ok_count = 0 fail_count = 0 while True: baseurl = random.choice(baseurls) try: ok = checkservice(baseurl2api[baseurl], options.serviceurl, baseurl, options.writer, maxarts, from_email=options.from_email, mail_recipients=mail_recipients, ) if ok: ok_count += 1 log.check('OK') else: fail_count += 1 log.check('FAIL!') except KeyboardInterrupt: break except BaseException: fail_count += 1 log.check('EPIC FAIL!!!') utils.report( system=system, subject='checkservice() failed, waiting 60seconds', from_email=options.from_email, mail_recipients=mail_recipients, ) sys.exc_clear() time.sleep(60) log.info('%s, %s\tok: %d, failed: %d' % ( baseurl, options.writer, ok_count, fail_count, ))
except Exception, exc: report('request failed: %s' % exc) sys.exit(1) if success: return client.response if client.error is not None: report('request failed: %s' % client.error) sys.exit(1) else: report('request failed: got response code %d' % client.response_code) sys.exit(1) start_time = time.time() log.info('sending render command') response = check_req('render', base_url=base_url, metabook=metabook, writer=writer, force_render=True, ) collection_id = response['collection_id'] while True: time.sleep(1) if time.time() - start_time > max_render_time: report('rendering exceeded allowed time of %d s' % max_render_time) sys.exit(2)