Пример #1
0
    def do_render(self, collection_id, post_data, is_new=False):
        params = self._get_params(post_data, collection_id=collection_id)
        metabook_data = params.metabook_data
        base_url = params.base_url
        writer = params.writer

        if writer not in name2writer:
            return self.error_response("unknown writer %r" % writer)

        if is_new and not metabook_data:
            return self.error_response('POST argument metabook or collection_id required')
        if not is_new and metabook_data:
            return self.error_response('Specify either metabook or collection_id, not both')

        if base_url and not self.is_good_baseurl(base_url):
            log.bad("bad base_url: %r" % (base_url, ))
            return self.error_response(
                "bad base_url %r. check your $wgServer and $wgScriptPath variables. localhost, 192.168.*.* and 127.0.*.* are not allowed." % (base_url, ))

        log.info('render %s %s' % (collection_id, writer))

        response = {
            'collection_id': collection_id,
            'writer': writer,
            'is_cached': False,
        }

        self.qserve.qadd(channel="makezip", payload=dict(params=params.__dict__),
                         jobid="%s:makezip" % (collection_id, ), timeout=20 * 60)

        self.qserve.qadd(channel="render", payload=dict(params=params.__dict__),
                         jobid="%s:render-%s" % (collection_id, writer), timeout=20 * 60)

        return response
Пример #2
0
    def do_render(self, collection_id, post_data, is_new=False):
        params = self._get_params(post_data,  collection_id=collection_id)
        metabook_data = params.metabook_data
        base_url = params.base_url
        writer = params.writer

        if writer not in name2writer:
            return self.error_response("unknown writer %r" % writer)

        if is_new and not metabook_data:
            return self.error_response('POST argument metabook or collection_id required')
        if not is_new and metabook_data:
            return self.error_response('Specify either metabook or collection_id, not both')

        if base_url and not self.is_good_baseurl(base_url):
            log.bad("bad base_url: %r" % (base_url, ))
            return self.error_response("bad base_url %r. check your $wgServer and $wgScriptPath variables" % (base_url, ))

        log.info('render %s %s' % (collection_id, writer))

        response = {
            'collection_id': collection_id,
            'writer': writer,
            'is_cached': False,
        }

        self.qserve.qadd(channel="makezip", payload=dict(params=params.__dict__), jobid="%s:makezip" % (collection_id, ), timeout=20 * 60)

        self.qserve.qadd(channel="render", payload=dict(params=params.__dict__),
                         jobid="%s:render-%s" % (collection_id, writer),  timeout=20 * 60)

        return response
Пример #3
0
    def do_zip_post(self, collection_id, post_data, is_new=False):
        params = self._get_params(post_data, collection_id=collection_id)

        try:
            post_data['metabook']
        except KeyError as exc:
            return self.error_response('POST argument required: %s' % exc)

        pod_api_url = params.pod_api_url
        if pod_api_url:
            result = json.loads(unicode(urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8'))
            post_url = result['post_url'].encode('utf-8')
            response = {
                'state': 'ok',
                'redirect_url': result['redirect_url'].encode('utf-8'),
            }
        else:
            try:
                post_url = post_data['post_url']
            except KeyError:
                return self.error_response('POST argument required: post_url')
            response = {'state': 'ok'}

        log.info('zip_post %s %s' % (collection_id, pod_api_url))
        params.post_url = post_url

        self.qserve.qadd(channel="post",  # jobid="%s:post" % collection_id,
                         payload=dict(params=params.__dict__),
                         timeout=20 * 60)
        return response
Пример #4
0
    def do_download(self, collection_id, post_data, is_new=False):
        if is_new:
            return self.error_response('POST argument required: collection_id')

        writer = post_data.get('writer', self.default_writer)
        
        try:
            log.info('download %s %s' % (collection_id, writer))
        
            output_path = self.get_path(collection_id, self.output_filename, writer)
            os.utime(output_path, None)
            status = self.read_status_file(collection_id, writer)
            response = Response()
            response.app_iter = FileIterable(output_path)
            response.content_length = os.path.getsize(output_path)
            if 'content_type' in status:
                response.content_type = status['content_type'].encode('utf-8', 'ignore')
            else:
                log.warn('no content type in status file')
            if 'file_extension' in status:
                response.headers['Content-Disposition'] = 'inline; filename=collection.%s' %  (
                    status['file_extension'].encode('utf-8', 'ignore'),
                )
            else:
                log.warn('no file extension in status file')
            return response
        except Exception, exc:
            log.ERROR('exception in do_download(): %r' % exc)
            return Response(status=500)
Пример #5
0
    def do_zip_post(self, collection_id, post_data, is_new=False):
        params = self._get_params(post_data, collection_id=collection_id)

        try:
            post_data['metabook']
        except KeyError as exc:
            return self.error_response('POST argument required: %s' % exc)

        pod_api_url = params.pod_api_url
        if pod_api_url:
            result = json.loads(
                unicode(
                    urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8'))
            post_url = result['post_url'].encode('utf-8')
            response = {
                'state': 'ok',
                'redirect_url': result['redirect_url'].encode('utf-8'),
            }
        else:
            try:
                post_url = post_data['post_url']
            except KeyError:
                return self.error_response('POST argument required: post_url')
            response = {'state': 'ok'}

        log.info('zip_post %s %s' % (collection_id, pod_api_url))
        params.post_url = post_url

        self.qserve.qadd(
            channel="post",  # jobid="%s:post" % collection_id,
            payload=dict(params=params.__dict__),
            timeout=20 * 60)
        return response
Пример #6
0
 def new_collection(self, post_data):
     collection_id = make_collection_id(post_data)
     collection_dir = self.get_collection_dir(collection_id)
     if not os.path.isdir(collection_dir):
         log.info('Creating new collection dir %r' % collection_dir)
         os.makedirs(collection_dir)
     return collection_id
Пример #7
0
 def new_collection(self, post_data):
     collection_id = make_collection_id(post_data)
     collection_dir = self.get_collection_dir(collection_id)
     if not os.path.isdir(collection_dir):
         log.info('Creating new collection dir %r' % collection_dir)
         os.makedirs(collection_dir)
     return collection_id
Пример #8
0
 def new_collection(self, post_data):
     collection_id = make_collection_id(post_data)
     colldir = self.get_collection_dir(collection_id)
     
     try:
         log.info('Creating directory %r' % colldir)
         os.mkdir(colldir)
     except OSError, exc:
         if getattr(exc, 'errno') not in (errno.EEXIST, errno.EISDIR):
             raise
Пример #9
0
def postRenderKillCommand(collection_id, serviceurl, writer):
    log.info('POSTing render_kill command %r' % collection_id)
    data = {
        "collection_id": collection_id,
        "writer": writer,
        "command": "render_kill",
    }
    data = urllib.urlencode(data)
    res = urllib2.urlopen(urllib2.Request(serviceurl.encode("utf8"), data)).read()
    return json.loads(unicode(res, 'utf-8'))
Пример #10
0
def removeContainerTable(containertable):
    newtables = []
    for row in containertable:
        for cell in row:
            for item in cell:
                if item.__class__ == Table:
                    newtables.append(item)
                else:
                    log.info("unmatched node:", item.__class__)
    return newtables
Пример #11
0
def removeContainerTable(containertable):
    newtables = []
    for row in containertable:
        for cell in row:
            for item in cell:
                if item.__class__ == Table:
                    newtables.append(item)
                else:
                    log.info("unmatched node:", item.__class__)
    return newtables
Пример #12
0
def postRenderCommand(metabook, baseurl, serviceurl, writer):
    log.info('POSTing render command %s %s' % (baseurl, writer))
    data = {
        "metabook": json.dumps(metabook),
        "writer": writer,
        "writer_options": writer_options.get(writer, ''),
        "base_url": baseurl.encode('utf-8'),
        "command": "render",
    }
    data = urllib.urlencode(data)
    res = urllib2.urlopen(urllib2.Request(serviceurl.encode("utf8"), data)).read()
    return json.loads(unicode(res, 'utf-8'))
 def new_collection(self, post_data):
     collection_id = make_collection_id(post_data)
     collection_dirs = self.get_collection_dirs(collection_id)
     for i in range(len(collection_dirs)):
         p = os.path.join(*collection_dirs[:i + 1])
         if os.path.isdir(p):
             continue
         try:
             log.info('Creating directory %r' % p)
             os.mkdir(p)
         except OSError, exc:
             if getattr(exc, 'errno') not in (errno.EEXIST, errno.EISDIR):
                 raise
Пример #14
0
def no_job_queue(job_type, collection_id, args):
    """Just spawn a new process for the given job"""
    
    if os.name == 'nt':
        kwargs = {}
    else:
        kwargs = {'close_fds': True}
    try:
        log.info('queueing %r' % args)
        subprocess.Popen(args, **kwargs)
    except OSError, exc:
        raise RuntimeError('Could not execute command %r: %s' % (
            args[0], exc,
        ))
def no_job_queue(job_type, collection_id, args):
    """Just spawn a new process for the given job"""
    
    if os.name == 'nt':
        kwargs = {}
    else:
        kwargs = {'close_fds': True}
    try:
        log.info('queueing %r' % args)
        subprocess.Popen(args, **kwargs)
    except OSError, exc:
        raise RuntimeError('Could not execute command %r: %s' % (
            args[0], exc,
        ))
Пример #16
0
def checkservice(api, serviceurl, baseurl, writer, maxarticles,
                 from_email=None,
                 mail_recipients=None,
                 render_timeout=RENDER_TIMEOUT_DEFAULT  # seconds or None
                 ):
    #    arts = getRandomArticles(api, min=1, max=maxarticles)
    #    log.info('random articles: %r' % arts)
    #    metabook = getMetabook(arts)
    metabook = getRandomMetabook(api, min=5, max=maxarticles)
    if not metabook:
        reportError('render', metabook, dict(reason="getRandomMetabook Failed"), baseurl, writer,
                    from_email=from_email,
                    mail_recipients=mail_recipients)
        time.sleep(60)

    res = postRenderCommand(metabook, baseurl, serviceurl, writer)
    collection_id = res['collection_id']
    st = time.time()
    while True:
        time.sleep(1)
        res = getRenderStatus(res["collection_id"], serviceurl, writer)
        if res["state"] != "progress":
            break
        if render_timeout and (time.time() - st) > render_timeout:
            log.timeout('Killing render proc for collection ID %r' % collection_id)
            r = postRenderKillCommand(collection_id, serviceurl, writer)
            if r['killed']:
                log.info('Killed.')
            else:
                log.warn('Nothing to kill!?')
            res["state"] = "failed"
            res["reason"] = "render_timeout (%ds)" % render_timeout
            break
    if res["state"] == "finished":
        d = download(res["collection_id"], serviceurl, writer).read()
        log.info("received %s document with %d bytes" % (writer, len(d)))
        checkDoc(d, writer)
        return True
    else:
        reportError('render', metabook, res, baseurl, writer,
                    from_email=from_email,
                    mail_recipients=mail_recipients,
                    )
    return False
def purge_cache(max_age, cache_dir):
    """Remove all subdirectories of cache_dir whose mtime is before now-max_age
    
    @param max_age: max age of directories in seconds
    @type max_age: int
    
    @param cache_dir: cache directory
    @type cache_dir: basestring
    """
    
    now = time.time()
    for path in get_collection_dirs(cache_dir):
        if now - os.stat(path).st_mtime < max_age:
            continue
        try:
            log.info('removing directory %r' % path)
            shutil.rmtree(path)
        except Exception, exc:
            log.ERROR('could not remove directory %r: %s' % (path, exc))
Пример #18
0
def getRandomMetabook(api, min=1, max=100):
    b = bookshelf.Bookshelf(api)
    booknames = b.booknames()
    num_articles = -1
    mbook = None
    tries = 100
    while tries and num_articles > max or num_articles < min:
        tries -= 1
        if tries == 0:
            return None
        bn = random.choice(booknames)
        log.info("getRandomMetabook trying %r" % bn)
        c = api.content_query(bn)
        if not c:
            continue
        mbook = mwlib.metabook.parse_collection_page(c)
        num_articles = len(mbook.articles())
        log.info("getRandomMetabook num arts min:%d this:%d max:%d" % (min, num_articles, max))
    mbook['book_page'] = bn
    addLicense(mbook)
    return mbook
Пример #19
0
    def do_render_kill(self, collection_id, post_data, is_new=False):
        if is_new:
            return self.error_response('POST argument required: collection_id')

        writer = post_data.get('writer', self.default_writer)
        
        log.info('render_kill %s %s' % (collection_id, writer))

        killed = False
        # pid_path = self.get_path(collection_id, self.pid_filename, writer)
        # try:
        #     pid = int(open(pid_path, 'rb').read())
        #     os.kill(pid, signal.SIGKILL)
        #     killed = True
        # except (OSError, ValueError, IOError):
        #     pass
        return {
            'collection_id': collection_id,
            'writer': writer,
            'killed': killed,
        }
Пример #20
0
    def getParsedTemplate(self, name):
        if name.startswith("[["):
            return None

        if name == '':
            return ''

        if name.startswith(":"):
            log.info("including article")
            raw = self.db.getRawArticle(name[1:])
        else:
            if len(name) > 1:
                name = name[0].capitalize() + name[1:]
                name = self.templateprefix + name

            # Check to see if this is a template in our blacklist --
            # one that we don't want to bother rendering.
            if name in self.templateblacklist:
                log.info("Skipping template " + name.encode('utf8'))
                raw = None
            else:
                raw = self.db.getTemplate(name, True)

        if raw is None:
            log.warn("no template", repr(name))
            res = None
        else:
            log.info("parsing template", repr(name))
            res = parse(raw)
            if DEBUG:
                print "TEMPLATE:", name, repr(raw)
                res.show()

        return res
Пример #21
0
    def do_render_status(self, collection_id, post_data, is_new=False):
        if is_new:
            return self.error_response('POST argument required: collection_id')

        def retval(**kw):
            return dict(collection_id=collection_id, writer=writer, **kw)
        
        writer = post_data.get('writer', self.default_writer)
            
        log.info('render_status %s %s' % (collection_id, writer))
        
        output_path = self.get_path(collection_id, self.output_filename, writer)
        if os.path.exists(output_path):
            return retval(state="finished")
        
        error_path = self.get_path(collection_id, self.error_filename, writer)
        if os.path.exists(error_path):
            text = unicode(open(error_path, 'rb').read(), 'utf-8', 'ignore')
            if text.startswith('traceback\n'):
                metabook_path = self.get_path(collection_id, self.metabook_filename)
                if os.path.exists(metabook_path):
                    metabook = unicode(open(metabook_path, 'rb').read(), 'utf-8', 'ignore')
                else:
                    metabook = None
                mail_sent = self.get_path(collection_id, "mail-sent")
                if not os.path.exists(mail_sent):
                    self.send_report_mail('rendering failed',
                        collection_id=collection_id,
                        writer=writer,
                        error=text,
                        metabook=metabook,
                    )
                    open(mail_sent, "w")
            return retval(state="failed", error=text)

        status = self.read_status_file(collection_id, writer)
        if status.get('state') == 'error':
            return retval(state="failed", error="unknown error")
        
        return retval(state="progress", status=status)
Пример #22
0
    def getParsedTemplate(self, name):
        if name.startswith("[["):
            return None

        if name == '':
            return ''

        if name.startswith(":"):
            log.info("including article")
            raw = self.db.getRawArticle(name[1:])
        else:
            if len(name) > 1:
                name = name[0].capitalize() + name[1:]
                name = self.templateprefix + name

            # Check to see if this is a template in our blacklist --
            # one that we don't want to bother rendering.
            if name in self.templateblacklist:
                log.info("Skipping template " + name.encode('utf8'))
                raw = None
            else:
                raw = self.db.getTemplate(name, True)

        if raw is None:
            log.warn("no template", repr(name))
            res = None
        else:
            log.info("parsing template", repr(name))
            res = parse(raw)
            if DEBUG:
                print "TEMPLATE:", name, repr(raw)
                res.show()
                
        return res
Пример #23
0
def reformatTable(t, maxCols):
    nodeInfo = getContentType(t)
    numCols = maxCols
    numRows = len(t.rows)

    onlyTables = len(t.children) > 0 #if table is empty onlyTables and onlyLists are False
    onlyLists = len(t.children) > 0
    if not nodeInfo:
        onlyTables = False
        onlyLists = False
    for row in nodeInfo:
        for cell in row:
            cellNodeTypes, cellTextLen = cell
            if not all(nodetype==Table for nodetype in cellNodeTypes):
                onlyTables = False
            if not all(nodetype==ItemList for nodetype in cellNodeTypes):
                onlyLists = False
            
    if onlyTables and numCols > 1:
        log.info('got table only table - removing container')
        t = removeContainerTable(t)
    if onlyLists and numCols > 2 :
        log.info('got list only table - reducing columns to 2')
        t = reduceCols(t, colnum=2)
    if onlyLists:
        log.info('got list only table - splitting list items')
        t = splitListItems(t)
        pass
    return t
Пример #24
0
def reformatTable(t, maxCols):
    nodeInfo = getContentType(t)
    numCols = maxCols
    numRows = len(t.rows)

    onlyTables = len(t.children) > 0 #if table is empty onlyTables and onlyLists are False
    onlyLists = len(t.children) > 0
    if not nodeInfo:
        onlyTables = False
        onlyLists = False
    for row in nodeInfo:
        for cell in row:
            cellNodeTypes, cellTextLen = cell
            if not all(nodetype==Table for nodetype in cellNodeTypes):
                onlyTables = False
            if not all(nodetype==ItemList for nodetype in cellNodeTypes):
                onlyLists = False
            
    if onlyTables and numCols > 1:
        log.info('got table only table - removing container')
        t = removeContainerTable(t)
    if onlyLists and numCols > 2 :
        log.info('got list only table - reducing columns to 2')
        t = reduceCols(t, colnum=2)
    if onlyLists:
        log.info('got list only table - splitting list items')
        t = splitListItems(t)
        pass
    return t
Пример #25
0
def checkDoc(data, writer):
    log.info('checkDoc %s' % writer)
    assert len(data) > 0
    if writer == 'rl':
        fd, filename = tempfile.mkstemp(suffix='.pdf')
        os.write(fd, data)
        os.close(fd)
        try:
            popen = subprocess.Popen(args=['pdfinfo', filename], stdout=subprocess.PIPE)
            rc = popen.wait()
            assert rc == 0, 'pdfinfo rc = %d' % rc
            for line in popen.stdout:
                line = line.strip()
                if not line.startswith('Pages:'):
                    continue
                num_pages = int(line.split()[-1])
                assert num_pages > 0, 'PDF is empty'
                break
            else:
                raise RuntimeError('invalid PDF')
        finally:
            os.unlink(filename)
Пример #26
0
def clean_cache(max_age, cache_dir):
    """Clean all subdirectories of cache_dir whose mtime is before now-max_age
    
    @param max_age: max age of directories in seconds
    @type max_age: int
    
    @param cache_dir: cache directory
    @type cache_dir: basestring
    """
    
    now = time.time()
    for d in os.listdir(cache_dir):
        path = os.path.join(cache_dir, d)
        if not os.path.isdir(path) or not collection_id_rex.match(d):
            log.warn('unknown item in cache dir %r: %r' % (cache_dir, d))
            continue
        if now - os.stat(path).st_mtime < max_age:
            continue
        try:
            log.info('removing directory %r' % path)
            shutil.rmtree(path)
        except Exception, exc:
            log.ERROR('could not remove directory %r: %s' % (path, exc))
Пример #27
0
def clean_cache(max_age, cache_dir):
    """Clean all subdirectories of cache_dir whose mtime is before now-max_age
    
    @param max_age: max age of directories in seconds
    @type max_age: int
    
    @param cache_dir: cache directory
    @type cache_dir: basestring
    """
    
    now = time.time()
    for d in os.listdir(cache_dir):
        path = os.path.join(cache_dir, d)
        if not os.path.isdir(path) or not collection_id_rex.match(d):
            log.warn('unknown item in cache dir %r: %r' % (cache_dir, d))
            continue
        if now - os.stat(path).st_mtime < max_age:
            continue
        try:
            log.info('removing directory %r' % path)
            shutil.rmtree(path)
        except Exception, exc:
            log.ERROR('could not remove directory %r: %s' % (path, exc))
Пример #28
0
    def getParsedTemplate(self, name):
        if name.startswith("[["):
            return None

        if name == '':
            return ''

        if name.startswith(":"):
            log.info("including article")
            raw = self.db.getRawArticle(name[1:])
        else:
            if len(name) > 1:
                name = name[0].capitalize() + name[1:]
                name = self.templateprefix + name

            # Check to see if this is a template in our blacklist --
            # one that we don't want to bother rendering.
            if name in self.templateblacklist:
                log.info("Skipping template " + name.encode('utf8'))
                raw = None
            else:
                raw = self.db.getTemplate(name, True)

        if raw is None:
            log.warn("no template", repr(name))
            res = None
        else:
            # add newline to templates starting with a (semi)colon, or tablemarkup
            # XXX what else? see test_implicit_newline in test_expander
            if raw.startswith(":") or raw.startswith(";") or raw.startswith(
                    "{|"):
                raw = '\n' + raw

            log.info("parsing template", repr(name))
            res = Parser(raw).parse()
            if DEBUG:
                print "TEMPLATE:", name, repr(raw)
                res.show()

        return res
Пример #29
0
    def getParsedTemplate(self, name):
        if name.startswith("[["):
            return None

        if name == "":
            return ""

        if name.startswith(":"):
            log.info("including article")
            raw = self.db.getRawArticle(name[1:])
        else:
            if len(name) > 1:
                name = name[0].capitalize() + name[1:]
                name = self.templateprefix + name

            # Check to see if this is a template in our blacklist --
            # one that we don't want to bother rendering.
            if name in self.templateblacklist:
                log.info("Skipping template " + name.encode("utf8"))
                raw = None
            else:
                raw = self.db.getTemplate(name, True)

        if raw is None:
            log.warn("no template", repr(name))
            res = None
        else:
            # add newline to templates starting with a (semi)colon, or tablemarkup
            # XXX what else? see test_implicit_newline in test_expander
            if raw.startswith(":") or raw.startswith(";") or raw.startswith("{|"):
                raw = "\n" + raw

            log.info("parsing template", repr(name))
            res = Parser(raw).parse()
            if DEBUG:
                print "TEMPLATE:", name, repr(raw)
                res.show()

        return res
Пример #30
0
    def do_download(self, collection_id, post_data, is_new=False):
        if is_new:
            return self.error_response('POST argument required: collection_id')

        writer = post_data.get('writer', self.default_writer)
        w=name2writer[writer]


        
        jobid="%s:render-%s" % (collection_id, writer)        
        res = self.qserve.qinfo(jobid=jobid) or {}
        download_url = res["result"]["url"]

        print "fetching", download_url
        f = urllib2.urlopen(download_url)
        info = f.info()

        response = Response()

        for h in ("Content-Length",): # "Content-Type", "Content-Disposition"):            
            v = info.getheader(h)
            if v:
                print "copy header:", h, v
                response.headers[h] = v

        if w.content_type:
            response.content_type = w.content_type
                
        if w.file_extension:
            response.headers['Content-Disposition'] = 'inline; filename=collection.%s' % (w.file_extension.encode('utf-8', 'ignore'))
        
                
        def readdata():
            while 1:
                d = f.read(4096)
                if not d:
                    break
                yield d
                
        response.app_iter = readdata()
        return response
    
        
        
        
        
        try:
            log.info('download %s %s' % (collection_id, writer))

            redir = os.environ.get("NSERVE_REDIRECT")
            if redir:
                response = Response()
                response.status = 301
                url = "%s/%s/%s/output.%s" % (redir, collection_id[:2], collection_id, writer)
                print "REDIRECT:", url
                response.location = url
                return response


            if 1:
                response=Response()
                response.headers["X-Accel-Redirect"] = "/%s/%s/output.%s" % (collection_id[:2], collection_id, writer)


                if w.content_type:
                    response.content_type = w.content_type
                
                if w.file_extension:
                    response.headers['Content-Disposition'] = 'inline; filename=collection.%s' % (w.file_extension.encode('utf-8', 'ignore'))

                return response
            
            output_path = self.get_path(collection_id, "output", writer)
            os.utime(output_path, None)
            
            data = open(output_path, "rb").read()
            
            response = Response(data, content_length=len(data))
            
            if w.content_type:
                response.content_type = w.content_type
                
            if w.file_extension:
                response.headers['Content-Disposition'] = 'inline; filename=collection.%s' % (
                    w.file_extension.encode('utf-8', 'ignore'))
            
            return response
        except Exception, exc:
            log.ERROR('exception in do_download(): %r' % exc)
            return Response(status=500)
Пример #31
0
def download(colid, serviceurl, writer):
    log.info('download')
    data = urllib.urlencode({"command": "download", "collection_id": colid, 'writer': writer})
    return urllib2.urlopen(urllib2.Request(serviceurl.encode("utf8"), data))  # fh
Пример #32
0
        except KeyError, exc:
            return self.error_response("POST argument required: %s" % exc)

        pod_api_url = params.pod_api_url
        if pod_api_url:
            result = json.loads(unicode(urllib2.urlopen(pod_api_url, data="any").read(), "utf-8"))
            post_url = result["post_url"].encode("utf-8")
            response = {"state": "ok", "redirect_url": result["redirect_url"].encode("utf-8")}
        else:
            try:
                post_url = post_data["post_url"]
            except KeyError:
                return self.error_response("POST argument required: post_url")
            response = {"state": "ok"}

        log.info("zip_post %s %s" % (collection_id, pod_api_url))
        params.post_url = post_url

        self.qserve.qadd(
            channel="post", payload=dict(params=params.__dict__), timeout=20 * 60  # jobid="%s:post" % collection_id,
        )
        return response


def _parse_qs(qs):
    for i, x in enumerate(qs):
        if ":" in x:
            host, port = x.split(":", 1)
            port = int(port)
            qs[i] = (host, port)
        else:
Пример #33
0
def main():
    from SocketServer import ForkingMixIn, ThreadingMixIn
    from wsgiref.simple_server import make_server, WSGIServer
    from flup.server import fcgi, fcgi_fork, scgi, scgi_fork
    
    class ForkingWSGIServer(ForkingMixIn, WSGIServer):
        pass
    
    class ThreadingWSGIServer(ThreadingMixIn, WSGIServer):
        pass
    
    proto2server = {
        'http': ForkingWSGIServer,
        'http_threaded': ThreadingWSGIServer,
        'fcgi': fcgi_fork.WSGIServer,
        'fcgi_threaded': fcgi.WSGIServer,
        'scgi': scgi_fork.WSGIServer,
        'scgi_threaded': scgi.WSGIServer,
    }
    
    parser = optparse.OptionParser(usage="%prog [OPTIONS]")
    parser.add_option('-l', '--logfile',
        help='log output to LOGFILE',
    )
    parser.add_option('-d', '--daemonize',
        action='store_true',
        help='become daemon as soon as possible',
    )
    parser.add_option('--pid-file',
        help='write PID of daemonized process to this file',
    )
    parser.add_option('-P', '--protocol',
        help='one of %s (default: http)' % ', '.join(proto2server.keys()),
        default='http',
    )
    parser.add_option('-p', '--port',
        help='port to listen on (default: 8899)',
        default='8899',
    )
    parser.add_option('-i', '--interface',
        help='interface to listen on (default: 0.0.0.0)',
        default='0.0.0.0',
    )
    parser.add_option('--cache-dir',
        help='cache directory (default: /var/cache/mw-serve/)',
        default='/var/cache/mw-serve/',
    )
    parser.add_option('--mwrender',
        help='(path to) mw-render executable',
        default='mw-render',
    )
    parser.add_option('--mwrender-logfile',
        help='global logfile for mw-render',
        metavar='LOGFILE',
    )
    parser.add_option('--mwzip',
        help='(path to) mw-zip executable',
        default='mw-zip',
    )
    parser.add_option('--mwzip-logfile',
        help='global logfile for mw-zip',
        metavar='LOGFILE',
    )
    parser.add_option('--mwpost',
        help='(path to) mw-post executable',
        default='mw-post',
    )
    parser.add_option('--mwpost-logfile',
        help='global logfile for mw-post',
        metavar='LOGFILE',
    )
    parser.add_option('-q', '--queue-dir',
        help='queue dir of mw-watch (if not specified, no queue is used)',
    )
    parser.add_option('-m', '--method',
        help='prefork or threaded (default: prefork)',
        default='prefork',
    )
    parser.add_option('--max-requests',
        help='maximum number of requests a child process can handle before it is killed, irrelevant for --method=threaded (default: 0 = no limit)',
        default='0',
        metavar='NUM',
    )
    parser.add_option('--min-spare',
        help='minimum number of spare processes/threads (default: 2)',
        default='2',
        metavar='NUM',
    )
    parser.add_option('--max-spare',
        help='maximum number of spare processes/threads (default: 5)',
        default='5',
        metavar='NUM',
    )
    parser.add_option('--max-children',
        help='maximum number of processes/threads (default: 50)',
        default='50',
        metavar='NUM',
    )
    parser.add_option('--report-from-mail',
        help='sender of error mails (--report-recipient also needed)',
        metavar='EMAIL',
    )
    parser.add_option('--report-recipient',
        help='recipient of error mails (--report-from-mail also needed)',
        metavar='EMAIL',
    )
    options, args = parser.parse_args()

    if args:
        parser.error('no arguments supported')
    
    
    if options.protocol not in proto2server:
        parser.error('unsupported protocol (must be one of %s)' % (
            ', '.join(proto2server.keys()),
        ))

    def to_int(opt_name):
        try:
            setattr(options, opt_name, int(getattr(options, opt_name)))
        except ValueError:
            parser.error('--%s value must be an integer' % opt_name.replace('_', '-'))
    
    to_int('port')
    to_int('max_requests')
    to_int('min_spare')
    to_int('max_spare')
    to_int('max_children')
    
    if options.method not in ('prefork', 'threaded'):
        parser.error('the only supported values for --method are "prefork" and "threaded"')
    
    from mwlib import serve, log, utils
    
    log = log.Log('mw-serve')
    
    if options.logfile:
        utils.start_logging(options.logfile)
    
    if options.daemonize:
        utils.daemonize()
    if options.pid_file:
        open(options.pid_file, 'wb').write('%d\n' % os.getpid())
    
    if options.method == 'threaded':
        options.protocol += '_threaded'
        flup_kwargs = {
            'maxThreads': options.max_children,
        }
    else:
        flup_kwargs = {
            'maxChildren': options.max_children,
            'maxRequests':  options.max_requests,
        }
    
    log.info("serving %s on %s:%s" % (options.protocol, options.interface, options.port))
    
    if options.report_recipient and options.report_from_mail:
        report_from_mail = options.report_from_mail.encode('utf-8')
        report_recipients = [options.report_recipient.encode('utf-8')]
    else:
        report_from_mail = None
        report_recipients = None
    
    app = serve.Application(
        cache_dir=options.cache_dir,
        mwrender_cmd=options.mwrender,
        mwrender_logfile=options.mwrender_logfile,
        mwzip_cmd=options.mwzip,
        mwzip_logfile=options.mwzip_logfile,
        mwpost_cmd=options.mwpost,
        mwpost_logfile=options.mwpost_logfile,
        queue_dir=options.queue_dir,
        report_from_mail=report_from_mail,
        report_recipients=report_recipients,
    )
    if options.protocol.startswith('http'):
        server = make_server(options.interface, options.port, app,
            server_class=proto2server[options.protocol],
        )
        try:
            server.serve_forever()
        except KeyboardInterrupt:
            pass
    else:
        serverclass = proto2server[options.protocol]
        serverclass(app,
            bindAddress=(options.interface, options.port),
            minSpare=options.min_spare,
            maxSpare=options.max_spare,
            **flup_kwargs
        ).run()
    
    if options.pid_file:
        utils.safe_unlink(options.pid_file)
    
    log.info('exit.')
Пример #34
0
def serve():
    from SocketServer import ForkingMixIn, ThreadingMixIn
    from wsgiref.simple_server import make_server, WSGIServer
    from flup.server import fcgi, fcgi_fork, scgi, scgi_fork

    class ForkingWSGIServer(ForkingMixIn, WSGIServer):
        pass

    class ThreadingWSGIServer(ThreadingMixIn, WSGIServer):
        pass

    proto2server = {
        'http': ForkingWSGIServer,
        'http_threaded': ThreadingWSGIServer,
        'fcgi': fcgi_fork.WSGIServer,
        'fcgi_threaded': fcgi.WSGIServer,
        'scgi': scgi_fork.WSGIServer,
        'scgi_threaded': scgi.WSGIServer,
    }

    parser = optparse.OptionParser(usage="%prog [OPTIONS]")
    parser.add_option(
        '-l',
        '--logfile',
        help='log output to LOGFILE',
    )
    parser.add_option(
        '-d',
        '--daemonize',
        action='store_true',
        help='become daemon as soon as possible',
    )
    parser.add_option(
        '--pid-file',
        help='write PID of daemonized process to this file',
    )
    parser.add_option(
        '-P',
        '--protocol',
        help='one of %s (default: http)' % ', '.join(proto2server.keys()),
        default='http',
    )
    parser.add_option(
        '-p',
        '--port',
        help='port to listen on (default: 8899)',
        default='8899',
    )
    parser.add_option(
        '-i',
        '--interface',
        help='interface to listen on (default: 0.0.0.0)',
        default='0.0.0.0',
    )
    parser.add_option(
        '--cache-dir',
        help='cache directory (default: /var/cache/mw-serve/)',
        default='/var/cache/mw-serve/',
    )
    parser.add_option(
        '--mwrender',
        help='(path to) mw-render executable',
        default='mw-render',
    )
    parser.add_option(
        '--mwrender-logfile',
        help='global logfile for mw-render',
        metavar='LOGFILE',
    )
    parser.add_option(
        '--mwzip',
        help='(path to) mw-zip executable',
        default='mw-zip',
    )
    parser.add_option(
        '--mwzip-logfile',
        help='global logfile for mw-zip',
        metavar='LOGFILE',
    )
    parser.add_option(
        '--mwpost',
        help='(path to) mw-post executable',
        default='mw-post',
    )
    parser.add_option(
        '--mwpost-logfile',
        help='global logfile for mw-post',
        metavar='LOGFILE',
    )
    parser.add_option(
        '-q',
        '--queue-dir',
        help='queue dir of mw-watch (if not specified, no queue is used)',
    )
    parser.add_option(
        '-m',
        '--method',
        help='prefork or threaded (default: prefork)',
        default='prefork',
    )
    parser.add_option(
        '--max-requests',
        help=
        'maximum number of requests a child process can handle before it is killed, irrelevant for --method=threaded (default: 0 = no limit)',
        default='0',
        metavar='NUM',
    )
    parser.add_option(
        '--min-spare',
        help='minimum number of spare processes/threads (default: 2)',
        default='2',
        metavar='NUM',
    )
    parser.add_option(
        '--max-spare',
        help='maximum number of spare processes/threads (default: 5)',
        default='5',
        metavar='NUM',
    )
    parser.add_option(
        '--max-children',
        help='maximum number of processes/threads (default: 50)',
        default='50',
        metavar='NUM',
    )
    parser.add_option(
        '--report-from-mail',
        help='sender of error mails (--report-recipient also needed)',
        metavar='EMAIL',
    )
    parser.add_option(
        '--report-recipient',
        help='recipient of error mails (--report-from-mail also needed)',
        metavar='EMAIL',
    )
    parser.add_option(
        '--clean-cache',
        help=
        'clean cache files that have not been touched for at least HOURS hours and exit',
        metavar='HOURS',
    )
    options, args = parser.parse_args()

    if options.clean_cache:
        try:
            options.clean_cache = int(options.clean_cache)
        except ValueError:
            parser.error('--clean-cache value must be an integer')
        from mwlib.serve import clean_cache
        clean_cache(options.clean_cache * 60 * 60, cache_dir=options.cache_dir)
        return

    if options.protocol not in proto2server:
        parser.error('unsupported protocol (must be one of %s)' %
                     (', '.join(proto2server.keys()), ))

    def to_int(opt_name):
        try:
            setattr(options, opt_name, int(getattr(options, opt_name)))
        except ValueError:
            parser.error('--%s value must be an integer' %
                         opt_name.replace('_', '-'))

    to_int('port')
    to_int('max_requests')
    to_int('min_spare')
    to_int('max_spare')
    to_int('max_children')

    if options.method not in ('prefork', 'threaded'):
        parser.error(
            'the only supported values for --method are "prefork" and "threaded"'
        )

    from mwlib import serve, log, utils

    log = log.Log('mw-serve')

    if options.logfile:
        utils.start_logging(options.logfile)

    if options.daemonize:
        utils.daemonize()
    if options.pid_file:
        open(options.pid_file, 'wb').write('%d\n' % os.getpid())

    if options.method == 'threaded':
        options.protocol += '_threaded'
        flup_kwargs = {
            'maxThreads': options.max_children,
        }
    else:
        flup_kwargs = {
            'maxChildren': options.max_children,
            'maxRequests': options.max_requests,
        }

    log.info("serving %s on %s:%s" %
             (options.protocol, options.interface, options.port))

    if options.report_recipient and options.report_from_mail:
        report_from_mail = options.report_from_mail.encode('utf-8')
        report_recipients = [options.report_recipient.encode('utf-8')]
    else:
        report_from_mail = None
        report_recipients = None

    app = serve.Application(
        cache_dir=options.cache_dir,
        mwrender_cmd=options.mwrender,
        mwrender_logfile=options.mwrender_logfile,
        mwzip_cmd=options.mwzip,
        mwzip_logfile=options.mwzip_logfile,
        mwpost_cmd=options.mwpost,
        mwpost_logfile=options.mwpost_logfile,
        queue_dir=options.queue_dir,
        report_from_mail=report_from_mail,
        report_recipients=report_recipients,
    )
    if options.protocol.startswith('http'):
        server = make_server(
            options.interface,
            options.port,
            app,
            server_class=proto2server[options.protocol],
        )
        try:
            server.serve_forever()
        except KeyboardInterrupt:
            pass
    else:
        serverclass = proto2server[options.protocol]
        serverclass(app,
                    bindAddress=(options.interface, options.port),
                    minSpare=options.min_spare,
                    maxSpare=options.max_spare,
                    **flup_kwargs).run()

    if options.pid_file:
        utils.safe_unlink(options.pid_file)

    log.info('exit.')
Пример #35
0
        pod_api_url = params.pod_api_url
        if pod_api_url:
            result = json.loads(unicode(urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8'))
            post_url = result['post_url'].encode('utf-8')
            response = {
                'state': 'ok',
                'redirect_url': result['redirect_url'].encode('utf-8'),
            }
        else:
            try:
                post_url = post_data['post_url']
            except KeyError:
                return self.error_response('POST argument required: post_url')
            response = {'state': 'ok'}

        log.info('zip_post %s %s' % (collection_id, pod_api_url))
        params.post_url = post_url

        self.qserve.qadd(channel="post",  # jobid="%s:post" % collection_id,
                         payload=dict(params=params.__dict__),
                         timeout=20 * 60)
        return response


def _parse_qs(qs):
    for i, x in enumerate(qs):
        if ":" in x:
            host, port = x.split(":", 1)
            port = int(port)
            qs[i] = (host, port)
        else:
Пример #36
0
 try:
     base_url = post_data['base_url']
     writer = post_data.get('writer', self.default_writer)
 except KeyError, exc:
     return self.error_response('POST argument required: %s' % exc)
 writer_options = post_data.get('writer_options', '')
 template_blacklist = post_data.get('template_blacklist', '')
 template_exclusion_category = post_data.get('template_exclusion_category', '')
 login_credentials = post_data.get('login_credentials', '')
 force_render = bool(post_data.get('force_render'))
 script_extension = post_data.get('script_extension', '')
 
 if not collection_id:
     collection_id = self.new_collection(post_data)
 
 log.info('render %s %s' % (collection_id, writer))
 
 response = {
     'collection_id': collection_id,
     'writer': writer,
     'is_cached': False,
 }
 
 pid_path = self.get_path(collection_id, self.pid_filename, writer)
 if os.path.exists(pid_path):
     log.info('mw-render already running for collection %r' % collection_id)
     return response
 
 output_path = self.get_path(collection_id, self.output_filename, writer)
 if os.path.exists(output_path):
     if force_render:
 except KeyError, exc:
     return self.error_response('POST argument required: %s' % exc)
 writer_options = post_data.get('writer_options', '')
 template_blacklist = post_data.get('template_blacklist', '')
 template_exclusion_category = post_data.get('template_exclusion_category', '')
 print_template_prefix = post_data.get('print_template_prefix', '')
 print_template_pattern = post_data.get('print_template_pattern', '')
 login_credentials = post_data.get('login_credentials', '')
 force_render = bool(post_data.get('force_render'))
 script_extension = post_data.get('script_extension', '')
 language = post_data.get('language', '')
 
 if not collection_id:
     collection_id = self.new_collection(post_data)
 
 log.info('render %s %s' % (collection_id, writer))
 
 response = {
     'collection_id': collection_id,
     'writer': writer,
     'is_cached': False,
 }
 
 pid_path = self.get_path(collection_id, self.pid_filename, writer)
 if os.path.exists(pid_path):
     log.info('mw-render already running for collection %r' % collection_id)
     return response
 
 output_path = self.get_path(collection_id, self.output_filename, writer)
 if os.path.exists(output_path):
     if force_render:
Пример #38
0
def main():
    parser = OptionParser(usage="%prog [OPTIONS]")
    parser.add_option("-b", "--baseurl", help="baseurl of wiki")
    parser.add_option("-w", "--writer", help="writer to use")
    parser.add_option('-l', '--logfile', help='log output to LOGFILE')
    parser.add_option('-f', '--from-email',
                      help='From: email address for error mails',
                      )
    parser.add_option('-r', '--mail-recipients',
                      help='To: email addresses ("," separated) for error mails',
                      )
    parser.add_option('-m', '--max-narticles',
                      help='maximum number of articles for random collections (min is 1)',
                      default=10,
                      )
    parser.add_option('-s', '--serviceurl',
                      help="location of the mw-serve server to test",
                      default='http://tools.pediapress.com/mw-serve/',
                      # default='http://localhost:8899/mw-serve/',
                      )
    use_help = 'Use --help for usage information.'
    options, args = parser.parse_args()

    assert options.from_email

    if options.logfile:
        utils.start_logging(options.logfile)

    baseurl2api = {}
    baseurls = options.baseurl.split()
    for baseurl in baseurls:
        baseurl2api[baseurl] = mwapidb.APIHelper(baseurl)

    maxarts = int(options.max_narticles)
    mail_recipients = None
    if options.mail_recipients:
        mail_recipients = options.mail_recipients.split(',')
    ok_count = 0
    fail_count = 0
    while True:
        baseurl = random.choice(baseurls)
        try:
            ok = checkservice(baseurl2api[baseurl],
                              options.serviceurl,
                              baseurl,
                              options.writer,
                              maxarts,
                              from_email=options.from_email,
                              mail_recipients=mail_recipients,
                              )
            if ok:
                ok_count += 1
                log.check('OK')
            else:
                fail_count += 1
                log.check('FAIL!')
        except KeyboardInterrupt:
            break
        except BaseException:
            fail_count += 1
            log.check('EPIC FAIL!!!')
            utils.report(
                system=system,
                subject='checkservice() failed, waiting 60seconds',
                from_email=options.from_email,
                mail_recipients=mail_recipients,
            )
            sys.exc_clear()
            time.sleep(60)
        log.info('%s, %s\tok: %d, failed: %d' % (
            baseurl, options.writer, ok_count, fail_count,
        ))
Пример #39
0
        except Exception, exc:
            report('request failed: %s' % exc)
            sys.exit(1)
        
        if success:
            return client.response
        if client.error is not None:
            report('request failed: %s' % client.error)
            sys.exit(1)
        else:
            report('request failed: got response code %d' % client.response_code)
            sys.exit(1)

    start_time = time.time()

    log.info('sending render command')
    response = check_req('render',
        base_url=base_url,
        metabook=metabook,
        writer=writer,
        force_render=True,
    )
    collection_id = response['collection_id']

    while True:
        time.sleep(1)

        if time.time() - start_time > max_render_time:
            report('rendering exceeded allowed time of %d s' % max_render_time)
            sys.exit(2)
Пример #40
0
        pod_api_url = params.pod_api_url
        if pod_api_url:
            result = json.loads(unicode(urllib2.urlopen(pod_api_url, data="any").read(), 'utf-8'))
            post_url = result['post_url'].encode('utf-8')
            response = {
                'state': 'ok',
                'redirect_url': result['redirect_url'].encode('utf-8'),
            }
        else:
            try:
                post_url = post_data['post_url']
            except KeyError:
                return self.error_response('POST argument required: post_url')
            response = {'state': 'ok'}

        log.info('zip_post %s %s' % (collection_id, pod_api_url))
        params.post_url = post_url

        self.qserve.qadd(channel="post",  # jobid="%s:post" % collection_id,
                         payload=dict(params=params.__dict__),
                         timeout=20 * 60)
        return response


def _parse_qs(qs):
    for i, x in enumerate(qs):
        if ":" in x:
            host, port = x.split(":", 1)
            port = int(port)
            qs[i] = (host, port)
        else: