Пример #1
0
    def wsgi_request(self, environ, start_response):
        if self.wsgi_debug:
            print 'wsgi_request start'
            print environ
            print '...'

        outstream = StringIO()
        response = Response(stdout=outstream, stderr=sys.stderr)
        extra = {
            'SESSION': self.app.REQUEST.SESSION,
            'AUTHENTICATED_USER': self.app.REQUEST.AUTHENTICATED_USER,
        }

        publish_module('Zope2', environ=environ, response=response, extra=extra, stdin=environ['wsgi.input'])

        output = outstream.getvalue()
        headers, body = output.split(newline*2, 1)
        headers = [header.split(': ', 1) for header in headers.split(newline)]
        status = headers.pop(0)[1]

        if self.wsgi_debug:
            print 'wsgi_request done, status="%s"' % status
            print '  ' + '\n'.join([': '.join(header) for header in headers])

        headers = [ (header[0], ', '.join(header[1:])) for header in headers ]
        if 'content-type' not in (header[0].lower() for header in headers):
            headers.append( ('Content-Type', 'text/html; charset=utf-8') )
        start_response(status, headers)
        return [body]
Пример #2
0
    def publish(self, path, basic=None, env=None, extra=None,
                request_method='GET', stdin=None, handle_errors=True):
        '''Publishes the object at 'path' returning a response object.'''

        from StringIO import StringIO
        from ZPublisher.Response import Response
        from ZPublisher.Test import publish_module

        from AccessControl.SecurityManagement import getSecurityManager
        from AccessControl.SecurityManagement import setSecurityManager

        # Save current security manager
        sm = getSecurityManager()

        # Commit the sandbox for good measure
        transaction.commit()

        if env is None:
            env = {}
        if extra is None:
            extra = {}

        request = self.app.REQUEST

        env['SERVER_NAME'] = request['SERVER_NAME']
        env['SERVER_PORT'] = request['SERVER_PORT']
        env['REQUEST_METHOD'] = request_method

        p = path.split('?')
        if len(p) == 1:
            env['PATH_INFO'] = p[0]
        elif len(p) == 2:
            [env['PATH_INFO'], env['QUERY_STRING']] = p
        else:
            raise TypeError, ''

        if basic:
            env['HTTP_AUTHORIZATION'] = "Basic %s" % base64.encodestring(basic)

        if stdin is None:
            stdin = StringIO()

        outstream = StringIO()
        response = Response(stdout=outstream, stderr=sys.stderr)

        publish_module('Zope2',
                       response=response,
                       stdin=stdin,
                       environ=env,
                       extra=extra,
                       debug=not handle_errors,
                      )

        # Restore security manager
        setSecurityManager(sm)

        return ResponseWrapper(response, outstream, path)
Пример #3
0
    def publish(self, path, basic=None, env=None, extra=None,
                request_method='GET', stdin=None, handle_errors=True):
        '''Publishes the object at 'path' returning a response object.'''

        from StringIO import StringIO
        from ZPublisher.Response import Response
        from ZPublisher.Test import publish_module

        # Commit the sandbox for good measure
        transaction.commit()

        if env is None:
            env = {}
        if extra is None:
            extra = {}

        request = self.app.REQUEST

        env['SERVER_NAME'] = request['SERVER_NAME']
        env['SERVER_PORT'] = request['SERVER_PORT']
        env['REQUEST_METHOD'] = request_method

        p = path.split('?')
        if len(p) == 1:
            env['PATH_INFO'] = p[0]
        elif len(p) == 2:
            [env['PATH_INFO'], env['QUERY_STRING']] = p
        else:
            raise TypeError, ''

        if basic:
            env['HTTP_AUTHORIZATION'] = "Basic %s" % base64.encodestring(basic)

        if stdin is None:
            stdin = StringIO()

        outstream = StringIO()
        response = Response(stdout=outstream, stderr=sys.stderr)

        publish_module('Zope2',
                       response=response,
                       stdin=stdin,
                       environ=env,
                       extra=extra,
                       debug=not handle_errors,
                      )

        return ResponseWrapper(response, outstream, path)
Пример #4
0
    def __call__(self, requestString, handle_errors=True):

        from ZPublisher.Iterators import IStreamIterator
        from ZPublisher.Response import Response
        from ZPublisher.Test import publish_module

        class TestResponse(Response):

            def setBody(self, body, title='', is_error=0, **kw):
                if IStreamIterator.providedBy(body):
                    body = ''.join(body)
                Response.setBody(self, body, title, is_error, **kw)

        # Discard leading white space to make call layout simpler
        requestString = requestString.lstrip()

        # Split off and parse the command line
        l = requestString.find('\n')
        commandLine = requestString[:l].rstrip()
        requestString = requestString[l+1:]
        method, url, protocol = commandLine.split()

        instream = StringIO(requestString)

        env = {"HTTP_HOST": 'localhost',
               "HTTP_REFERER": 'localhost',
               "REQUEST_METHOD": method,
               "SERVER_PROTOCOL": protocol,
               }

        p = url.split('?', 1)
        if len(p) == 1:
            env['PATH_INFO'] = p[0]
        elif len(p) == 2:
            [env['PATH_INFO'], env['QUERY_STRING']] = p
        else:
            raise TypeError, ''

        # If you followed closely, you notice that one part of the url
        # gets unquoted (PATH_INFO) while the other (QUERY_STRING)
        # doesn't That complies with what the ZSERVER does.
        env['PATH_INFO'] = urllib.unquote(env['PATH_INFO'])

        headers = [splitHeader(header) for header in rfc822.Message(instream).headers]

        # Store request body without headers
        instream = StringIO(instream.read())

        for name, value in headers:
            name = ('_'.join(name.upper().split('-')))
            if name not in ('CONTENT_TYPE', 'CONTENT_LENGTH'):
                name = 'HTTP_' + name
            env[name] = value.rstrip()

        if env.has_key('HTTP_AUTHORIZATION'):
            env['HTTP_AUTHORIZATION'] = authHeader(env['HTTP_AUTHORIZATION'])

        outstream = StringIO()
        response = TestResponse(stdout=outstream, stderr=sys.stderr)

        publish_module('Zope2',
                       response=response,
                       stdin=instream,
                       environ=env,
                       debug=not handle_errors,
                      )

        self.app._p_jar.sync()

        return response
Пример #5
0
def http(request_string, handle_errors=True):
    """Execute an HTTP request string via the publisher

    This is used for HTTP doc tests.
    """
    import urllib
    import rfc822
    from cStringIO import StringIO
    from ZPublisher.Response import Response
    from ZPublisher.Test import publish_module
    from AccessControl.SecurityManagement import getSecurityManager
    from AccessControl.SecurityManagement import setSecurityManager

    # Save current Security Manager
    old_sm = getSecurityManager()

    # Commit work done by previous python code.
    transaction.commit()

    # Discard leading white space to make call layout simpler
    request_string = request_string.lstrip()

    # Split off and parse the command line
    l = request_string.find('\n')
    command_line = request_string[:l].rstrip()
    request_string = request_string[l+1:]
    method, path, protocol = command_line.split()
    path = urllib.unquote(path)

    instream = StringIO(request_string)

    env = {"HTTP_HOST": 'localhost',
           "HTTP_REFERER": 'localhost',
           "REQUEST_METHOD": method,
           "SERVER_PROTOCOL": protocol,
           }

    p = path.split('?')
    if len(p) == 1:
        env['PATH_INFO'] = p[0]
    elif len(p) == 2:
        [env['PATH_INFO'], env['QUERY_STRING']] = p
    else:
        raise TypeError, ''

    header_output = HTTPHeaderOutput(
        protocol, ('x-content-type-warning', 'x-powered-by',
                   'bobo-exception-type', 'bobo-exception-file',
                   'bobo-exception-value', 'bobo-exception-line'))

    headers = [split_header(header)
               for header in rfc822.Message(instream).headers]

    # Store request body without headers
    instream = StringIO(instream.read())

    for name, value in headers:
        name = ('_'.join(name.upper().split('-')))
        if name not in ('CONTENT_TYPE', 'CONTENT_LENGTH'):
            name = 'HTTP_' + name
        env[name] = value.rstrip()

    if env.has_key('HTTP_AUTHORIZATION'):
        env['HTTP_AUTHORIZATION'] = auth_header(env['HTTP_AUTHORIZATION'])

    outstream = StringIO()
    response = Response(stdout=outstream, stderr=sys.stderr)

    publish_module('Zope2',
                   response=response,
                   stdin=instream,
                   environ=env,
                   debug=not handle_errors,
                  )
    header_output.setResponseStatus(response.getStatus(), response.errmsg)
    header_output.setResponseHeaders(response.headers)
    header_output.appendResponseHeaders(response._cookie_list())
    header_output.appendResponseHeaders(response.accumulated_headers.splitlines())

    # Restore previous security manager, which may have been changed
    # by calling the publish method above
    setSecurityManager(old_sm)

    # Sync connection
    sync()

    return DocResponseWrapper(response, outstream, path, header_output)
Пример #6
0
def http(request_string, handle_errors=True):
    """Execute an HTTP request string via the publisher

    This is used for HTTP doc tests.
    """
    import urllib
    import rfc822
    from cStringIO import StringIO
    from ZPublisher.Response import Response
    from ZPublisher.Test import publish_module

    # Commit work done by previous python code.
    transaction.commit()

    # Discard leading white space to make call layout simpler
    request_string = request_string.lstrip()

    # Split off and parse the command line
    l = request_string.find('\n')
    command_line = request_string[:l].rstrip()
    request_string = request_string[l+1:]
    method, path, protocol = command_line.split()
    path = urllib.unquote(path)

    instream = StringIO(request_string)

    env = {"HTTP_HOST": 'localhost',
           "HTTP_REFERER": 'localhost',
           "REQUEST_METHOD": method,
           "SERVER_PROTOCOL": protocol,
           }

    p = path.split('?')
    if len(p) == 1:
        env['PATH_INFO'] = p[0]
    elif len(p) == 2:
        [env['PATH_INFO'], env['QUERY_STRING']] = p
    else:
        raise TypeError, ''

    header_output = HTTPHeaderOutput(
        protocol, ('x-content-type-warning', 'x-powered-by',
                   'bobo-exception-type', 'bobo-exception-file',
                   'bobo-exception-value', 'bobo-exception-line'))

    headers = [split_header(header)
               for header in rfc822.Message(instream).headers]

    # Store request body without headers
    instream = StringIO(instream.read())

    for name, value in headers:
        name = ('_'.join(name.upper().split('-')))
        if name not in ('CONTENT_TYPE', 'CONTENT_LENGTH'):
            name = 'HTTP_' + name
        env[name] = value.rstrip()

    if env.has_key('HTTP_AUTHORIZATION'):
        env['HTTP_AUTHORIZATION'] = auth_header(env['HTTP_AUTHORIZATION'])

    outstream = StringIO()
    response = Response(stdout=outstream, stderr=sys.stderr)

    publish_module('Zope2',
                   response=response,
                   stdin=instream,
                   environ=env,
                   debug=not handle_errors,
                  )
    header_output.setResponseStatus(response.getStatus(), response.errmsg)
    header_output.setResponseHeaders(response.headers)
    header_output.appendResponseHeaders(response._cookie_list())
    header_output.appendResponseHeaders(response.accumulated_headers.splitlines())

    sync()

    return DocResponseWrapper(response, outstream, path, header_output)
Пример #7
0
def run(app, args, rate=5):
    # Adjust root logging handler levels
    level = getConfiguration().eventlog.getLowestHandlerLevel()
    root = logging.getLogger()
    for handler in root.handlers:
        handler.setLevel(level)

    logger = logging.getLogger("linkcheck.processor")
    logger.setLevel(level)
    logger.info("looking for sites...")

    session = requests.Session(timeout=5)

    counter = 0
    sites = {}

    # Enter runloop
    while True:
        errors = set()

        for name, item in app.objectItems():
            if name in sites:
                continue

            if IPloneSiteRoot.providedBy(item):
                try:
                    tool = getToolByName(item, 'portal_linkcheck')
                except AttributeError:
                    continue

                logger.info("found site '%s'." % name)

                registry = getUtility(IRegistry, context=item)

                try:
                    settings = registry.forInterface(ISettings)
                except KeyError:
                    logger.warn("settings not available; please reinstall.")
                    continue

                responses = []

                def worker():
                    while True:
                        url = q.get()
                        r = None

                        try:
                            r = session.get(url)
                        except requests.Timeout:
                            status_code = 504
                        except requests.RequestException as exc:
                            logger.warn(exc)
                            status_code = 503
                        except UnicodeError as exc:
                            logger.warn("Unable to decode string: %r (%s)." % (
                                url, exc))
                            status_code = 502

                        if r is None:
                            r = requests.Response()
                            r.status_code = status_code
                            r.url = url

                        responses.append(r)
                        q.task_done()

                q = Queue()
                for i in range(settings.concurrency):
                    t = threading.Thread(target=worker)
                    t.daemon = True
                    t.start()

                logger.info(
                    "%d worker threads started." % settings.concurrency
                    )

                sites[name] = (tool, settings, q, responses)

        if not sites and not counter:
            logger.info(
                "no sites found; polling every %d second(s) ..." % rate
                )

        for tool, settings, queue, responses in sites.values():
            # Synchronize database
            tool._p_jar.sync()

            if not tool.is_available():
                logger.warn("Tool not available; please run update step.")
                logger.info("Sleeping for 10 seconds...")
                time.sleep(10)
                break

            if not counter % 3600:
                now = datetime.datetime.now()

                # This timestamp is the threshold for items that need an
                # update.
                needs_update = int(time.mktime(
                    (now - datetime.timedelta(hours=settings.interval)).\
                    timetuple()
                    ))

                # This timestamp is the threshold for items that are no
                # longer active.
                expired = int(time.mktime(
                    (now - datetime.timedelta(days=settings.expiration)).\
                    timetuple()
                    ))

                discard = set()
                for url, entry in tool.checked.items():
                    if url in tool.queue:
                        continue

                    # Discard items that are expired
                    if entry[0] and entry[0] < expired:
                        discard.add(url)

                    # Enqueue items with an out of date timestamp.
                    elif entry[0] and entry[0] < needs_update:
                        tool.queue.put(url)

                for url in discard:
                    del tool.checked[url]

            # Fetch set of URLs to check (up to transaction size).
            queued = tool.queue[:settings.transaction_size]
            if not queued:
                continue

            urls = filter(None, map(tool.links.get, queued))

            # This keeps track of status updates, which we'll apply at
            # the end.
            updates = []

            # Distinguish between internal and external requests.
            internal, external = partition(
                lambda url: url.startswith('/'),
                urls
                )

            # Must be HTTP or HTTPS
            external, invalid = partition(
                lambda url: url.startswith('http://') or \
                            url.startswith('https://'),
                external
                )

            for url in external:
                queue.put(url)

            # Wait for responses
            queue.join()

            while responses:
                response = responses.pop()
                status = response.status_code

                # This may be a redirect.
                if response.history:
                    url = response.history[0].url
                    if response.history[0].status_code == 301:
                        status = 301
                else:
                    url = response.url

                updates.append((url, status))

            for url in internal:
                # For now, we simply ignore internal links if we're
                # not publishing.
                if not settings.use_publisher:
                    continue

                stdout = StringIO()
                stderr = StringIO()

                env = {
                    'GATEWAY_INTERFACE': 'CGI/1.1 ',
                    'HTTP_ACCEPT': '*/*',
                    'HTTP_HOST': '127.0.0.1',
                    'HTTP_USER_AGENT': 'Bobo',
                    'REQUEST_METHOD': 'GET',
                    'SCRIPT_NAME': '',
                    'SERVER_HOSTNAME': 'bobo.server.host',
                    'SERVER_NAME': 'bobo.server',
                    'SERVER_PORT': '80',
                    'SERVER_PROTOCOL': 'HTTP/1.0 ',
                    }

                env['PATH_INFO'] = "/" + tool.aq_parent.absolute_url() + url

                try:
                    status = publish_module(
                        'Zope2', environ=env, stdout=stdout, stderr=stderr
                        )
                except ConflictError:
                    status = 503
                else:
                    # This is assumed to be a good response.
                    if status == 302:
                        status = 200

                updates.append((url, status))

            # Pull URLs out of queue, actually removing them.
            unchanged = []
            urls = set(urls)

            while urls:
                try:
                    i = tool.queue.pull()
                except IndexError:
                    transaction.abort()
                    continue

                try:
                    url = tool.links[i]
                    urls.remove(url)
                except KeyError:
                    unchanged.append(i)

            # This shouldn't happen to frequently.
            for i in unchanged:
                tool.queue.put(i)
                url = tool.links[i]
                logger.warn("putting back unprocessed url: %s." % url)

            for url in invalid:
                tool.update(url, 0)
                errors.add(url)

            # Apply status updates
            for url, status in updates:
                tool.update(url, status)

            transaction.get().note('updated link validity')
            try:
                transaction.commit()
            except ConflictError:
                transaction.abort()

        for url in errors:
            logger.warn("error checking: %s." % url)

        time.sleep(rate)
        app._p_jar.sync()
        counter += 1