Exemplo n.º 1
0
def _get_common_params(request, js_profiles_path):
    """ Return arguments common for all endpoints """
    wait_time = getarg(request, "wait", defaults.WAIT_TIME, type=float, range=(0, defaults.MAX_WAIT_TIME))
    viewport = getarg(request, "viewport", defaults.VIEWPORT)
    _check_viewport(viewport, wait_time, defaults.VIEWPORT_MAX_WIDTH,
                    defaults.VIEWPORT_MAX_HEIGTH, defaults.VIEWPORT_MAX_AREA)

    url = getarg(request, "url", type=None)
    baseurl = getarg(request, "baseurl", default=None, type=None)
    if isinstance(url, unicode):
        url = url.encode('utf8')
    if isinstance(baseurl, unicode):
        baseurl = baseurl.encode('utf8')

    res = dict(
        url = url,
        baseurl = baseurl,
        wait = wait_time,
        viewport = viewport,
        images = getarg_bool(request, "images", defaults.AUTOLOAD_IMAGES),
        headers = _get_headers_params(request),

        proxy = getarg(request, "proxy", None),
    )
    res.update(_get_javascript_params(request, js_profiles_path))
    return res
Exemplo n.º 2
0
def _get_png_params(request):
    url, baseurl, wait_time = _get_common_params(request)
    width = getarg(request, "width", None, type=int, range=(1, defaults.MAX_WIDTH))
    height = getarg(request, "height", None, type=int, range=(1, defaults.MAX_HEIGTH))
    viewport = getarg(request, "viewport", defaults.VIEWPORT)

    _check_viewport(viewport, wait_time, defaults.VIEWPORT_MAX_WIDTH,
                    defaults.VIEWPORT_MAX_HEIGTH, defaults.VIEWPORT_MAX_AREA)

    return url, baseurl, wait_time, width, height, viewport
Exemplo n.º 3
0
def _get_javascript_params(request, js_profiles_path):
    js_profile = _check_js_profile(request, js_profiles_path, getarg(request, 'js', None))
    js_source = getarg(request, 'js_source', None)
    if js_source is not None:
        return js_source, js_profile
    
    if request.method == 'POST':
        return request.content.getvalue(), js_profile
    else:
        return None, js_profile
Exemplo n.º 4
0
def _get_common_params(request):
    url = getarg(request, "url")
    baseurl = getarg(request, "baseurl", None)
    wait_time = getarg(request, "wait", defaults.WAIT_TIME, type=float, range=(0, defaults.MAX_WAIT_TIME))
    js_source = _get_javascript_params(request)

    viewport = getarg(request, "viewport", defaults.VIEWPORT)
    _check_viewport(viewport, wait_time, defaults.VIEWPORT_MAX_WIDTH,
                    defaults.VIEWPORT_MAX_HEIGTH, defaults.VIEWPORT_MAX_AREA)

    return url, baseurl, wait_time, viewport, js_source
Exemplo n.º 5
0
    def _getRender(self, request):
        url, baseurl, wait_time, width, height, viewport = _get_png_params(request)

        html = getarg(request, "html", defaults.DO_HTML, type=int, range=(0, 1))
        iframes = getarg(request, "iframes", defaults.DO_IFRAMES, type=int, range=(0, 1))
        png = getarg(request, "png", defaults.DO_PNG, type=int, range=(0, 1))

        return self.pool.render(JsonRender, request,
                                url, baseurl, wait_time,
                                html, iframes, png,
                                width, height, viewport)
Exemplo n.º 6
0
def _get_common_params(request, js_profiles_path):
    url = getarg(request, "url")
    baseurl = getarg(request, "baseurl", None)
    wait_time = getarg(request, "wait", defaults.WAIT_TIME, type=float, range=(0, defaults.MAX_WAIT_TIME))
    js_source, js_profile = _get_javascript_params(request, js_profiles_path)
    images = getarg(request, "images", defaults.AUTOLOAD_IMAGES, type=int, range=(0, 1))

    viewport = getarg(request, "viewport", defaults.VIEWPORT)
    _check_viewport(viewport, wait_time, defaults.VIEWPORT_MAX_WIDTH,
                    defaults.VIEWPORT_MAX_HEIGTH, defaults.VIEWPORT_MAX_AREA)

    return url, baseurl, wait_time, viewport, js_source, js_profile, images
Exemplo n.º 7
0
    def _getRender(self, request):
        url, baseurl, wait_time, viewport, js_source, width, height = _get_png_params(request)

        html = getarg(request, "html", defaults.DO_HTML, type=int, range=(0, 1))
        iframes = getarg(request, "iframes", defaults.DO_IFRAMES, type=int, range=(0, 1))
        png = getarg(request, "png", defaults.DO_PNG, type=int, range=(0, 1))
        script = getarg(request, "script", defaults.SHOW_SCRIPT, type=int, range=(0, 1))
        console = getarg(request, "console", defaults.SHOW_CONSOLE, type=int, range=(0, 1))

        return self.pool.render(JsonRender, request,
                                url, baseurl, wait_time, viewport, js_source,
                                html, iframes, png, script, console,
                                width, height)
Exemplo n.º 8
0
def _get_png_params(request, js_profiles_path):
    url, baseurl, wait_time, viewport, js_source, js_profile = _get_common_params(
        request, js_profiles_path)
    width = getarg(request,
                   "width",
                   None,
                   type=int,
                   range=(1, defaults.MAX_WIDTH))
    height = getarg(request,
                    "height",
                    None,
                    type=int,
                    range=(1, defaults.MAX_HEIGTH))
    return url, baseurl, wait_time, viewport, js_source, js_profile, width, height
Exemplo n.º 9
0
    def render_GET(self, request):
        d = self._getRender(request)
        timeout = getarg(request, "timeout", defaults.TIMEOUT, type=float, range=(0, defaults.MAX_TIMEOUT))
        wait_time = getarg(request, "wait", defaults.WAIT_TIME, type=float, range=(0, defaults.MAX_WAIT_TIME))

        timer = reactor.callLater(timeout+wait_time, d.cancel)
        d.addCallback(self._cancelTimer, timer)
        d.addCallback(self._writeOutput, request)
        d.addErrback(self._timeoutError, request)
        d.addErrback(self._renderError, request)
        d.addErrback(self._internalError, request)
        d.addBoth(self._finishRequest, request)
        request.starttime = time.time()
        return NOT_DONE_YET
Exemplo n.º 10
0
def _get_common_params(request, js_profiles_path):
    url = getarg(request, "url")
    baseurl = getarg(request, "baseurl", None)
    wait_time = getarg(request,
                       "wait",
                       defaults.WAIT_TIME,
                       type=float,
                       range=(0, defaults.MAX_WAIT_TIME))
    js_source, js_profile = _get_javascript_params(request, js_profiles_path)

    viewport = getarg(request, "viewport", defaults.VIEWPORT)
    _check_viewport(viewport, wait_time, defaults.VIEWPORT_MAX_WIDTH,
                    defaults.VIEWPORT_MAX_HEIGTH, defaults.VIEWPORT_MAX_AREA)

    return url, baseurl, wait_time, viewport, js_source, js_profile
Exemplo n.º 11
0
 def render_GET(self, request):
     request.setHeader("Content-Type", "image/gif")
     request.write("GIF89a")
     n = getarg(request, "n", 1, type=float)
     d = deferLater(reactor, n, lambda: (request, n))
     d.addCallback(self._delayedRender)
     return NOT_DONE_YET
Exemplo n.º 12
0
 def render_GET(self, request):
     request.setHeader("Content-Type", "image/gif")
     request.write("GIF89a")
     n = getarg(request, "n", 1, type=float)
     d = deferLater(reactor, n, lambda: (request, n))
     d.addCallback(self._delayedRender)
     return NOT_DONE_YET
Exemplo n.º 13
0
    def render_GET(self, request):
        #log.msg("%s %s %s %s" % (id(request), request.method, request.path, request.args))
        _check_filters(self.pool, request)
        pool_d = self._getRender(request)
        timeout = getarg(request, "timeout", defaults.TIMEOUT, type=float, range=(0, defaults.MAX_TIMEOUT))
        wait_time = getarg(request, "wait", defaults.WAIT_TIME, type=float, range=(0, defaults.MAX_WAIT_TIME))

        timer = reactor.callLater(timeout+wait_time, pool_d.cancel)
        pool_d.addCallback(self._cancelTimer, timer)
        pool_d.addCallback(self._writeOutput, request)
        pool_d.addErrback(self._timeoutError, request)
        pool_d.addErrback(self._renderError, request)
        pool_d.addErrback(self._internalError, request)
        pool_d.addBoth(self._finishRequest, request)
        request.starttime = time.time()
        return NOT_DONE_YET
Exemplo n.º 14
0
def _check_filters(pool, request):
    network_manager = pool.network_manager
    if not hasattr(network_manager, 'unknownFilters'):
        return

    filter_names = getarg(request, 'filters', "")
    unknown_filters = network_manager.unknownFilters(filter_names)
    if unknown_filters:
        raise BadRequest("Invalid filter names: %s" % unknown_filters)
Exemplo n.º 15
0
def _check_filters(pool, request):
    network_manager = pool.network_manager
    if not hasattr(network_manager, 'unknownFilters'):
        # allow custom non-filtering network access managers
        return

    filter_names = getarg(request, 'filters', '')
    unknown_filters = network_manager.unknownFilters(filter_names)
    if unknown_filters:
        raise BadRequest("Invalid filter names: %s" % unknown_filters)
Exemplo n.º 16
0
def _get_js_source(request):
    js_source = getarg(request, 'js_source', None)
    if js_source is not None:
        return js_source

    # handle application/javascript POST requests
    if request.method == 'POST':
        content_type = request.getHeader('Content-Type')
        if content_type and 'application/javascript' in content_type:
            return request.content.read()
Exemplo n.º 17
0
 def render_GET(self, request):
     d = self._getRender(request)
     timeout = getarg(request, "timeout", 30, type=float, range=(0, 60))
     timer = reactor.callLater(timeout, d.cancel)
     d.addCallback(self._cancelTimer, timer)
     d.addCallback(self._writeOutput, request)
     d.addErrback(self._timeoutError, request)
     d.addErrback(self._renderError, request)
     d.addErrback(self._internalError, request)
     d.addBoth(self._finishRequest, request)
     request.starttime = time.time()
     return NOT_DONE_YET
Exemplo n.º 18
0
 def _getRender(self, request):
     url = getarg(request, "url")
     baseurl = getarg(request, "baseurl", None)
     width = getarg(request, "width", None, type=int, range=(0, 1920))
     height = getarg(request, "height", None, type=int, range=(0, 1080))
     vwidth = getarg(request, "vwidth", 1024, type=int, range=(0, 1920))
     vheight = getarg(request, "vheight", 768, type=int, range=(0, 1080))
     return self.pool.render(PngRender, url, baseurl, width, height, vwidth, vheight)
Exemplo n.º 19
0
 def __init__(self, proxy_profiles_path, request):
     proxy_profiles_path = os.path.abspath(proxy_profiles_path)
     profile_name = getarg(request, self.GET_ARGUMENT, None)
     if not profile_name:
         params = [], [], []
     else:
         filename = profile_name + '.ini'
         ini_path = os.path.abspath(os.path.join(proxy_profiles_path, filename))
         if not ini_path.startswith(proxy_profiles_path + os.path.sep):
             # security check fails
             raise BadRequest(self.NO_PROXY_PROFILE_MSG)
         else:
             params = self._parseIni(ini_path)
     super(SplashQNetworkProxyFactory, self).__init__(*params)
Exemplo n.º 20
0
    def render_GET(self, request):
        #log.msg("%s %s %s %s" % (id(request), request.method, request.path, request.args))
        pool_d = self._getRender(request)
        timeout = getarg(request,
                         "timeout",
                         defaults.TIMEOUT,
                         type=float,
                         range=(0, defaults.MAX_TIMEOUT))
        wait_time = getarg(request,
                           "wait",
                           defaults.WAIT_TIME,
                           type=float,
                           range=(0, defaults.MAX_WAIT_TIME))

        timer = reactor.callLater(timeout + wait_time, pool_d.cancel)
        pool_d.addCallback(self._cancelTimer, timer)
        pool_d.addCallback(self._writeOutput, request)
        pool_d.addErrback(self._timeoutError, request)
        pool_d.addErrback(self._renderError, request)
        pool_d.addErrback(self._internalError, request)
        pool_d.addBoth(self._finishRequest, request)
        request.starttime = time.time()
        return NOT_DONE_YET
Exemplo n.º 21
0
def _get_headers_params(request):
    headers = None

    if getattr(request, 'inspect_me', False):
        # use headers from splash_request
        headers = [
            (name, value)
            for name, values in request.requestHeaders.getAllRawHeaders()
            for value in values
        ]

    headers = getarg(request, "headers", default=headers, type=None)
    if headers is None:
        return headers

    if not isinstance(headers, (list, tuple, dict)):
        raise BadRequest("'headers' must be either JSON array of (name, value) pairs or JSON object")

    if isinstance(headers, (list, tuple)):
        for el in headers:
            if not (isinstance(el, (list, tuple)) and len(el) == 2 and all(isinstance(e, basestring) for e in el)):
                raise BadRequest("'headers' must be either JSON array of (name, value) pairs or JSON object")

    return headers
Exemplo n.º 22
0
    def process(self, request, splash_request, operation, data):
        filter_names = [f for f in getarg(splash_request, "filters", default="").split(',') if f]

        if filter_names == ['none']:
            return request

        if not filter_names:
            if self.rules.filter_is_known('default'):
                filter_names = ['default']
            else:
                return request

        url, options = self._url_and_options(request, splash_request)
        blocking_filter = self.rules.get_blocking_filter(filter_names, url, options)
        if blocking_filter:
            if self.verbosity >= 2:
                msg = "Filter %s: dropped %s %s" % (
                    blocking_filter,
                    id(splash_request),
                    request_repr(request, operation)
                )
                log.msg(msg, system='request_middleware')
            drop_request(request)
        return request
Exemplo n.º 23
0
def _get_png_params(request):
    return dict(
        width = getarg(request, "width", None, type=int, range=(1, defaults.MAX_WIDTH)),
        height = getarg(request, "height", None, type=int, range=(1, defaults.MAX_HEIGTH)),
    )
Exemplo n.º 24
0
 def _get_allowed_domains(self, splash_request):
     allowed_domains = getarg(splash_request, "allowed_domains", None)
     if allowed_domains is not None:
         return allowed_domains.split(',')
Exemplo n.º 25
0
def _get_javascript_params(request, js_profiles_path):
    return dict(
        js_profile=_check_js_profile(request, js_profiles_path, getarg(request, 'js', None)),
        js_source=_get_js_source(request),
    )
Exemplo n.º 26
0
 def _url_and_options(self, request, splash_request):
     url = unicode(request.url().toString())
     domain = urlparse.urlsplit(getarg(splash_request, 'url')).netloc
     options = {'domain': domain}
     return url, options
Exemplo n.º 27
0
def _get_timeout_arg(request):
    return getarg(request, "timeout", defaults.TIMEOUT, type=float, range=(0, defaults.MAX_TIMEOUT))
Exemplo n.º 28
0
 def __init__(self, proxy_profiles_path, request):
     self.proxy_profiles_path = proxy_profiles_path
     profile_name = getarg(request, self.GET_ARGUMENT, None)
     blacklist, whitelist, proxy_list = self._getFilterParams(profile_name)
     super(ProfilesSplashProxyFactory, self).__init__(blacklist, whitelist, proxy_list)
Exemplo n.º 29
0
def _get_png_params(request):
    url, baseurl, wait_time, viewport, js_source = _get_common_params(request)
    width = getarg(request, "width", None, type=int, range=(1, defaults.MAX_WIDTH))
    height = getarg(request, "height", None, type=int, range=(1, defaults.MAX_HEIGTH))
    return url, baseurl, wait_time, viewport, js_source, width, height
Exemplo n.º 30
0
 def render_GET(self, request):
     n = getarg(request, "n", 1, type=float)
     d = deferLater(reactor, n, lambda: (request, n))
     d.addCallback(self._delayedRender)
     return NOT_DONE_YET
Exemplo n.º 31
0
 def __init__(self, request, allow_subdomains=True):
     allowed_domains = getarg(request, "allowed_domains", None)
     if allowed_domains is not None:
         allowed_domains = allowed_domains.split(',')
     self.host_re = self.get_host_regex(allowed_domains, allow_subdomains)
     super(FilteringQNetworkAccessManager, self).__init__()
Exemplo n.º 32
0
def _get_common_params(request):
    url = getarg(request, "url")
    baseurl = getarg(request, "baseurl", None)
    wait_time = getarg(request, "wait", defaults.WAIT_TIME, type=float, range=(0, defaults.MAX_WAIT_TIME))
    return url, baseurl, wait_time
Exemplo n.º 33
0
 def render_GET(self, request):
     n = getarg(request, "n", 1, type=float)
     d = deferLater(reactor, n, lambda: (request, n))
     d.addCallback(self._delayedRender)
     return NOT_DONE_YET
Exemplo n.º 34
0
 def _get_allowed_domains(self, splash_request):
     allowed_domains = getarg(splash_request, "allowed_domains", None)
     if allowed_domains is not None:
         return allowed_domains.split(',')
Exemplo n.º 35
0
 def __init__(self, proxy_profiles_path, request):
     self.proxy_profiles_path = proxy_profiles_path
     profile_name = getarg(request, self.GET_ARGUMENT, None)
     blacklist, whitelist, proxy_list = self._getFilterParams(profile_name)
     super(ProfilesSplashProxyFactory,
           self).__init__(blacklist, whitelist, proxy_list)
Exemplo n.º 36
0
 def _getRender(self, request):
     url = getarg(request, "url")
     baseurl = getarg(request, "baseurl", None)
     return self.pool.render(HtmlRender, url, baseurl)