Пример #1
0
 def test_rooted_empty(self):
     """
     Rooted empty path.
     """
     self.assertThat([
         url_path(URL.from_text(u'http://example.com/')),
         url_path(URL.from_text(u'/'))
     ], AllMatch(Equals(u'/')))
Пример #2
0
 def test_rooted(self):
     """
     Rooted path.
     """
     self.assertThat([
         url_path(URL.from_text(u'http://example.com/foo/bar')),
         url_path(URL.from_text(u'/foo/bar'))
     ], AllMatch(Equals(u'/foo/bar')))
Пример #3
0
def retarget_links(html_tree, mode='external'):
    if mode == 'none':
        return
    elif mode not in ('external', 'all'):
        raise ValueError('expected "none", "external", or "all", not: %r'
                         % mode)
    for el in html_tree.iter():
        if not isinstance(el.tag, basestring):
            continue
        if not el.tag == 'a':
            continue
        if el.get('target'):
            continue  # let explicit settings lie
        href = el.get('href')
        if not href or href.startswith('#'):
            continue
        if mode == 'all':
            retarget = True
        elif mode == 'external':
            try:
                url = URL.from_text(href)
            except ValueError:
                retarget = True
            else:
                retarget = bool(url.host)
        if retarget:
            el.set('target', '_blank')
            el.set('rel', 'noopener')
    return
Пример #4
0
def _a_handler(a_el):
    href = a_el.attrib.get('href')
    if href:
        url = urljoin(a_el.base_url, href)
        if not isinstance(url, six.text_type):
            url = six.u(url)
        return URL.from_text(url)
Пример #5
0
 def setUp(self):
     service = StubFeed({
         URL.from_text(feed._source).host.encode('ascii'): makeXML(feed)
         for feed in FEEDS
     })
     treq = StubTreq(service.resource())
     self.retriever = FeedRetrieval(treq=treq)
Пример #6
0
    def render_GET(self, req):
        """
        Historically, accessing this via "GET /uri?uri=<capabilitiy>"
        was/is a feature -- which simply redirects to the more-common
        "GET /uri/<capability>" with any other query args
        preserved. New code should use "/uri/<cap>"
        """
        uri_arg = req.args.get(b"uri", [None])[0]
        if uri_arg is None:
            raise WebError("GET /uri requires uri=")

        # shennanigans like putting "%2F" or just "/" itself, or ../
        # etc in the <cap> might be a vector for weirdness so we
        # validate that this is a valid capability before proceeding.
        cap = uri.from_string(uri_arg)
        if isinstance(cap, uri.UnknownURI):
            raise WebError("Invalid capability")

        # so, using URL.from_text(req.uri) isn't going to work because
        # it seems Nevow was creating absolute URLs including
        # host/port whereas req.uri is absolute (but lacks host/port)
        redir_uri = URL.from_text(req.prePathURL().decode('utf8'))
        redir_uri = redir_uri.child(urllib.quote(uri_arg).decode('utf8'))
        # add back all the query args that AREN'T "?uri="
        for k, values in req.args.items():
            if k != b"uri":
                for v in values:
                    redir_uri = redir_uri.add(k.decode('utf8'), v.decode('utf8'))
        return redirectTo(redir_uri.to_text().encode('utf8'), req)
Пример #7
0
def retarget_links(html_tree, mode='external'):
    if mode == 'none':
        return
    elif mode not in ('external', 'all'):
        raise ValueError('expected "none", "external", or "all", not: %r' %
                         mode)
    for el in html_tree.iter():
        if not isinstance(el.tag, basestring):
            continue
        if not el.tag == 'a':
            continue
        if el.get('target'):
            continue  # let explicit settings lie
        href = el.get('href')
        if not href or href.startswith('#'):
            continue
        if mode == 'all':
            retarget = True
        elif mode == 'external':
            try:
                url = URL.from_text(href)
            except ValueError:
                retarget = True
            else:
                retarget = bool(url.host)
        if retarget:
            el.set('target', '_blank')
            el.set('rel', 'noopener')
    return
Пример #8
0
def fake_nevow_request(method='GET',
                       body=b'',
                       is_secure=False,
                       uri=b'http://example.com/one',
                       request_headers=None,
                       Request=NevowRequest):
    """
    Create a fake `NevowRequest` instance for the purposes of testing.
    """
    uri = URL.from_text(uri.decode('utf-8')).to_uri()
    channel = DummyChannel()
    if is_secure:
        channel.transport = DummyChannel.SSL()
    request = Request(channel=channel)
    request.method = method
    request.uri = url_path(uri)
    request.clientproto = b'HTTP/1.1'
    request.client = channel.transport.getPeer()
    content_length = len(body)
    request.requestHeaders.setRawHeaders('host',
                                         [uri.authority().encode('utf-8')])
    request.requestHeaders.setRawHeaders('content-length', [content_length])
    if request_headers:
        for k, v in request_headers.items():
            request.requestHeaders.setRawHeaders(k, v)
    request.gotLength(content_length)
    request.content.write(body)
    request.content.seek(0)
    return request
Пример #9
0
def assert_valid(response):
    # type: (Response) -> None
    if response.direct_passthrough:
        return

    data = response.data
    assert isinstance(data, bytes)
    # assert response.status_code in [200, 302, 401]

    if response.status_code == 302:
        return

    if URL.from_text(request.url).path in SKIPPED_PATHS:
        return

    if response.mimetype == "text/html":
        assert_html_valid(response)

    elif response.mimetype == "application/json":
        assert_json_valid(response)

    else:
        raise AssertionError("Unknown mime type: " + response.mimetype)

    return
Пример #10
0
 def test_basic(self):
     """
     Test basic request map keys.
     """
     request = fake_twisted_request(request_headers={
         b'x-foo': [b'bar'],
     })
     self.assertThat(
         _nevow_request_to_request_map(request),
         ContainsDict({
             'content_type':
             Equals(b'application/octet-stream'),
             'content_length':
             Equals(0),
             'character_encoding':
             Is(None),
             'headers':
             Equals({
                 b'Content-Length': [0],
                 b'X-Foo': [b'bar'],
                 b'Host': [b'example.com']
             }),
             'remote_addr':
             Equals(b'192.168.1.1'),
             'request_method':
             Equals(b'GET'),
             'server_name':
             Equals(b'example.com'),
             'server_port':
             Equals(80),
             'scheme':
             Equals(b'http'),
             'uri':
             Equals(URL.from_text(u'/one'))
         }))
Пример #11
0
def _nevow_request_to_request_map(req):
    """
    Convert a Nevow request object into an immutable request map.
    """
    headers = req.requestHeaders
    content_type, character_encoding = _get_content_type(headers)
    iri = URL.from_text(req.uri.decode('utf-8')).to_iri()
    host = _get_first_header(headers, b'host').decode('utf-8')
    scheme = u'https' if req.isSecure() else u'http'
    if u':' in host:
        host, port = host.split(u':', 1)
        port = int(port)
    else:
        port = {u'https': 443, u'http': 80}.get(scheme)
    return m(
        body=req.content,
        content_type=content_type,
        content_length=_get_first_header(headers, b'content-length'),
        character_encoding=character_encoding,
        headers=freeze(dict(headers.getAllRawHeaders())),
        remote_addr=req.getClientIP(),
        request_method=req.method,
        server_name=host,
        server_port=port,
        scheme=scheme,
        #ssl_client_cert=XXX,
        uri=iri,
        #query_string
        path_info=url_path(iri),
        protocol=getattr(req, 'clientproto', None))
Пример #12
0
def Europe():
    print("Time to plan your trip!")
    url = URL.from_text(
        u'https://www.kayak.com/flight-routes/United-States-US0/Europe-EU0')
    better_url = url.replace(scheme=u'https', port=443)
    org_url = better_url.click(u'.')

    print(org_url.to_text())
Пример #13
0
def Asia():
    print("Time to plan your trip!")
    url = URL.from_text(
        u'https://www.justfly.com/cheap-flights-to-Asia-2/?campaign=146&route=X-SHA&adgroupid=26220101560&gclid=EAIaIQobChMIqKD4nMnT4gIV07fACh227A5TEAAYASAAEgLFLfD_BwE'
    )
    better_url = url.replace(scheme=u'https', port=443)
    org_url = better_url.click(u'.')

    print(org_url.to_text())
Пример #14
0
def Africa():
    print("Time to plan your trip!")
    url = URL.from_text(
        u'https://www.farecompare.com/flights/africa/zone.html?utm_source=google&utm_medium=cpc&utm_campaign=dsa_us_desktop&utm_term=&gclid=EAIaIQobChMI3JzdhMnT4gIVnrfACh1oFQpOEAAYAyAAEgIFHfD_BwE#quote?quoteKey=CCHICJNB20190702R20190709P1CTF'
    )
    better_url = url.replace(scheme=u'https', port=443)
    org_url = better_url.click(u'.')

    print(org_url.to_text())
Пример #15
0
 def setUp(self):
     service = StubFeed({
         URL.from_text(feed._source).host.encode('ascii'): makeXML(feed)
         for feed in FEEDS
     })
     treq = StubTreq(service.resource())
     urls = [feed._source for feed in FEEDS]
     retriever = FeedRetrieval(treq)
     self.client = StubTreq(
         FeedAggregation(retriever.retrieve, urls).resource())
Пример #16
0
def update(project_dir):
    """Update a projects task-set."""

    # we don't do a fancy caching strategy and redownload every time
    # now, so we clean everytime now
    _clean_cache()

    project_dir = Path(project_dir)

    # read the system-wide configuration file to find location of the
    # cache directory
    config = load_config()

    # load the project config
    proj_config = load_proj_config(project_dir)

    cache_path = Path(osp.expanduser(osp.expandvars(config['cache']['path'])))

    # get the upstream URL
    url_str = osp.expandvars(proj_config['upstream_url'])
    url = URL.from_text(url_str)

    module_url = URL.from_text(
        osp.expandvars(osp.expanduser(proj_config['modules']['source_url'])))

    print(f"Using upstream: {url}")
    print(f"Retrieving modules from: {module_url}")

    # retrieve the upstream repo and return the exact file location of
    # the jubeo bundle
    source_repo_path = retrieve_upstream(
        url,
        cache_path,
        proj_config,
    )

    ## update the project
    update_project(
        source_repo_path,
        project_dir,
        proj_config,
        force=True,
    )
Пример #17
0
 def test_unrooted(self):
     """
     Routes with paths that are not rooted raise `ValueError`.
     """
     matcher = MatchesStructure(args=MatchesListwise([
         Equals('Route must be a rooted path'),
         Equals(URL.from_text(u'foo'))
     ]))
     with ExpectedException(ValueError, matcher):
         route.route(u'foo', route.GET, [tracer('a')])
Пример #18
0
 def __iter__(self, _SRC_ATTR=(None, 'src'), _youtube_hosts=('youtube.com',
                                                             'www.youtube.com',
                                                             'youtube-nocookie.com',
                                                             'www.youtube-nocookie.com')):
     html_ns = namespaces['html']
     elide = False
     for token in BaseFilter.__iter__(self):
         token_type = token['type']
         if elide:
             # NOTE html5lib doesn't permit nesting <iframe> tags,
             # (presumably because HTML5 doesn't permit it). Therefore we
             # don't need to deal with that case here, just wait for the
             # first end tag.
             if token_type == 'EndTag' and token['name'] == 'iframe':
                 elide = False
         else:
             if (
                 token_type == 'StartTag' and
                 token['name'] == 'iframe' and
                 token['namespace'] == html_ns and
                 'data' in token and
                 _SRC_ATTR in token['data']
             ):
                 url = URL.from_text(token['data'][_SRC_ATTR])
                 if url.absolute and url.host in _youtube_hosts and len(url.path) == 2 and url.path[0] == 'embed':
                     yield {
                         'type': 'StartTag',
                         'namespace': html_ns,
                         'name': 'a',
                         'data': OrderedDict([
                             ((None, 'href'), self._watch_url(url).to_text()),
                         ]),
                     }
                     yield {
                         'type': 'EmptyTag',
                         'namespace': html_ns,
                         'name': u'img',
                         'data': OrderedDict([
                             ((None, 'alt'), 'YouTube video'),
                             (_SRC_ATTR, self._thumbnail_url(url).to_text()),
                             ((None, 'width'), '320'),
                             ((None, 'height'), '180'),
                         ]),
                     }
                     yield {
                         'type': 'EndTag',
                         'namespace': html_ns,
                         'name': 'a',
                     }
                     elide = True
                 else:
                     yield token
             else:
                 yield token
Пример #19
0
def menuBar(btn):
    if btn == "Exit":
        exit()
    if btn == "Help":
        url = URL.from_text(u'https://github.com/andrewcgarber/NoteSmoosh.git')
        print(url)
        app.infoBox("Help", "Please refer to the User's Manual located at this URL:" + url.to_text())
    if btn == "About":
        app.infoBox("About", "NoteSmoosh is a prototypical application that is aimed to help law school "
                             "students (and others) by compiling study products from various formats and "
                             "writing it to one cohesive document.")
Пример #20
0
def basic_request(method=route.GET, body=b'', uri=b'http://example.com/'):
    """
    Construct a suitable ``REQUEST`` context value.
    """
    uri = URL.from_text(uri).to_uri()
    return m(body=BytesIO(body),
             request_method=method,
             content_type=u'text/plain',
             character_encoding=u'utf-8',
             headers=m(),
             scheme=uri.scheme,
             path_info=url_path(uri))
    def test_converge_complete(self):
        """
        At the end of a convergence iteration, ``_CONVERGE_COMPLETE`` is updated
        to the current time.
        """
        interval = 45

        reactor = MemoryReactorClock()

        deploy_config = DeploymentConfiguration(
            domain=u"s4.example.com",
            kubernetes_namespace=u"testing",
            subscription_manager_endpoint=URL.from_text(
                u"http://localhost:8000"),
            s3_access_key_id=u"access key id",
            s3_secret_key=u"secret key",
            introducer_image=u"introducer:abcdefgh",
            storageserver_image=u"storageserver:abcdefgh",
        )

        state_path = FilePath(self.mktemp().decode("ascii"))
        state_path.makedirs()
        subscription_client = memory_client(
            state_path,
            deploy_config.domain,
        )
        k8s_client = KubeClient(k8s=memory_kubernetes().client())
        aws_region = FakeAWSServiceRegion(
            access_key=deploy_config.s3_access_key_id,
            secret_key=deploy_config.s3_secret_key,
        )
        d = aws_region.get_route53_client().create_hosted_zone(
            u"foo",
            deploy_config.domain,
        )
        self.successResultOf(d)

        service = _convergence_service(
            reactor,
            interval,
            deploy_config,
            subscription_client,
            k8s_client,
            aws_region,
        )
        service.startService()
        reactor.advance(interval)
        last_completed = next(
            iter(
                list(metric.samples[-1][-1] for metric in REGISTRY.collect()
                     if metric.name == u"s4_last_convergence_succeeded")))
        self.assertThat(reactor.seconds(), Equals(last_completed))
    def test_converge_complete(self):
        """
        At the end of a convergence iteration, ``_CONVERGE_COMPLETE`` is updated
        to the current time.
        """
        interval = 45

        reactor = MemoryReactorClock()

        deploy_config = DeploymentConfiguration(
            domain=u"s4.example.com",
            kubernetes_namespace=u"testing",
            subscription_manager_endpoint=URL.from_text(u"http://localhost:8000"),
            s3_access_key_id=u"access key id",
            s3_secret_key=u"secret key",
            introducer_image=u"introducer:abcdefgh",
            storageserver_image=u"storageserver:abcdefgh",
        )

        state_path = FilePath(self.mktemp().decode("ascii"))
        state_path.makedirs()
        subscription_client = memory_client(
            state_path,
            deploy_config.domain,
        )
        k8s_client = KubeClient(k8s=memory_kubernetes().client())
        aws_region = FakeAWSServiceRegion(
            access_key=deploy_config.s3_access_key_id,
            secret_key=deploy_config.s3_secret_key,
        )
        d = aws_region.get_route53_client().create_hosted_zone(
            u"foo", deploy_config.domain,
        )
        self.successResultOf(d)

        service = _convergence_service(
            reactor,
            interval,
            deploy_config,
            subscription_client,
            k8s_client,
            aws_region,
        )
        service.startService()
        reactor.advance(interval)
        last_completed = next(iter(list(
            metric.samples[-1][-1]
            for metric
            in REGISTRY.collect()
            if metric.name == u"s4_last_convergence_succeeded"
        )))
        self.assertThat(reactor.seconds(), Equals(last_completed))
Пример #23
0
 def __attrs_post_init__(self):
     MultiService.__init__(self)
     self.tahoe_client = TahoeClient(
         URL.from_text(
             self.config.get_config_from_file(b"node.url").decode("utf-8")),
         Agent(self.reactor),
     )
     magic_folder_web_service(
         self.reactor,
         self.webport,
         self._get_magic_folder,
         self._get_auth_token,
     ).setServiceParent(self)
Пример #24
0
async def on_message(message):
    if message.author == bot.user:
        return

    url = URL.from_text(u'https://github.com/wtg/shuttletracker')
    better_url = url.replace(scheme=u'https', port=443)
    org_url = better_url.click(u'.')

    traccs_quotes = [
        ('Track? I myself prefer the more scrumptious' + ' *tracc*'),
        (org_url.to_text())
    ]

    if 'track' in message.content.lower():
        response = random.choice(traccs_quotes)
        await message.channel.send(response)
Пример #25
0
def retrieve_upstream(
        url: URL,
        cache_path: Path,
        proj_config = None
):

    # TODO: use ProjConfig API, now we are just using a dict

    cach_path = Path(cache_path)
    repo_cache_path = cache_path / "specs"

    # get the spec repo
    upstream_repo_path = get_repo(url, repo_cache_path)

    # load the local config otherwise use the upstream's default
    # config
    if proj_config is None:
        proj_config = load_proj_config(upstream_repo_path)

    ## Retrieve external modules

    mod_source_url = URL.from_text(
        osp.expandvars(osp.expanduser(proj_config['modules']['source_url'])))

    # we support retrieving modules from a 3rd party source via URL
    modules = proj_config['modules']['modules']

    # then get the modules repo if any where requested
    if len(modules):

        mod_cache_path = cache_path / "modules"
        mod_repo_path = get_repo(mod_source_url, mod_cache_path)

        # then copy the modules to the source repo
        for module in modules:
            mod_fname = f"{module}.py"
            shutil.copyfile(
                mod_repo_path / mod_fname,
                upstream_repo_path / 'tasks' / 'modules' / mod_fname,
            )

    return upstream_repo_path
Пример #26
0
def _nevow_request_to_request_map(req):
    """
    Convert a Nevow request object into an immutable request map.
    """
    headers = req.requestHeaders
    content_type, character_encoding = _get_content_type(headers)
    return m(
        body=req.content,
        content_type=content_type,
        content_length=_get_first_header(headers, b'content-length'),
        character_encoding=character_encoding,
        headers=freeze(dict(headers.getAllRawHeaders())),
        remote_addr=req.getClientIP(),
        request_method=req.method,
        server_name=req.getRequestHostname(),
        server_port=req.host.port,
        scheme=b'https' if req.isSecure() else b'http',
        #ssl_client_cert=XXX,
        uri=URL.from_text(req.uri.decode('ascii')),
        #query_string
        #path_info
        #protocol
    )
Пример #27
0
def assert_valid(response):
    # type: (Response) -> None
    if response.direct_passthrough:
        return

    data = response.data
    assert isinstance(data, bytes)
    # assert response.status_code in [200, 302, 401]

    if response.status_code == 302:
        return

    if URL.from_text(request.url).path in SKIPPED_PATHS:
        return

    if response.mimetype == "text/html":
        assert_html_valid(response)

    elif response.mimetype == "application/json":
        assert_json_valid(response)

    else:
        raise AssertionError(f"Unknown mime type: {response.mimetype}")
Пример #28
0
def route(path, method, interceptors, name=None):
    """
    Construct a route description.

    :param unicode path: Rooted route path to match, may include identifiers
    and wildcards (Rails-like syntax). For example: ``/users/:id/*rest``
    :param unicode method: Request method to match, with the special case of
    ``'ANY'`` to match any method.
    :type interceptors: Iterable[`Interceptor`] or Callable[[pmap], Any]
    :param interceptors: Interceptors to enqueue when matching this route, or a
    single function accepting the ``REQUEST`` value and returning a
    ``RESPONSE`` value.
    :param unicode name: Route name, derived from the last interceptor's name
    if ``None``.
    :rtype: Route
    :return: Fully specified route to match.
    """
    if isinstance(path, bytes):
        path = path.decode('utf-8')
    interceptors, name = _conform_interceptor(interceptors, name)
    if isinstance(name, bytes):
        name = name.decode('utf-8')
    iri = URL.from_text(path).to_iri()
    if not iri.rooted:
        raise ValueError('Route must be a rooted path', iri)
    parsed = _parse_path(iri.path)
    return Route(
        name=name,
        path=path,
        method=method,
        interceptors=interceptors,
        priority=parsed.priority,
        path_re=_path_regex(parsed.parts, parsed.constraints),
        path_parts=parsed.parts,
        path_params=parsed.params,
        path_constraints=parsed.constraints,
        matcher=constantly(None))
Пример #29
0
def path_from_url(url):
    url = text_type(url)
    return "/" + "/".join(URL.from_text(url).path)
Пример #30
0
def path_from_url(url):
    url = str(url)
    return "/" + "/".join(URL.from_text(url).path)
Пример #31
0
 def test_unrooted(self):
     """
     Unrooted path.
     """
     self.assertThat(url_path(URL.from_text(u'foo/bar')),
                     Equals(u'foo/bar'))
Пример #32
0
    def request(self, method, url, **kwargs):
        """
        See :func:`treq.request()`.
        """
        method = method.encode('ascii').upper()

        if isinstance(url, unicode):
            parsed_url = URL.from_text(url)
        else:
            parsed_url = URL.from_text(url.decode('ascii'))

        # Join parameters provided in the URL
        # and the ones passed as argument.
        params = kwargs.get('params')
        if params:
            parsed_url = parsed_url.replace(
                query=parsed_url.query + tuple(_coerced_query_params(params)))

        url = parsed_url.to_uri().to_text().encode('ascii')

        # Convert headers dictionary to
        # twisted raw headers format.
        headers = kwargs.get('headers')
        if headers:
            if isinstance(headers, dict):
                h = Headers({})
                for k, v in headers.items():
                    if isinstance(v, (bytes, unicode)):
                        h.addRawHeader(k, v)
                    elif isinstance(v, list):
                        h.setRawHeaders(k, v)

                headers = h
        else:
            headers = Headers({})

        # Here we choose a right producer
        # based on the parameters passed in.
        bodyProducer = None
        data = kwargs.get('data')
        files = kwargs.get('files')
        # since json=None needs to be serialized as 'null', we need to
        # explicitly check kwargs for this key
        has_json = 'json' in kwargs

        if files:
            # If the files keyword is present we will issue a
            # multipart/form-data request as it suits better for cases
            # with files and/or large objects.
            files = list(_convert_files(files))
            boundary = str(uuid.uuid4()).encode('ascii')
            headers.setRawHeaders(
                b'content-type',
                [b'multipart/form-data; boundary=' + boundary])
            if data:
                data = _convert_params(data)
            else:
                data = []

            bodyProducer = multipart.MultiPartProducer(data + files,
                                                       boundary=boundary)
        elif data:
            # Otherwise stick to x-www-form-urlencoded format
            # as it's generally faster for smaller requests.
            if isinstance(data, (dict, list, tuple)):
                headers.setRawHeaders(b'content-type',
                                      [b'application/x-www-form-urlencoded'])
                data = urlencode(data, doseq=True)
            bodyProducer = self._data_to_body_producer(data)
        elif has_json:
            # If data is sent as json, set Content-Type as 'application/json'
            headers.setRawHeaders(b'content-type',
                                  [b'application/json; charset=UTF-8'])
            content = kwargs['json']
            json = json_dumps(content, separators=(u',', u':')).encode('utf-8')
            bodyProducer = self._data_to_body_producer(json)

        cookies = kwargs.get('cookies', {})

        if not isinstance(cookies, CookieJar):
            cookies = cookiejar_from_dict(cookies)

        cookies = merge_cookies(self._cookiejar, cookies)
        wrapped_agent = CookieAgent(self._agent, cookies)

        if kwargs.get('allow_redirects', True):
            if kwargs.get('browser_like_redirects', False):
                wrapped_agent = BrowserLikeRedirectAgent(wrapped_agent)
            else:
                wrapped_agent = RedirectAgent(wrapped_agent)

        wrapped_agent = ContentDecoderAgent(wrapped_agent,
                                            [(b'gzip', GzipDecoder)])

        auth = kwargs.get('auth')
        if auth:
            wrapped_agent = add_auth(wrapped_agent, auth)

        d = wrapped_agent.request(method,
                                  url,
                                  headers=headers,
                                  bodyProducer=bodyProducer)

        timeout = kwargs.get('timeout')
        if timeout:
            delayedCall = default_reactor(kwargs.get('reactor')).callLater(
                timeout, d.cancel)

            def gotResult(result):
                if delayedCall.active():
                    delayedCall.cancel()
                return result

            d.addBoth(gotResult)

        if not kwargs.get('unbuffered', False):
            d.addCallback(_BufferedResponse)

        return d.addCallback(_Response, cookies)
Пример #33
0
 def test_unrooted_empty(self):
     """
     Unrooted empty path.
     """
     self.assertThat(url_path(URL.from_text(u'')), Equals(u''))
Пример #34
0
def proxy(request,
          url=None,
          response_callback=None,
          sec_chk_hosts=True,
          sec_chk_rules=True,
          timeout=None,
          allowed_hosts=[],
          **kwargs):
    # Request default timeout
    if not timeout:
        timeout = TIMEOUT

    # Security rules and settings
    PROXY_ALLOWED_HOSTS = getattr(settings, 'PROXY_ALLOWED_HOSTS', ())

    # Sanity url checks
    if 'url' not in request.GET and not url:
        return HttpResponse(
            "The proxy service requires a URL-encoded URL as a parameter.",
            status=400,
            content_type="text/plain")

    raw_url = url or request.GET['url']
    raw_url = urljoin(settings.SITEURL,
                      raw_url) if raw_url.startswith("/") else raw_url
    url = urlsplit(raw_url)
    scheme = str(url.scheme)
    locator = str(url.path)
    if url.query != "":
        locator += '?' + url.query
    if url.fragment != "":
        locator += '#' + url.fragment

    # White-Black Listing Hosts
    site_url = urlsplit(settings.SITEURL)
    if sec_chk_hosts and not settings.DEBUG:

        # Attach current SITEURL
        if site_url.hostname not in PROXY_ALLOWED_HOSTS:
            PROXY_ALLOWED_HOSTS += (site_url.hostname, )

        # Attach current hostname
        if check_ogc_backend(geoserver.BACKEND_PACKAGE):
            from geonode.geoserver.helpers import ogc_server_settings
            hostname = (
                ogc_server_settings.hostname, ) if ogc_server_settings else ()
            if hostname not in PROXY_ALLOWED_HOSTS:
                PROXY_ALLOWED_HOSTS += hostname

        # Check OWS regexp
        if url.query and ows_regexp.match(url.query):
            ows_tokens = ows_regexp.match(url.query).groups()
            if len(
                    ows_tokens
            ) == 4 and 'version' == ows_tokens[0] and StrictVersion(
                    ows_tokens[1]) >= StrictVersion("1.0.0") and StrictVersion(
                        ows_tokens[1]
                    ) <= StrictVersion("3.0.0") and ows_tokens[2].lower() in (
                        'getcapabilities') and ows_tokens[3].upper() in (
                            'OWS', 'WCS', 'WFS', 'WMS', 'WPS', 'CSW'):
                if url.hostname not in PROXY_ALLOWED_HOSTS:
                    PROXY_ALLOWED_HOSTS += (url.hostname, )

        # Check Remote Services base_urls
        from geonode.services.models import Service
        for _s in Service.objects.all():
            _remote_host = urlsplit(_s.base_url).hostname
            PROXY_ALLOWED_HOSTS += (_remote_host, )

        if not validate_host(url.hostname, PROXY_ALLOWED_HOSTS):
            return HttpResponse(
                "DEBUG is set to False but the host of the path provided to the proxy service"
                " is not in the PROXY_ALLOWED_HOSTS setting.",
                status=403,
                content_type="text/plain")

    # Security checks based on rules; allow only specific requests
    if sec_chk_rules:
        # TODO: Not yet implemented
        pass

    # Collecting headers and cookies
    headers, access_token = get_headers(request,
                                        url,
                                        raw_url,
                                        allowed_hosts=allowed_hosts)

    # Inject access_token if necessary
    parsed = urlparse(raw_url)
    parsed._replace(path=locator.encode('utf8'))
    if parsed.netloc == site_url.netloc and scheme != site_url.scheme:
        parsed = parsed._replace(scheme=site_url.scheme)

    _url = parsed.geturl()

    # Some clients / JS libraries generate URLs with relative URL paths, e.g.
    # "http://host/path/path/../file.css", which the requests library cannot
    # currently handle (https://github.com/kennethreitz/requests/issues/2982).
    # We parse and normalise such URLs into absolute paths before attempting
    # to proxy the request.
    _url = URL.from_text(_url).normalize().to_text()

    if request.method == "GET" and access_token and 'access_token' not in _url:
        query_separator = '&' if '?' in _url else '?'
        _url = ('%s%saccess_token=%s' % (_url, query_separator, access_token))

    _data = request.body.decode('utf-8')

    # Avoid translating local geoserver calls into external ones
    if check_ogc_backend(geoserver.BACKEND_PACKAGE):
        from geonode.geoserver.helpers import ogc_server_settings
        _url = _url.replace('%s%s' % (settings.SITEURL, 'geoserver'),
                            ogc_server_settings.LOCATION.rstrip('/'))
        _data = _data.replace('%s%s' % (settings.SITEURL, 'geoserver'),
                              ogc_server_settings.LOCATION.rstrip('/'))

    response, content = http_client.request(_url,
                                            method=request.method,
                                            data=_data,
                                            headers=headers,
                                            timeout=timeout,
                                            user=request.user)
    content = response.content or response.reason
    status = response.status_code
    content_type = response.headers.get('Content-Type')

    if status >= 400:
        return HttpResponse(content=content,
                            reason=content,
                            status=status,
                            content_type=content_type)

    # decompress GZipped responses if not enabled
    # if content and response and response.getheader('Content-Encoding') == 'gzip':
    if content and content_type and content_type == 'gzip':
        buf = io.BytesIO(content)
        f = gzip.GzipFile(fileobj=buf)
        content = f.read()

    PLAIN_CONTENT_TYPES = ['text', 'plain', 'html', 'json', 'xml', 'gml']
    for _ct in PLAIN_CONTENT_TYPES:
        if content_type and _ct in content_type and not isinstance(
                content, six.string_types):
            try:
                content = content.decode()
                break
            except Exception:
                pass

    if response and response_callback:
        kwargs = {} if not kwargs else kwargs
        kwargs.update({
            'response': response,
            'content': content,
            'status': status,
            'content_type': content_type
        })
        return response_callback(**kwargs)
    else:
        # If we get a redirect, let's add a useful message.
        if status and status in (301, 302, 303, 307):
            _response = HttpResponse(
                ('This proxy does not support redirects. The server in "%s" '
                 'asked for a redirect to "%s"' %
                 (url, response.getheader('Location'))),
                status=status,
                content_type=content_type)
            _response['Location'] = response.getheader('Location')
            return _response
        else:

            def _get_message(text):
                _s = text
                if isinstance(text, bytes):
                    _s = text.decode("utf-8", "replace")
                try:
                    found = re.search('<b>Message</b>(.+?)</p>',
                                      _s).group(1).strip()
                except Exception:
                    found = _s
                return found

            return HttpResponse(
                content=content,
                reason=_get_message(content) if status not in (200,
                                                               201) else None,
                status=status,
                content_type=content_type)
Пример #35
0
def init(force, taskset_name, upstream, base_repo, project_dir):
    """Initialize a project task-set.

    If the '--upstream' option is not given will look to whether the
    '--base-repo' flag is on.

    If the '--base-repo' flag is on, jubeo will always initialize this
    directory with the 'base_repo' configuration which is included in
    the jubeo source code. Otherwise if a '.jubeo/jubeo.toml' file is
    present in the project this will be used. If it doesn't exist then
    the 'base_repo' will be used.

    """

    _clean_cache()

    project_dir = Path(project_dir)

    config = load_config()

    cache_path = Path(osp.expanduser(osp.expandvars(config['cache']['path'])))

    # see if the project has a config, and load it if there is
    try:
        proj_config = load_proj_config(project_dir)
    except FileNotFoundError:
        proj_config = None

    # get the upstream repo we are retrieving from, downloading if
    # necessary
    if upstream is None:

        # if base_repo option not on and this project has a config
        # load this config

        if (not base_repo and proj_config is not None):

            # read the upstream URL from the config
            upstream_url = URL.from_text(
                osp.expandvars(osp.expanduser(proj_config['upstream_url'])))

        # fall back to base_repo if everything else failed
        if base_repo:
            upstream_url = URL.from_text(str(base_repo_path()))

    else:
        upstream_url = URL.from_text(upstream)

    # then we can simply copy from this downloaded repo
    source_repo_path = Path(
        retrieve_upstream(
            upstream_url,
            cache_path,
            proj_config=proj_config,
        ))

    init_project(
        source_repo_path,
        project_dir,
        force=force,
    )