Exemplo n.º 1
0
def _makeGetterFactory(url, factoryFactory, contextFactory=None,
                       *args, **kwargs):
    """
    Create and connect an HTTP page getting factory.

    Any additional positional or keyword arguments are used when calling
    C{factoryFactory}.

    @param factoryFactory: Factory factory that is called with C{url}, C{args}
        and C{kwargs} to produce the getter

    @param contextFactory: Context factory to use when creating a secure
        connection, defaulting to L{None}

    @return: The factory created by C{factoryFactory}
    """
    uri = URI.fromBytes(url)
    factory = factoryFactory(url, *args, **kwargs)
    if uri.scheme == b'https':
        from twisted.internet import ssl
        if contextFactory is None:
            contextFactory = ssl.ClientContextFactory()
        reactor.connectSSL(
            nativeString(uri.host), uri.port, factory, contextFactory)
    else:
        reactor.connectTCP(nativeString(uri.host), uri.port, factory)
    return factory
Exemplo n.º 2
0
    def setUp(self):
        # Initialize resource tree
        root = self._init_resource()
        self.site = Site(root, timeout=None)

        # Start server for testing
        self.hostname = 'localhost'
        context_factory = ssl_context_factory(self.key_file, self.certificate_file)

        server_endpoint = SSL4ServerEndpoint(reactor, 0, context_factory, interface=self.hostname)
        self.server = yield server_endpoint.listen(self.site)
        self.port_number = self.server.getHost().port

        # Connect H2 client with server
        self.client_certificate = get_client_certificate(self.key_file, self.certificate_file)
        client_options = optionsForClientTLS(
            hostname=self.hostname,
            trustRoot=self.client_certificate,
            acceptableProtocols=[b'h2']
        )
        uri = URI.fromBytes(bytes(self.get_url('/'), 'utf-8'))

        self.conn_closed_deferred = Deferred()
        from scrapy.core.http2.protocol import H2ClientFactory
        h2_client_factory = H2ClientFactory(uri, Settings(), self.conn_closed_deferred)
        client_endpoint = SSL4ClientEndpoint(reactor, self.hostname, self.port_number, client_options)
        self.client = yield client_endpoint.connect(h2_client_factory)
Exemplo n.º 3
0
    def from_uri(cls, reactor, uri):
        """Return an AMQEndpoint instance configured with the given AMQP uri.

        @see: https://www.rabbitmq.com/uri-spec.html
        """
        uri = URI.fromBytes(uri.encode(), defaultPort=5672)
        kwargs = {}
        host = uri.host.decode()
        if "@" in host:
            auth, host = uri.netloc.decode().split("@")
            username, password = auth.split(":")
            kwargs.update({"username": username, "password": password})

        vhost = uri.path.decode()
        if len(vhost) > 1:
            vhost = vhost[1:]  # Strip leading "/"
        kwargs["vhost"] = vhost

        params = parse_qs(uri.query)
        kwargs.update({
            name.decode(): value[0].decode()
            for name, value in params.items()
        })

        if "heartbeat" in kwargs:
            kwargs["heartbeat"] = int(kwargs["heartbeat"])
        return cls(reactor, host, uri.port, **kwargs)
Exemplo n.º 4
0
def build_agent(req):
    uri = URI.fromBytes(req.url)
    proxy = req.get('proxy')
    if req.get('use_proxy') is False:
        proxy = None

    if proxy:
        if uri.scheme == 'https':

            agent = TunnelingAgent(
                reactor=reactor,
                proxy=proxy,
                contextFactory=ScrapexClientContextFactory(),
                connectTimeout=req.get('timeout'))
        else:
            endpoint = TCP4ClientEndpoint(reactor,
                                          host=proxy.host,
                                          port=proxy.port,
                                          timeout=req.get('timeout'))
            agent = ProxyAgent(endpoint)
            if proxy.auth_header:
                req.get('headers')['Proxy-Authorization'] = proxy.auth_header
    else:
        agent = Agent(reactor)

    agent = RedirectAgent(agent, redirectLimit=3)
    agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)])
    return agent
Exemplo n.º 5
0
 def __init__(self, reactor, proxyURI,
              connectTimeout=None, bindAddress=None, pool=None):
     super(ScrapyProxyAgent, self).__init__(reactor,
                                            connectTimeout=connectTimeout,
                                            bindAddress=bindAddress,
                                            pool=pool)
     self._proxyURI = URI.fromBytes(proxyURI)
Exemplo n.º 6
0
 def makeURL(self):
     url_data = URI.fromBytes(self._url)
     if url_data.scheme:
         args = {
             "scheme": url_data.scheme,
             "hostname": url_data.host,
             "port": url_data.port,
             "path": url_data.path,
         }
     else:
         args = {
             "scheme": "https" if self._ssl else "http",
             "hostname": self._hostname,
             "port": self._port,
             "path": self._url,
         }
     hasHost = bool(url_data.host)
     hostMatch = url_data.host.endswith(self._hostname)
     ipMatch = self._ipAddr in self._hostnameIp
     if hasHost and xor(hostMatch, ipMatch) or not ipMatch:
         self._proxyIp = self._ipAddr
     # Remove port if default (see RFC 2616, 14.23)
     if int(args['port']) in (80, 443) or \
             self._proxyIp and not url_data.scheme:
         self._reqURL = "{scheme}://{hostname}{path}".format(**args)
     else:
         self._reqURL = "{scheme}://{hostname}:{port}{path}".format(**args)
     log.debug("HTTP request URL: %s, Proxy: %s", self._reqURL,
               self._proxyIp)
Exemplo n.º 7
0
 def from_dict(cls, dct):
     """Create a new cassette from *dct*, as deserialized from JSON
     or YAML format."""
     cassette = cls()
     for interaction in dct['http_interactions']:
         rq = interaction['request']
         # Overwrite the scheme and netloc, leaving just the part of
         # the URI that would be sent in a real request.
         relative_uri = urlunparse(('', '') + urlparse(rq['uri'])[2:])
         request = Request._construct(
             rq['method'], relative_uri, Headers(rq['headers']),
             SavedBodyProducer(body_from_dict(rq)),
             False, URI.fromBytes(rq['uri'].encode('utf-8')))
         rp = interaction['response']
         response = Response._construct(
             ('HTTP', 1, 1), rp['status']['code'], rp['status']['message'],
             Headers(rp['headers']), AbortableStringTransport(), request)
         content_length = response.headers.getRawHeaders('Content-Length')
         if content_length:
             try:
                 response.length = int(content_length[0])
             except ValueError:
                 pass
         cassette.responses.append(
             SavedResponse(response, body_from_dict(rp)))
     return cassette
Exemplo n.º 8
0
 def makeURL(self):
     url_data = URI.fromBytes(self._url)
     if url_data.scheme:
         args = {
             "scheme": url_data.scheme,
             "hostname": url_data.host,
             "port": url_data.port,
             "path": url_data.path,
         }
     else:
         args = {
             "scheme": "https" if self._ssl else "http",
             "hostname": self._hostname,
             "port": self._port,
             "path": self._url,
         }
     hasHost = bool(url_data.host)
     hostMatch = url_data.host.endswith(self._hostname)
     ipMatch = self._ipAddr in self._hostnameIp
     portProtoMatch = self._ssl and int(args['port']) == 443 or \
                         not self._ssl and int(args['port']) == 80
     if hasHost and xor(hostMatch, ipMatch) or not ipMatch:
         self._proxyIp = self._ipAddr
     # Remove port if default (see RFC 2616, 14.23)
     if (int(args['port']) in (80, 443) and portProtoMatch) or \
             bool(self._proxyIp) and not url_data.scheme:
         self._reqURL = "{scheme}://{hostname}{path}".format(**args)
     else:
         self._reqURL = "{scheme}://{hostname}:{port}{path}".format(**args)
     log.debug(
         "HTTP request URL: %s, Proxy: %s", self._reqURL, self._proxyIp
     )
Exemplo n.º 9
0
 def __init__(self, reactor, proxyURI,
              connectTimeout=None, bindAddress=None, pool=None):
     super(ScrapyProxyAgent, self).__init__(reactor,
                                            connectTimeout=connectTimeout,
                                            bindAddress=bindAddress,
                                            pool=pool)
     self._proxyURI = URI.fromBytes(proxyURI)
Exemplo n.º 10
0
	def _create_agent(self, req):

		""" create right agent for specific request """

		agent = None

		uri = URI.fromBytes(req.url)
		proxy = req.get('proxy')
		if req.get('use_proxy') is False:
			proxy = None
		
		if proxy:	
			if uri.scheme == 'https':
				
				agent_key = 'httpsproxy-%s-%s' % (proxy.host, proxy.port)
				agent = self._agents.get(agent_key)

				if not agent:
					
					agent = TunnelingAgent(reactor=reactor, proxy=proxy, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool)

					self._agents[agent_key] = agent

			else:
				#http
				agent_key = 'httpproxy-%s-%s' % (proxy.host, proxy.port)
				agent = self._agents.get(agent_key)

				if not agent:
					endpoint = TCP4ClientEndpoint(reactor, host=proxy.host, port=proxy.port , timeout=req.get('timeout'))
					agent = ProxyAgent(endpoint, pool=self._pool)
					self._agents[agent_key] = agent


				if proxy.auth_header:
					req.get('headers')['Proxy-Authorization'] = proxy.auth_header

		else:
			
			agent = self._direct_agent #use single agent when no proxies used


		redirectLimit = self.scraper.config.get('max_redirects')
		if redirectLimit is None:
			redirectLimit = 3
	
		if redirectLimit>0:
			agent = BrowserLikeRedirectAgent(agent, redirectLimit=redirectLimit)

		
		agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)])

		if self.cj is not None:
			agent = CookieAgent(agent, self.cj)
		
		return agent	
Exemplo n.º 11
0
    def request(self, request: Request, spider: Spider) -> Deferred:
        uri = URI.fromBytes(bytes(request.url, encoding='utf-8'))
        try:
            endpoint = self.get_endpoint(uri)
        except SchemeNotSupported:
            return defer.fail(Failure())

        key = self.get_key(uri)
        d = self._pool.get_connection(key, uri, endpoint)
        d.addCallback(lambda conn: conn.request(request, spider))
        return d
Exemplo n.º 12
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Issue a new request via the configured proxy.
        """
        # Cache *all* connections under the same key, since we are only
        # connecting to a single destination, the proxy:

        proxyEndpoint = self._getEndpoint(self._proxyURI)

        key = ("http-proxy", self._proxyURI.host, self._proxyURI.port)
        return self._requestWithEndpoint(key, proxyEndpoint, method,
                                         URI.fromBytes(uri), headers,
                                         bodyProducer, uri)
Exemplo n.º 13
0
    def request(self,
                method,
                uri,
                headers=None,
                bodyProducer=None,
                address=None,
                path=None):
        """
        Adapted from Agent.request but to allow arbitrary text in the request path, e.g. 'GET http://www.example.com HTTP/1.1'

        Path argument can be used to specifically override the path in the uri for non-conforming paths
        Address argument can be used to override the address the url connection is made to
        """
        method = method.encode("UTF-8")
        parsedURI = URI.fromBytes(uri.encode("UTF-8"))

        try:
            if address is not None:
                modifiedURI = URI.fromBytes(uri.encode("UTF-8"))
                modifiedURI.host = address.encode("UTF-8")
                endpoint = self._getEndpoint(modifiedURI)
            else:
                endpoint = self._getEndpoint(parsedURI)
        except SchemeNotSupported:
            return defer.fail("Scheme not supported")

        parsedURI.path = path.encode(
            "UTF-8") if path is not None else parsedURI.path
        key = (parsedURI.scheme, parsedURI.host, parsedURI.port)
        d = self._requestWithEndpoint(key, endpoint, method, parsedURI,
                                      headers, bodyProducer,
                                      parsedURI.originForm)

        # Add a timeout to the deferred to prevent hangs on requests that connect but don't send any data
        d.addTimeout(self.timeout, reactor)
        d.addCallback(self.read_response, parsedURI)
        return d
Exemplo n.º 14
0
 def request(self, method, uri, headers=None, bodyProducer=None):
     """
     Issue a new request via the configured proxy.
     """
     # Cache *all* connections under the same key, since we are only
     # connecting to a single destination, the proxy:
     if twisted_version >= (15, 0, 0):
         proxyEndpoint = self._getEndpoint(self._proxyURI)
     else:
         proxyEndpoint = self._getEndpoint(self._proxyURI.scheme,
                                           self._proxyURI.host,
                                           self._proxyURI.port)
     key = ("http-proxy", self._proxyURI.host, self._proxyURI.port)
     return self._requestWithEndpoint(key, proxyEndpoint, method,
                                      URI.fromBytes(uri), headers,
                                      bodyProducer, uri)
Exemplo n.º 15
0
    def _command_download(self, data):
        reactor = self._context["reactor"]
        session_files = self._context["session_files"]
        audio_id = data["audio_id"]
        partial_url = data["partial_url"]

        ip_address = str(self.transport.getPeer().host)
        url = "http://" + ip_address + partial_url

        file_path = session_files.session_dir / f"{audio_id}.opus"

        log.info(f"Downloading file from {url} to {file_path}")

        url_bytes = url.encode("utf-8")
        url_parsed = URI.fromBytes(url_bytes)
        factory = HTTPDownloader(url_bytes, str(file_path))
        reactor.connectTCP(url_parsed.host, url_parsed.port, factory)
        d = factory.deferred

        def on_success(data):
            # File downloaded succesfully, tell the server
            result = {
                "command": "update_downloaded",
                "audio_id": audio_id,
                "result": "success"
            }
            result_json = json.dumps(result)
            self._tcp_packetizer.write(result_json)

        def on_error(error):
            # File failed to downloaded succesfully, tell the server
            log.error(f"Failed to download file at '{url}': {error}")
            result = {
                "command": "update_downloaded",
                "audio_id": audio_id,
                "result": "failure",
                "error": str(error)
            }
            result_json = json.dumps(result)
            self._tcp_packetizer.write(result_json)

        d.addCallback(on_success)
        d.addErrback(on_error)

        return d
Exemplo n.º 16
0
    def request(self, method, uri, headers, bodyProducer):

        if headers is None:
            headers = Headers()
        else:
            headers = headers.copy()

        contentType = headers.getRawHeaders('content-type', [""])[0]
        date = headers.getRawHeaders('date',
                                     [""])[0] or self._generateRequestDate(uri)
        headers.setRawHeaders('date', [date])

        uri_origin_form = URI.fromBytes(uri).originForm
        contentMD5 = headers.getRawHeaders('content-md5', [""])[0]

        if not contentMD5 and bodyProducer is not None:

            r = getattr(self.agent, '_reactor') or reactor
            bodyConsumer = StringConsumer(callLater=r.callLater)

            yield bodyProducer.startProducing(bodyConsumer)
            body = bodyConsumer.value()
            bodyProducer = StringBodyProducer(body)

            if body:
                contentMD5 = binascii.b2a_base64(
                    hashlib.md5(body).digest()).strip()
                headers.addRawHeader('content-md5', contentMD5)

        sts = "\n".join([
            method, contentType or "", contentMD5, date or "", uri_origin_form
        ])
        mac = hmac.new(self.secretKey, sts, digestmod=hashlib.sha1).digest()
        encodedMAC = binascii.b2a_base64(mac).strip()

        auth_header = "AuthHMAC {0}:{1}".format(self.accessKey, encodedMAC)
        headers.addRawHeader('authorization', auth_header)

        d = yield self.agent.request(method, uri, headers, bodyProducer)
        self._handleResponseDate(uri, d)
        defer.returnValue(d)
Exemplo n.º 17
0
	def __init__(self, url, outputfile, contextFactory=None, *args, **kwargs):
		if hasattr(client, '_parse'):
			scheme, host, port, path = client._parse(url)
		else:
			from twisted.web.client import URI
			uri = URI.fromBytes(url)
			scheme = uri.scheme
			host = uri.host
			port = uri.port
			path = uri.path
# ======= another twisted fix possibility
#		parsed = urlparse(url)
#		scheme = parsed.scheme
#		host = parsed.hostname
#		port = parsed.port or (443 if scheme == 'https' else 80)

		self.factory = HTTPProgressDownloader(url, outputfile, *args, **kwargs)
		if scheme == "https":
			self.connection = reactor.connectSSL(host, port, self.factory, ssl.ClientContextFactory())
		else:
			self.connection = reactor.connectTCP(host, port, self.factory)
Exemplo n.º 18
0
	def __init__(self, url, outputfile, contextFactory=None, *args, **kwargs):
		if hasattr(client, '_parse'):
			scheme, host, port, path = client._parse(url)
		else:
			from twisted.web.client import URI
			uri = URI.fromBytes(url)
			scheme = uri.scheme
			host = uri.host
			port = uri.port
			path = uri.path
# ======= another twisted fix possibility
#		parsed = urlparse(url)
#		scheme = parsed.scheme
#		host = parsed.hostname
#		port = parsed.port or (443 if scheme == 'https' else 80)

		self.factory = HTTPProgressDownloader(url, outputfile, *args, **kwargs)
		if scheme == "https":
			self.connection = reactor.connectSSL(host, port, self.factory, ssl.ClientContextFactory())
		else:
			self.connection = reactor.connectTCP(host, port, self.factory)
Exemplo n.º 19
0
    def _get_agent(self, request: Request,
                   timeout: Optional[float]) -> H2Agent:
        from twisted.internet import reactor
        bind_address = request.meta.get('bindaddress') or self._bind_address
        proxy = request.meta.get('proxy')
        if proxy:
            _, _, proxy_host, proxy_port, proxy_params = _parse(proxy)
            scheme = _parse(request.url)[0]
            proxy_host = proxy_host.decode()
            omit_connect_tunnel = b'noconnect' in proxy_params
            if omit_connect_tunnel:
                warnings.warn(
                    "Using HTTPS proxies in the noconnect mode is not "
                    "supported by the downloader handler. If you use Zyte "
                    "Smart Proxy Manager, it doesn't require this mode "
                    "anymore, so you should update scrapy-crawlera to "
                    "scrapy-zyte-smartproxy and remove '?noconnect' from the "
                    "Zyte Smart Proxy Manager URL.")

            if scheme == b'https' and not omit_connect_tunnel:
                # ToDo
                raise NotImplementedError(
                    'Tunneling via CONNECT method using HTTP/2.0 is not yet supported'
                )
            return self._ProxyAgent(
                reactor=reactor,
                context_factory=self._context_factory,
                proxy_uri=URI.fromBytes(to_bytes(proxy, encoding='ascii')),
                connect_timeout=timeout,
                bind_address=bind_address,
                pool=self._pool,
            )

        return self._Agent(
            reactor=reactor,
            context_factory=self._context_factory,
            connect_timeout=timeout,
            bind_address=bind_address,
            pool=self._pool,
        )
Exemplo n.º 20
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Issue a request to the server indicated by the given C{uri}.

        An existing connection from the connection pool may be used or a new one may be created.
        Without additional modifications this connection pool may not be very useful because
        each connection in the pool will use the same Tor circuit.

        Currently only the I{HTTP} scheme is supported in C{uri}.

        @see: L{twisted.web.iweb.IAgent.request}
        """
        parsedURI = URI.fromBytes(uri)
        endpoint = self._getEndpoint(parsedURI.scheme, parsedURI.host,
                                         parsedURI.port)

        # XXX
        # perhaps the request method should take a key?
        key = (parsedURI.scheme, parsedURI.host, parsedURI.port)

        return self._requestWithEndpoint(key, endpoint, method, parsedURI,
                                         headers, bodyProducer, parsedURI.originForm)
Exemplo n.º 21
0
def getPageCached(url, contextFactory=None, *args, **kwargs):
    """download a web page as a string, keep a cache of already downloaded pages

    Download a page. Return a deferred, which will callback with a
    page (as a string) or errback with a description of the error.

    See HTTPClientCacheFactory to see what extra args can be passed.
    """
    uri = URI.fromBytes(url)
    scheme = uri.scheme
    host = uri.host
    port = uri.port

    factory = HTTPClientCacheFactory(url, *args, **kwargs)

    if scheme == 'https':
        if contextFactory is None:
            contextFactory = HTTPSVerifyingContextFactory(host)
        reactor.connectSSL(host, port, factory, contextFactory)
    else:
        reactor.connectTCP(host, port, factory)

    return factory.deferred
Exemplo n.º 22
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Issue a request to the server indicated by the given C{uri}.

        An existing connection from the connection pool may be used or a new one may be created.
        Without additional modifications this connection pool may not be very useful because
        each connection in the pool will use the same Tor circuit.

        Currently only the I{HTTP} scheme is supported in C{uri}.

        @see: L{twisted.web.iweb.IAgent.request}
        """
        parsedURI = URI.fromBytes(uri)
        endpoint = self._getEndpoint(parsedURI.scheme, parsedURI.host,
                                     parsedURI.port)

        # XXX
        # perhaps the request method should take a key?
        key = (parsedURI.scheme, parsedURI.host, parsedURI.port)

        return self._requestWithEndpoint(key, endpoint, method, parsedURI,
                                         headers, bodyProducer,
                                         parsedURI.originForm)
Exemplo n.º 23
0
def getPage(url, bindAddress=None, *arg, **kw):
    # reimplemented here to insert bindAddress

    uri = URI.fromBytes(url)
    scheme = uri.scheme
    host = uri.host
    port = uri.port
    path = uri.path

    factory = HTTPClientFactory(url, *arg, **kw)
    factory.noisy = False

    if scheme == 'https':
        from twisted.internet import ssl
        context = ssl.ClientContextFactory()
        reactor.connectSSL(host,
                           port,
                           factory,
                           context,
                           bindAddress=bindAddress)
    else:
        reactor.connectTCP(host, port, factory, bindAddress=bindAddress)
    return factory.deferred
Exemplo n.º 24
0
def get_json(url, *args, **kwargs):
    """
    :param json: JSON data
    :param url:
    :param args:
    :param kwargs:
    :return:
    """
    j = kwargs.pop('json', None)
    if j:
        kwargs['postdata'] = as_json(j)
    kwargs.setdefault('agent', 'Twisted JSON Adapter')
    uri = URI.fromBytes(url)
    factory = HTTPClientFactory(url, *args, **kwargs)
    factory.noisy = 0
    if uri.scheme == b'https':
        from twisted.internet import ssl
        contextFactory = ssl.ClientContextFactory()
        reactor.connectSSL(
            nativeString(uri.host), uri.port, factory, contextFactory)
    else:
        reactor.connectTCP(nativeString(uri.host), uri.port, factory)
    return factory.deferred.addCallback(_json_loads)
Exemplo n.º 25
0
def getPageCached(url, contextFactory=None, *args, **kwargs):
    """download a web page as a string, keep a cache of already downloaded pages

    Download a page. Return a deferred, which will callback with a
    page (as a string) or errback with a description of the error.

    See HTTPClientCacheFactory to see what extra args can be passed.
    """
    uri = URI.fromBytes(url)
    scheme = uri.scheme
    host = uri.host
    port = uri.port

    factory = HTTPClientCacheFactory(url, *args, **kwargs)

    if scheme == 'https':
        if contextFactory is None:
            contextFactory = HTTPSVerifyingContextFactory(host)
        reactor.connectSSL(host, port, factory, contextFactory)
    else:
        reactor.connectTCP(host, port, factory)

    return factory.deferred
Exemplo n.º 26
0
def get_json(url, *args, **kwargs):
    """
    :param json: JSON data
    :param url:
    :param args:
    :param kwargs:
    :return:
    """
    j = kwargs.pop('json', None)
    if j:
        kwargs['postdata'] = as_json(j)
    kwargs.setdefault('agent', 'Twisted JSON Adapter')
    uri = URI.fromBytes(url)
    factory = HTTPClientFactory(url, *args, **kwargs)
    factory.noisy = 0
    if uri.scheme == b'https':
        from twisted.internet import ssl
        contextFactory = ssl.ClientContextFactory()
        reactor.connectSSL(nativeString(uri.host), uri.port, factory,
                           contextFactory)
    else:
        reactor.connectTCP(nativeString(uri.host), uri.port, factory)
    return factory.deferred.addCallback(_json_loads)
Exemplo n.º 27
0
    def request(self, method, uri, headers, bodyProducer):

        if headers is None:
            headers = Headers()
        else:
            headers = headers.copy()

        contentType = headers.getRawHeaders('content-type', [""])[0]
        date = headers.getRawHeaders('date', [""])[0] or self._generateRequestDate(uri)
        headers.setRawHeaders('date', [date])

        uri_origin_form = URI.fromBytes(uri).originForm
        contentMD5 = headers.getRawHeaders('content-md5', [""])[0]

        if not contentMD5 and bodyProducer is not None:

            r = getattr(self.agent, '_reactor') or reactor
            bodyConsumer = StringConsumer(callLater=r.callLater)

            yield bodyProducer.startProducing(bodyConsumer)
            body = bodyConsumer.value()
            bodyProducer = StringBodyProducer(body)

            if body:
                contentMD5 = binascii.b2a_base64(hashlib.md5(body).digest()).strip()
                headers.addRawHeader('content-md5', contentMD5)

        sts = "\n".join([method, contentType or "", contentMD5, date or "", uri_origin_form])
        mac = hmac.new(self.secretKey, sts, digestmod=hashlib.sha1).digest()
        encodedMAC = binascii.b2a_base64(mac).strip()

        auth_header = "AuthHMAC {0}:{1}".format(self.accessKey, encodedMAC)
        headers.addRawHeader('authorization', auth_header)

        d = yield self.agent.request(method, uri, headers, bodyProducer)
        self._handleResponseDate(uri, d)
        defer.returnValue(d)
Exemplo n.º 28
0
 def prePathURL(self):
     from twisted.web.client import URI
     location = URI.fromBytes(self.uri)
     location.path = b'/'.join(self.prepath)
     return location.toBytes()
Exemplo n.º 29
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Args:
            method (bytes): HTTP method: GET/POST/etc

            uri (bytes): Absolute URI to be retrieved

            headers (twisted.web.http_headers.Headers|None):
                HTTP headers to send with the request, or None to
                send no extra headers.

            bodyProducer (twisted.web.iweb.IBodyProducer|None):
                An object which can generate bytes to make up the
                body of this request (for example, the properly encoded contents of
                a file for a file upload).  Or None if the request is to have
                no body.

        Returns:
            Deferred[twisted.web.iweb.IResponse]:
                fires when the header of the response has been received (regardless of the
                response status code). Fails if there is any problem which prevents that
                response from being received (including problems that prevent the request
                from being sent).
        """
        parsed_uri = URI.fromBytes(uri, defaultPort=-1)
        res = yield self._route_matrix_uri(parsed_uri)

        # set up the TLS connection params
        #
        # XXX disabling TLS is really only supported here for the benefit of the
        # unit tests. We should make the UTs cope with TLS rather than having to make
        # the code support the unit tests.
        if self._tls_client_options_factory is None:
            tls_options = None
        else:
            tls_options = self._tls_client_options_factory.get_options(
                res.tls_server_name.decode("ascii"))

        # make sure that the Host header is set correctly
        if headers is None:
            headers = Headers()
        else:
            headers = headers.copy()

        if not headers.hasHeader(b"host"):
            headers.addRawHeader(b"host", res.host_header)

        class EndpointFactory(object):
            @staticmethod
            def endpointForURI(_uri):
                ep = LoggingHostnameEndpoint(self._reactor, res.target_host,
                                             res.target_port)
                if tls_options is not None:
                    ep = wrapClientTLS(tls_options, ep)
                return ep

        agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(),
                                           self._pool)
        res = yield make_deferred_yieldable(
            agent.request(method, uri, headers, bodyProducer))
        return res
Exemplo n.º 30
0
    def request(
        self,
        method: bytes,
        uri: bytes,
        headers: Optional["Headers"] = None,
        bodyProducer: Optional["IBodyProducer"] = None,
    ) -> Generator["defer.Deferred[Any]", Any, IResponse]:
        """
        :param method: HTTP method (GET/POST/etc).

        :param uri: Absolute URI to be retrieved.

        :param headers: HTTP headers to send with the request, or None to
            send no extra headers.

        :param bodyProducer: An object which can generate bytes to make up the
            body of this request (for example, the properly encoded contents of
            a file for a file upload).  Or None if the request is to have
            no body.

        :returns a deferred that fires when the header of the response has
            been received (regardless of the response status code). Fails if
            there is any problem which prevents that response from being received
            (including problems that prevent the request from being sent).
        """
        parsed_uri = URI.fromBytes(uri, defaultPort=-1)
        routing: _RoutingResult
        routing = yield defer.ensureDeferred(
            self._route_matrix_uri(parsed_uri))

        # set up the TLS connection params
        #
        # XXX disabling TLS is really only supported here for the benefit of the
        # unit tests. We should make the UTs cope with TLS rather than having to make
        # the code support the unit tests.
        if self._tls_client_options_factory is None:
            tls_options = None
        else:
            tls_options = self._tls_client_options_factory.get_options(
                routing.tls_server_name.decode("ascii"))

        # make sure that the Host header is set correctly
        if headers is None:
            headers = Headers()
        else:
            # Type safety: Headers.copy doesn't have a return type annotated,
            # and I don't want to stub web.http_headers. Could use stubgen? It's
            # a pretty simple file.
            headers = headers.copy()  # type: ignore[no-untyped-call]
            assert headers is not None

        if not headers.hasHeader(b"host"):
            headers.addRawHeader(b"host", routing.host_header)

        @implementer(IAgentEndpointFactory)
        class EndpointFactory:
            @staticmethod
            def endpointForURI(_uri: URI) -> IStreamClientEndpoint:
                ep: IStreamClientEndpoint = LoggingHostnameEndpoint(
                    self._reactor,
                    routing.target_host,
                    routing.target_port,
                )
                if tls_options is not None:
                    ep = wrapClientTLS(tls_options, ep)
                return ep

        agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(),
                                           self._pool)
        res: IResponse
        res = yield agent.request(method, uri, headers, bodyProducer)
        return res
Exemplo n.º 31
0
    def prePathURL(self):
        from twisted.web.client import URI

        location = URI.fromBytes(self.uri)
        location.path = b"/".join(self.prepath)
        return location.toBytes()
Exemplo n.º 32
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Issue a request to the server indicated by the given uri.

        Supports `http` and `https` schemes.

        An existing connection from the connection pool may be used or a new one may be
        created.

        See also: twisted.web.iweb.IAgent.request

        Args:
            method (bytes): The request method to use, such as `GET`, `POST`, etc

            uri (bytes): The location of the resource to request.

            headers (Headers|None): Extra headers to send with the request

            bodyProducer (IBodyProducer|None): An object which can generate bytes to
                make up the body of this request (for example, the properly encoded
                contents of a file for a file upload). Or, None if the request is to
                have no body.

        Returns:
            Deferred[IResponse]: completes when the header of the response has
                 been received (regardless of the response status code).

                 Can fail with:
                    SchemeNotSupported: if the uri is not http or https

                    twisted.internet.error.TimeoutError if the server we are connecting
                        to (proxy or destination) does not accept a connection before
                        connectTimeout.

                    ... other things too.
        """
        uri = uri.strip()
        if not _VALID_URI.match(uri):
            raise ValueError("Invalid URI {!r}".format(uri))

        parsed_uri = URI.fromBytes(uri)
        pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port)
        request_path = parsed_uri.originForm

        should_skip_proxy = False
        if self.no_proxy is not None:
            should_skip_proxy = proxy_bypass_environment(
                parsed_uri.host.decode(),
                proxies={"no": self.no_proxy},
            )

        if (parsed_uri.scheme == b"http" and self.http_proxy_endpoint
                and not should_skip_proxy):
            # Cache *all* connections under the same key, since we are only
            # connecting to a single destination, the proxy:
            pool_key = ("http-proxy", self.http_proxy_endpoint)
            endpoint = self.http_proxy_endpoint
            request_path = uri
        elif (parsed_uri.scheme == b"https" and self.https_proxy_endpoint
              and not should_skip_proxy):
            connect_headers = Headers()

            # Determine whether we need to set Proxy-Authorization headers
            if self.https_proxy_creds:
                # Set a Proxy-Authorization header
                connect_headers.addRawHeader(
                    b"Proxy-Authorization",
                    self.https_proxy_creds.as_proxy_authorization_value(),
                )

            endpoint = HTTPConnectProxyEndpoint(
                self.proxy_reactor,
                self.https_proxy_endpoint,
                parsed_uri.host,
                parsed_uri.port,
                headers=connect_headers,
            )
        else:
            # not using a proxy
            endpoint = HostnameEndpoint(self._reactor, parsed_uri.host,
                                        parsed_uri.port,
                                        **self._endpoint_kwargs)

        logger.debug("Requesting %s via %s", uri, endpoint)

        if parsed_uri.scheme == b"https":
            tls_connection_creator = self._policy_for_https.creatorForNetloc(
                parsed_uri.host, parsed_uri.port)
            endpoint = wrapClientTLS(tls_connection_creator, endpoint)
        elif parsed_uri.scheme == b"http":
            pass
        else:
            return defer.fail(
                Failure(
                    SchemeNotSupported("Unsupported scheme: %r" %
                                       (parsed_uri.scheme, ))))

        return self._requestWithEndpoint(pool_key, endpoint, method,
                                         parsed_uri, headers, bodyProducer,
                                         request_path)
Exemplo n.º 33
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        Issue a request to the server indicated by the given uri.

        Supports `http` and `https` schemes.

        An existing connection from the connection pool may be used or a new one may be
        created.

        See also: twisted.web.iweb.IAgent.request

        Args:
            method (bytes): The request method to use, such as `GET`, `POST`, etc

            uri (bytes): The location of the resource to request.

            headers (Headers|None): Extra headers to send with the request

            bodyProducer (IBodyProducer|None): An object which can generate bytes to
                make up the body of this request (for example, the properly encoded
                contents of a file for a file upload). Or, None if the request is to
                have no body.

        Returns:
            Deferred[IResponse]: completes when the header of the response has
                 been received (regardless of the response status code).
        """
        uri = uri.strip()
        if not _VALID_URI.match(uri):
            raise ValueError("Invalid URI {!r}".format(uri))

        parsed_uri = URI.fromBytes(uri)
        pool_key: tuple = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port)
        request_path = parsed_uri.originForm

        if parsed_uri.scheme == b"http" and self.proxy_endpoint:
            # Cache *all* connections under the same key, since we are only
            # connecting to a single destination, the proxy:
            pool_key = ("http-proxy", self.proxy_endpoint)
            endpoint = self.proxy_endpoint
            request_path = uri
        elif parsed_uri.scheme == b"https" and self.proxy_endpoint:
            endpoint = HTTPConnectProxyEndpoint(
                self._reactor,
                self.proxy_endpoint,
                parsed_uri.host,
                parsed_uri.port,
                self._proxy_auth,
            )
        else:
            # not using a proxy
            endpoint = HostnameEndpoint(self._reactor, parsed_uri.host,
                                        parsed_uri.port,
                                        **self._endpoint_kwargs)

        logger.debug("Requesting %s via %s", uri, endpoint)

        if parsed_uri.scheme == b"https":
            tls_connection_creator = self._policy_for_https.creatorForNetloc(
                parsed_uri.host, parsed_uri.port)
            endpoint = wrapClientTLS(tls_connection_creator, endpoint)
        elif parsed_uri.scheme == b"http":
            pass
        else:
            return defer.fail(
                Failure(
                    SchemeNotSupported("Unsupported scheme: %r" %
                                       (parsed_uri.scheme, ))))

        return self._requestWithEndpoint(pool_key, endpoint, method,
                                         parsed_uri, headers, bodyProducer,
                                         request_path)
Exemplo n.º 34
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        """
        :param method: HTTP method (GET/POST/etc).
        :type method: bytes

        :param uri: Absolute URI to be retrieved.
        :type uri: bytes

        :param headers: HTTP headers to send with the request, or None to
            send no extra headers.
        :type headers: twisted.web.http_headers.Headers, None

        :param bodyProducer: An object which can generate bytes to make up the
            body of this request (for example, the properly encoded contents of
            a file for a file upload).  Or None if the request is to have
            no body.
        :type bodyProducer: twisted.web.iweb.IBodyProducer, None

        :returns a deferred that fires when the header of the response has
            been received (regardless of the response status code). Fails if
            there is any problem which prevents that response from being received
            (including problems that prevent the request from being sent).
        :rtype: Deferred[twisted.web.iweb.IResponse]
        """
        parsed_uri = URI.fromBytes(uri, defaultPort=-1)
        res = yield self._route_matrix_uri(parsed_uri)

        # set up the TLS connection params
        #
        # XXX disabling TLS is really only supported here for the benefit of the
        # unit tests. We should make the UTs cope with TLS rather than having to make
        # the code support the unit tests.
        if self._tls_client_options_factory is None:
            tls_options = None
        else:
            tls_options = self._tls_client_options_factory.get_options(
                res.tls_server_name.decode("ascii")
            )

        # make sure that the Host header is set correctly
        if headers is None:
            headers = Headers()
        else:
            headers = headers.copy()

        if not headers.hasHeader(b'host'):
            headers.addRawHeader(b'host', res.host_header)

        class EndpointFactory(object):
            @staticmethod
            def endpointForURI(_uri):
                ep = LoggingHostnameEndpoint(
                    self._reactor, res.target_host, res.target_port,
                )
                if tls_options is not None:
                    ep = wrapClientTLS(tls_options, ep)
                return ep

        agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(), self._pool)
        res = yield agent.request(method, uri, headers, bodyProducer)
        defer.returnValue(res)