def _makeGetterFactory(url, factoryFactory, contextFactory=None, *args, **kwargs): """ Create and connect an HTTP page getting factory. Any additional positional or keyword arguments are used when calling C{factoryFactory}. @param factoryFactory: Factory factory that is called with C{url}, C{args} and C{kwargs} to produce the getter @param contextFactory: Context factory to use when creating a secure connection, defaulting to L{None} @return: The factory created by C{factoryFactory} """ uri = URI.fromBytes(url) factory = factoryFactory(url, *args, **kwargs) if uri.scheme == b'https': from twisted.internet import ssl if contextFactory is None: contextFactory = ssl.ClientContextFactory() reactor.connectSSL( nativeString(uri.host), uri.port, factory, contextFactory) else: reactor.connectTCP(nativeString(uri.host), uri.port, factory) return factory
def setUp(self): # Initialize resource tree root = self._init_resource() self.site = Site(root, timeout=None) # Start server for testing self.hostname = 'localhost' context_factory = ssl_context_factory(self.key_file, self.certificate_file) server_endpoint = SSL4ServerEndpoint(reactor, 0, context_factory, interface=self.hostname) self.server = yield server_endpoint.listen(self.site) self.port_number = self.server.getHost().port # Connect H2 client with server self.client_certificate = get_client_certificate(self.key_file, self.certificate_file) client_options = optionsForClientTLS( hostname=self.hostname, trustRoot=self.client_certificate, acceptableProtocols=[b'h2'] ) uri = URI.fromBytes(bytes(self.get_url('/'), 'utf-8')) self.conn_closed_deferred = Deferred() from scrapy.core.http2.protocol import H2ClientFactory h2_client_factory = H2ClientFactory(uri, Settings(), self.conn_closed_deferred) client_endpoint = SSL4ClientEndpoint(reactor, self.hostname, self.port_number, client_options) self.client = yield client_endpoint.connect(h2_client_factory)
def from_uri(cls, reactor, uri): """Return an AMQEndpoint instance configured with the given AMQP uri. @see: https://www.rabbitmq.com/uri-spec.html """ uri = URI.fromBytes(uri.encode(), defaultPort=5672) kwargs = {} host = uri.host.decode() if "@" in host: auth, host = uri.netloc.decode().split("@") username, password = auth.split(":") kwargs.update({"username": username, "password": password}) vhost = uri.path.decode() if len(vhost) > 1: vhost = vhost[1:] # Strip leading "/" kwargs["vhost"] = vhost params = parse_qs(uri.query) kwargs.update({ name.decode(): value[0].decode() for name, value in params.items() }) if "heartbeat" in kwargs: kwargs["heartbeat"] = int(kwargs["heartbeat"]) return cls(reactor, host, uri.port, **kwargs)
def build_agent(req): uri = URI.fromBytes(req.url) proxy = req.get('proxy') if req.get('use_proxy') is False: proxy = None if proxy: if uri.scheme == 'https': agent = TunnelingAgent( reactor=reactor, proxy=proxy, contextFactory=ScrapexClientContextFactory(), connectTimeout=req.get('timeout')) else: endpoint = TCP4ClientEndpoint(reactor, host=proxy.host, port=proxy.port, timeout=req.get('timeout')) agent = ProxyAgent(endpoint) if proxy.auth_header: req.get('headers')['Proxy-Authorization'] = proxy.auth_header else: agent = Agent(reactor) agent = RedirectAgent(agent, redirectLimit=3) agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)]) return agent
def __init__(self, reactor, proxyURI, connectTimeout=None, bindAddress=None, pool=None): super(ScrapyProxyAgent, self).__init__(reactor, connectTimeout=connectTimeout, bindAddress=bindAddress, pool=pool) self._proxyURI = URI.fromBytes(proxyURI)
def makeURL(self): url_data = URI.fromBytes(self._url) if url_data.scheme: args = { "scheme": url_data.scheme, "hostname": url_data.host, "port": url_data.port, "path": url_data.path, } else: args = { "scheme": "https" if self._ssl else "http", "hostname": self._hostname, "port": self._port, "path": self._url, } hasHost = bool(url_data.host) hostMatch = url_data.host.endswith(self._hostname) ipMatch = self._ipAddr in self._hostnameIp if hasHost and xor(hostMatch, ipMatch) or not ipMatch: self._proxyIp = self._ipAddr # Remove port if default (see RFC 2616, 14.23) if int(args['port']) in (80, 443) or \ self._proxyIp and not url_data.scheme: self._reqURL = "{scheme}://{hostname}{path}".format(**args) else: self._reqURL = "{scheme}://{hostname}:{port}{path}".format(**args) log.debug("HTTP request URL: %s, Proxy: %s", self._reqURL, self._proxyIp)
def from_dict(cls, dct): """Create a new cassette from *dct*, as deserialized from JSON or YAML format.""" cassette = cls() for interaction in dct['http_interactions']: rq = interaction['request'] # Overwrite the scheme and netloc, leaving just the part of # the URI that would be sent in a real request. relative_uri = urlunparse(('', '') + urlparse(rq['uri'])[2:]) request = Request._construct( rq['method'], relative_uri, Headers(rq['headers']), SavedBodyProducer(body_from_dict(rq)), False, URI.fromBytes(rq['uri'].encode('utf-8'))) rp = interaction['response'] response = Response._construct( ('HTTP', 1, 1), rp['status']['code'], rp['status']['message'], Headers(rp['headers']), AbortableStringTransport(), request) content_length = response.headers.getRawHeaders('Content-Length') if content_length: try: response.length = int(content_length[0]) except ValueError: pass cassette.responses.append( SavedResponse(response, body_from_dict(rp))) return cassette
def makeURL(self): url_data = URI.fromBytes(self._url) if url_data.scheme: args = { "scheme": url_data.scheme, "hostname": url_data.host, "port": url_data.port, "path": url_data.path, } else: args = { "scheme": "https" if self._ssl else "http", "hostname": self._hostname, "port": self._port, "path": self._url, } hasHost = bool(url_data.host) hostMatch = url_data.host.endswith(self._hostname) ipMatch = self._ipAddr in self._hostnameIp portProtoMatch = self._ssl and int(args['port']) == 443 or \ not self._ssl and int(args['port']) == 80 if hasHost and xor(hostMatch, ipMatch) or not ipMatch: self._proxyIp = self._ipAddr # Remove port if default (see RFC 2616, 14.23) if (int(args['port']) in (80, 443) and portProtoMatch) or \ bool(self._proxyIp) and not url_data.scheme: self._reqURL = "{scheme}://{hostname}{path}".format(**args) else: self._reqURL = "{scheme}://{hostname}:{port}{path}".format(**args) log.debug( "HTTP request URL: %s, Proxy: %s", self._reqURL, self._proxyIp )
def _create_agent(self, req): """ create right agent for specific request """ agent = None uri = URI.fromBytes(req.url) proxy = req.get('proxy') if req.get('use_proxy') is False: proxy = None if proxy: if uri.scheme == 'https': agent_key = 'httpsproxy-%s-%s' % (proxy.host, proxy.port) agent = self._agents.get(agent_key) if not agent: agent = TunnelingAgent(reactor=reactor, proxy=proxy, contextFactory=ScrapexClientContextFactory(), connectTimeout=30, pool=self._pool) self._agents[agent_key] = agent else: #http agent_key = 'httpproxy-%s-%s' % (proxy.host, proxy.port) agent = self._agents.get(agent_key) if not agent: endpoint = TCP4ClientEndpoint(reactor, host=proxy.host, port=proxy.port , timeout=req.get('timeout')) agent = ProxyAgent(endpoint, pool=self._pool) self._agents[agent_key] = agent if proxy.auth_header: req.get('headers')['Proxy-Authorization'] = proxy.auth_header else: agent = self._direct_agent #use single agent when no proxies used redirectLimit = self.scraper.config.get('max_redirects') if redirectLimit is None: redirectLimit = 3 if redirectLimit>0: agent = BrowserLikeRedirectAgent(agent, redirectLimit=redirectLimit) agent = ContentDecoderAgent(agent, [('gzip', GzipDecoder)]) if self.cj is not None: agent = CookieAgent(agent, self.cj) return agent
def request(self, request: Request, spider: Spider) -> Deferred: uri = URI.fromBytes(bytes(request.url, encoding='utf-8')) try: endpoint = self.get_endpoint(uri) except SchemeNotSupported: return defer.fail(Failure()) key = self.get_key(uri) d = self._pool.get_connection(key, uri, endpoint) d.addCallback(lambda conn: conn.request(request, spider)) return d
def request(self, method, uri, headers=None, bodyProducer=None): """ Issue a new request via the configured proxy. """ # Cache *all* connections under the same key, since we are only # connecting to a single destination, the proxy: proxyEndpoint = self._getEndpoint(self._proxyURI) key = ("http-proxy", self._proxyURI.host, self._proxyURI.port) return self._requestWithEndpoint(key, proxyEndpoint, method, URI.fromBytes(uri), headers, bodyProducer, uri)
def request(self, method, uri, headers=None, bodyProducer=None, address=None, path=None): """ Adapted from Agent.request but to allow arbitrary text in the request path, e.g. 'GET http://www.example.com HTTP/1.1' Path argument can be used to specifically override the path in the uri for non-conforming paths Address argument can be used to override the address the url connection is made to """ method = method.encode("UTF-8") parsedURI = URI.fromBytes(uri.encode("UTF-8")) try: if address is not None: modifiedURI = URI.fromBytes(uri.encode("UTF-8")) modifiedURI.host = address.encode("UTF-8") endpoint = self._getEndpoint(modifiedURI) else: endpoint = self._getEndpoint(parsedURI) except SchemeNotSupported: return defer.fail("Scheme not supported") parsedURI.path = path.encode( "UTF-8") if path is not None else parsedURI.path key = (parsedURI.scheme, parsedURI.host, parsedURI.port) d = self._requestWithEndpoint(key, endpoint, method, parsedURI, headers, bodyProducer, parsedURI.originForm) # Add a timeout to the deferred to prevent hangs on requests that connect but don't send any data d.addTimeout(self.timeout, reactor) d.addCallback(self.read_response, parsedURI) return d
def request(self, method, uri, headers=None, bodyProducer=None): """ Issue a new request via the configured proxy. """ # Cache *all* connections under the same key, since we are only # connecting to a single destination, the proxy: if twisted_version >= (15, 0, 0): proxyEndpoint = self._getEndpoint(self._proxyURI) else: proxyEndpoint = self._getEndpoint(self._proxyURI.scheme, self._proxyURI.host, self._proxyURI.port) key = ("http-proxy", self._proxyURI.host, self._proxyURI.port) return self._requestWithEndpoint(key, proxyEndpoint, method, URI.fromBytes(uri), headers, bodyProducer, uri)
def _command_download(self, data): reactor = self._context["reactor"] session_files = self._context["session_files"] audio_id = data["audio_id"] partial_url = data["partial_url"] ip_address = str(self.transport.getPeer().host) url = "http://" + ip_address + partial_url file_path = session_files.session_dir / f"{audio_id}.opus" log.info(f"Downloading file from {url} to {file_path}") url_bytes = url.encode("utf-8") url_parsed = URI.fromBytes(url_bytes) factory = HTTPDownloader(url_bytes, str(file_path)) reactor.connectTCP(url_parsed.host, url_parsed.port, factory) d = factory.deferred def on_success(data): # File downloaded succesfully, tell the server result = { "command": "update_downloaded", "audio_id": audio_id, "result": "success" } result_json = json.dumps(result) self._tcp_packetizer.write(result_json) def on_error(error): # File failed to downloaded succesfully, tell the server log.error(f"Failed to download file at '{url}': {error}") result = { "command": "update_downloaded", "audio_id": audio_id, "result": "failure", "error": str(error) } result_json = json.dumps(result) self._tcp_packetizer.write(result_json) d.addCallback(on_success) d.addErrback(on_error) return d
def request(self, method, uri, headers, bodyProducer): if headers is None: headers = Headers() else: headers = headers.copy() contentType = headers.getRawHeaders('content-type', [""])[0] date = headers.getRawHeaders('date', [""])[0] or self._generateRequestDate(uri) headers.setRawHeaders('date', [date]) uri_origin_form = URI.fromBytes(uri).originForm contentMD5 = headers.getRawHeaders('content-md5', [""])[0] if not contentMD5 and bodyProducer is not None: r = getattr(self.agent, '_reactor') or reactor bodyConsumer = StringConsumer(callLater=r.callLater) yield bodyProducer.startProducing(bodyConsumer) body = bodyConsumer.value() bodyProducer = StringBodyProducer(body) if body: contentMD5 = binascii.b2a_base64( hashlib.md5(body).digest()).strip() headers.addRawHeader('content-md5', contentMD5) sts = "\n".join([ method, contentType or "", contentMD5, date or "", uri_origin_form ]) mac = hmac.new(self.secretKey, sts, digestmod=hashlib.sha1).digest() encodedMAC = binascii.b2a_base64(mac).strip() auth_header = "AuthHMAC {0}:{1}".format(self.accessKey, encodedMAC) headers.addRawHeader('authorization', auth_header) d = yield self.agent.request(method, uri, headers, bodyProducer) self._handleResponseDate(uri, d) defer.returnValue(d)
def __init__(self, url, outputfile, contextFactory=None, *args, **kwargs): if hasattr(client, '_parse'): scheme, host, port, path = client._parse(url) else: from twisted.web.client import URI uri = URI.fromBytes(url) scheme = uri.scheme host = uri.host port = uri.port path = uri.path # ======= another twisted fix possibility # parsed = urlparse(url) # scheme = parsed.scheme # host = parsed.hostname # port = parsed.port or (443 if scheme == 'https' else 80) self.factory = HTTPProgressDownloader(url, outputfile, *args, **kwargs) if scheme == "https": self.connection = reactor.connectSSL(host, port, self.factory, ssl.ClientContextFactory()) else: self.connection = reactor.connectTCP(host, port, self.factory)
def _get_agent(self, request: Request, timeout: Optional[float]) -> H2Agent: from twisted.internet import reactor bind_address = request.meta.get('bindaddress') or self._bind_address proxy = request.meta.get('proxy') if proxy: _, _, proxy_host, proxy_port, proxy_params = _parse(proxy) scheme = _parse(request.url)[0] proxy_host = proxy_host.decode() omit_connect_tunnel = b'noconnect' in proxy_params if omit_connect_tunnel: warnings.warn( "Using HTTPS proxies in the noconnect mode is not " "supported by the downloader handler. If you use Zyte " "Smart Proxy Manager, it doesn't require this mode " "anymore, so you should update scrapy-crawlera to " "scrapy-zyte-smartproxy and remove '?noconnect' from the " "Zyte Smart Proxy Manager URL.") if scheme == b'https' and not omit_connect_tunnel: # ToDo raise NotImplementedError( 'Tunneling via CONNECT method using HTTP/2.0 is not yet supported' ) return self._ProxyAgent( reactor=reactor, context_factory=self._context_factory, proxy_uri=URI.fromBytes(to_bytes(proxy, encoding='ascii')), connect_timeout=timeout, bind_address=bind_address, pool=self._pool, ) return self._Agent( reactor=reactor, context_factory=self._context_factory, connect_timeout=timeout, bind_address=bind_address, pool=self._pool, )
def request(self, method, uri, headers=None, bodyProducer=None): """ Issue a request to the server indicated by the given C{uri}. An existing connection from the connection pool may be used or a new one may be created. Without additional modifications this connection pool may not be very useful because each connection in the pool will use the same Tor circuit. Currently only the I{HTTP} scheme is supported in C{uri}. @see: L{twisted.web.iweb.IAgent.request} """ parsedURI = URI.fromBytes(uri) endpoint = self._getEndpoint(parsedURI.scheme, parsedURI.host, parsedURI.port) # XXX # perhaps the request method should take a key? key = (parsedURI.scheme, parsedURI.host, parsedURI.port) return self._requestWithEndpoint(key, endpoint, method, parsedURI, headers, bodyProducer, parsedURI.originForm)
def getPageCached(url, contextFactory=None, *args, **kwargs): """download a web page as a string, keep a cache of already downloaded pages Download a page. Return a deferred, which will callback with a page (as a string) or errback with a description of the error. See HTTPClientCacheFactory to see what extra args can be passed. """ uri = URI.fromBytes(url) scheme = uri.scheme host = uri.host port = uri.port factory = HTTPClientCacheFactory(url, *args, **kwargs) if scheme == 'https': if contextFactory is None: contextFactory = HTTPSVerifyingContextFactory(host) reactor.connectSSL(host, port, factory, contextFactory) else: reactor.connectTCP(host, port, factory) return factory.deferred
def get_json(url, *args, **kwargs): """ :param json: JSON data :param url: :param args: :param kwargs: :return: """ j = kwargs.pop('json', None) if j: kwargs['postdata'] = as_json(j) kwargs.setdefault('agent', 'Twisted JSON Adapter') uri = URI.fromBytes(url) factory = HTTPClientFactory(url, *args, **kwargs) factory.noisy = 0 if uri.scheme == b'https': from twisted.internet import ssl contextFactory = ssl.ClientContextFactory() reactor.connectSSL( nativeString(uri.host), uri.port, factory, contextFactory) else: reactor.connectTCP(nativeString(uri.host), uri.port, factory) return factory.deferred.addCallback(_json_loads)
def get_json(url, *args, **kwargs): """ :param json: JSON data :param url: :param args: :param kwargs: :return: """ j = kwargs.pop('json', None) if j: kwargs['postdata'] = as_json(j) kwargs.setdefault('agent', 'Twisted JSON Adapter') uri = URI.fromBytes(url) factory = HTTPClientFactory(url, *args, **kwargs) factory.noisy = 0 if uri.scheme == b'https': from twisted.internet import ssl contextFactory = ssl.ClientContextFactory() reactor.connectSSL(nativeString(uri.host), uri.port, factory, contextFactory) else: reactor.connectTCP(nativeString(uri.host), uri.port, factory) return factory.deferred.addCallback(_json_loads)
def getPage(url, bindAddress=None, *arg, **kw): # reimplemented here to insert bindAddress uri = URI.fromBytes(url) scheme = uri.scheme host = uri.host port = uri.port path = uri.path factory = HTTPClientFactory(url, *arg, **kw) factory.noisy = False if scheme == 'https': from twisted.internet import ssl context = ssl.ClientContextFactory() reactor.connectSSL(host, port, factory, context, bindAddress=bindAddress) else: reactor.connectTCP(host, port, factory, bindAddress=bindAddress) return factory.deferred
def request(self, method, uri, headers, bodyProducer): if headers is None: headers = Headers() else: headers = headers.copy() contentType = headers.getRawHeaders('content-type', [""])[0] date = headers.getRawHeaders('date', [""])[0] or self._generateRequestDate(uri) headers.setRawHeaders('date', [date]) uri_origin_form = URI.fromBytes(uri).originForm contentMD5 = headers.getRawHeaders('content-md5', [""])[0] if not contentMD5 and bodyProducer is not None: r = getattr(self.agent, '_reactor') or reactor bodyConsumer = StringConsumer(callLater=r.callLater) yield bodyProducer.startProducing(bodyConsumer) body = bodyConsumer.value() bodyProducer = StringBodyProducer(body) if body: contentMD5 = binascii.b2a_base64(hashlib.md5(body).digest()).strip() headers.addRawHeader('content-md5', contentMD5) sts = "\n".join([method, contentType or "", contentMD5, date or "", uri_origin_form]) mac = hmac.new(self.secretKey, sts, digestmod=hashlib.sha1).digest() encodedMAC = binascii.b2a_base64(mac).strip() auth_header = "AuthHMAC {0}:{1}".format(self.accessKey, encodedMAC) headers.addRawHeader('authorization', auth_header) d = yield self.agent.request(method, uri, headers, bodyProducer) self._handleResponseDate(uri, d) defer.returnValue(d)
def _route_matrix_uri(self, parsed_uri, lookup_well_known=True): """Helper for `request`: determine the routing for a Matrix URI Args: parsed_uri (twisted.web.client.URI): uri to route. Note that it should be parsed with URI.fromBytes(uri, defaultPort=-1) to set the `port` to -1 if there is no explicit port given. lookup_well_known (bool): True if we should look up the .well-known file if there is no SRV record. Returns: Deferred[_RoutingResult] """ # check for an IP literal try: ip_address = IPAddress(parsed_uri.host.decode("ascii")) except Exception: # not an IP address ip_address = None if ip_address: port = parsed_uri.port if port == -1: port = 8448 return _RoutingResult( host_header=parsed_uri.netloc, tls_server_name=parsed_uri.host, target_host=parsed_uri.host, target_port=port, ) if parsed_uri.port != -1: # there is an explicit port return _RoutingResult( host_header=parsed_uri.netloc, tls_server_name=parsed_uri.host, target_host=parsed_uri.host, target_port=parsed_uri.port, ) if lookup_well_known: # try a .well-known lookup well_known_result = yield self._well_known_resolver.get_well_known( parsed_uri.host) well_known_server = well_known_result.delegated_server if well_known_server: # if we found a .well-known, start again, but don't do another # .well-known lookup. # parse the server name in the .well-known response into host/port. # (This code is lifted from twisted.web.client.URI.fromBytes). if b":" in well_known_server: well_known_host, well_known_port = well_known_server.rsplit( b":", 1) try: well_known_port = int(well_known_port) except ValueError: # the part after the colon could not be parsed as an int # - we assume it is an IPv6 literal with no port (the closing # ']' stops it being parsed as an int) well_known_host, well_known_port = well_known_server, -1 else: well_known_host, well_known_port = well_known_server, -1 new_uri = URI( scheme=parsed_uri.scheme, netloc=well_known_server, host=well_known_host, port=well_known_port, path=parsed_uri.path, params=parsed_uri.params, query=parsed_uri.query, fragment=parsed_uri.fragment, ) res = yield self._route_matrix_uri(new_uri, lookup_well_known=False) return res # try a SRV lookup service_name = b"_matrix._tcp.%s" % (parsed_uri.host, ) server_list = yield self._srv_resolver.resolve_service(service_name) if not server_list: target_host = parsed_uri.host port = 8448 logger.debug( "No SRV record for %s, using %s:%i", parsed_uri.host.decode("ascii"), target_host.decode("ascii"), port, ) else: target_host, port = pick_server_from_list(server_list) logger.debug( "Picked %s:%i from SRV records for %s", target_host.decode("ascii"), port, parsed_uri.host.decode("ascii"), ) return _RoutingResult( host_header=parsed_uri.netloc, tls_server_name=parsed_uri.host, target_host=target_host, target_port=port, )
def request( self, method: bytes, uri: bytes, headers: Optional["Headers"] = None, bodyProducer: Optional["IBodyProducer"] = None, ) -> Generator["defer.Deferred[Any]", Any, IResponse]: """ :param method: HTTP method (GET/POST/etc). :param uri: Absolute URI to be retrieved. :param headers: HTTP headers to send with the request, or None to send no extra headers. :param bodyProducer: An object which can generate bytes to make up the body of this request (for example, the properly encoded contents of a file for a file upload). Or None if the request is to have no body. :returns a deferred that fires when the header of the response has been received (regardless of the response status code). Fails if there is any problem which prevents that response from being received (including problems that prevent the request from being sent). """ parsed_uri = URI.fromBytes(uri, defaultPort=-1) routing: _RoutingResult routing = yield defer.ensureDeferred( self._route_matrix_uri(parsed_uri)) # set up the TLS connection params # # XXX disabling TLS is really only supported here for the benefit of the # unit tests. We should make the UTs cope with TLS rather than having to make # the code support the unit tests. if self._tls_client_options_factory is None: tls_options = None else: tls_options = self._tls_client_options_factory.get_options( routing.tls_server_name.decode("ascii")) # make sure that the Host header is set correctly if headers is None: headers = Headers() else: # Type safety: Headers.copy doesn't have a return type annotated, # and I don't want to stub web.http_headers. Could use stubgen? It's # a pretty simple file. headers = headers.copy() # type: ignore[no-untyped-call] assert headers is not None if not headers.hasHeader(b"host"): headers.addRawHeader(b"host", routing.host_header) @implementer(IAgentEndpointFactory) class EndpointFactory: @staticmethod def endpointForURI(_uri: URI) -> IStreamClientEndpoint: ep: IStreamClientEndpoint = LoggingHostnameEndpoint( self._reactor, routing.target_host, routing.target_port, ) if tls_options is not None: ep = wrapClientTLS(tls_options, ep) return ep agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(), self._pool) res: IResponse res = yield agent.request(method, uri, headers, bodyProducer) return res
def request(self, method, uri, headers=None, bodyProducer=None): """ Issue a request to the server indicated by the given uri. Supports `http` and `https` schemes. An existing connection from the connection pool may be used or a new one may be created. See also: twisted.web.iweb.IAgent.request Args: method (bytes): The request method to use, such as `GET`, `POST`, etc uri (bytes): The location of the resource to request. headers (Headers|None): Extra headers to send with the request bodyProducer (IBodyProducer|None): An object which can generate bytes to make up the body of this request (for example, the properly encoded contents of a file for a file upload). Or, None if the request is to have no body. Returns: Deferred[IResponse]: completes when the header of the response has been received (regardless of the response status code). Can fail with: SchemeNotSupported: if the uri is not http or https twisted.internet.error.TimeoutError if the server we are connecting to (proxy or destination) does not accept a connection before connectTimeout. ... other things too. """ uri = uri.strip() if not _VALID_URI.match(uri): raise ValueError("Invalid URI {!r}".format(uri)) parsed_uri = URI.fromBytes(uri) pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port) request_path = parsed_uri.originForm should_skip_proxy = False if self.no_proxy is not None: should_skip_proxy = proxy_bypass_environment( parsed_uri.host.decode(), proxies={"no": self.no_proxy}, ) if (parsed_uri.scheme == b"http" and self.http_proxy_endpoint and not should_skip_proxy): # Cache *all* connections under the same key, since we are only # connecting to a single destination, the proxy: pool_key = ("http-proxy", self.http_proxy_endpoint) endpoint = self.http_proxy_endpoint request_path = uri elif (parsed_uri.scheme == b"https" and self.https_proxy_endpoint and not should_skip_proxy): connect_headers = Headers() # Determine whether we need to set Proxy-Authorization headers if self.https_proxy_creds: # Set a Proxy-Authorization header connect_headers.addRawHeader( b"Proxy-Authorization", self.https_proxy_creds.as_proxy_authorization_value(), ) endpoint = HTTPConnectProxyEndpoint( self.proxy_reactor, self.https_proxy_endpoint, parsed_uri.host, parsed_uri.port, headers=connect_headers, ) else: # not using a proxy endpoint = HostnameEndpoint(self._reactor, parsed_uri.host, parsed_uri.port, **self._endpoint_kwargs) logger.debug("Requesting %s via %s", uri, endpoint) if parsed_uri.scheme == b"https": tls_connection_creator = self._policy_for_https.creatorForNetloc( parsed_uri.host, parsed_uri.port) endpoint = wrapClientTLS(tls_connection_creator, endpoint) elif parsed_uri.scheme == b"http": pass else: return defer.fail( Failure( SchemeNotSupported("Unsupported scheme: %r" % (parsed_uri.scheme, )))) return self._requestWithEndpoint(pool_key, endpoint, method, parsed_uri, headers, bodyProducer, request_path)
def request(self, method, uri, headers=None, bodyProducer=None): """ :param method: HTTP method (GET/POST/etc). :type method: bytes :param uri: Absolute URI to be retrieved. :type uri: bytes :param headers: HTTP headers to send with the request, or None to send no extra headers. :type headers: twisted.web.http_headers.Headers, None :param bodyProducer: An object which can generate bytes to make up the body of this request (for example, the properly encoded contents of a file for a file upload). Or None if the request is to have no body. :type bodyProducer: twisted.web.iweb.IBodyProducer, None :returns a deferred that fires when the header of the response has been received (regardless of the response status code). Fails if there is any problem which prevents that response from being received (including problems that prevent the request from being sent). :rtype: Deferred[twisted.web.iweb.IResponse] """ parsed_uri = URI.fromBytes(uri, defaultPort=-1) res = yield self._route_matrix_uri(parsed_uri) # set up the TLS connection params # # XXX disabling TLS is really only supported here for the benefit of the # unit tests. We should make the UTs cope with TLS rather than having to make # the code support the unit tests. if self._tls_client_options_factory is None: tls_options = None else: tls_options = self._tls_client_options_factory.get_options( res.tls_server_name.decode("ascii") ) # make sure that the Host header is set correctly if headers is None: headers = Headers() else: headers = headers.copy() if not headers.hasHeader(b'host'): headers.addRawHeader(b'host', res.host_header) class EndpointFactory(object): @staticmethod def endpointForURI(_uri): ep = LoggingHostnameEndpoint( self._reactor, res.target_host, res.target_port, ) if tls_options is not None: ep = wrapClientTLS(tls_options, ep) return ep agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(), self._pool) res = yield agent.request(method, uri, headers, bodyProducer) defer.returnValue(res)
def request(self, method, uri, headers=None, bodyProducer=None): """ Args: method (bytes): HTTP method: GET/POST/etc uri (bytes): Absolute URI to be retrieved headers (twisted.web.http_headers.Headers|None): HTTP headers to send with the request, or None to send no extra headers. bodyProducer (twisted.web.iweb.IBodyProducer|None): An object which can generate bytes to make up the body of this request (for example, the properly encoded contents of a file for a file upload). Or None if the request is to have no body. Returns: Deferred[twisted.web.iweb.IResponse]: fires when the header of the response has been received (regardless of the response status code). Fails if there is any problem which prevents that response from being received (including problems that prevent the request from being sent). """ parsed_uri = URI.fromBytes(uri, defaultPort=-1) res = yield self._route_matrix_uri(parsed_uri) # set up the TLS connection params # # XXX disabling TLS is really only supported here for the benefit of the # unit tests. We should make the UTs cope with TLS rather than having to make # the code support the unit tests. if self._tls_client_options_factory is None: tls_options = None else: tls_options = self._tls_client_options_factory.get_options( res.tls_server_name.decode("ascii")) # make sure that the Host header is set correctly if headers is None: headers = Headers() else: headers = headers.copy() if not headers.hasHeader(b"host"): headers.addRawHeader(b"host", res.host_header) class EndpointFactory(object): @staticmethod def endpointForURI(_uri): ep = LoggingHostnameEndpoint(self._reactor, res.target_host, res.target_port) if tls_options is not None: ep = wrapClientTLS(tls_options, ep) return ep agent = Agent.usingEndpointFactory(self._reactor, EndpointFactory(), self._pool) res = yield make_deferred_yieldable( agent.request(method, uri, headers, bodyProducer)) return res
def prePathURL(self): from twisted.web.client import URI location = URI.fromBytes(self.uri) location.path = b"/".join(self.prepath) return location.toBytes()
def request(self, method, uri, headers=None, bodyProducer=None): """ Issue a request to the server indicated by the given uri. Supports `http` and `https` schemes. An existing connection from the connection pool may be used or a new one may be created. See also: twisted.web.iweb.IAgent.request Args: method (bytes): The request method to use, such as `GET`, `POST`, etc uri (bytes): The location of the resource to request. headers (Headers|None): Extra headers to send with the request bodyProducer (IBodyProducer|None): An object which can generate bytes to make up the body of this request (for example, the properly encoded contents of a file for a file upload). Or, None if the request is to have no body. Returns: Deferred[IResponse]: completes when the header of the response has been received (regardless of the response status code). """ uri = uri.strip() if not _VALID_URI.match(uri): raise ValueError("Invalid URI {!r}".format(uri)) parsed_uri = URI.fromBytes(uri) pool_key: tuple = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port) request_path = parsed_uri.originForm if parsed_uri.scheme == b"http" and self.proxy_endpoint: # Cache *all* connections under the same key, since we are only # connecting to a single destination, the proxy: pool_key = ("http-proxy", self.proxy_endpoint) endpoint = self.proxy_endpoint request_path = uri elif parsed_uri.scheme == b"https" and self.proxy_endpoint: endpoint = HTTPConnectProxyEndpoint( self._reactor, self.proxy_endpoint, parsed_uri.host, parsed_uri.port, self._proxy_auth, ) else: # not using a proxy endpoint = HostnameEndpoint(self._reactor, parsed_uri.host, parsed_uri.port, **self._endpoint_kwargs) logger.debug("Requesting %s via %s", uri, endpoint) if parsed_uri.scheme == b"https": tls_connection_creator = self._policy_for_https.creatorForNetloc( parsed_uri.host, parsed_uri.port) endpoint = wrapClientTLS(tls_connection_creator, endpoint) elif parsed_uri.scheme == b"http": pass else: return defer.fail( Failure( SchemeNotSupported("Unsupported scheme: %r" % (parsed_uri.scheme, )))) return self._requestWithEndpoint(pool_key, endpoint, method, parsed_uri, headers, bodyProducer, request_path)
def prePathURL(self): from twisted.web.client import URI location = URI.fromBytes(self.uri) location.path = b'/'.join(self.prepath) return location.toBytes()