async def _send_request( self, request: MatrixFederationRequest, retry_on_dns_fail: bool = True, timeout: Optional[int] = None, long_retries: bool = False, ignore_backoff: bool = False, backoff_on_404: bool = False, ) -> IResponse: """ Sends a request to the given server. Args: request: details of request to be sent retry_on_dns_fail: true if the request should be retied on DNS failures timeout: number of milliseconds to wait for the response headers (including connecting to the server), *for each attempt*. 60s by default. long_retries: whether to use the long retry algorithm. The regular retry algorithm makes 4 attempts, with intervals [0.5s, 1s, 2s]. The long retry algorithm makes 11 attempts, with intervals [4s, 16s, 60s, 60s, ...] Both algorithms add -20%/+40% jitter to the retry intervals. Note that the above intervals are *in addition* to the time spent waiting for the request to complete (up to `timeout` ms). NB: the long retry algorithm takes over 20 minutes to complete, with a default timeout of 60s! ignore_backoff: true to ignore the historical backoff data and try the request anyway. backoff_on_404: Back off if we get a 404 Returns: Resolves with the HTTP response object on success. Raises: HttpResponseException: If we get an HTTP response code >= 300 (except 429). NotRetryingDestination: If we are not yet ready to retry this server. FederationDeniedError: If this destination is not on our federation whitelist RequestSendFailed: If there were problems connecting to the remote, due to e.g. DNS failures, connection timeouts etc. """ if timeout: _sec_timeout = timeout / 1000 else: _sec_timeout = self.default_timeout if (self.hs.config.federation.federation_domain_whitelist is not None and request.destination not in self.hs.config.federation.federation_domain_whitelist): raise FederationDeniedError(request.destination) limiter = await synapse.util.retryutils.get_retry_limiter( request.destination, self.clock, self._store, backoff_on_404=backoff_on_404, ignore_backoff=ignore_backoff, ) method_bytes = request.method.encode("ascii") destination_bytes = request.destination.encode("ascii") path_bytes = request.path.encode("ascii") if request.query: query_bytes = encode_query_args(request.query) else: query_bytes = b"" scope = start_active_span( "outgoing-federation-request", tags={ tags.SPAN_KIND: tags.SPAN_KIND_RPC_CLIENT, tags.PEER_ADDRESS: request.destination, tags.HTTP_METHOD: request.method, tags.HTTP_URL: request.path, }, finish_on_close=True, ) # Inject the span into the headers headers_dict: Dict[bytes, List[bytes]] = {} opentracing.inject_header_dict(headers_dict, request.destination) headers_dict[b"User-Agent"] = [self.version_string_bytes] with limiter, scope: # XXX: Would be much nicer to retry only at the transaction-layer # (once we have reliable transactions in place) if long_retries: retries_left = MAX_LONG_RETRIES else: retries_left = MAX_SHORT_RETRIES url_bytes = request.uri url_str = url_bytes.decode("ascii") url_to_sign_bytes = urllib.parse.urlunparse( (b"", b"", path_bytes, None, query_bytes, b"")) while True: try: json = request.get_json() if json: headers_dict[b"Content-Type"] = [b"application/json"] auth_headers = self.build_auth_headers( destination_bytes, method_bytes, url_to_sign_bytes, json) data = encode_canonical_json(json) producer: Optional[ IBodyProducer] = QuieterFileBodyProducer( BytesIO(data), cooperator=self._cooperator) else: producer = None auth_headers = self.build_auth_headers( destination_bytes, method_bytes, url_to_sign_bytes) headers_dict[b"Authorization"] = auth_headers logger.debug( "{%s} [%s] Sending request: %s %s; timeout %fs", request.txn_id, request.destination, request.method, url_str, _sec_timeout, ) outgoing_requests_counter.labels(request.method).inc() try: with Measure(self.clock, "outbound_request"): # we don't want all the fancy cookie and redirect handling # that treq.request gives: just use the raw Agent. # To preserve the logging context, the timeout is treated # in a similar way to `defer.gatherResults`: # * Each logging context-preserving fork is wrapped in # `run_in_background`. In this case there is only one, # since the timeout fork is not logging-context aware. # * The `Deferred` that joins the forks back together is # wrapped in `make_deferred_yieldable` to restore the # logging context regardless of the path taken. request_deferred = run_in_background( self.agent.request, method_bytes, url_bytes, headers=Headers(headers_dict), bodyProducer=producer, ) request_deferred = timeout_deferred( request_deferred, timeout=_sec_timeout, reactor=self.reactor, ) response = await make_deferred_yieldable( request_deferred) except DNSLookupError as e: raise RequestSendFailed( e, can_retry=retry_on_dns_fail) from e except Exception as e: raise RequestSendFailed(e, can_retry=True) from e incoming_responses_counter.labels(request.method, response.code).inc() set_tag(tags.HTTP_STATUS_CODE, response.code) response_phrase = response.phrase.decode("ascii", errors="replace") if 200 <= response.code < 300: logger.debug( "{%s} [%s] Got response headers: %d %s", request.txn_id, request.destination, response.code, response_phrase, ) pass else: logger.info( "{%s} [%s] Got response headers: %d %s", request.txn_id, request.destination, response.code, response_phrase, ) # :'( # Update transactions table? d = treq.content(response) d = timeout_deferred(d, timeout=_sec_timeout, reactor=self.reactor) try: body = await make_deferred_yieldable(d) except Exception as e: # Eh, we're already going to raise an exception so lets # ignore if this fails. logger.warning( "{%s} [%s] Failed to get error response: %s %s: %s", request.txn_id, request.destination, request.method, url_str, _flatten_response_never_received(e), ) body = None exc = HttpResponseException(response.code, response_phrase, body) # Retry if the error is a 5xx or a 429 (Too Many # Requests), otherwise just raise a standard # `HttpResponseException` if 500 <= response.code < 600 or response.code == 429: raise RequestSendFailed(exc, can_retry=True) from exc else: raise exc break except RequestSendFailed as e: logger.info( "{%s} [%s] Request failed: %s %s: %s", request.txn_id, request.destination, request.method, url_str, _flatten_response_never_received(e.inner_exception), ) if not e.can_retry: raise if retries_left and not timeout: if long_retries: delay = 4**(MAX_LONG_RETRIES + 1 - retries_left) delay = min(delay, 60) delay *= random.uniform(0.8, 1.4) else: delay = 0.5 * 2**(MAX_SHORT_RETRIES - retries_left) delay = min(delay, 2) delay *= random.uniform(0.8, 1.4) logger.debug( "{%s} [%s] Waiting %ss before re-sending...", request.txn_id, request.destination, delay, ) await self.clock.sleep(delay) retries_left -= 1 else: raise except Exception as e: logger.warning( "{%s} [%s] Request failed: %s %s: %s", request.txn_id, request.destination, request.method, url_str, _flatten_response_never_received(e), ) raise return response
async def send_request(*, instance_name="master", **kwargs): if instance_name == local_instance_name: raise Exception("Trying to send HTTP request to self") if instance_name == "master": host = master_host port = master_port elif instance_name in instance_map: host = instance_map[instance_name].host port = instance_map[instance_name].port else: raise Exception("Instance %r not in 'instance_map' config" % (instance_name, )) data = await cls._serialize_payload(**kwargs) url_args = [ urllib.parse.quote(kwargs[name], safe="") for name in cls.PATH_ARGS ] if cls.CACHE: txn_id = random_string(10) url_args.append(txn_id) if cls.METHOD == "POST": request_func = client.post_json_get_json elif cls.METHOD == "PUT": request_func = client.put_json elif cls.METHOD == "GET": request_func = client.get_json else: # We have already asserted in the constructor that a # compatible was picked, but lets be paranoid. raise Exception("Unknown METHOD on %s replication endpoint" % (cls.NAME, )) uri = "http://%s:%s/_synapse/replication/%s/%s" % ( host, port, cls.NAME, "/".join(url_args), ) try: # We keep retrying the same request for timeouts. This is so that we # have a good idea that the request has either succeeded or failed on # the master, and so whether we should clean up or not. while True: headers: Dict[bytes, List[bytes]] = {} # Add an authorization header, if configured. if replication_secret: headers[b"Authorization"] = [ b"Bearer " + replication_secret ] opentracing.inject_header_dict(headers, check_destination=False) try: result = await request_func(uri, data, headers=headers) break except RequestTimedOutError: if not cls.RETRY_ON_TIMEOUT: raise logger.warning("%s request timed out; retrying", cls.NAME) # If we timed out we probably don't need to worry about backing # off too much, but lets just wait a little anyway. await clock.sleep(1) except HttpResponseException as e: # We convert to SynapseError as we know that it was a SynapseError # on the main process that we should send to the client. (And # importantly, not stack traces everywhere) _outgoing_request_counter.labels(cls.NAME, e.code).inc() raise e.to_synapse_error() except Exception as e: _outgoing_request_counter.labels(cls.NAME, "ERR").inc() raise SynapseError(502, "Failed to talk to main process") from e _outgoing_request_counter.labels(cls.NAME, 200).inc() return result