def get_key(server_name, key_ids): limiter = yield get_retry_limiter( server_name, self.clock, self.store, ) with limiter: keys = None try: keys = yield self.get_server_verify_key_v2_direct( server_name, key_ids) except Exception as e: logger.info( "Unable to getting key %r for %r directly: %s %s", key_ids, server_name, type(e).__name__, str(e.message), ) if not keys: keys = yield self.get_server_verify_key_v1_direct( server_name, key_ids) keys = {server_name: keys} defer.returnValue(keys)
def claim_client_keys(destination): device_keys = remote_queries[destination] try: limiter = yield get_retry_limiter( destination, self.clock, self.store ) with limiter: remote_result = yield self.federation.claim_client_keys( destination, {"one_time_keys": device_keys}, timeout=timeout ) for user_id, keys in remote_result["one_time_keys"].items(): if user_id in device_keys: json_result[user_id] = keys except CodeMessageException as e: failures[destination] = { "status": e.code, "message": e.message } except NotRetryingDestination as e: failures[destination] = { "status": 503, "message": "Not ready for retry", } except Exception as e: # include ConnectionRefused and other errors failures[destination] = { "status": 503, "message": e.message }
def get_key(server_name, key_ids): limiter = yield get_retry_limiter( server_name, self.clock, self.store, ) with limiter: keys = None try: keys = yield self.get_server_verify_key_v2_direct( server_name, key_ids ) except Exception as e: logger.info( "Unable to getting key %r for %r directly: %s %s", key_ids, server_name, type(e).__name__, str(e.message), ) if not keys: keys = yield self.get_server_verify_key_v1_direct( server_name, key_ids ) keys = {server_name: keys} defer.returnValue(keys)
def do_remote_query(destination): destination_query = remote_queries[destination] try: limiter = yield get_retry_limiter( destination, self.clock, self.store ) with limiter: remote_result = yield self.federation.query_client_keys( destination, {"device_keys": destination_query}, timeout=timeout ) for user_id, keys in remote_result["device_keys"].items(): if user_id in destination_query: results[user_id] = keys except CodeMessageException as e: failures[destination] = { "status": e.code, "message": e.message } except NotRetryingDestination as e: failures[destination] = { "status": 503, "message": "Not ready for retry", }
def _get_server_verify_key_impl(self, server_name, key_ids): keys = None @defer.inlineCallbacks def get_key(perspective_name, perspective_keys): try: result = yield self.get_server_verify_key_v2_indirect( server_name, key_ids, perspective_name, perspective_keys) defer.returnValue(result) except Exception as e: logging.info( "Unable to getting key %r for %r from %r: %s %s", key_ids, server_name, perspective_name, type(e).__name__, str(e.message), ) perspective_results = yield defer.gatherResults([ get_key(p_name, p_keys) for p_name, p_keys in self.perspective_servers.items() ]) for results in perspective_results: if results is not None: keys = results limiter = yield get_retry_limiter( server_name, self.clock, self.store, ) with limiter: if not keys: try: keys = yield self.get_server_verify_key_v2_direct( server_name, key_ids) except Exception as e: logging.info( "Unable to getting key %r for %r directly: %s %s", key_ids, server_name, type(e).__name__, str(e.message), ) if not keys: keys = yield self.get_server_verify_key_v1_direct( server_name, key_ids) for key_id in key_ids: if key_id in keys: defer.returnValue(keys[key_id]) return raise ValueError("No verification key found for given key ids")
def _get_server_verify_key_impl(self, server_name, key_ids): keys = None @defer.inlineCallbacks def get_key(perspective_name, perspective_keys): try: result = yield self.get_server_verify_key_v2_indirect( server_name, key_ids, perspective_name, perspective_keys ) defer.returnValue(result) except Exception as e: logging.info( "Unable to getting key %r for %r from %r: %s %s", key_ids, server_name, perspective_name, type(e).__name__, str(e.message), ) perspective_results = yield defer.gatherResults([ get_key(p_name, p_keys) for p_name, p_keys in self.perspective_servers.items() ]) for results in perspective_results: if results is not None: keys = results limiter = yield get_retry_limiter( server_name, self.clock, self.store, ) with limiter: if not keys: try: keys = yield self.get_server_verify_key_v2_direct( server_name, key_ids ) except Exception as e: logging.info( "Unable to getting key %r for %r directly: %s %s", key_ids, server_name, type(e).__name__, str(e.message), ) if not keys: keys = yield self.get_server_verify_key_v1_direct( server_name, key_ids ) for key_id in key_ids: if key_id in keys: defer.returnValue(keys[key_id]) return raise ValueError("No verification key found for given key ids")
def _get_server_verify_key_impl(self, server_name, key_ids): keys = None perspective_results = [] for perspective_name, perspective_keys in self.perspective_servers.items( ): @defer.inlineCallbacks def get_key(): try: result = yield self.get_server_verify_key_v2_indirect( server_name, key_ids, perspective_name, perspective_keys) defer.returnValue(result) except: logging.info( "Unable to getting key %r for %r from %r", key_ids, server_name, perspective_name, ) perspective_results.append(get_key()) perspective_results = yield defer.gatherResults(perspective_results) for results in perspective_results: if results is not None: keys = results limiter = yield get_retry_limiter( server_name, self.clock, self.store, ) with limiter: if keys is None: try: keys = yield self.get_server_verify_key_v2_direct( server_name, key_ids) except: pass keys = yield self.get_server_verify_key_v1_direct( server_name, key_ids) for key_id in key_ids: if key_id in keys: defer.returnValue(keys[key_id]) return raise ValueError("No verification key found for given key ids")
def test_new_destination(self): """A happy-path case with a new destination and a successful operation""" store = self.hs.get_datastore() limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store)) # advance the clock a bit before making the request self.pump(1) with limiter: pass new_timings = self.get_success(store.get_destination_retry_timings("test_dest")) self.assertIsNone(new_timings)
def _transaction_transmission_loop(self): pending_pdus = [] try: self.transmission_loop_running = True # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. yield get_retry_limiter(self._destination, self._clock, self._store) pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( yield self._get_new_device_messages()) # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that # the following code (from popping the queues up to the point # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the # transmission_loop_running flag. pending_pdus = self._pending_pdus # We can only include at most 50 PDUs per transactions pending_pdus, self._pending_pdus = pending_pdus[: 50], pending_pdus[ 50:] pending_edus = [] pending_edus.extend(self._get_rr_edus(force_flush=False)) # We can only include at most 100 EDUs per transactions pending_edus.extend( self._pop_pending_edus(100 - len(pending_edus))) pending_edus.extend(self._pending_edus_keyed.values()) self._pending_edus_keyed = {} pending_edus.extend(device_message_edus) pending_presence = self._pending_presence self._pending_presence = {} if pending_presence: pending_edus.append( Edu( origin=self._server_name, destination=self._destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self._clock.time_msec()) for presence in pending_presence.values() ] }, )) if pending_pdus: logger.debug( "TX [%s] len(pending_pdus_by_dest[dest]) = %d", self._destination, len(pending_pdus)) if not pending_pdus and not pending_edus: logger.debug("TX [%s] Nothing to send", self._destination) self._last_device_stream_id = device_stream_id return # if we've decided to send a transaction anyway, and we have room, we # may as well send any pending RRs if len(pending_edus) < 100: pending_edus.extend(self._get_rr_edus(force_flush=True)) # END CRITICAL SECTION success = yield self._transaction_manager.send_new_transaction( self._destination, pending_pdus, pending_edus) if success: sent_transactions_counter.inc() sent_edus_counter.inc(len(pending_edus)) for edu in pending_edus: sent_edus_by_type.labels(edu.edu_type).inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: yield self._store.delete_device_msgs_for_remote( self._destination, device_stream_id) logger.info("Marking as sent %r %r", self._destination, dev_list_id) yield self._store.mark_as_sent_devices_by_remote( self._destination, dev_list_id) self._last_device_stream_id = device_stream_id self._last_device_list_stream_id = dev_list_id else: break except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", self._destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0), ) except FederationDeniedError as e: logger.info(e) except HttpResponseException as e: logger.warning( "TX [%s] Received %d response to transaction: %s", self._destination, e.code, e, ) except RequestSendFailed as e: logger.warning("TX [%s] Failed to send transaction: %s", self._destination, e) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, self._destination) except Exception: logger.exception( "TX [%s] Failed to send transaction", self._destination, ) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, self._destination) finally: # We want to be *very* sure we clear this after we stop processing self.transmission_loop_running = False
def test_limiter(self): """General test case which walks through the process of a failing request""" store = self.hs.get_datastore() limiter = self.get_success( get_retry_limiter("test_dest", self.clock, store)) self.pump(1) try: with limiter: self.pump(1) failure_ts = self.clock.time_msec() raise AssertionError("argh") except AssertionError: pass new_timings = self.get_success( store.get_destination_retry_timings("test_dest")) self.assertEqual(new_timings["failure_ts"], failure_ts) self.assertEqual(new_timings["retry_last_ts"], failure_ts) self.assertEqual(new_timings["retry_interval"], MIN_RETRY_INTERVAL) # now if we try again we should get a failure self.get_failure(get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination) # # advance the clock and try again # self.pump(MIN_RETRY_INTERVAL) limiter = self.get_success( get_retry_limiter("test_dest", self.clock, store)) self.pump(1) try: with limiter: self.pump(1) retry_ts = self.clock.time_msec() raise AssertionError("argh") except AssertionError: pass new_timings = self.get_success( store.get_destination_retry_timings("test_dest")) self.assertEqual(new_timings["failure_ts"], failure_ts) self.assertEqual(new_timings["retry_last_ts"], retry_ts) self.assertGreaterEqual(new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 0.5) self.assertLessEqual(new_timings["retry_interval"], MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0) # # one more go, with success # self.reactor.advance(MIN_RETRY_INTERVAL * RETRY_MULTIPLIER * 2.0) limiter = self.get_success( get_retry_limiter("test_dest", self.clock, store)) self.pump(1) with limiter: self.pump(1) # wait for the update to land self.pump() new_timings = self.get_success( store.get_destination_retry_timings("test_dest")) self.assertIsNone(new_timings)
def _attempt_new_transaction(self, destination): # list of (pending_pdu, deferred, order) if destination in self.pending_transactions: # XXX: pending_transactions can get stuck on by a never-ending # request at which point pending_pdus_by_dest just keeps growing. # we need application-layer timeouts of some flavour of these # requests logger.debug("TX [%s] Transaction already in progress", destination) return try: self.pending_transactions[destination] = 1 yield run_on_reactor() while True: pending_pdus = self.pending_pdus_by_dest.pop(destination, []) pending_edus = self.pending_edus_by_dest.pop(destination, []) pending_presence = self.pending_presence_by_dest.pop( destination, {}) pending_failures = self.pending_failures_by_dest.pop( destination, []) pending_edus.extend( self.pending_edus_keyed_by_dest.pop(destination, {}).values()) limiter = yield get_retry_limiter( destination, self.clock, self.store, ) device_message_edus, device_stream_id = ( yield self._get_new_device_messages(destination)) pending_edus.extend(device_message_edus) if pending_presence: pending_edus.append( Edu( origin=self.server_name, destination=destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self.clock.time_msec()) for presence in pending_presence.values() ] }, )) if pending_pdus: logger.debug( "TX [%s] len(pending_pdus_by_dest[dest]) = %d", destination, len(pending_pdus)) if not pending_pdus and not pending_edus and not pending_failures: logger.debug("TX [%s] Nothing to send", destination) self.last_device_stream_id_by_dest[destination] = ( device_stream_id) return success = yield self._send_new_transaction( destination, pending_pdus, pending_edus, pending_failures, device_stream_id, should_delete_from_device_stream=bool(device_message_edus), limiter=limiter, ) if not success: break except NotRetryingDestination: logger.info( "TX [%s] not ready for retry yet - " "dropping transaction for now", destination, ) finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None)
def get_server_verify_key(self, server_name, key_ids): """Finds a verification key for the server with one of the key ids. Args: server_name (str): The name of the server to fetch a key for. keys_ids (list of str): The key_ids to check for. """ # Check the datastore to see if we have one cached. cached = yield self.store.get_server_verify_keys(server_name, key_ids) if cached: defer.returnValue(cached[0]) return # Try to fetch the key from the remote server. limiter = yield get_retry_limiter( server_name, self.clock, self.store, ) with limiter: (response, tls_certificate) = yield fetch_server_key( server_name, self.hs.tls_context_factory ) # Check the response. x509_certificate_bytes = crypto.dump_certificate( crypto.FILETYPE_ASN1, tls_certificate ) if ("signatures" not in response or server_name not in response["signatures"]): raise ValueError("Key response not signed by remote server") if "tls_certificate" not in response: raise ValueError("Key response missing TLS certificate") tls_certificate_b64 = response["tls_certificate"] if encode_base64(x509_certificate_bytes) != tls_certificate_b64: raise ValueError("TLS certificate doesn't match") verify_keys = {} for key_id, key_base64 in response["verify_keys"].items(): if is_signing_algorithm_supported(key_id): key_bytes = decode_base64(key_base64) verify_key = decode_verify_key_bytes(key_id, key_bytes) verify_keys[key_id] = verify_key for key_id in response["signatures"][server_name]: if key_id not in response["verify_keys"]: raise ValueError( "Key response must include verification keys for all" " signatures" ) if key_id in verify_keys: verify_signed_json( response, server_name, verify_keys[key_id] ) # Cache the result in the datastore. time_now_ms = self.clock.time_msec() yield self.store.store_server_certificate( server_name, server_name, time_now_ms, tls_certificate, ) for key_id, key in verify_keys.items(): yield self.store.store_server_verify_key( server_name, server_name, time_now_ms, key ) for key_id in key_ids: if key_id in verify_keys: defer.returnValue(verify_keys[key_id]) return raise ValueError("No verification key found for given key ids")
def get_server_verify_key_v2_indirect(self, server_name, key_ids, perspective_name, perspective_keys): limiter = yield get_retry_limiter(perspective_name, self.clock, self.store) with limiter: # TODO(mark): Set the minimum_valid_until_ts to that needed by # the events being validated or the current time if validating # an incoming request. responses = yield self.client.post_json( destination=perspective_name, path=b"/_matrix/key/v2/query", data={ u"server_keys": { server_name: { key_id: { u"minimum_valid_until_ts": 0 } for key_id in key_ids } } }, ) keys = {} for response in responses: if (u"signatures" not in response or perspective_name not in response[u"signatures"]): raise ValueError( "Key response not signed by perspective server" " %r" % (perspective_name, )) verified = False for key_id in response[u"signatures"][perspective_name]: if key_id in perspective_keys: verify_signed_json(response, perspective_name, perspective_keys[key_id]) verified = True if not verified: logging.info( "Response from perspective server %r not signed with a" " known key, signed with: %r, known keys: %r", perspective_name, list(response[u"signatures"][perspective_name]), list(perspective_keys)) raise ValueError( "Response not signed with a known key for perspective" " server %r" % (perspective_name, )) response_keys = yield self.process_v2_response( server_name, perspective_name, response) keys.update(response_keys) yield self.store_keys( server_name=server_name, from_server=perspective_name, verify_keys=keys, ) defer.returnValue(keys)
def _transaction_transmission_loop(self): pending_pdus = [] try: self.transmission_loop_running = True # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. yield get_retry_limiter(self._destination, self._clock, self._store) pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( # We have to keep 2 free slots for presence and rr_edus yield self._get_new_device_messages(MAX_EDUS_PER_TRANSACTION - 2) ) # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that # the following code (from popping the queues up to the point # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the # transmission_loop_running flag. pending_pdus = self._pending_pdus # We can only include at most 50 PDUs per transactions pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:] pending_edus = [] # We can only include at most 100 EDUs per transactions # rr_edus and pending_presence take at most one slot each pending_edus.extend(self._get_rr_edus(force_flush=False)) pending_presence = self._pending_presence self._pending_presence = {} if pending_presence: pending_edus.append( Edu( origin=self._server_name, destination=self._destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self._clock.time_msec() ) for presence in pending_presence.values() ] }, ) ) pending_edus.extend(device_message_edus) pending_edus.extend( self._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus)) ) while ( len(pending_edus) < MAX_EDUS_PER_TRANSACTION and self._pending_edus_keyed ): _, val = self._pending_edus_keyed.popitem() pending_edus.append(val) if pending_pdus: logger.debug( "TX [%s] len(pending_pdus_by_dest[dest]) = %d", self._destination, len(pending_pdus), ) if not pending_pdus and not pending_edus: logger.debug("TX [%s] Nothing to send", self._destination) self._last_device_stream_id = device_stream_id return # if we've decided to send a transaction anyway, and we have room, we # may as well send any pending RRs if len(pending_edus) < MAX_EDUS_PER_TRANSACTION: pending_edus.extend(self._get_rr_edus(force_flush=True)) # END CRITICAL SECTION success = yield self._transaction_manager.send_new_transaction( self._destination, pending_pdus, pending_edus ) if success: sent_transactions_counter.inc() sent_edus_counter.inc(len(pending_edus)) for edu in pending_edus: sent_edus_by_type.labels(edu.edu_type).inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: yield self._store.delete_device_msgs_for_remote( self._destination, device_stream_id ) logger.info( "Marking as sent %r %r", self._destination, dev_list_id ) yield self._store.mark_as_sent_devices_by_remote( self._destination, dev_list_id ) self._last_device_stream_id = device_stream_id self._last_device_list_stream_id = dev_list_id else: break except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", self._destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0 ), ) except FederationDeniedError as e: logger.info(e) except HttpResponseException as e: logger.warning( "TX [%s] Received %d response to transaction: %s", self._destination, e.code, e, ) except RequestSendFailed as e: logger.warning( "TX [%s] Failed to send transaction: %s", self._destination, e ) for p, _ in pending_pdus: logger.info( "Failed to send event %s to %s", p.event_id, self._destination ) except Exception: logger.exception("TX [%s] Failed to send transaction", self._destination) for p, _ in pending_pdus: logger.info( "Failed to send event %s to %s", p.event_id, self._destination ) finally: # We want to be *very* sure we clear this after we stop processing self.transmission_loop_running = False
def get_server_verify_key_v2_indirect(self, server_name, key_ids, perspective_name, perspective_keys): limiter = yield get_retry_limiter( perspective_name, self.clock, self.store ) with limiter: # TODO(mark): Set the minimum_valid_until_ts to that needed by # the events being validated or the current time if validating # an incoming request. query_response = yield self.client.post_json( destination=perspective_name, path=b"/_matrix/key/v2/query", data={ u"server_keys": { server_name: { key_id: { u"minimum_valid_until_ts": 0 } for key_id in key_ids } } }, ) keys = {} responses = query_response["server_keys"] for response in responses: if (u"signatures" not in response or perspective_name not in response[u"signatures"]): raise ValueError( "Key response not signed by perspective server" " %r" % (perspective_name,) ) verified = False for key_id in response[u"signatures"][perspective_name]: if key_id in perspective_keys: verify_signed_json( response, perspective_name, perspective_keys[key_id] ) verified = True if not verified: logging.info( "Response from perspective server %r not signed with a" " known key, signed with: %r, known keys: %r", perspective_name, list(response[u"signatures"][perspective_name]), list(perspective_keys) ) raise ValueError( "Response not signed with a known key for perspective" " server %r" % (perspective_name,) ) response_keys = yield self.process_v2_response( server_name, perspective_name, response ) keys.update(response_keys) yield self.store_keys( server_name=server_name, from_server=perspective_name, verify_keys=keys, ) defer.returnValue(keys)
def _attempt_new_transaction(self, destination): # list of (pending_pdu, deferred, order) if destination in self.pending_transactions: # XXX: pending_transactions can get stuck on by a never-ending # request at which point pending_pdus_by_dest just keeps growing. # we need application-layer timeouts of some flavour of these # requests logger.debug( "TX [%s] Transaction already in progress", destination ) return try: self.pending_transactions[destination] = 1 yield run_on_reactor() while True: pending_pdus = self.pending_pdus_by_dest.pop(destination, []) pending_edus = self.pending_edus_by_dest.pop(destination, []) pending_presence = self.pending_presence_by_dest.pop(destination, {}) pending_failures = self.pending_failures_by_dest.pop(destination, []) pending_edus.extend( self.pending_edus_keyed_by_dest.pop(destination, {}).values() ) limiter = yield get_retry_limiter( destination, self.clock, self.store, ) device_message_edus, device_stream_id = ( yield self._get_new_device_messages(destination) ) pending_edus.extend(device_message_edus) if pending_presence: pending_edus.append( Edu( origin=self.server_name, destination=destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self.clock.time_msec() ) for presence in pending_presence.values() ] }, ) ) if pending_pdus: logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", destination, len(pending_pdus)) if not pending_pdus and not pending_edus and not pending_failures: logger.debug("TX [%s] Nothing to send", destination) self.last_device_stream_id_by_dest[destination] = ( device_stream_id ) return success = yield self._send_new_transaction( destination, pending_pdus, pending_edus, pending_failures, device_stream_id, should_delete_from_device_stream=bool(device_message_edus), limiter=limiter, ) if not success: break except NotRetryingDestination: logger.info( "TX [%s] not ready for retry yet - " "dropping transaction for now", destination, ) finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None)
def get_pdu(self, destinations, event_id, outlier=False, timeout=None): """Requests the PDU with given origin and ID from the remote home servers. Will attempt to get the PDU from each destination in the list until one succeeds. This will persist the PDU locally upon receipt. Args: destinations (list): Which home servers to query pdu_origin (str): The home server that originally sent the pdu. event_id (str) outlier (bool): Indicates whether the PDU is an `outlier`, i.e. if it's from an arbitary point in the context as opposed to part of the current block of PDUs. Defaults to `False` timeout (int): How long to try (in ms) each destination for before moving to the next destination. None indicates no timeout. Returns: Deferred: Results in the requested PDU. """ # TODO: Rate limit the number of times we try and get the same event. if self._get_pdu_cache: e = self._get_pdu_cache.get(event_id) if e: defer.returnValue(e) pdu = None for destination in destinations: try: limiter = yield get_retry_limiter( destination, self._clock, self.store, ) with limiter: transaction_data = yield self.transport_layer.get_event( destination, event_id, timeout=timeout, ) logger.debug("transaction_data %r", transaction_data) pdu_list = [ self.event_from_pdu_json(p, outlier=outlier) for p in transaction_data["pdus"] ] if pdu_list and pdu_list[0]: pdu = pdu_list[0] # Check signatures are correct. pdu = yield self._check_sigs_and_hashes([pdu])[0] break except SynapseError: logger.info( "Failed to get PDU %s from %s because %s", event_id, destination, e, ) continue except CodeMessageException as e: if 400 <= e.code < 500: raise logger.info( "Failed to get PDU %s from %s because %s", event_id, destination, e, ) continue except NotRetryingDestination as e: logger.info(e.message) continue except Exception as e: logger.info( "Failed to get PDU %s from %s because %s", event_id, destination, e, ) continue if self._get_pdu_cache is not None and pdu: self._get_pdu_cache[event_id] = pdu defer.returnValue(pdu)
def _attempt_new_transaction(self, destination): if destination in self.pending_transactions: # XXX: pending_transactions can get stuck on by a never-ending # request at which point pending_pdus_by_dest just keeps growing. # we need application-layer timeouts of some flavour of these # requests logger.info("TX [%s] Transaction already in progress", destination) return logger.info("TX [%s] _attempt_new_transaction", destination) # list of (pending_pdu, deferred, order) pending_pdus = self.pending_pdus_by_dest.pop(destination, []) pending_edus = self.pending_edus_by_dest.pop(destination, []) pending_failures = self.pending_failures_by_dest.pop(destination, []) if pending_pdus: logger.info("TX [%s] len(pending_pdus_by_dest[dest]) = %d", destination, len(pending_pdus)) if not pending_pdus and not pending_edus and not pending_failures: logger.info("TX [%s] Nothing to send", destination) return # Sort based on the order field pending_pdus.sort(key=lambda t: t[2]) pdus = [x[0] for x in pending_pdus] edus = [x[0] for x in pending_edus] failures = [x[0].get_dict() for x in pending_failures] deferreds = [ x[1] for x in pending_pdus + pending_edus + pending_failures ] try: self.pending_transactions[destination] = 1 limiter = yield get_retry_limiter( destination, self._clock, self.store, ) logger.debug( "TX [%s] Attempting new transaction" " (pdus: %d, edus: %d, failures: %d)", destination, len(pending_pdus), len(pending_edus), len(pending_failures)) logger.debug("TX [%s] Persisting transaction...", destination) transaction = Transaction.create_new( origin_server_ts=int(self._clock.time_msec()), transaction_id=str(self._next_txn_id), origin=self.server_name, destination=destination, pdus=pdus, edus=edus, pdu_failures=failures, ) self._next_txn_id += 1 yield self.transaction_actions.prepare_to_send(transaction) logger.debug("TX [%s] Persisted transaction", destination) logger.info( "TX [%s] Sending transaction [%s]", destination, transaction.transaction_id, ) with limiter: # Actually send the transaction # FIXME (erikj): This is a bit of a hack to make the Pdu age # keys work def json_data_cb(): data = transaction.get_dict() now = int(self._clock.time_msec()) if "pdus" in data: for p in data["pdus"]: if "age_ts" in p: unsigned = p.setdefault("unsigned", {}) unsigned["age"] = now - int(p["age_ts"]) del p["age_ts"] return data try: response = yield self.transport_layer.send_transaction( transaction, json_data_cb) code = 200 if response: for e_id, r in response.get("pdus", {}).items(): if "error" in r: logger.warn( "Transaction returned error for %s: %s", e_id, r, ) except HttpResponseException as e: code = e.code response = e.response logger.info("TX [%s] got %d response", destination, code) logger.debug("TX [%s] Sent transaction", destination) logger.debug("TX [%s] Marking as delivered...", destination) yield self.transaction_actions.delivered(transaction, code, response) logger.debug("TX [%s] Marked as delivered", destination) logger.debug("TX [%s] Yielding to callbacks...", destination) for deferred in deferreds: if code == 200: deferred.callback(None) else: deferred.errback(RuntimeError("Got status %d" % code)) # Ensures we don't continue until all callbacks on that # deferred have fired try: yield deferred except: pass logger.debug("TX [%s] Yielded to callbacks", destination) except NotRetryingDestination: logger.info( "TX [%s] not ready for retry yet - " "dropping transaction for now", destination, ) except RuntimeError as e: # We capture this here as there as nothing actually listens # for this finishing functions deferred. logger.warn( "TX [%s] Problem in _attempt_transaction: %s", destination, e, ) except Exception as e: # We capture this here as there as nothing actually listens # for this finishing functions deferred. logger.warn( "TX [%s] Problem in _attempt_transaction: %s", destination, e, ) for deferred in deferreds: if not deferred.called: deferred.errback(e) finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None) # Check to see if there is anything else to send. self._attempt_new_transaction(destination)
def _transaction_transmission_loop(self, destination): pending_pdus = [] try: self.pending_transactions[destination] = 1 # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. yield get_retry_limiter(destination, self.clock, self.store) pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( yield self._get_new_device_messages(destination) ) # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that # the following code (from popping the queues up to the point # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the # pending_transactions flag. pending_pdus = self.pending_pdus_by_dest.pop(destination, []) # We can only include at most 50 PDUs per transactions pending_pdus, leftover_pdus = pending_pdus[:50], pending_pdus[50:] if leftover_pdus: self.pending_pdus_by_dest[destination] = leftover_pdus pending_edus = self.pending_edus_by_dest.pop(destination, []) # We can only include at most 100 EDUs per transactions pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:] if leftover_edus: self.pending_edus_by_dest[destination] = leftover_edus pending_presence = self.pending_presence_by_dest.pop(destination, {}) pending_edus.extend( self.pending_edus_keyed_by_dest.pop(destination, {}).values() ) pending_edus.extend(device_message_edus) if pending_presence: pending_edus.append( Edu( origin=self.server_name, destination=destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self.clock.time_msec() ) for presence in pending_presence.values() ] }, ) ) if pending_pdus: logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", destination, len(pending_pdus)) if not pending_pdus and not pending_edus: logger.debug("TX [%s] Nothing to send", destination) self.last_device_stream_id_by_dest[destination] = ( device_stream_id ) return # END CRITICAL SECTION success = yield self._send_new_transaction( destination, pending_pdus, pending_edus, ) if success: sent_transactions_counter.inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: yield self.store.delete_device_msgs_for_remote( destination, device_stream_id ) logger.info("Marking as sent %r %r", destination, dev_list_id) yield self.store.mark_as_sent_devices_by_remote( destination, dev_list_id ) self.last_device_stream_id_by_dest[destination] = device_stream_id self.last_device_list_stream_id_by_dest[destination] = dev_list_id else: break except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0 ), ) except FederationDeniedError as e: logger.info(e) except Exception as e: logger.warn( "TX [%s] Failed to send transaction: %s", destination, e, ) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, destination) finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None)
def _attempt_new_transaction(self, destination): # list of (pending_pdu, deferred, order) if destination in self.pending_transactions: # XXX: pending_transactions can get stuck on by a never-ending # request at which point pending_pdus_by_dest just keeps growing. # we need application-layer timeouts of some flavour of these # requests logger.debug( "TX [%s] Transaction already in progress", destination ) return try: self.pending_transactions[destination] = 1 # XXX: what's this for? yield run_on_reactor() while True: limiter = yield get_retry_limiter( destination, self.clock, self.store, backoff_on_404=True, # If we get a 404 the other side has gone ) device_message_edus, device_stream_id, dev_list_id = ( yield self._get_new_device_messages(destination) ) # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that # the following code (from popping the queues up to the point # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the # pending_transactions flag. pending_pdus = self.pending_pdus_by_dest.pop(destination, []) pending_edus = self.pending_edus_by_dest.pop(destination, []) pending_presence = self.pending_presence_by_dest.pop(destination, {}) pending_failures = self.pending_failures_by_dest.pop(destination, []) pending_edus.extend( self.pending_edus_keyed_by_dest.pop(destination, {}).values() ) pending_edus.extend(device_message_edus) if pending_presence: pending_edus.append( Edu( origin=self.server_name, destination=destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self.clock.time_msec() ) for presence in pending_presence.values() ] }, ) ) if pending_pdus: logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", destination, len(pending_pdus)) if not pending_pdus and not pending_edus and not pending_failures: logger.debug("TX [%s] Nothing to send", destination) self.last_device_stream_id_by_dest[destination] = ( device_stream_id ) return # END CRITICAL SECTION success = yield self._send_new_transaction( destination, pending_pdus, pending_edus, pending_failures, limiter=limiter, ) if success: # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: yield self.store.delete_device_msgs_for_remote( destination, device_stream_id ) logger.info("Marking as sent %r %r", destination, dev_list_id) yield self.store.mark_as_sent_devices_by_remote( destination, dev_list_id ) self.last_device_stream_id_by_dest[destination] = device_stream_id self.last_device_list_stream_id_by_dest[destination] = dev_list_id else: break except NotRetryingDestination: logger.debug( "TX [%s] not ready for retry yet - " "dropping transaction for now", destination, ) finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None)
def _attempt_new_transaction(self, destination): yield run_on_reactor() # list of (pending_pdu, deferred, order) if destination in self.pending_transactions: # XXX: pending_transactions can get stuck on by a never-ending # request at which point pending_pdus_by_dest just keeps growing. # we need application-layer timeouts of some flavour of these # requests logger.debug( "TX [%s] Transaction already in progress", destination ) return pending_pdus = self.pending_pdus_by_dest.pop(destination, []) pending_edus = self.pending_edus_by_dest.pop(destination, []) pending_failures = self.pending_failures_by_dest.pop(destination, []) if pending_pdus: logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", destination, len(pending_pdus)) if not pending_pdus and not pending_edus and not pending_failures: logger.debug("TX [%s] Nothing to send", destination) return try: self.pending_transactions[destination] = 1 logger.debug("TX [%s] _attempt_new_transaction", destination) # Sort based on the order field pending_pdus.sort(key=lambda t: t[2]) pdus = [x[0] for x in pending_pdus] edus = [x[0] for x in pending_edus] failures = [x[0].get_dict() for x in pending_failures] deferreds = [ x[1] for x in pending_pdus + pending_edus + pending_failures ] txn_id = str(self._next_txn_id) limiter = yield get_retry_limiter( destination, self._clock, self.store, ) logger.debug( "TX [%s] {%s} Attempting new transaction" " (pdus: %d, edus: %d, failures: %d)", destination, txn_id, len(pending_pdus), len(pending_edus), len(pending_failures) ) logger.debug("TX [%s] Persisting transaction...", destination) transaction = Transaction.create_new( origin_server_ts=int(self._clock.time_msec()), transaction_id=txn_id, origin=self.server_name, destination=destination, pdus=pdus, edus=edus, pdu_failures=failures, ) self._next_txn_id += 1 yield self.transaction_actions.prepare_to_send(transaction) logger.debug("TX [%s] Persisted transaction", destination) logger.info( "TX [%s] {%s} Sending transaction [%s]," " (PDUs: %d, EDUs: %d, failures: %d)", destination, txn_id, transaction.transaction_id, len(pending_pdus), len(pending_edus), len(pending_failures), ) with limiter: # Actually send the transaction # FIXME (erikj): This is a bit of a hack to make the Pdu age # keys work def json_data_cb(): data = transaction.get_dict() now = int(self._clock.time_msec()) if "pdus" in data: for p in data["pdus"]: if "age_ts" in p: unsigned = p.setdefault("unsigned", {}) unsigned["age"] = now - int(p["age_ts"]) del p["age_ts"] return data try: response = yield self.transport_layer.send_transaction( transaction, json_data_cb ) code = 200 if response: for e_id, r in response.get("pdus", {}).items(): if "error" in r: logger.warn( "Transaction returned error for %s: %s", e_id, r, ) except HttpResponseException as e: code = e.code response = e.response logger.info( "TX [%s] {%s} got %d response", destination, txn_id, code ) logger.debug("TX [%s] Sent transaction", destination) logger.debug("TX [%s] Marking as delivered...", destination) yield self.transaction_actions.delivered( transaction, code, response ) logger.debug("TX [%s] Marked as delivered", destination) logger.debug("TX [%s] Yielding to callbacks...", destination) for deferred in deferreds: if code == 200: deferred.callback(None) else: deferred.errback(RuntimeError("Got status %d" % code)) # Ensures we don't continue until all callbacks on that # deferred have fired try: yield deferred except: pass logger.debug("TX [%s] Yielded to callbacks", destination) except NotRetryingDestination: logger.info( "TX [%s] not ready for retry yet - " "dropping transaction for now", destination, ) except RuntimeError as e: # We capture this here as there as nothing actually listens # for this finishing functions deferred. logger.warn( "TX [%s] Problem in _attempt_transaction: %s", destination, e, ) except Exception as e: # We capture this here as there as nothing actually listens # for this finishing functions deferred. logger.warn( "TX [%s] Problem in _attempt_transaction: %s", destination, e, ) for deferred in deferreds: if not deferred.called: deferred.errback(e) finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None) # Check to see if there is anything else to send. self._attempt_new_transaction(destination)
def get_pdu(self, destinations, event_id, outlier=False, timeout=None): """Requests the PDU with given origin and ID from the remote home servers. Will attempt to get the PDU from each destination in the list until one succeeds. This will persist the PDU locally upon receipt. Args: destinations (list): Which home servers to query pdu_origin (str): The home server that originally sent the pdu. event_id (str) outlier (bool): Indicates whether the PDU is an `outlier`, i.e. if it's from an arbitary point in the context as opposed to part of the current block of PDUs. Defaults to `False` timeout (int): How long to try (in ms) each destination for before moving to the next destination. None indicates no timeout. Returns: Deferred: Results in the requested PDU. """ # TODO: Rate limit the number of times we try and get the same event. if self._get_pdu_cache: e = self._get_pdu_cache.get(event_id) if e: defer.returnValue(e) pdu = None for destination in destinations: try: limiter = yield get_retry_limiter( destination, self._clock, self.store, ) with limiter: transaction_data = yield self.transport_layer.get_event( destination, event_id, timeout=timeout, ) logger.debug("transaction_data %r", transaction_data) pdu_list = [ self.event_from_pdu_json(p, outlier=outlier) for p in transaction_data["pdus"] ] if pdu_list and pdu_list[0]: pdu = pdu_list[0] # Check signatures are correct. pdu = yield self._check_sigs_and_hash(pdu) break except SynapseError: logger.info( "Failed to get PDU %s from %s because %s", event_id, destination, e, ) continue except CodeMessageException as e: if 400 <= e.code < 500: raise logger.info( "Failed to get PDU %s from %s because %s", event_id, destination, e, ) continue except NotRetryingDestination as e: logger.info(e.message) continue except Exception as e: logger.info( "Failed to get PDU %s from %s because %s", event_id, destination, e, ) continue if self._get_pdu_cache is not None and pdu: self._get_pdu_cache[event_id] = pdu defer.returnValue(pdu)
def _transaction_transmission_loop(self, destination): pending_pdus = [] try: self.pending_transactions[destination] = 1 # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. yield get_retry_limiter(destination, self.clock, self.store) pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( yield self._get_new_device_messages(destination)) # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that # the following code (from popping the queues up to the point # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the # pending_transactions flag. pending_pdus = self.pending_pdus_by_dest.pop(destination, []) pending_edus = self.pending_edus_by_dest.pop(destination, []) pending_presence = self.pending_presence_by_dest.pop( destination, {}) pending_failures = self.pending_failures_by_dest.pop( destination, []) pending_edus.extend( self.pending_edus_keyed_by_dest.pop(destination, {}).values()) pending_edus.extend(device_message_edus) if pending_presence: pending_edus.append( Edu( origin=self.server_name, destination=destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self.clock.time_msec()) for presence in pending_presence.values() ] }, )) if pending_pdus: logger.debug( "TX [%s] len(pending_pdus_by_dest[dest]) = %d", destination, len(pending_pdus)) if not pending_pdus and not pending_edus and not pending_failures: logger.debug("TX [%s] Nothing to send", destination) self.last_device_stream_id_by_dest[destination] = ( device_stream_id) return # END CRITICAL SECTION success = yield self._send_new_transaction( destination, pending_pdus, pending_edus, pending_failures, ) if success: sent_transactions_counter.inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: yield self.store.delete_device_msgs_for_remote( destination, device_stream_id) logger.info("Marking as sent %r %r", destination, dev_list_id) yield self.store.mark_as_sent_devices_by_remote( destination, dev_list_id) self.last_device_stream_id_by_dest[ destination] = device_stream_id self.last_device_list_stream_id_by_dest[ destination] = dev_list_id else: break except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0), ) except FederationDeniedError as e: logger.info(e) except Exception as e: logger.warn( "TX [%s] Failed to send transaction: %s", destination, e, ) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, destination) finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None)
def _send_new_transaction(self, destination, pending_pdus, pending_edus, pending_failures): # Sort based on the order field pending_pdus.sort(key=lambda t: t[1]) pdus = [x[0] for x in pending_pdus] edus = pending_edus failures = [x.get_dict() for x in pending_failures] try: self.pending_transactions[destination] = 1 logger.debug("TX [%s] _attempt_new_transaction", destination) txn_id = str(self._next_txn_id) limiter = yield get_retry_limiter( destination, self.clock, self.store, ) logger.debug( "TX [%s] {%s} Attempting new transaction" " (pdus: %d, edus: %d, failures: %d)", destination, txn_id, len(pending_pdus), len(pending_edus), len(pending_failures)) logger.debug("TX [%s] Persisting transaction...", destination) transaction = Transaction.create_new( origin_server_ts=int(self.clock.time_msec()), transaction_id=txn_id, origin=self.server_name, destination=destination, pdus=pdus, edus=edus, pdu_failures=failures, ) self._next_txn_id += 1 yield self.transaction_actions.prepare_to_send(transaction) logger.debug("TX [%s] Persisted transaction", destination) logger.info( "TX [%s] {%s} Sending transaction [%s]," " (PDUs: %d, EDUs: %d, failures: %d)", destination, txn_id, transaction.transaction_id, len(pending_pdus), len(pending_edus), len(pending_failures), ) with limiter: # Actually send the transaction # FIXME (erikj): This is a bit of a hack to make the Pdu age # keys work def json_data_cb(): data = transaction.get_dict() now = int(self.clock.time_msec()) if "pdus" in data: for p in data["pdus"]: if "age_ts" in p: unsigned = p.setdefault("unsigned", {}) unsigned["age"] = now - int(p["age_ts"]) del p["age_ts"] return data try: response = yield self.transport_layer.send_transaction( transaction, json_data_cb) code = 200 if response: for e_id, r in response.get("pdus", {}).items(): if "error" in r: logger.warn( "Transaction returned error for %s: %s", e_id, r, ) except HttpResponseException as e: code = e.code response = e.response logger.info("TX [%s] {%s} got %d response", destination, txn_id, code) logger.debug("TX [%s] Sent transaction", destination) logger.debug("TX [%s] Marking as delivered...", destination) yield self.transaction_actions.delivered(transaction, code, response) logger.debug("TX [%s] Marked as delivered", destination) if code != 200: for p in pdus: logger.info("Failed to send event %s to %s", p.event_id, destination) except NotRetryingDestination: logger.info( "TX [%s] not ready for retry yet - " "dropping transaction for now", destination, ) except RuntimeError as e: # We capture this here as there as nothing actually listens # for this finishing functions deferred. logger.warn( "TX [%s] Problem in _attempt_transaction: %s", destination, e, ) for p in pdus: logger.info("Failed to send event %s to %s", p.event_id, destination) except Exception as e: # We capture this here as there as nothing actually listens # for this finishing functions deferred. logger.warn( "TX [%s] Problem in _attempt_transaction: %s", destination, e, ) for p in pdus: logger.info("Failed to send event %s to %s", p.event_id, destination) finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None)