def __init__(self, hs, media_repo, media_storage): super().__init__() self.auth = hs.get_auth() self.clock = hs.get_clock() self.filepaths = media_repo.filepaths self.max_spider_size = hs.config.max_spider_size self.server_name = hs.hostname self.store = hs.get_datastore() self.client = SimpleHttpClient( hs, treq_args={"browser_like_redirects": True}, ip_whitelist=hs.config.url_preview_ip_range_whitelist, ip_blacklist=hs.config.url_preview_ip_range_blacklist, ) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata self._cache = ExpiringCache( cache_name="url_previews", clock=self.clock, # don't spider URLs more often than once an hour expiry_ms=60 * 60 * 1000, ) self._cleaner_loop = self.clock.looping_call( self._start_expire_url_cache_data, 10 * 1000)
def bind_threepid(self, creds, mxid): yield run_on_reactor() logger.debug("binding threepid %r to %s", creds, mxid) http_client = SimpleHttpClient(self.hs) data = None if 'id_server' in creds: id_server = creds['id_server'] elif 'idServer' in creds: id_server = creds['idServer'] else: raise SynapseError(400, "No id_server in creds") if 'client_secret' in creds: client_secret = creds['client_secret'] elif 'clientSecret' in creds: client_secret = creds['clientSecret'] else: raise SynapseError(400, "No client_secret in creds") try: data = yield http_client.post_urlencoded_get_json( "https://%s%s" % (id_server, "/_matrix/identity/api/v1/3pid/bind"), { 'sid': creds['sid'], 'client_secret': client_secret, 'mxid': mxid, }) logger.debug("bound threepid %r to %s", creds, mxid) except CodeMessageException as e: data = json.loads(e.msg) defer.returnValue(data)
def _threepid_from_creds(self, creds): # TODO: get this from the homeserver rather than creating a new one for # each request http_client = SimpleHttpClient(self.hs) # XXX: make this configurable! trustedIdServers = ['matrix.org:8090', 'matrix.org'] if not creds['idServer'] in trustedIdServers: logger.warn('%s is not a trusted ID server: rejecting 3pid ' + 'credentials', creds['idServer']) defer.returnValue(None) data = {} try: data = yield http_client.get_json( # XXX: This should be HTTPS "http://%s%s" % ( creds['idServer'], "/_matrix/identity/api/v1/3pid/getValidated3pid" ), {'sid': creds['sid'], 'clientSecret': creds['clientSecret']} ) except CodeMessageException as e: data = json.loads(e.msg) if 'medium' in data: defer.returnValue(data) defer.returnValue(None)
def bind_threepid(self, creds, mxid): yield run_on_reactor() logger.debug("binding threepid %r to %s", creds, mxid) http_client = SimpleHttpClient(self.hs) data = None if 'id_server' in creds: id_server = creds['id_server'] elif 'idServer' in creds: id_server = creds['idServer'] else: raise SynapseError(400, "No id_server in creds") if 'client_secret' in creds: client_secret = creds['client_secret'] elif 'clientSecret' in creds: client_secret = creds['clientSecret'] else: raise SynapseError(400, "No client_secret in creds") try: data = yield http_client.post_urlencoded_get_json( "https://%s%s" % ( id_server, "/_matrix/identity/api/v1/3pid/bind" ), { 'sid': creds['sid'], 'client_secret': client_secret, 'mxid': mxid, } ) logger.debug("bound threepid %r to %s", creds, mxid) except CodeMessageException as e: data = json.loads(e.msg) defer.returnValue(data)
def __init__(self, hs): super().__init__(hs) # An HTTP client for contacting trusted URLs. self.http_client = SimpleHttpClient(hs) # An HTTP client for contacting identity servers specified by clients. self.blacklisting_http_client = SimpleHttpClient( hs, ip_blacklist=hs.config.federation_ip_range_blacklist) self.federation_http_client = hs.get_federation_http_client() self.hs = hs self._web_client_location = hs.config.invite_client_location # Ratelimiters for `/requestToken` endpoints. self._3pid_validation_ratelimiter_ip = Ratelimiter( store=self.store, clock=hs.get_clock(), rate_hz=hs.config.ratelimiting.rc_3pid_validation.per_second, burst_count=hs.config.ratelimiting.rc_3pid_validation.burst_count, ) self._3pid_validation_ratelimiter_address = Ratelimiter( store=self.store, clock=hs.get_clock(), rate_hz=hs.config.ratelimiting.rc_3pid_validation.per_second, burst_count=hs.config.ratelimiting.rc_3pid_validation.burst_count, )
def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() # An HTTP client for contacting trusted URLs. self.http_client = SimpleHttpClient(hs) # An HTTP client for contacting identity servers specified by clients. self.blacklisting_http_client = SimpleHttpClient( hs, ip_blacklist=hs.config.server.federation_ip_range_blacklist, ip_whitelist=hs.config.server.federation_ip_range_whitelist, ) self.federation_http_client = hs.get_federation_http_client() self.hs = hs self.rewrite_identity_server_urls = ( hs.config.registration.rewrite_identity_server_urls ) self._enable_lookup = hs.config.registration.enable_3pid_lookup self._web_client_location = hs.config.email.invite_client_location # Ratelimiters for `/requestToken` endpoints. self._3pid_validation_ratelimiter_ip = Ratelimiter( store=self.store, clock=hs.get_clock(), rate_hz=hs.config.ratelimiting.rc_3pid_validation.per_second, burst_count=hs.config.ratelimiting.rc_3pid_validation.burst_count, ) self._3pid_validation_ratelimiter_address = Ratelimiter( store=self.store, clock=hs.get_clock(), rate_hz=hs.config.ratelimiting.rc_3pid_validation.per_second, burst_count=hs.config.ratelimiting.rc_3pid_validation.burst_count, )
def _check_recaptcha(self, authdict, clientip): try: user_response = authdict["response"] except KeyError: # Client tried to provide captcha but didn't give the parameter: # bad request. raise LoginError( 400, "Captcha response is required", errcode=Codes.CAPTCHA_NEEDED ) logger.info( "Submitting recaptcha response %s with remoteip %s", user_response, clientip ) # TODO: get this from the homeserver rather than creating a new one for # each request try: client = SimpleHttpClient(self.hs) data = yield client.post_urlencoded_get_json( "https://www.google.com/recaptcha/api/siteverify", args={ 'secret': self.hs.config.recaptcha_private_key, 'response': user_response, 'remoteip': clientip, } ) except PartialDownloadError as pde: # Twisted is silly data = pde.response resp_body = simplejson.loads(data) if 'success' in resp_body and resp_body['success']: defer.returnValue(True) raise LoginError(401, "", errcode=Codes.UNAUTHORIZED)
def __init__(self, hs): super(IdentityHandler, self).__init__(hs) self.http_client = SimpleHttpClient(hs) # We create a blacklisting instance of SimpleHttpClient for contacting identity # servers specified by clients self.blacklisting_http_client = SimpleHttpClient( hs, ip_blacklist=hs.config.federation_ip_range_blacklist) self.federation_http_client = hs.get_http_client() self.hs = hs
def test_client_ip_range_blacklist(self): """Ensure that Synapse does not try to connect to blacklisted IPs""" # Add some DNS entries we'll blacklist self.reactor.lookups["internal"] = "127.0.0.1" self.reactor.lookups["internalv6"] = "fe80:0:0:0:0:8a2e:370:7337" ip_blacklist = IPSet(["127.0.0.0/8", "fe80::/64"]) cl = SimpleHttpClient(self.hs, ip_blacklist=ip_blacklist) # Try making a GET request to a blacklisted IPv4 address # ------------------------------------------------------ # Make the request d = defer.ensureDeferred(cl.get_json("http://internal:8008/foo/bar")) self.pump(1) # Check that it was unable to resolve the address clients = self.reactor.tcpClients self.assertEqual(len(clients), 0) self.failureResultOf(d, DNSLookupError) # Try making a POST request to a blacklisted IPv6 address # ------------------------------------------------------- # Make the request d = defer.ensureDeferred( cl.post_json_get_json("http://internalv6:8008/foo/bar", {})) # Move the reactor forwards self.pump(1) # Check that it was unable to resolve the address clients = self.reactor.tcpClients self.assertEqual(len(clients), 0) # Check that it was due to a blacklisted DNS lookup self.failureResultOf(d, DNSLookupError) # Try making a GET request to a non-blacklisted IPv4 address # ---------------------------------------------------------- # Make the request d = defer.ensureDeferred(cl.get_json("http://testserv:8008/foo/bar")) # Nothing has happened yet self.assertNoResult(d) # Move the reactor forwards self.pump(1) # Check that it was able to resolve the address clients = self.reactor.tcpClients self.assertNotEqual(len(clients), 0) # Connection will still fail as this IP address does not resolve to anything self.failureResultOf(d, RequestTimedOutError)
def _query_email(self, email): httpCli = SimpleHttpClient(self.hs) data = yield httpCli.get_json( # TODO FIXME This should be configurable. # XXX: ID servers need to use HTTPS "http://%s%s" % ("matrix.org:8090", "/_matrix/identity/api/v1/lookup"), { 'medium': 'email', 'address': email }) defer.returnValue(data)
def __init__(self, hs): super().__init__(hs) # An HTTP client for contacting trusted URLs. self.http_client = SimpleHttpClient(hs) # An HTTP client for contacting identity servers specified by clients. self.blacklisting_http_client = SimpleHttpClient( hs, ip_blacklist=hs.config.federation_ip_range_blacklist) self.federation_http_client = hs.get_federation_http_client() self.hs = hs self._web_client_location = hs.config.invite_client_location
def _query_email(self, email): httpCli = SimpleHttpClient(self.hs) data = yield httpCli.get_json( # TODO FIXME This should be configurable. # XXX: ID servers need to use HTTPS "http://%s%s" % ( "matrix.org:8090", "/_matrix/identity/api/v1/lookup" ), { 'medium': 'email', 'address': email } ) defer.returnValue(data)
def _bind_threepid(self, creds, mxid): yield logger.debug("binding threepid") httpCli = SimpleHttpClient(self.hs) data = yield httpCli.post_urlencoded_get_json( # XXX: Change when ID servers are all HTTPS "http://%s%s" % (creds['idServer'], "/_matrix/identity/api/v1/3pid/bind"), { 'sid': creds['sid'], 'clientSecret': creds['clientSecret'], 'mxid': mxid, }) logger.debug("bound threepid") defer.returnValue(data)
def __init__(self, _hs, profile_tag, user_name, app_id, app_display_name, device_display_name, pushkey, pushkey_ts, data, last_token, last_success, failing_since): super(HttpPusher, self).__init__(_hs, profile_tag, user_name, app_id, app_display_name, device_display_name, pushkey, pushkey_ts, data, last_token, last_success, failing_since) if 'url' not in data: raise PusherConfigException( "'url' required in data for HTTP pusher") self.url = data['url'] self.httpCli = SimpleHttpClient(self.hs) self.data_minus_url = {} self.data_minus_url.update(self.data) del self.data_minus_url['url']
def __init__(self, hs, media_repo, media_storage): Resource.__init__(self) self.auth = hs.get_auth() self.clock = hs.get_clock() self.filepaths = media_repo.filepaths self.max_spider_size = hs.config.max_spider_size self.server_name = hs.hostname self.store = hs.get_datastore() self.client = SimpleHttpClient( hs, treq_args={"browser_like_redirects": True}, ip_whitelist=hs.config.url_preview_ip_range_whitelist, ip_blacklist=hs.config.url_preview_ip_range_blacklist, ) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata self._cache = ExpiringCache( cache_name="url_previews", clock=self.clock, # don't spider URLs more often than once an hour expiry_ms=60 * 60 * 1000, ) self._cleaner_loop = self.clock.looping_call( self._start_expire_url_cache_data, 10 * 1000, )
def _query_email(self, email): http_client = SimpleHttpClient(self.hs) try: data = yield http_client.get_json( # TODO FIXME This should be configurable. # XXX: ID servers need to use HTTPS "http://%s%s" % ("matrix.org:8090", "/_matrix/identity/api/v1/lookup"), { 'medium': 'email', 'address': email }) defer.returnValue(data) except CodeMessageException as e: data = json.loads(e.msg) defer.returnValue(data)
def __init__(self, hs): super().__init__() self.hs = hs # self.auth = hs.get_auth() self.store = hs.get_datastore() self.http_client = SimpleHttpClient(hs)
def __init__(self, _hs, profile_tag, user_name, app_id, app_display_name, device_display_name, pushkey, pushkey_ts, data, last_token, last_success, failing_since): super(HttpPusher, self).__init__( _hs, profile_tag, user_name, app_id, app_display_name, device_display_name, pushkey, pushkey_ts, data, last_token, last_success, failing_since ) if 'url' not in data: raise PusherConfigException( "'url' required in data for HTTP pusher" ) self.url = data['url'] self.httpCli = SimpleHttpClient(self.hs) self.data_minus_url = {} self.data_minus_url.update(self.data) del self.data_minus_url['url']
def _bind_threepid(self, creds, mxid): yield logger.debug("binding threepid") httpCli = SimpleHttpClient(self.hs) data = yield httpCli.post_urlencoded_get_json( # XXX: Change when ID servers are all HTTPS "http://%s%s" % ( creds['idServer'], "/_matrix/identity/api/v1/3pid/bind" ), { 'sid': creds['sid'], 'clientSecret': creds['clientSecret'], 'mxid': mxid, } ) logger.debug("bound threepid") defer.returnValue(data)
def threepid_from_creds(self, creds): yield run_on_reactor() # TODO: get this from the homeserver rather than creating a new one for # each request http_client = SimpleHttpClient(self.hs) # XXX: make this configurable! # trustedIdServers = ['matrix.org', 'localhost:8090'] trustedIdServers = ['matrix.org', 'vector.im'] if 'id_server' in creds: id_server = creds['id_server'] elif 'idServer' in creds: id_server = creds['idServer'] else: raise SynapseError(400, "No id_server in creds") if 'client_secret' in creds: client_secret = creds['client_secret'] elif 'clientSecret' in creds: client_secret = creds['clientSecret'] else: raise SynapseError(400, "No client_secret in creds") if id_server not in trustedIdServers: logger.warn( '%s is not a trusted ID server: rejecting 3pid ' + 'credentials', id_server) defer.returnValue(None) data = {} try: data = yield http_client.get_json( "https://%s%s" % (id_server, "/_matrix/identity/api/v1/3pid/getValidated3pid"), { 'sid': creds['sid'], 'client_secret': client_secret }) except CodeMessageException as e: data = json.loads(e.msg) if 'medium' in data: defer.returnValue(data) defer.returnValue(None)
def _query_email(self, email): http_client = SimpleHttpClient(self.hs) try: data = yield http_client.get_json( # TODO FIXME This should be configurable. # XXX: ID servers need to use HTTPS "http://%s%s" % ( "matrix.org:8090", "/_matrix/identity/api/v1/lookup" ), { 'medium': 'email', 'address': email } ) defer.returnValue(data) except CodeMessageException as e: data = json.loads(e.msg) defer.returnValue(data)
def get_proxied_http_client(self) -> SimpleHttpClient: """ An HTTP client that uses configured HTTP(S) proxies. """ return SimpleHttpClient( self, http_proxy=os.getenvb(b"http_proxy"), https_proxy=os.getenvb(b"HTTPS_PROXY"), )
def threepid_from_creds(self, creds): yield run_on_reactor() # TODO: get this from the homeserver rather than creating a new one for # each request http_client = SimpleHttpClient(self.hs) # XXX: make this configurable! # trustedIdServers = ['matrix.org', 'localhost:8090'] trustedIdServers = ['matrix.org'] if 'id_server' in creds: id_server = creds['id_server'] elif 'idServer' in creds: id_server = creds['idServer'] else: raise SynapseError(400, "No id_server in creds") if 'client_secret' in creds: client_secret = creds['client_secret'] elif 'clientSecret' in creds: client_secret = creds['clientSecret'] else: raise SynapseError(400, "No client_secret in creds") if id_server not in trustedIdServers: logger.warn('%s is not a trusted ID server: rejecting 3pid ' + 'credentials', id_server) defer.returnValue(None) data = {} try: data = yield http_client.get_json( "https://%s%s" % ( id_server, "/_matrix/identity/api/v1/3pid/getValidated3pid" ), {'sid': creds['sid'], 'client_secret': client_secret} ) except CodeMessageException as e: data = json.loads(e.msg) if 'medium' in data: defer.returnValue(data) defer.returnValue(None)
def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient: """ An HTTP client that uses configured HTTP(S) proxies and blacklists IPs based on the IP range blacklist/whitelist. """ return SimpleHttpClient( self, ip_whitelist=self.config.ip_range_whitelist, ip_blacklist=self.config.ip_range_blacklist, use_proxy=True, )
def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient: """ An HTTP client that uses configured HTTP(S) proxies and blacklists IPs based on the IP range blacklist/whitelist. """ return SimpleHttpClient( self, ip_whitelist=self.config.ip_range_whitelist, ip_blacklist=self.config.ip_range_blacklist, http_proxy=os.getenvb(b"http_proxy"), https_proxy=os.getenvb(b"HTTPS_PROXY"), )
def on_POST(self, request): login_submission = _parse_json(request) try: if login_submission["type"] == LoginRestServlet.PASS_TYPE: if not self.password_enabled: raise SynapseError(400, "Password login has been disabled.") result = yield self.do_password_login(login_submission) defer.returnValue(result) elif self.saml2_enabled and (login_submission["type"] == LoginRestServlet.SAML2_TYPE): relay_state = "" if "relay_state" in login_submission: relay_state = "&RelayState="+urllib.quote( login_submission["relay_state"]) result = { "uri": "%s%s" % (self.idp_redirect_url, relay_state) } defer.returnValue((200, result)) # TODO Delete this after all CAS clients switch to token login instead elif self.cas_enabled and (login_submission["type"] == LoginRestServlet.CAS_TYPE): # TODO: get this from the homeserver rather than creating a new one for # each request http_client = SimpleHttpClient(self.hs) uri = "%s/proxyValidate" % (self.cas_server_url,) args = { "ticket": login_submission["ticket"], "service": login_submission["service"] } body = yield http_client.get_raw(uri, args) result = yield self.do_cas_login(body) defer.returnValue(result) elif login_submission["type"] == LoginRestServlet.TOKEN_TYPE: result = yield self.do_token_login(login_submission) defer.returnValue(result) else: raise SynapseError(400, "Bad login type.") except KeyError: raise SynapseError(400, "Missing JSON keys.")
def __init__( self, hs: "HomeServer", media_repo: "MediaRepository", media_storage: MediaStorage, ): super().__init__() self.auth = hs.get_auth() self.clock = hs.get_clock() self.filepaths = media_repo.filepaths self.max_spider_size = hs.config.media.max_spider_size self.server_name = hs.hostname self.store = hs.get_datastore() self.client = SimpleHttpClient( hs, treq_args={"browser_like_redirects": True}, ip_whitelist=hs.config.media.url_preview_ip_range_whitelist, ip_blacklist=hs.config.media.url_preview_ip_range_blacklist, use_proxy=True, ) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage self._oembed = OEmbedProvider(hs) # We run the background jobs if we're the instance specified (or no # instance is specified, where we assume there is only one instance # serving media). instance_running_jobs = hs.config.media.media_instance_running_background_jobs self._worker_run_media_background_jobs = ( instance_running_jobs is None or instance_running_jobs == hs.get_instance_name() ) self.url_preview_url_blacklist = hs.config.media.url_preview_url_blacklist self.url_preview_accept_language = hs.config.media.url_preview_accept_language # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata self._cache: ExpiringCache[str, ObservableDeferred] = ExpiringCache( cache_name="url_previews", clock=self.clock, # don't spider URLs more often than once an hour expiry_ms=ONE_HOUR, ) if self._worker_run_media_background_jobs: self._cleaner_loop = self.clock.looping_call( self._start_expire_url_cache_data, 10 * 1000 )
def requestEmailToken(self, id_server, email, client_secret, send_attempt, **kwargs): yield run_on_reactor() http_client = SimpleHttpClient(self.hs) params = { 'email': email, 'client_secret': client_secret, 'send_attempt': send_attempt, } params.update(kwargs) try: data = yield http_client.post_urlencoded_get_json( "https://%s%s" % (id_server, "/_matrix/identity/api/v1/validate/email/requestToken"), params) defer.returnValue(data) except CodeMessageException as e: logger.info("Proxied requestToken failed: %r", e) raise e
def _threepid_from_creds(self, creds): # TODO: get this from the homeserver rather than creating a new one for # each request httpCli = SimpleHttpClient(self.hs) # XXX: make this configurable! trustedIdServers = ['matrix.org:8090'] if not creds['idServer'] in trustedIdServers: logger.warn( '%s is not a trusted ID server: rejecting 3pid ' + 'credentials', creds['idServer']) defer.returnValue(None) data = yield httpCli.get_json( # XXX: This should be HTTPS "http://%s%s" % (creds['idServer'], "/_matrix/identity/api/v1/3pid/getValidated3pid"), { 'sid': creds['sid'], 'clientSecret': creds['clientSecret'] }) if 'medium' in data: defer.returnValue(data) defer.returnValue(None)
def __init__(self, hs): super().__init__() self.hs = hs # self.get_ver_code_cache = ExpiringCache( # cache_name="get_ver_code_cache", # clock=self._clock, # max_len=1000, # expiry_ms=10 * 60 * 1000, # reset_expiry_on_get=False, # ) self._address_ratelimiter = Ratelimiter( clock=hs.get_clock(), rate_hz=self.hs.config.rc_login_address.per_second, burst_count=self.hs.config.rc_login_address.burst_count, ) self.http_client = SimpleHttpClient(hs)
def __init__(self, hs): super().__init__() self.hs = hs logger.info("------------init------") self.auth = hs.get_auth() self._auth_handler = hs.get_auth_handler() self._cache = hs.get_eachchat_cache_for_openid() # self.get_ver_code_cache = ExpiringCache( # cache_name="get_ver_code_cache", # clock=self._clock, # max_len=1000, # expiry_ms=10 * 60 * 1000, # reset_expiry_on_get=False, # ) self._address_ratelimiter = Ratelimiter( clock=hs.get_clock(), rate_hz=self.hs.config.rc_login_address.per_second, burst_count=self.hs.config.rc_login_address.burst_count, ) self.http_client = SimpleHttpClient(hs)
def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs # JWT configuration variables. self.jwt_enabled = hs.config.jwt_enabled self.jwt_secret = hs.config.jwt_secret self.jwt_algorithm = hs.config.jwt_algorithm self.jwt_issuer = hs.config.jwt_issuer self.jwt_audiences = hs.config.jwt_audiences # SSO configuration. self.saml2_enabled = hs.config.saml2_enabled self.cas_enabled = hs.config.cas_enabled self.oidc_enabled = hs.config.oidc_enabled self._msc2858_enabled = hs.config.experimental.msc2858_enabled self.auth = hs.get_auth() self.auth_handler = self.hs.get_auth_handler() self.registration_handler = hs.get_registration_handler() self._sso_handler = hs.get_sso_handler() self._well_known_builder = WellKnownBuilder(hs) self._address_ratelimiter = Ratelimiter( clock=hs.get_clock(), rate_hz=self.hs.config.rc_login_address.per_second, burst_count=self.hs.config.rc_login_address.burst_count, ) self._account_ratelimiter = Ratelimiter( clock=hs.get_clock(), rate_hz=self.hs.config.rc_login_account.per_second, burst_count=self.hs.config.rc_login_account.burst_count, ) self.http_client = SimpleHttpClient(hs) self._cache = hs.get_eachchat_cache_for_openid()
class PreviewUrlResource(Resource): isLeaf = True def __init__(self, hs, media_repo, media_storage): Resource.__init__(self) self.auth = hs.get_auth() self.clock = hs.get_clock() self.filepaths = media_repo.filepaths self.max_spider_size = hs.config.max_spider_size self.server_name = hs.hostname self.store = hs.get_datastore() self.client = SimpleHttpClient( hs, treq_args={"browser_like_redirects": True}, ip_whitelist=hs.config.url_preview_ip_range_whitelist, ip_blacklist=hs.config.url_preview_ip_range_blacklist, ) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata self._cache = ExpiringCache( cache_name="url_previews", clock=self.clock, # don't spider URLs more often than once an hour expiry_ms=60 * 60 * 1000, ) self._cleaner_loop = self.clock.looping_call( self._start_expire_url_cache_data, 10 * 1000, ) def render_OPTIONS(self, request): return respond_with_json(request, 200, {}, send_cors=True) def render_GET(self, request): self._async_render_GET(request) return NOT_DONE_YET @wrap_json_request_handler @defer.inlineCallbacks def _async_render_GET(self, request): # XXX: if get_user_by_req fails, what should we do in an async render? requester = yield self.auth.get_user_by_req(request) url = parse_string(request, "url") if b"ts" in request.args: ts = parse_integer(request, "ts") else: ts = self.clock.time_msec() # XXX: we could move this into _do_preview if we wanted. url_tuple = urlparse.urlsplit(url) for entry in self.url_preview_url_blacklist: match = True for attrib in entry: pattern = entry[attrib] value = getattr(url_tuple, attrib) logger.debug(( "Matching attrib '%s' with value '%s' against" " pattern '%s'" ) % (attrib, value, pattern)) if value is None: match = False continue if pattern.startswith('^'): if not re.match(pattern, getattr(url_tuple, attrib)): match = False continue else: if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern): match = False continue if match: logger.warn( "URL %s blocked by url_blacklist entry %s", url, entry ) raise SynapseError( 403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN ) # the in-memory cache: # * ensures that only one request is active at a time # * takes load off the DB for the thundering herds # * also caches any failures (unlike the DB) so we don't keep # requesting the same endpoint observable = self._cache.get(url) if not observable: download = run_in_background( self._do_preview, url, requester.user, ts, ) observable = ObservableDeferred( download, consumeErrors=True ) self._cache[url] = observable else: logger.info("Returning cached response") og = yield make_deferred_yieldable(observable.observe()) respond_with_json_bytes(request, 200, og, send_cors=True) @defer.inlineCallbacks def _do_preview(self, url, user, ts): """Check the db, and download the URL and build a preview Args: url (str): user (str): ts (int): Returns: Deferred[str]: json-encoded og data """ # check the URL cache in the DB (which will also provide us with # historical previews, if we have any) cache_result = yield self.store.get_url_cache(url, ts) if ( cache_result and cache_result["expires_ts"] > ts and cache_result["response_code"] / 100 == 2 ): # It may be stored as text in the database, not as bytes (such as # PostgreSQL). If so, encode it back before handing it on. og = cache_result["og"] if isinstance(og, six.text_type): og = og.encode('utf8') defer.returnValue(og) return media_info = yield self._download_url(url, user) logger.debug("got media_info of '%s'" % media_info) if _is_media(media_info['media_type']): file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( None, file_id, file_id, media_info["media_type"], url_cache=True, ) og = { "og:description": media_info['download_name'], "og:image": "mxc://%s/%s" % ( self.server_name, media_info['filesystem_id'] ), "og:image:type": media_info['media_type'], "matrix:image:size": media_info['media_length'], } if dims: og["og:image:width"] = dims['width'] og["og:image:height"] = dims['height'] else: logger.warn("Couldn't get dims for %s" % url) # define our OG response for this media elif _is_html(media_info['media_type']): # TODO: somehow stop a big HTML tree from exploding synapse's RAM with open(media_info['filename'], 'rb') as file: body = file.read() encoding = None # Let's try and figure out if it has an encoding set in a meta tag. # Limit it to the first 1kb, since it ought to be in the meta tags # at the top. match = _charset_match.search(body[:1000]) # If we find a match, it should take precedence over the # Content-Type header, so set it here. if match: encoding = match.group(1).decode('ascii') # If we don't find a match, we'll look at the HTTP Content-Type, and # if that doesn't exist, we'll fall back to UTF-8. if not encoding: match = _content_type_match.match( media_info['media_type'] ) encoding = match.group(1) if match else "utf-8" og = decode_and_calc_og(body, media_info['uri'], encoding) # pre-cache the image for posterity # FIXME: it might be cleaner to use the same flow as the main /preview_url # request itself and benefit from the same caching etc. But for now we # just rely on the caching on the master request to speed things up. if 'og:image' in og and og['og:image']: image_info = yield self._download_url( _rebase_url(og['og:image'], media_info['uri']), user ) if _is_media(image_info['media_type']): # TODO: make sure we don't choke on white-on-transparent images file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( None, file_id, file_id, image_info["media_type"], url_cache=True, ) if dims: og["og:image:width"] = dims['width'] og["og:image:height"] = dims['height'] else: logger.warn("Couldn't get dims for %s" % og["og:image"]) og["og:image"] = "mxc://%s/%s" % ( self.server_name, image_info['filesystem_id'] ) og["og:image:type"] = image_info['media_type'] og["matrix:image:size"] = image_info['media_length'] else: del og["og:image"] else: logger.warn("Failed to find any OG data in %s", url) og = {} logger.debug("Calculated OG for %s as %s" % (url, og)) jsonog = json.dumps(og).encode('utf8') # store OG in history-aware DB cache yield self.store.store_url_cache( url, media_info["response_code"], media_info["etag"], media_info["expires"] + media_info["created_ts"], jsonog, media_info["filesystem_id"], media_info["created_ts"], ) defer.returnValue(jsonog) @defer.inlineCallbacks def _download_url(self, url, user): # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? file_id = datetime.date.today().isoformat() + '_' + random_string(16) file_info = FileInfo( server_name=None, file_id=file_id, url_cache=True, ) with self.media_storage.store_into_file(file_info) as (f, fname, finish): try: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) except SynapseError: # Pass SynapseErrors through directly, so that the servlet # handler will return a SynapseError to the client instead of # blank data or a 500. raise except DNSLookupError: # DNS lookup returned no results # Note: This will also be the case if one of the resolved IP # addresses is blacklisted raise SynapseError( 502, "DNS resolution failure during URL preview generation", Codes.UNKNOWN ) except Exception as e: # FIXME: pass through 404s and other error messages nicely logger.warn("Error downloading %s: %r", url, e) raise SynapseError( 500, "Failed to download content: %s" % ( traceback.format_exception_only(sys.exc_info()[0], e), ), Codes.UNKNOWN, ) yield finish() try: if b"Content-Type" in headers: media_type = headers[b"Content-Type"][0].decode('ascii') else: media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() download_name = get_filename_from_headers(headers) yield self.store.store_local_media( media_id=file_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=download_name, media_length=length, user_id=user, url_cache=url, ) except Exception as e: logger.error("Error handling downloaded %s: %r", url, e) # TODO: we really ought to delete the downloaded file in this # case, since we won't have recorded it in the db, and will # therefore not expire it. raise defer.returnValue({ "media_type": media_type, "media_length": length, "download_name": download_name, "created_ts": time_now_ms, "filesystem_id": file_id, "filename": fname, "uri": uri, "response_code": code, # FIXME: we should calculate a proper expiration based on the # Cache-Control and Expire headers. But for now, assume 1 hour. "expires": 60 * 60 * 1000, "etag": headers["ETag"][0] if "ETag" in headers else None, }) def _start_expire_url_cache_data(self): return run_as_background_process( "expire_url_cache_data", self._expire_url_cache_data, ) @defer.inlineCallbacks def _expire_url_cache_data(self): """Clean up expired url cache content, media and thumbnails. """ # TODO: Delete from backup media store now = self.clock.time_msec() logger.info("Running url preview cache expiry") if not (yield self.store.has_completed_background_updates()): logger.info("Still running DB updates; skipping expiry") return # First we delete expired url cache entries media_ids = yield self.store.get_expired_url_cache(now) removed_media = [] for media_id in media_ids: fname = self.filepaths.url_cache_filepath(media_id) try: os.remove(fname) except OSError as e: # If the path doesn't exist, meh if e.errno != errno.ENOENT: logger.warn("Failed to remove media: %r: %s", media_id, e) continue removed_media.append(media_id) try: dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) for dir in dirs: os.rmdir(dir) except Exception: pass yield self.store.delete_url_cache(removed_media) if removed_media: logger.info("Deleted %d entries from url cache", len(removed_media)) # Now we delete old images associated with the url cache. # These may be cached for a bit on the client (i.e., they # may have a room open with a preview url thing open). # So we wait a couple of days before deleting, just in case. expire_before = now - 2 * 24 * 60 * 60 * 1000 media_ids = yield self.store.get_url_cache_media_before(expire_before) removed_media = [] for media_id in media_ids: fname = self.filepaths.url_cache_filepath(media_id) try: os.remove(fname) except OSError as e: # If the path doesn't exist, meh if e.errno != errno.ENOENT: logger.warn("Failed to remove media: %r: %s", media_id, e) continue try: dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) for dir in dirs: os.rmdir(dir) except Exception: pass thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id) try: shutil.rmtree(thumbnail_dir) except OSError as e: # If the path doesn't exist, meh if e.errno != errno.ENOENT: logger.warn("Failed to remove media: %r: %s", media_id, e) continue removed_media.append(media_id) try: dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id) for dir in dirs: os.rmdir(dir) except Exception: pass yield self.store.delete_url_cache_media(removed_media) logger.info("Deleted %d media from url cache", len(removed_media))
def get_simple_http_client(self) -> SimpleHttpClient: return SimpleHttpClient(self)
class PreviewUrlResource(DirectServeResource): isLeaf = True def __init__(self, hs, media_repo, media_storage): super().__init__() self.auth = hs.get_auth() self.clock = hs.get_clock() self.filepaths = media_repo.filepaths self.max_spider_size = hs.config.max_spider_size self.server_name = hs.hostname self.store = hs.get_datastore() self.client = SimpleHttpClient( hs, treq_args={"browser_like_redirects": True}, ip_whitelist=hs.config.url_preview_ip_range_whitelist, ip_blacklist=hs.config.url_preview_ip_range_blacklist, ) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata self._cache = ExpiringCache( cache_name="url_previews", clock=self.clock, # don't spider URLs more often than once an hour expiry_ms=60 * 60 * 1000, ) self._cleaner_loop = self.clock.looping_call( self._start_expire_url_cache_data, 10 * 1000) def render_OPTIONS(self, request): request.setHeader(b"Allow", b"OPTIONS, GET") return respond_with_json(request, 200, {}, send_cors=True) @wrap_json_request_handler async def _async_render_GET(self, request): # XXX: if get_user_by_req fails, what should we do in an async render? requester = await self.auth.get_user_by_req(request) url = parse_string(request, "url") if b"ts" in request.args: ts = parse_integer(request, "ts") else: ts = self.clock.time_msec() # XXX: we could move this into _do_preview if we wanted. url_tuple = urlparse.urlsplit(url) for entry in self.url_preview_url_blacklist: match = True for attrib in entry: pattern = entry[attrib] value = getattr(url_tuple, attrib) logger.debug(("Matching attrib '%s' with value '%s' against" " pattern '%s'") % (attrib, value, pattern)) if value is None: match = False continue if pattern.startswith("^"): if not re.match(pattern, getattr(url_tuple, attrib)): match = False continue else: if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern): match = False continue if match: logger.warn("URL %s blocked by url_blacklist entry %s", url, entry) raise SynapseError( 403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN) # the in-memory cache: # * ensures that only one request is active at a time # * takes load off the DB for the thundering herds # * also caches any failures (unlike the DB) so we don't keep # requesting the same endpoint observable = self._cache.get(url) if not observable: download = run_in_background(self._do_preview, url, requester.user, ts) observable = ObservableDeferred(download, consumeErrors=True) self._cache[url] = observable else: logger.info("Returning cached response") og = await make_deferred_yieldable( defer.maybeDeferred(observable.observe)) respond_with_json_bytes(request, 200, og, send_cors=True) @defer.inlineCallbacks def _do_preview(self, url, user, ts): """Check the db, and download the URL and build a preview Args: url (str): user (str): ts (int): Returns: Deferred[str]: json-encoded og data """ # check the URL cache in the DB (which will also provide us with # historical previews, if we have any) cache_result = yield self.store.get_url_cache(url, ts) if (cache_result and cache_result["expires_ts"] > ts and cache_result["response_code"] / 100 == 2): # It may be stored as text in the database, not as bytes (such as # PostgreSQL). If so, encode it back before handing it on. og = cache_result["og"] if isinstance(og, six.text_type): og = og.encode("utf8") defer.returnValue(og) return media_info = yield self._download_url(url, user) logger.debug("got media_info of '%s'" % media_info) if _is_media(media_info["media_type"]): file_id = media_info["filesystem_id"] dims = yield self.media_repo._generate_thumbnails( None, file_id, file_id, media_info["media_type"], url_cache=True) og = { "og:description": media_info["download_name"], "og:image": "mxc://%s/%s" % (self.server_name, media_info["filesystem_id"]), "og:image:type": media_info["media_type"], "matrix:image:size": media_info["media_length"], } if dims: og["og:image:width"] = dims["width"] og["og:image:height"] = dims["height"] else: logger.warn("Couldn't get dims for %s" % url) # define our OG response for this media elif _is_html(media_info["media_type"]): # TODO: somehow stop a big HTML tree from exploding synapse's RAM with open(media_info["filename"], "rb") as file: body = file.read() encoding = None # Let's try and figure out if it has an encoding set in a meta tag. # Limit it to the first 1kb, since it ought to be in the meta tags # at the top. match = _charset_match.search(body[:1000]) # If we find a match, it should take precedence over the # Content-Type header, so set it here. if match: encoding = match.group(1).decode("ascii") # If we don't find a match, we'll look at the HTTP Content-Type, and # if that doesn't exist, we'll fall back to UTF-8. if not encoding: match = _content_type_match.match(media_info["media_type"]) encoding = match.group(1) if match else "utf-8" og = decode_and_calc_og(body, media_info["uri"], encoding) # pre-cache the image for posterity # FIXME: it might be cleaner to use the same flow as the main /preview_url # request itself and benefit from the same caching etc. But for now we # just rely on the caching on the master request to speed things up. if "og:image" in og and og["og:image"]: image_info = yield self._download_url( _rebase_url(og["og:image"], media_info["uri"]), user) if _is_media(image_info["media_type"]): # TODO: make sure we don't choke on white-on-transparent images file_id = image_info["filesystem_id"] dims = yield self.media_repo._generate_thumbnails( None, file_id, file_id, image_info["media_type"], url_cache=True) if dims: og["og:image:width"] = dims["width"] og["og:image:height"] = dims["height"] else: logger.warn("Couldn't get dims for %s" % og["og:image"]) og["og:image"] = "mxc://%s/%s" % ( self.server_name, image_info["filesystem_id"], ) og["og:image:type"] = image_info["media_type"] og["matrix:image:size"] = image_info["media_length"] else: del og["og:image"] else: logger.warn("Failed to find any OG data in %s", url) og = {} logger.debug("Calculated OG for %s as %s" % (url, og)) jsonog = json.dumps(og).encode("utf8") # store OG in history-aware DB cache yield self.store.store_url_cache( url, media_info["response_code"], media_info["etag"], media_info["expires"] + media_info["created_ts"], jsonog, media_info["filesystem_id"], media_info["created_ts"], ) defer.returnValue(jsonog) @defer.inlineCallbacks def _download_url(self, url, user): # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? file_id = datetime.date.today().isoformat() + "_" + random_string(16) file_info = FileInfo(server_name=None, file_id=file_id, url_cache=True) with self.media_storage.store_into_file(file_info) as (f, fname, finish): try: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size) except SynapseError: # Pass SynapseErrors through directly, so that the servlet # handler will return a SynapseError to the client instead of # blank data or a 500. raise except DNSLookupError: # DNS lookup returned no results # Note: This will also be the case if one of the resolved IP # addresses is blacklisted raise SynapseError( 502, "DNS resolution failure during URL preview generation", Codes.UNKNOWN, ) except Exception as e: # FIXME: pass through 404s and other error messages nicely logger.warn("Error downloading %s: %r", url, e) raise SynapseError( 500, "Failed to download content: %s" % (traceback.format_exception_only(sys.exc_info()[0], e), ), Codes.UNKNOWN, ) yield finish() try: if b"Content-Type" in headers: media_type = headers[b"Content-Type"][0].decode("ascii") else: media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() download_name = get_filename_from_headers(headers) yield self.store.store_local_media( media_id=file_id, media_type=media_type, time_now_ms=self.clock.time_msec(), upload_name=download_name, media_length=length, user_id=user, url_cache=url, ) except Exception as e: logger.error("Error handling downloaded %s: %r", url, e) # TODO: we really ought to delete the downloaded file in this # case, since we won't have recorded it in the db, and will # therefore not expire it. raise defer.returnValue({ "media_type": media_type, "media_length": length, "download_name": download_name, "created_ts": time_now_ms, "filesystem_id": file_id, "filename": fname, "uri": uri, "response_code": code, # FIXME: we should calculate a proper expiration based on the # Cache-Control and Expire headers. But for now, assume 1 hour. "expires": 60 * 60 * 1000, "etag": headers["ETag"][0] if "ETag" in headers else None, }) def _start_expire_url_cache_data(self): return run_as_background_process("expire_url_cache_data", self._expire_url_cache_data) @defer.inlineCallbacks def _expire_url_cache_data(self): """Clean up expired url cache content, media and thumbnails. """ # TODO: Delete from backup media store now = self.clock.time_msec() logger.info("Running url preview cache expiry") if not (yield self.store.has_completed_background_updates()): logger.info("Still running DB updates; skipping expiry") return # First we delete expired url cache entries media_ids = yield self.store.get_expired_url_cache(now) removed_media = [] for media_id in media_ids: fname = self.filepaths.url_cache_filepath(media_id) try: os.remove(fname) except OSError as e: # If the path doesn't exist, meh if e.errno != errno.ENOENT: logger.warn("Failed to remove media: %r: %s", media_id, e) continue removed_media.append(media_id) try: dirs = self.filepaths.url_cache_filepath_dirs_to_delete( media_id) for dir in dirs: os.rmdir(dir) except Exception: pass yield self.store.delete_url_cache(removed_media) if removed_media: logger.info("Deleted %d entries from url cache", len(removed_media)) # Now we delete old images associated with the url cache. # These may be cached for a bit on the client (i.e., they # may have a room open with a preview url thing open). # So we wait a couple of days before deleting, just in case. expire_before = now - 2 * 24 * 60 * 60 * 1000 media_ids = yield self.store.get_url_cache_media_before(expire_before) removed_media = [] for media_id in media_ids: fname = self.filepaths.url_cache_filepath(media_id) try: os.remove(fname) except OSError as e: # If the path doesn't exist, meh if e.errno != errno.ENOENT: logger.warn("Failed to remove media: %r: %s", media_id, e) continue try: dirs = self.filepaths.url_cache_filepath_dirs_to_delete( media_id) for dir in dirs: os.rmdir(dir) except Exception: pass thumbnail_dir = self.filepaths.url_cache_thumbnail_directory( media_id) try: shutil.rmtree(thumbnail_dir) except OSError as e: # If the path doesn't exist, meh if e.errno != errno.ENOENT: logger.warn("Failed to remove media: %r: %s", media_id, e) continue removed_media.append(media_id) try: dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete( media_id) for dir in dirs: os.rmdir(dir) except Exception: pass yield self.store.delete_url_cache_media(removed_media) logger.info("Deleted %d media from url cache", len(removed_media))
def build_simple_http_client(self): return SimpleHttpClient(self)
def get_proxied_http_client(self) -> SimpleHttpClient: """ An HTTP client that uses configured HTTP(S) proxies. """ return SimpleHttpClient(self, use_proxy=True)
def get_simple_http_client(self) -> SimpleHttpClient: """ An HTTP client with no special configuration. """ return SimpleHttpClient(self)
class IdentityHandler(BaseHandler): def __init__(self, hs): super(IdentityHandler, self).__init__(hs) self.http_client = SimpleHttpClient(hs) # We create a blacklisting instance of SimpleHttpClient for contacting identity # servers specified by clients self.blacklisting_http_client = SimpleHttpClient( hs, ip_blacklist=hs.config.federation_ip_range_blacklist) self.federation_http_client = hs.get_http_client() self.hs = hs @defer.inlineCallbacks def threepid_from_creds(self, id_server, creds): """ Retrieve and validate a threepid identifier from a "credentials" dictionary against a given identity server Args: id_server (str): The identity server to validate 3PIDs against. Must be a complete URL including the protocol (http(s)://) creds (dict[str, str]): Dictionary containing the following keys: * client_secret|clientSecret: A unique secret str provided by the client * sid: The ID of the validation session Returns: Deferred[dict[str,str|int]|None]: A dictionary consisting of response params to the /getValidated3pid endpoint of the Identity Service API, or None if the threepid was not found """ client_secret = creds.get("client_secret") or creds.get("clientSecret") if not client_secret: raise SynapseError(400, "Missing param client_secret in creds", errcode=Codes.MISSING_PARAM) session_id = creds.get("sid") if not session_id: raise SynapseError(400, "Missing param session_id in creds", errcode=Codes.MISSING_PARAM) query_params = {"sid": session_id, "client_secret": client_secret} url = id_server + "/_matrix/identity/api/v1/3pid/getValidated3pid" try: data = yield self.http_client.get_json(url, query_params) except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") except HttpResponseException as e: logger.info( "%s returned %i for threepid validation for: %s", id_server, e.code, creds, ) return None # Old versions of Sydent return a 200 http code even on a failed validation # check. Thus, in addition to the HttpResponseException check above (which # checks for non-200 errors), we need to make sure validation_session isn't # actually an error, identified by the absence of a "medium" key # See https://github.com/matrix-org/sydent/issues/215 for details if "medium" in data: return data logger.info("%s reported non-validated threepid: %s", id_server, creds) return None @defer.inlineCallbacks def bind_threepid(self, client_secret, sid, mxid, id_server, id_access_token=None, use_v2=True): """Bind a 3PID to an identity server Args: client_secret (str): A unique secret provided by the client sid (str): The ID of the validation session mxid (str): The MXID to bind the 3PID to id_server (str): The domain of the identity server to query id_access_token (str): The access token to authenticate to the identity server with, if necessary. Required if use_v2 is true use_v2 (bool): Whether to use v2 Identity Service API endpoints. Defaults to True Returns: Deferred[dict]: The response from the identity server """ logger.debug("Proxying threepid bind request for %s to %s", mxid, id_server) # If an id_access_token is not supplied, force usage of v1 if id_access_token is None: use_v2 = False # Decide which API endpoint URLs to use headers = {} bind_data = {"sid": sid, "client_secret": client_secret, "mxid": mxid} if use_v2: bind_url = "https://%s/_matrix/identity/v2/3pid/bind" % ( id_server, ) headers["Authorization"] = create_id_access_token_header( id_access_token) else: bind_url = "https://%s/_matrix/identity/api/v1/3pid/bind" % ( id_server, ) try: # Use the blacklisting http client as this call is only to identity servers # provided by a client data = yield self.blacklisting_http_client.post_json_get_json( bind_url, bind_data, headers=headers) # Remember where we bound the threepid yield self.store.add_user_bound_threepid( user_id=mxid, medium=data["medium"], address=data["address"], id_server=id_server, ) return data except HttpResponseException as e: if e.code != 404 or not use_v2: logger.error("3PID bind failed with Matrix error: %r", e) raise e.to_synapse_error() except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") except CodeMessageException as e: data = json.loads(e.msg) # XXX WAT? return data logger.info("Got 404 when POSTing JSON %s, falling back to v1 URL", bind_url) res = yield self.bind_threepid(client_secret, sid, mxid, id_server, id_access_token, use_v2=False) return res @defer.inlineCallbacks def try_unbind_threepid(self, mxid, threepid): """Attempt to remove a 3PID from an identity server, or if one is not provided, all identity servers we're aware the binding is present on Args: mxid (str): Matrix user ID of binding to be removed threepid (dict): Dict with medium & address of binding to be removed, and an optional id_server. Raises: SynapseError: If we failed to contact the identity server Returns: Deferred[bool]: True on success, otherwise False if the identity server doesn't support unbinding (or no identity server found to contact). """ if threepid.get("id_server"): id_servers = [threepid["id_server"]] else: id_servers = yield self.store.get_id_servers_user_bound( user_id=mxid, medium=threepid["medium"], address=threepid["address"]) # We don't know where to unbind, so we don't have a choice but to return if not id_servers: return False changed = True for id_server in id_servers: changed &= yield self.try_unbind_threepid_with_id_server( mxid, threepid, id_server) return changed @defer.inlineCallbacks def try_unbind_threepid_with_id_server(self, mxid, threepid, id_server): """Removes a binding from an identity server Args: mxid (str): Matrix user ID of binding to be removed threepid (dict): Dict with medium & address of binding to be removed id_server (str): Identity server to unbind from Raises: SynapseError: If we failed to contact the identity server Returns: Deferred[bool]: True on success, otherwise False if the identity server doesn't support unbinding """ url = "https://%s/_matrix/identity/api/v1/3pid/unbind" % (id_server, ) url_bytes = "/_matrix/identity/api/v1/3pid/unbind".encode("ascii") content = { "mxid": mxid, "threepid": { "medium": threepid["medium"], "address": threepid["address"] }, } # we abuse the federation http client to sign the request, but we have to send it # using the normal http client since we don't want the SRV lookup and want normal # 'browser-like' HTTPS. auth_headers = self.federation_http_client.build_auth_headers( destination=None, method="POST", url_bytes=url_bytes, content=content, destination_is=id_server, ) headers = {b"Authorization": auth_headers} try: # Use the blacklisting http client as this call is only to identity servers # provided by a client yield self.blacklisting_http_client.post_json_get_json( url, content, headers) changed = True except HttpResponseException as e: changed = False if e.code in (400, 404, 501): # The remote server probably doesn't support unbinding (yet) logger.warning("Received %d response while unbinding threepid", e.code) else: logger.error( "Failed to unbind threepid on identity server: %s", e) raise SynapseError(500, "Failed to contact identity server") except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") yield self.store.remove_user_bound_threepid( user_id=mxid, medium=threepid["medium"], address=threepid["address"], id_server=id_server, ) return changed @defer.inlineCallbacks def send_threepid_validation( self, email_address, client_secret, send_attempt, send_email_func, next_link=None, ): """Send a threepid validation email for password reset or registration purposes Args: email_address (str): The user's email address client_secret (str): The provided client secret send_attempt (int): Which send attempt this is send_email_func (func): A function that takes an email address, token, client_secret and session_id, sends an email and returns a Deferred. next_link (str|None): The URL to redirect the user to after validation Returns: The new session_id upon success Raises: SynapseError is an error occurred when sending the email """ # Check that this email/client_secret/send_attempt combo is new or # greater than what we've seen previously session = yield self.store.get_threepid_validation_session( "email", client_secret, address=email_address, validated=False) # Check to see if a session already exists and that it is not yet # marked as validated if session and session.get("validated_at") is None: session_id = session["session_id"] last_send_attempt = session["last_send_attempt"] # Check that the send_attempt is higher than previous attempts if send_attempt <= last_send_attempt: # If not, just return a success without sending an email return session_id else: # An non-validated session does not exist yet. # Generate a session id session_id = random_string(16) if next_link: # Manipulate the next_link to add the sid, because the caller won't get # it until we send a response, by which time we've sent the mail. if "?" in next_link: next_link += "&" else: next_link += "?" next_link += "sid=" + urllib.parse.quote(session_id) # Generate a new validation token token = random_string(32) # Send the mail with the link containing the token, client_secret # and session_id try: yield send_email_func(email_address, token, client_secret, session_id) except Exception: logger.exception("Error sending threepid validation email to %s", email_address) raise SynapseError( 500, "An error was encountered when sending the email") token_expires = (self.hs.clock.time_msec() + self.hs.config.email_validation_token_lifetime) yield self.store.start_or_continue_validation_session( "email", email_address, session_id, client_secret, send_attempt, next_link, token, token_expires, ) return session_id @defer.inlineCallbacks def requestEmailToken(self, id_server, email, client_secret, send_attempt, next_link=None): """ Request an external server send an email on our behalf for the purposes of threepid validation. Args: id_server (str): The identity server to proxy to email (str): The email to send the message to client_secret (str): The unique client_secret sends by the user send_attempt (int): Which attempt this is next_link: A link to redirect the user to once they submit the token Returns: The json response body from the server """ params = { "email": email, "client_secret": client_secret, "send_attempt": send_attempt, } if next_link: params["next_link"] = next_link if self.hs.config.using_identity_server_from_trusted_list: # Warn that a deprecated config option is in use logger.warning( 'The config option "trust_identity_server_for_password_resets" ' 'has been replaced by "account_threepid_delegate". ' "Please consult the sample config at docs/sample_config.yaml for " "details and update your config file.") try: data = yield self.http_client.post_json_get_json( id_server + "/_matrix/identity/api/v1/validate/email/requestToken", params, ) return data except HttpResponseException as e: logger.info("Proxied requestToken failed: %r", e) raise e.to_synapse_error() except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") @defer.inlineCallbacks def requestMsisdnToken( self, id_server, country, phone_number, client_secret, send_attempt, next_link=None, ): """ Request an external server send an SMS message on our behalf for the purposes of threepid validation. Args: id_server (str): The identity server to proxy to country (str): The country code of the phone number phone_number (str): The number to send the message to client_secret (str): The unique client_secret sends by the user send_attempt (int): Which attempt this is next_link: A link to redirect the user to once they submit the token Returns: The json response body from the server """ params = { "country": country, "phone_number": phone_number, "client_secret": client_secret, "send_attempt": send_attempt, } if next_link: params["next_link"] = next_link if self.hs.config.using_identity_server_from_trusted_list: # Warn that a deprecated config option is in use logger.warning( 'The config option "trust_identity_server_for_password_resets" ' 'has been replaced by "account_threepid_delegate". ' "Please consult the sample config at docs/sample_config.yaml for " "details and update your config file.") try: data = yield self.http_client.post_json_get_json( id_server + "/_matrix/identity/api/v1/validate/msisdn/requestToken", params, ) except HttpResponseException as e: logger.info("Proxied requestToken failed: %r", e) raise e.to_synapse_error() except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") assert self.hs.config.public_baseurl # we need to tell the client to send the token back to us, since it doesn't # otherwise know where to send it, so add submit_url response parameter # (see also MSC2078) data["submit_url"] = ( self.hs.config.public_baseurl + "_matrix/client/unstable/add_threepid/msisdn/submit_token") return data @defer.inlineCallbacks def validate_threepid_session(self, client_secret, sid): """Validates a threepid session with only the client secret and session ID Tries validating against any configured account_threepid_delegates as well as locally. Args: client_secret (str): A secret provided by the client sid (str): The ID of the session Returns: Dict[str, str|int] if validation was successful, otherwise None """ # XXX: We shouldn't need to keep wrapping and unwrapping this value threepid_creds = {"client_secret": client_secret, "sid": sid} # We don't actually know which medium this 3PID is. Thus we first assume it's email, # and if validation fails we try msisdn validation_session = None # Try to validate as email if self.hs.config.threepid_behaviour_email == ThreepidBehaviour.REMOTE: # Ask our delegated email identity server validation_session = yield self.threepid_from_creds( self.hs.config.account_threepid_delegate_email, threepid_creds) elif self.hs.config.threepid_behaviour_email == ThreepidBehaviour.LOCAL: # Get a validated session matching these details validation_session = yield self.store.get_threepid_validation_session( "email", client_secret, sid=sid, validated=True) if validation_session: return validation_session # Try to validate as msisdn if self.hs.config.account_threepid_delegate_msisdn: # Ask our delegated msisdn identity server validation_session = yield self.threepid_from_creds( self.hs.config.account_threepid_delegate_msisdn, threepid_creds) return validation_session @defer.inlineCallbacks def proxy_msisdn_submit_token(self, id_server, client_secret, sid, token): """Proxy a POST submitToken request to an identity server for verification purposes Args: id_server (str): The identity server URL to contact client_secret (str): Secret provided by the client sid (str): The ID of the session token (str): The verification token Raises: SynapseError: If we failed to contact the identity server Returns: Deferred[dict]: The response dict from the identity server """ body = {"client_secret": client_secret, "sid": sid, "token": token} try: return (yield self.http_client.post_json_get_json( id_server + "/_matrix/identity/api/v1/validate/msisdn/submitToken", body, )) except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") except HttpResponseException as e: logger.warning( "Error contacting msisdn account_threepid_delegate: %s", e) raise SynapseError(400, "Error contacting the identity server") @defer.inlineCallbacks def lookup_3pid(self, id_server, medium, address, id_access_token=None): """Looks up a 3pid in the passed identity server. Args: id_server (str): The server name (including port, if required) of the identity server to use. medium (str): The type of the third party identifier (e.g. "email"). address (str): The third party identifier (e.g. "*****@*****.**"). id_access_token (str|None): The access token to authenticate to the identity server with Returns: str|None: the matrix ID of the 3pid, or None if it is not recognized. """ if id_access_token is not None: try: results = yield self._lookup_3pid_v2(id_server, id_access_token, medium, address) return results except Exception as e: # Catch HttpResponseExcept for a non-200 response code # Check if this identity server does not know about v2 lookups if isinstance(e, HttpResponseException) and e.code == 404: # This is an old identity server that does not yet support v2 lookups logger.warning( "Attempted v2 lookup on v1 identity server %s. Falling " "back to v1", id_server, ) else: logger.warning("Error when looking up hashing details: %s", e) return None return (yield self._lookup_3pid_v1(id_server, medium, address)) @defer.inlineCallbacks def _lookup_3pid_v1(self, id_server, medium, address): """Looks up a 3pid in the passed identity server using v1 lookup. Args: id_server (str): The server name (including port, if required) of the identity server to use. medium (str): The type of the third party identifier (e.g. "email"). address (str): The third party identifier (e.g. "*****@*****.**"). Returns: str: the matrix ID of the 3pid, or None if it is not recognized. """ try: data = yield self.blacklisting_http_client.get_json( "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server), { "medium": medium, "address": address }, ) if "mxid" in data: if "signatures" not in data: raise AuthError(401, "No signatures on 3pid binding") yield self._verify_any_signature(data, id_server) return data["mxid"] except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") except IOError as e: logger.warning("Error from v1 identity server lookup: %s" % (e, )) return None @defer.inlineCallbacks def _lookup_3pid_v2(self, id_server, id_access_token, medium, address): """Looks up a 3pid in the passed identity server using v2 lookup. Args: id_server (str): The server name (including port, if required) of the identity server to use. id_access_token (str): The access token to authenticate to the identity server with medium (str): The type of the third party identifier (e.g. "email"). address (str): The third party identifier (e.g. "*****@*****.**"). Returns: Deferred[str|None]: the matrix ID of the 3pid, or None if it is not recognised. """ # Check what hashing details are supported by this identity server try: hash_details = yield self.blacklisting_http_client.get_json( "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server), {"access_token": id_access_token}, ) except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") if not isinstance(hash_details, dict): logger.warning( "Got non-dict object when checking hash details of %s%s: %s", id_server_scheme, id_server, hash_details, ) raise SynapseError( 400, "Non-dict object from %s%s during v2 hash_details request: %s" % (id_server_scheme, id_server, hash_details), ) # Extract information from hash_details supported_lookup_algorithms = hash_details.get("algorithms") lookup_pepper = hash_details.get("lookup_pepper") if (not supported_lookup_algorithms or not isinstance(supported_lookup_algorithms, list) or not lookup_pepper or not isinstance(lookup_pepper, str)): raise SynapseError( 400, "Invalid hash details received from identity server %s%s: %s" % (id_server_scheme, id_server, hash_details), ) # Check if any of the supported lookup algorithms are present if LookupAlgorithm.SHA256 in supported_lookup_algorithms: # Perform a hashed lookup lookup_algorithm = LookupAlgorithm.SHA256 # Hash address, medium and the pepper with sha256 to_hash = "%s %s %s" % (address, medium, lookup_pepper) lookup_value = sha256_and_url_safe_base64(to_hash) elif LookupAlgorithm.NONE in supported_lookup_algorithms: # Perform a non-hashed lookup lookup_algorithm = LookupAlgorithm.NONE # Combine together plaintext address and medium lookup_value = "%s %s" % (address, medium) else: logger.warning( "None of the provided lookup algorithms of %s are supported: %s", id_server, supported_lookup_algorithms, ) raise SynapseError( 400, "Provided identity server does not support any v2 lookup " "algorithms that this homeserver supports.", ) # Authenticate with identity server given the access token from the client headers = { "Authorization": create_id_access_token_header(id_access_token) } try: lookup_results = yield self.blacklisting_http_client.post_json_get_json( "%s%s/_matrix/identity/v2/lookup" % (id_server_scheme, id_server), { "addresses": [lookup_value], "algorithm": lookup_algorithm, "pepper": lookup_pepper, }, headers=headers, ) except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") except Exception as e: logger.warning("Error when performing a v2 3pid lookup: %s", e) raise SynapseError( 500, "Unknown error occurred during identity server lookup") # Check for a mapping from what we looked up to an MXID if "mappings" not in lookup_results or not isinstance( lookup_results["mappings"], dict): logger.warning("No results from 3pid lookup") return None # Return the MXID if it's available, or None otherwise mxid = lookup_results["mappings"].get(lookup_value) return mxid @defer.inlineCallbacks def _verify_any_signature(self, data, server_hostname): if server_hostname not in data["signatures"]: raise AuthError( 401, "No signature from server %s" % (server_hostname, )) for key_name, signature in data["signatures"][server_hostname].items(): try: key_data = yield self.blacklisting_http_client.get_json( "%s%s/_matrix/identity/api/v1/pubkey/%s" % (id_server_scheme, server_hostname, key_name)) except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") if "public_key" not in key_data: raise AuthError( 401, "No public key named %s from %s" % (key_name, server_hostname)) verify_signed_json( data, server_hostname, decode_verify_key_bytes(key_name, decode_base64(key_data["public_key"])), ) return @defer.inlineCallbacks def ask_id_server_for_third_party_invite( self, requester, id_server, medium, address, room_id, inviter_user_id, room_alias, room_avatar_url, room_join_rules, room_name, inviter_display_name, inviter_avatar_url, id_access_token=None, ): """ Asks an identity server for a third party invite. Args: requester (Requester) id_server (str): hostname + optional port for the identity server. medium (str): The literal string "email". address (str): The third party address being invited. room_id (str): The ID of the room to which the user is invited. inviter_user_id (str): The user ID of the inviter. room_alias (str): An alias for the room, for cosmetic notifications. room_avatar_url (str): The URL of the room's avatar, for cosmetic notifications. room_join_rules (str): The join rules of the email (e.g. "public"). room_name (str): The m.room.name of the room. inviter_display_name (str): The current display name of the inviter. inviter_avatar_url (str): The URL of the inviter's avatar. id_access_token (str|None): The access token to authenticate to the identity server with Returns: A deferred tuple containing: token (str): The token which must be signed to prove authenticity. public_keys ([{"public_key": str, "key_validity_url": str}]): public_key is a base64-encoded ed25519 public key. fallback_public_key: One element from public_keys. display_name (str): A user-friendly name to represent the invited user. """ invite_config = { "medium": medium, "address": address, "room_id": room_id, "room_alias": room_alias, "room_avatar_url": room_avatar_url, "room_join_rules": room_join_rules, "room_name": room_name, "sender": inviter_user_id, "sender_display_name": inviter_display_name, "sender_avatar_url": inviter_avatar_url, } # Add the identity service access token to the JSON body and use the v2 # Identity Service endpoints if id_access_token is present data = None base_url = "%s%s/_matrix/identity" % (id_server_scheme, id_server) if id_access_token: key_validity_url = "%s%s/_matrix/identity/v2/pubkey/isvalid" % ( id_server_scheme, id_server, ) # Attempt a v2 lookup url = base_url + "/v2/store-invite" try: data = yield self.blacklisting_http_client.post_json_get_json( url, invite_config, { "Authorization": create_id_access_token_header(id_access_token) }, ) except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") except HttpResponseException as e: if e.code != 404: logger.info("Failed to POST %s with JSON: %s", url, e) raise e if data is None: key_validity_url = "%s%s/_matrix/identity/api/v1/pubkey/isvalid" % ( id_server_scheme, id_server, ) url = base_url + "/api/v1/store-invite" try: data = yield self.blacklisting_http_client.post_json_get_json( url, invite_config) except TimeoutError: raise SynapseError(500, "Timed out contacting identity server") except HttpResponseException as e: logger.warning( "Error trying to call /store-invite on %s%s: %s", id_server_scheme, id_server, e, ) if data is None: # Some identity servers may only support application/x-www-form-urlencoded # types. This is especially true with old instances of Sydent, see # https://github.com/matrix-org/sydent/pull/170 try: data = yield self.blacklisting_http_client.post_urlencoded_get_json( url, invite_config) except HttpResponseException as e: logger.warning( "Error calling /store-invite on %s%s with fallback " "encoding: %s", id_server_scheme, id_server, e, ) raise e # TODO: Check for success token = data["token"] public_keys = data.get("public_keys", []) if "public_key" in data: fallback_public_key = { "public_key": data["public_key"], "key_validity_url": key_validity_url, } else: fallback_public_key = public_keys[0] if not public_keys: public_keys.append(fallback_public_key) display_name = data["display_name"] return token, public_keys, fallback_public_key, display_name
def build_proxied_http_client(self): return SimpleHttpClient( self, http_proxy=os.getenvb(b"http_proxy"), https_proxy=os.getenvb(b"HTTPS_PROXY"), )
class HttpPusher(Pusher): def __init__(self, _hs, profile_tag, user_name, app_id, app_display_name, device_display_name, pushkey, pushkey_ts, data, last_token, last_success, failing_since): super(HttpPusher, self).__init__( _hs, profile_tag, user_name, app_id, app_display_name, device_display_name, pushkey, pushkey_ts, data, last_token, last_success, failing_since ) if 'url' not in data: raise PusherConfigException( "'url' required in data for HTTP pusher" ) self.url = data['url'] self.httpCli = SimpleHttpClient(self.hs) self.data_minus_url = {} self.data_minus_url.update(self.data) del self.data_minus_url['url'] @defer.inlineCallbacks def _build_notification_dict(self, event, tweaks): # we probably do not want to push for every presence update # (we may want to be able to set up notifications when specific # people sign in, but we'd want to only deliver the pertinent ones) # Actually, presence events will not get this far now because we # need to filter them out in the main Pusher code. if 'event_id' not in event: defer.returnValue(None) ctx = yield self.get_context_for_event(event) d = { 'notification': { 'id': event['event_id'], 'room_id': event['room_id'], 'type': event['type'], 'sender': event['user_id'], 'counts': { # -- we don't mark messages as read yet so # we have no way of knowing # Just set the badge to 1 until we have read receipts 'unread': 1, # 'missed_calls': 2 }, 'devices': [ { 'app_id': self.app_id, 'pushkey': self.pushkey, 'pushkey_ts': long(self.pushkey_ts / 1000), 'data': self.data_minus_url, 'tweaks': tweaks } ] } } if event['type'] == 'm.room.member': d['notification']['membership'] = event['content']['membership'] d['notification']['user_is_target'] = event['state_key'] == self.user_name if 'content' in event: d['notification']['content'] = event['content'] if len(ctx['aliases']): d['notification']['room_alias'] = ctx['aliases'][0] if 'sender_display_name' in ctx and len(ctx['sender_display_name']) > 0: d['notification']['sender_display_name'] = ctx['sender_display_name'] if 'name' in ctx and len(ctx['name']) > 0: d['notification']['room_name'] = ctx['name'] defer.returnValue(d) @defer.inlineCallbacks def dispatch_push(self, event, tweaks): notification_dict = yield self._build_notification_dict(event, tweaks) if not notification_dict: defer.returnValue([]) try: resp = yield self.httpCli.post_json_get_json(self.url, notification_dict) except: logger.warn("Failed to push %s ", self.url) defer.returnValue(False) rejected = [] if 'rejected' in resp: rejected = resp['rejected'] defer.returnValue(rejected) @defer.inlineCallbacks def reset_badge_count(self): d = { 'notification': { 'id': '', 'type': None, 'sender': '', 'counts': { 'unread': 0, 'missed_calls': 0 }, 'devices': [ { 'app_id': self.app_id, 'pushkey': self.pushkey, 'pushkey_ts': long(self.pushkey_ts / 1000), 'data': self.data_minus_url, } ] } } try: resp = yield self.httpCli.post_json_get_json(self.url, d) except: logger.exception("Failed to push %s ", self.url) defer.returnValue(False) rejected = [] if 'rejected' in resp: rejected = resp['rejected'] defer.returnValue(rejected)