def get_project_path(presidio_config): project_path = None try: project_path = os.path.abspath(presidio_config.get('project_path')) except: LOG.error('\"project_path\" entry not specified in configuration!') LOG.error('Cannot proceed; exiting...') sys.exit(1) return project_path
def _BAD_IDEA_set_use_unverified_jwt(): global _use_unverified_jwt LOG.warning('BAD IDEA: Use of unverified JWTs requested!') LOG.warning('BAD IDEA: This option is for debugging ONLY!') LOG.warning('BAD IDEA: Please, please don\'t use this in production!') LOG.warning('BAD IDEA: You have been warned...') _use_unverified_jwt = True
def update_safe_result_cache(self, url, methodParams, result): if ((len(self.safe_result_cache) == 0) and (self.safe_result_cache_seconds != 0)): # Initialize cache config expire_seconds = self.app.config.get('SAFE_RESULT_CACHE_SECONDS') if expire_seconds is not None: self.safe_result_cache_seconds = expire_seconds else: LOG.info('Using default value.') LOG.info((f'SAFE result cache expiry time is ' f'{self.safe_result_cache_seconds} seconds.')) key = f'{url}{methodParams}' expire_time = (dt_now() + timedelta(0, self.safe_result_cache_seconds)) self.safe_result_cache[key] = (result, expire_time)
def get_web_root(presidio_config): default_web_root = '/datasets' web_root = presidio_config.get('web_root') if type(web_root) is not str: LOG.info('\"web_root\" configuration entry missing or invalid.') LOG.info(('Proceeding with default value of: %s' % default_web_root)) web_root = default_web_root # Ensure we begin with / if (web_root[0] != '/'): web_root = ('/' + web_root) # Ensure we don't end with / if (web_root[-1] == '/'): web_root = web_root[:-1] return web_root
def get_safe_server_list(presidio_config): safe_servers = presidio_config.get('safe_servers') safe_server_list = [] if safe_servers: if type(safe_servers) is str: safe_server_list.append(safe_servers) elif type(safe_servers) is list: safe_server_list += safe_servers else: LOG.error(('\"safe_servers\" entry incorrectly specified ' + 'in configuration!')) LOG.error('Cannot proceed; exiting...') sys.exit(1) else: LOG.error('\"safe_servers\" entry not specified in configuration!') LOG.error('Cannot proceed; exiting...') sys.exit(1) return safe_server_list
def get_presidio_principal(presidio_config): key_file = presidio_config.get('key_file') presidio_principal = None if key_file: try: presidio_principal = generate_presidio_principal(key_file) except: LOG.error('Error loading key file!') LOG.error('Please ensure that the key_file config entry points') LOG.error('to the correct file, that the file has the correct') LOG.error('format, and that it contains the data that you expect.') LOG.error('Cannot proceed; exiting...') sys.exit(1) else: LOG.error('\"key_file\" entry not specified in configuration!') LOG.error('Cannot proceed; exiting...') sys.exit(1) return presidio_principal
def configure_ca_store(presidio_config): ca_file = presidio_config.get('ca_file') if ca_file: try: initialize_CA_store(ca_file) except EnvironmentError: LOG.warning('Error loading CA roots!') LOG.warning('Please ensure that the ca_file config entry points') LOG.warning('to the correct file, that the file has the correct') LOG.warning(('format, and that it contains the data that you ' + 'expect.')) LOG.warning(('Continuing to run - ' + 'but presidio may behave unpredictably...')) else: LOG.warning('ca_file entry not specified in config file!') LOG.warning(('Continuing to run - ' + 'but presidio may behave unpredictably...'))
def configure_safe_result_cache_seconds(presidio_app): presidio_config = presidio_app.config['PRESIDIO_CONFIG'] safe_result_cache_seconds = None if presidio_config is not None: safe_result_cache_seconds = ( presidio_config.get('safe_result_cache_seconds')) else: LOG.warning('Presidio app object somehow does not have') LOG.warning('PRESIDIO_CONFIG set, when trying to configure:') LOG.warning('safe_result_cache_seconds') LOG.warning('Proceeding - but this suggests something weird') LOG.warning('is going on...') if safe_result_cache_seconds is not None: if (((type(safe_result_cache_seconds) is int) or (type(safe_result_cache_seconds) is float)) and (safe_result_cache_seconds >= 0)): presidio_app.config['SAFE_RESULT_CACHE_SECONDS'] = ( safe_result_cache_seconds) else: LOG.warning(('\"safe_result_cache_seconds\" incorrectly ' + 'specified in configuration!'))
def safe_check_access(self, dataset_SCID, user_DN, ns_token, project_ID): pconf = self.app.config['PRESIDIO_CONFIG'] bypass_safe = pconf.get('BAD_IDEA_bypass_safe_servers') if bypass_safe: LOG.warning('BAD IDEA: Bypassing SAFE servers requested!') LOG.warning('BAD IDEA: This option is for debugging ONLY!') LOG.warning((f'BAD IDEA: Please, please don\'t ' f'use this in production!')) LOG.warning('BAD IDEA: You have been warned...') return True presidio_principal = self.app.config['PRESIDIO_PRINCIPAL'] presidio_principal = presidio_principal.decode('utf-8') methodParams = [dataset_SCID, user_DN, ns_token, project_ID] payload_dict = {'principal': presidio_principal, 'methodParams': methodParams} payload = json_dumps(payload_dict) headers = {'Content-Type': 'application/json', 'Accept-Charset': 'UTF-8'} safe_server_list = self.app.config['SAFE_SERVER_LIST'] shuffle(safe_server_list) for server in safe_server_list: safe_result = None url = (f'http://{server}/access') # Check the cache first... safe_result = self.query_safe_result_cache(url, methodParams) if safe_result is not None: LOG.debug('Using cached SAFE query result') LOG.debug((f'Access decision for dataset {dataset_SCID} ' f'by {user_DN} was: {safe_result}')) return safe_result # Nothing in the cache? Time to ask SAFE. LOG.debug((f'Trying to query SAFE at {url} with the following ' f'parameters: {payload}')) resp = None try: resp = post(url, data=payload, headers=headers, timeout=4) except Exception as e: LOG.warning((f'Error occurred while trying to ' f'query SAFE server: {server}')) LOG.warning('Error message:') LOG.warning(e) LOG.warning('Trying next SAFE server in list (if any)...') if resp: resp.close() continue status_code = None if resp: status_code = resp.status_code try: safe_result = resp.json() except Exception as e: LOG.warning((f'Error occurred while parsing response ' f'from SAFE server: {server}')) LOG.warning('Error message:') LOG.warning(e) LOG.warning('Trying next SAFE server in list (if any)...') continue finally: resp.close() LOG.debug(f'Status code from SAFE is: {status_code}') if status_code == 200: # Default to deny. result_message = ( f'SAFE did not permit access for {user_DN} ' f'to dataset {dataset_SCID}' ) result = False if (safe_result.get('result') == 'succeed'): # SAFE reported affirmative result. result_message = ( f'SAFE permitted access for {user_DN} ' f'to dataset {dataset_SCID}' ) result = True LOG.debug(result_message) self.update_safe_result_cache(url, methodParams, result) return result else: LOG.debug((f'SAFE server {server} returned ' f'status code {status_code}')) LOG.debug('Trying next SAFE server in list (if any)...') continue LOG.warning((f'None of the configured SAFE servers replied; ' f'denying access.')) return False
def render_autoindex(self, path, browse_root=None, template=None, template_context=None, endpoint='.autoindex', show_hidden=None, sort_by='name', order=1, mimetype=None): """Renders an autoindex with the given path. :param path: the relative path. :param browse_root: if it is specified, it used to a path which is served by root address. :param template: the template name. :param template_context: would be passed to the Jinja2 template when rendering an AutoIndex page. :param endpoint: an endpoint which is a function. :param show_hidden: whether to show hidden files (starting with '.') :param sort_by: the property to sort the entrys by. :param mimetype: set static mime type for files (no auto detection).""" if browse_root: rootdir = RootDirectory(browse_root, autoindex=self) else: rootdir = self.rootdir path = re_sub(r'\/*$', '', path) abspath = join(rootdir.abspath, path) if request.cert is None: return abort(401, 'Client certificate not found.') if request.verified_jwt_claims is None: return abort(401, 'Notary Service JWT not found.') LOG.debug('Path is: %s' % abspath) dataset_SCID = request.verified_jwt_claims.get('data-set') if dataset_SCID is None: return abort(401, 'Unable to find data-set in JWT claims.') user_DN = request.verified_jwt_claims.get('sub') if user_DN is None: return abort(401, 'Unable to find sub in JWT claims.') ns_token = request.verified_jwt_claims.get('ns-token') if ns_token is None: return abort(401, 'Unable to find ns-token in JWT claims.') project_ID = request.verified_jwt_claims.get('project-id') if project_ID is None: return abort(401, 'Unable to find project-id in JWT claims.') if isdir(abspath): sort_by = request.args.get('sort_by', sort_by) if sort_by[0] in ['-', '+']: order = {'+': 1, '-': -1}[sort_by[0]] sort_by = sort_by[1::] else: order = ( {'asc': 1, 'desc': -1}[request.args.get('order', 'asc')]) curdir = Directory(path, rootdir) if show_hidden is None: show_hidden = self.show_hidden entries = curdir.explore(sort_by=sort_by, order=order, show_hidden=show_hidden) # We wrap the "entries" generator here, with our own. # The "safe_entries" generator will call out to SAFE, # which will, in turn, make the decision of whether to display # a given entry. safe_entries = self.safe_entry_generator(abspath, request.uuid, entries, dataset_SCID, user_DN, ns_token, project_ID) if callable(endpoint): endpoint = endpoint.__name__ context = {} if template_context is not None: context.update(template_context) if self.template_context is not None: context.update(self.template_context) context.update( curdir=curdir, entries=safe_entries, sort_by=sort_by, order=order, endpoint=endpoint) if template: return render_template(template, **context) try: template = '{0}autoindex.html'.format(self.template_prefix) return render_template(template, **context) except TemplateNotFound: template = '{0}/autoindex.html'.format(__autoindex__) return render_template(template, **context) elif (isfile(abspath) and self.is_it_safe(abspath, dataset_SCID, user_DN, ns_token, project_ID)): if mimetype: return send_file(abspath, mimetype=mimetype) else: return send_file(abspath) else: return abort(404)
def process_ns_jwt(jwt, DN_from_cert): ns_jwt = NSJWT() ns_jwt.setToken(jwt) # First, decode without verification, to get issuer. try: ns_jwt.decode(publicKey=None, verify=False) except Exception: return (None, 'Notary Service JWT failed unverified decode.') unverified_claims = None try: unverified_claims = ns_jwt.getClaims() except Exception: return (None, 'Failed to extract unverified claims from JWT.') verified_claims = None if not _use_unverified_jwt: ns_fqdn = unverified_claims.get('iss') ns_jwks_resp = None if ns_fqdn: ns_jwks_url = f'https://{ns_fqdn}/jwks' try: ns_jwks_resp = get(ns_jwks_url, verify=True) except Exception: if nw_jwks_resp: nw_jwks_resp.close() return (None, 'GET of JWKS from Notary Service failed.') else: return (None, 'Unable to find issuer in JWT claims.') ns_jwks_status_code = None ns_jwks_keys_json = None if ns_jwks_resp: ns_jwks_status_code = ns_jwks_resp.status_code try: ns_jwks_keys_json = ns_jwks_resp.json() except Exception as e: return (None, 'Invalid JWKS response from Notary Service.') finally: ns_jwks_resp.close() if ns_jwks_status_code != 200: return (None, 'GET of JWKS from Notary Service reported an error.') ns_jwks_keys = None if ns_jwks_keys_json: ns_jwks_keys = ns_jwks_keys_json.get('keys') else: return (None, 'Empty JWKS returned by Notary Service.') ns_pubkey = None if ns_jwks_keys: num_keys = 0 try: num_keys = len(ns_jwks_keys) except Exception: return (None, 'Could not determine number of keys in JWKS.') if not (num_keys > 0): return (None, 'Invalid number of keys in JWKS.') # Only grab the first key entry from the JWKS, then try to process. ns_jwk_value = ns_jwks_keys[0] try: ns_jwk_json = json_dumps(ns_jwk_value).encode('utf-8') ns_jwk = jwk.JWK.from_json(ns_jwk_json) ns_jwk_pem = ns_jwk.export_to_pem().decode('utf-8') ns_pubkey = crypto.load_publickey(crypto.FILETYPE_PEM, ns_jwk_pem) except Exception: return (None, 'Key entry could not be extracted from JWKS.') else: return (None, 'JWKS from Notary Service missing key container.') if ns_pubkey: try: ns_pubkey_pem = crypto.dump_publickey(crypto.FILETYPE_PEM, ns_pubkey) ns_jwt.decode(publicKey=ns_pubkey_pem) except Exception: return (None, 'Notary Service JWT failed verified decode.') else: return (None, 'No valid public key provided by JWT issuer.') try: verified_claims = ns_jwt.getClaims() except Exception: return (None, 'Failed to extract verified claims from JWT.') computed_ns_token = generate_safe_principal_id(ns_pubkey) ns_token = verified_claims.get('ns-token') if ns_token: if ns_token != computed_ns_token.decode('utf-8'): return (None, (f'JWT ns-token does not match token ' f'computed from public key.')) else: return (None, 'Unable to find ns-token in JWT claims.') else: LOG.warning('BAD IDEA: Using unverified JWT claims, against advice...') verified_claims = unverified_claims expiry = verified_claims.get('exp') if expiry: dte = datetime.fromtimestamp(expiry) if datetime.now() > dte: return (None, 'JWT has expired.') else: return (None, 'Unable to find expiry in JWT claims.') userDN = verified_claims.get('sub') if userDN: if userDN != DN_from_cert: return (None, (f'JWT subject does not match ' f'value from client certificate.')) else: return (None, 'Unable to find subject in JWT claims.') return (verified_claims, None)
def SafeLabelsFileCheck(path, dataset_SCID): LOG.debug(f'_project_path is: {_project_path}') LOG.debug(f'_project_path.parent is: {_project_path.parent}') if basename(path) == _safelabels_filename: LOG.debug(f'Ignoring SafeLabels file {_safelabels_filename}') return False cur_path = Path(path) if not isdir(cur_path): cur_path = cur_path.parent safeLabels = None while cur_path != _project_path.parent: LOG.debug(f'cur_path is: {cur_path}') try: safeLabels = _get_safelabels(cur_path) except EnvironmentError: # Couldn't find labels file in this directory, so # continue loop one level up. cur_path = cur_path.parent continue except YAMLError as ye: # OK. This is bad news. # # The admin *clearly* had an intended set of controls, but # apparently failed to write the YAML correctly. # # Spit out a warning and exception (to aid in debugging), # then refuse access (rather than walking up the directory tree # to check the parent's policy, which the admin may well have # been trying to supersede with the mis-written file. LOG.error('Encountered error while parsing SafeLabels file!') LOG.error('Error message:') LOG.error(ye) LOG.error(f'Failing safe, and disallowing access to: {path}') return False # Proceeding under the assumption that the safelabels file # loaded properly. file_version = safeLabels.get('version') if file_version is None: LOG.warning('SafeLabels file missing \'version\' specifier.') LOG.warning('Will attempt to check according to the most recent') LOG.warning('version specification...') elif file_version == 1.0: # Base case, since we have only one version, right now. pass else: # Sigh. Specified an invalid version. # Try to parse using the most recent version, # and let the chips fall where they may. LOG.warning(('SafeLabels file found with invalid ' + '\'version\' specified.')) LOG.warning('Will attempt to check according to the most recent') LOG.warning('version specification...') label_check = SafeLabelsChecker_v1(path, dataset_SCID, safeLabels) if label_check: LOG.debug(f'Matching SCID found for {path}') return True else: break if safeLabels is None: LOG.debug(f'Unable to find a SafeLabels file to apply for {path}') else: LOG.debug(f'No matching SCIDs found for {path}') return False
def configure_label_mech(presidio_config, project_path): global _project_path _project_path = Path(project_path) global _label_mech_fn _label_mech_fn = SafeLabelsFileCheck conf_label_mech = presidio_config.get('label_mech') if conf_label_mech: conf_label_mech = conf_label_mech.lower() if conf_label_mech == 'xattr': _label_mech_fn = ExtendedAttributeLabelCheck elif conf_label_mech != 'safelabels': LOG.warning('Unknown value specified for \"label_mech\"') LOG.warning('in configuration file.') else: LOG.warning('\"label_mech\" entry not specified in configuration.') if _label_mech_fn == ExtendedAttributeLabelCheck: LOG.info('Using extended attribute mechanism for SAFE labels.') conf_xattr_label_base = presidio_config.get('xattr_label_base') if conf_xattr_label_base: _xattr_label_base = conf_xattr_label_base LOG.info(f'Extended attribute label base is: {_xattr_label_base}') else: LOG.info('Using default SafeLabels file mechanism for SAFE labels.') conf_safelabels_filename = presidio_config.get('safelabels_filename') if conf_safelabels_filename: _safelabels_filename = conf_safelabels_filename LOG.info(f'SafeLabels file name is: {_safelabels_filename}')
def ExtendedAttributeLabelCheck(path, dataset_SCID): cur_path = Path(path) LOG.debug(f'_project_path is: {_project_path}') LOG.debug(f'_project_path.parent is: {_project_path.parent}') while cur_path != _project_path.parent: LOG.debug(f'cur_path is: {cur_path}') path_attrs = xattr(cur_path) attr_key_list = [ e for e in path_attrs.list() if _xattr_label_base in e ] if attr_key_list: for attr in attr_key_list: LOG.debug(f'Checking xattr: {attr} for path: {cur_path}') if (path_attrs[attr]).decode('utf-8') == dataset_SCID: LOG.debug(f'Matching SCID found for {path}') return True # If we got here, we got to the end of the list of # matching extended attributes, but did not find a # matching SCID. # # Since we found matching extended attributes and # we should match as narrowly as possible, we need # to break out of the path search loop here. break cur_path = cur_path.parent LOG.debug(f'No matching SCIDs found for {path}') return False
def SafeLabelsChecker_v1(path, dataset_SCID, safeLabels): per_file_overrides = safeLabels.get('overrides') if per_file_overrides is None: # Not specified; perfectly valid. pass elif type(per_file_overrides) is not dict: # Gotta fail safe again... LOG.warning('\'overrides\' specified, but not a dictionary.') LOG.warning('Failing safe...') return False else: keys = per_file_overrides.keys() labels = None for key in keys: if re_search(key, path): labels = per_file_overrides.get(key) break if labels is None: # There may be no overrides found for the specified path. # That's perfectly valid. Log it at debug level, and move on. LOG.debug('No overrides found for path; proceeding to default.') else: if type(labels) is str: return (labels == dataset_SCID) elif type(labels) is list: for label in labels: if (label == dataset_SCID): return True return False else: LOG.warning(('Incorrectly specified value in ' '\'overrides\' entry.')) LOG.warning('Failing safe...') return False default_labels = safeLabels.get('default') if default_labels is None: LOG.warning('\'default\' entry unspecified!') LOG.warning('Failing safe...') return False elif type(default_labels) is str: return (default_labels == dataset_SCID) elif type(default_labels) is list: for label in default_labels: if (label == dataset_SCID): return True return False else: LOG.warning('\'default\' specified, but not a valid value.') LOG.warning('Failing safe...') return False # Tack a final false return at the end, to be defensive. return False