def load_dir(self, directory, ext=".xml", url=None): """ :param directory: A directory to walk. :param ext: Include files with this extension (default .xml) Traverse a directory tree looking for metadata. Files ending in the specified extension are included. Directories starting with '.' are excluded. """ if url is None: url = directory log.debug("walking %s" % directory) if not directory in self.md: entities = [] for top, dirs, files in os.walk(directory): for dn in dirs: if dn.startswith("."): dirs.remove(dn) for nm in files: log.debug("found file %s" % nm) if nm.endswith(ext): fn = os.path.join(top, nm) try: t = self.parse_metadata(fn, fail_on_error=True) # local metadata is assumed to be ok entities.extend(self.entities(t)) except Exception, ex: log.error(ex) self.import_metadata(self.entity_set(entities, url))
def run(self, server): locked = False try: if self.lock.acquire(blocking=0): locked = True md = self.server.new_repository() for o in self.server.observers: md.subscribe(o) for p in server.plumbings: state = {'update': True, 'stats': {}} p.process(md, state) stats.update(state.get('stats', {})) if not md.sane(): log.error("update produced insane active repository - will try again later...") with server.lock.writelock: log.debug("update produced new repository with %d entities" % md.index.size()) server.md = md md.fire(type=EVENT_REPOSITORY_LIVE, size=md.index.size()) stats['Repository Update Time'] = datetime.now() stats['Repository Size'] = md.index.size() else: log.error("another instance is running - will try again later...") except Exception, ex: traceback.print_exc(ex)
def load_dir(self, directory, ext=".xml", url=None): """ :param directory: A directory to walk. :param ext: Include files with this extension (default .xml) Traverse a directory tree looking for metadata. Files ending in the specified extension are included. Directories starting with '.' are excluded. """ if url is None: url = directory log.debug("walking %s" % directory) if not directory in self.md: entities = [] for top, dirs, files in os.walk(directory): for dn in dirs: if dn.startswith("."): dirs.remove(dn) for nm in files: log.debug("found file %s" % nm) if nm.endswith(ext): fn = os.path.join(top, nm) try: t = self.parse_metadata(fn, fail_on_error=True) entities.extend(self.entities(t)) # local metadata is assumed to be ok except Exception, ex: log.error(ex) self.import_metadata(self.entity_set(entities, url))
def test_log_syslog(self): with patch('syslog.syslog', new=self.dummy_syslog): try: logger = logging.getLogger() old_handlers = [] for hdl in logger.handlers: logger.removeHandler(hdl) old_handlers.append(hdl) test_handler = SysLogLibHandler("USER") logger.addHandler(test_handler) logger.setLevel(logging.WARNING) log.info("info") log.warn("warn") log.warning("warning") log.error("error") log.critical("critical") log.debug("debug") lines = self._syslog.getvalue().split("\n") assert ("info" not in lines) assert ("12:warn" in lines) assert ("12:warning" in lines) assert ("10:critical" in lines) assert ("11:error" in lines) assert ("debug" not in lines) finally: logger.removeHandler(test_handler) for hdl in old_handlers: logger.addHandler(hdl)
def publish(req, *opts): """ Publish the working document in XML form. :param req: The request :param opts: Options (unused) :return: None Publish takes one argument: path to a file where the document tree will be written. **Examples** .. code-block:: yaml - publish: /tmp/idp.xml """ if req.t is None: raise PipeException("Empty document submitted for publication") if req.args is None: raise PipeException("publish must specify output") try: validate_document(req.t) except DocumentInvalid, ex: log.error(ex.error_log) raise PipeException("XML schema validation failed")
def test_log_plain(self): try: logfile = StringIO() logger = logging.getLogger() old_handlers = [] for hdl in logger.handlers: logger.removeHandler(hdl) old_handlers.append(hdl) test_handler = logging.StreamHandler(logfile) logger.addHandler(test_handler) logger.setLevel(logging.WARNING) log.info("info") log.warn("warn") log.warning("warning") log.error("error") log.critical("critical") log.debug("debug") lines = logfile.getvalue().split("\n") assert ("info" not in lines) assert ("warn" in lines) assert ("warning" in lines) assert ("critical" in lines) assert ("error" in lines) assert ("debug" not in lines) finally: logger.removeHandler(test_handler) for hdl in old_handlers: logger.addHandler(hdl)
def _process(self, req): """The inner request pipeline processor. :param req: The request to run through the pipeline """ log.debug('Processing \n%s' % self) for p in self.pipeline: try: pipe, opts, name, args = loader.load_pipe(p) #log.debug("traversing pipe %s,%s,%s using %s" % (pipe,name,args,opts)) if type(args) is str or type(args) is unicode: args = [args] if args is not None and type(args) is not dict and type(args) is not list and type(args) is not tuple: raise PipeException("Unknown argument type %s" % repr(args)) req.args = args req.name = name ot = pipe(req, *opts) if ot is not None: req.t = ot #log.debug("new state after %s: %s (done=%s)" % (pipe,req.state,req.done)) if req.done: break except PipeException, ex: log.error(ex) break
def test_log_syslog(self): with patch('syslog.syslog', new=self.dummy_syslog): try: logger = logging.getLogger() old_handlers = [] for hdl in logger.handlers: logger.removeHandler(hdl) old_handlers.append(hdl) test_handler = SysLogLibHandler("USER") logger.addHandler(test_handler) logger.setLevel(logging.WARNING) log.info("info") log.warn("warn") log.warning("warning") log.error("error") log.critical("critical") log.debug("debug") lines = self._syslog.getvalue().split("\n") assert("info" not in lines) assert("12:warn" in lines) assert("12:warning" in lines) assert("10:critical" in lines) assert("11:error" in lines) assert("debug" not in lines) finally: logger.removeHandler(test_handler) for hdl in old_handlers: logger.addHandler(hdl)
def run(self, server): locked = False try: self.lock.acquire() locked = True md = self.server.md.clone() for p in server.plumbings: state = {'update': True, 'stats': {}} p.process(md, state) stats.update(state.get('stats', {})) with server.lock.writelock: log.debug("update produced new repository with %d entities" % server.md.store.size()) server.md = md server.md.fire(type=EVENT_REPOSITORY_LIVE, size=server.md.store.size()) stats['Repository Update Time'] = datetime.now() stats['Repository Size'] = server.md.store.size() self.nruns += 1 stats['Updates Since Server Start'] = self.nruns if hasattr(self.server.md.store, 'periodic'): self.server.md.store.periodic(stats) except Exception, ex: log.error(ex.message)
def test_log_plain(self): try: logfile = StringIO() logger = logging.getLogger() old_handlers = [] for hdl in logger.handlers: logger.removeHandler(hdl) old_handlers.append(hdl) test_handler = logging.StreamHandler(logfile) logger.addHandler(test_handler) logger.setLevel(logging.WARNING) log.info("info") log.warn("warn") log.warning("warning") log.error("error") log.critical("critical") log.debug("debug") lines = logfile.getvalue().split("\n") assert("info" not in lines) assert("warn" in lines) assert("warning" in lines) assert("critical" in lines) assert("error" in lines) assert("debug" not in lines) finally: logger.removeHandler(test_handler) for hdl in old_handlers: logger.addHandler(hdl)
def run(self, server): locked = False try: self.lock.acquire() locked = True md = self.server.md.clone() for p in server.plumbings: state = {'update': True, 'stats': {}} p.process(md, state) stats.update(state.get('stats', {})) with server.lock.writelock: log.debug("update produced new repository with %d entities" % server.md.store.size()) server.md = md server.md.fire(type=EVENT_REPOSITORY_LIVE, size=server.md.store.size()) stats['Repository Update Time'] = datetime.now() stats['Repository Size'] = server.md.store.size() self.nruns += 1 stats['Updates Since Server Start'] = self.nruns if hasattr(self.server.md.store, 'periodic'): self.server.md.store.periodic(stats) except Exception as ex: log.error(ex.message) finally: if locked: self.lock.release()
def schema(): global _SCHEMA if _SCHEMA is None: try: parser = etree.XMLParser() parser.resolvers.add(ResourceResolver()) st = etree.parse(pkg_resources.resource_stream(__name__, "schema/schema.xsd"), parser) _SCHEMA = etree.XMLSchema(st) except etree.XMLSchemaParseError, ex: log.error(_e(ex.error_log)) raise ex
def publish(req, *opts): """ Publish the working document in XML form. :param req: The request :param opts: Options (unused) :return: None Publish takes one argument: path to a file where the document tree will be written. **Examples** .. code-block:: yaml - publish: /tmp/idp.xml """ if req.t is None: raise PipeException("Empty document submitted for publication") if req.args is None: raise PipeException("publish must specify output") try: validate_document(req.t) except DocumentInvalid as ex: log.error(ex.error_log) raise PipeException("XML schema validation failed") output_file = None if type(req.args) is dict: output_file = req.args.get("output", None) else: output_file = req.args[0] if output_file is not None: output_file = output_file.strip() log.debug("publish {}".format(output_file)) resource_name = output_file m = re.match(FILESPEC_REGEX, output_file) if m: output_file = m.group(1) resource_name = m.group(2) log.debug("output_file={}, resource_name={}".format( output_file, resource_name)) out = output_file if os.path.isdir(output_file): out = "{}.xml".format(os.path.join(output_file, req.id)) safe_write(out, dumptree(req.t)) req.md.store.update( req.t, tid=resource_name ) # TODO maybe this is not the right thing to do anymore return req.t
def parse_metadata(self, fn, key=None, base_url=None, fail_on_error=False, filter_invalid=True, validate=True, post=None): """Parse a piece of XML and split it up into EntityDescriptor elements. Each such element is stored in the MDRepository instance. :param fn: a file-like object containing SAML metadata :param key: a certificate (file) or a SHA1 fingerprint to use for signature verification :param base_url: use this base url to resolve relative URLs for XInclude processing :param fail_on_error: (default: False) :param filter_invalid: (default True) remove invalid EntityDescriptor elements rather than raise an errror :param validate: (default: True) set to False to turn off all XML schema validation :param post: A callable that will be called to modify the parse-tree before any validation (but after xinclude processing) """ try: t = etree.parse(fn, base_url=base_url, parser=etree.XMLParser(resolve_entities=False)) t.xinclude() if key is not None: try: log.debug("verifying signature using %s" % key) refs = xmlsec.verified(t, key) if len(refs) != 1: raise MetadataException("XML metadata contains %d signatures - exactly 1 is required" % len(refs)) t = refs[0] # prevent wrapping attacks except Exception, ex: tb = traceback.format_exc() print tb log.error(ex) return None if post is not None: t = post(t) if validate: if filter_invalid: for e in t.findall('{%s}EntityDescriptor' % NS['md']): if not schema().validate(e): error = _e(schema().error_log, m=base_url) log.debug("removing '%s': schema validation failed (%s)" % (e.get('entityID'), error)) e.getparent().remove(e) self.fire(type=EVENT_DROP_ENTITY, url=base_url, entityID=e.get('entityID'), error=error) else: # Having removed the invalid entities this should now never happen... schema().assertValid(t)
def load(req, *opts): """ General-purpose resource fetcher. :param req: The request :param opts: Options: [qsize <5>] [timeout <30>] [xrd <output xrd file>] :return: None Supports both remote and local resources. Fetching remote resources is done in parallell using threads. """ remote = [] for x in req.args: x = x.strip() log.debug("load %s" % x) m = re.match(FILESPEC_REGEX, x) rid = None if m: x = m.group(1) rid = m.group(2) r = x.split() assert len(r) in [1, 2], PipeException("Usage: load: resource [as url] [verification]") verify = None url = r[0] if len(r) == 2: verify = r[1] if "://" in url: log.debug("remote %s %s %s" % (url, verify, rid)) remote.append((url, verify, rid)) elif os.path.exists(url): if os.path.isdir(url): log.debug("local directory %s %s %s" % (url, verify, rid)) req.md.load_dir(url, url=rid) elif os.path.isfile(url): log.debug("local file %s %s %s" % (url, verify, rid)) remote.append(("file://%s" % url, verify, rid)) else: log.error("Unknown file type for load: %s" % r[0]) else: log.error("Don't know how to load '%s' as %s verified by %s" % (url, rid, verify)) opts = dict(zip(opts[::2], opts[1::2])) opts.setdefault('timeout', 30) opts.setdefault('qsize', 5) opts.setdefault('xrd', None) stats = dict() opts.setdefault('stats', stats) req.md.fetch_metadata(remote, **opts) req.state['stats']['Metadata URLs'] = stats
def safe_write(fn, data): """Safely write data to a file with name fn :param fn: a filename :param data: some data to write :return: True or False depending on the outcome of the write """ tmpn = None try: dirname, basename = os.path.split(fn) with tempfile.NamedTemporaryFile("w", delete=False, prefix=".%s" % basename, dir=dirname) as tmp: tmp.write(data) tmpn = tmp.name if os.path.exists(tmpn) and os.stat(tmpn).st_size > 0: os.rename(tmpn, fn) return True except Exception, ex: log.error(ex)
def safe_write(fn, data): """Safely write data to a file with name fn :param fn: a filename :param data: some data to write :return: True or False depending on the outcome of the write """ tmpn = None try: fn = os.path.expanduser(fn) dirname, basename = os.path.split(fn) with tempfile.NamedTemporaryFile('w', delete=False, prefix=".%s" % basename, dir=dirname) as tmp: tmp.write(data) tmpn = tmp.name if os.path.exists(tmpn) and os.stat(tmpn).st_size > 0: os.rename(tmpn, fn) return True except Exception, ex: log.error(ex)
def loadstats(req, *opts): """ Log (INFO) information about the result of the last call to load :param req: The request :param opts: Options: (none) :return: None """ from stats import metadata _stats = None try: if 'json' in opts: _stats = json.dumps(metadata) else: buf = StringIO() yaml.dump(metadata, buf) _stats = buf.getvalue() except Exception, ex: log.error(ex)
def run(self): def _parse_date(str): if str is None: return datetime.new() return datetime(*parsedate(str)[:6]) self.start_time = clock() try: cache = None if self.enable_cache: log.debug("fetching %s using cache" % self.url) cache = ".cache" else: log.debug("fetching %s without using cache" % self.url) if self.url.startswith("file://"): path = self.url[7:] if not os.path.exists(path): raise IOError("File not found: %s" % path) with open(path, "r") as fd: self.result = fd.read() self.cached = False self.date = datetime.now() self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime) else: h = httplib2.Http( cache=cache, timeout=20, disable_ssl_certificate_validation=True ) # yes this is correct! resp, content = h.request(self.url) self.resp = resp self.last_modified = _parse_date(resp.get("last-modified", resp.get("date", None))) self.date = _parse_date(resp["date"]) if resp.status != 200: log.error("got %d: %s from %s" % (resp.status, resp.reason, self.url)) raise IOError(resp.reason) self.result = content self.cached = resp.fromcache log.debug("got %d bytes from %s" % (len(self.result), self.url)) except Exception, ex: traceback.print_exc() self.ex = ex self.result = None
def loadstats(req, *opts): """ Log (INFO) information about the result of the last call to load :param req: The request :param opts: Options: (none) :return: None """ from stats import metadata _stats = None try: if 'json' in opts: _stats = json.dumps(metadata) else: buf = StringIO() yaml.dump(metadata, buf) _stats = buf.getvalue() except Exception as ex: log.error(ex) log.info("pyff loadstats: %s" % _stats)
e.get('entityID'), error)) e.getparent().remove(e) self.fire(type=EVENT_DROP_ENTITY, url=base_url, entityID=e.get('entityID'), error=error) else: # Having removed the invalid entities this should now never # happen... schema().assertValid(t) except DocumentInvalid, ex: traceback.print_exc() log.debug("schema validation failed on '%s': %s" % ( base_url, _e(ex.error_log, m=base_url))) raise MetadataException("schema validation failed") except Exception, ex: # log.debug(_e(schema().error_log)) log.error(ex) if fail_on_error: raise ex return None if key is not None: try: log.debug("verifying signature using %s" % key) refs = xmlsec.verified(t, key) if len(refs) != 1: raise MetadataException( "XML metadata contains %d signatures - exactly 1 is required" % len(refs)) t = refs[0] # prevent wrapping attacks except Exception, ex: tb = traceback.format_exc() print tb log.error(ex)
def finalize(req, *opts): """ Prepares the working document for publication/rendering. :param req: The request :param opts: Options (not used) :return: returns the working document with @Name, @cacheDuration and @validUntil set Set Name, ID, cacheDuration and validUntil on the toplevel EntitiesDescriptor element of the working document. Unless explicit provided the @Name is set from the request URI if the pipeline is executed in the pyFF server. The @ID is set to a string representing the current date/time and will be prefixed with the string provided, which defaults to '_'. The @cacheDuration element must be a valid xsd duration (eg PT5H for 5 hrs) and @validUntil can be either an absolute ISO 8601 time string or (more comonly) a relative time on the form .. code-block:: none \+?([0-9]+d)?\s*([0-9]+h)?\s*([0-9]+m)?\s*([0-9]+s)? For instance +45d 2m results in a time delta of 45 days and 2 minutes. The '+' sign is optional. If operating on a single EntityDescriptor then @Name is ignored (cf :py:mod:`pyff.pipes.builtins.first`). **Examples** .. code-block:: yaml - finalize: cacheDuration: PT8H validUntil: +10d ID: pyff """ if req.t is None: raise PipeException("Your plumbing is missing a select statement.") e = root(req.t) if e.tag == "{%s}EntitiesDescriptor" % NS['md']: name = req.args.get('name', None) if name is None or not len(name): name = req.args.get('Name', None) if name is None or not len(name): name = req.state.get('url', None) if name is None or not len(name): name = e.get('Name', None) if name is not None and len(name): e.set('Name', name) now = datetime.utcnow() mdid = req.args.get('ID', 'prefix _') if re.match('(\s)*prefix(\s)*', mdid): prefix = re.sub('^(\s)*prefix(\s)*', '', mdid) ID = now.strftime(prefix + "%Y%m%dT%H%M%SZ") else: ID = mdid if not e.get('ID'): e.set('ID', ID) valid_until = str(req.args.get('validUntil', e.get('validUntil', None))) if valid_until is not None and len(valid_until) > 0: offset = duration2timedelta(valid_until) if offset is not None: dt = now + offset e.set('validUntil', dt.strftime("%Y-%m-%dT%H:%M:%SZ")) elif valid_until is not None: try: dt = iso8601.parse_date(valid_until) dt = dt.replace(tzinfo=None) # make dt "naive" (tz-unaware) offset = dt - now e.set('validUntil', dt.strftime("%Y-%m-%dT%H:%M:%SZ")) except ValueError, ex: log.error("Unable to parse validUntil: %s (%s)" % (valid_until, ex)) # set a reasonable default: 50% of the validity # we replace this below if we have cacheDuration set req.state['cache'] = int(total_seconds(offset) / 50)
def load(req, *opts): """ General-purpose resource fetcher. :param req: The request :param opts: Options: See "Options" below :return: None Supports both remote and local resources. Fetching remote resources is done in parallel using threads. Note: When downloading remote files over HTTPS the TLS server certificate is not validated. Note: Default behaviour is to ignore metadata files or entities in MD files that cannot be loaded Options are put directly after "load". E.g: .. code-block:: yaml - load fail_on_error True filter_invalid False: - http://example.com/some_remote_metadata.xml - local_file.xml - /opt/directory_containing_md_files/ **Options** Defaults are marked with (*) - max_workers <5> : Number of parallel threads to use for loading MD files - timeout <120> : Socket timeout when downloading files - validate <True*|False> : When true downloaded metadata files are validated (schema validation) - fail_on_error <True|False*> : Control whether an error during download, parsing or (optional)validatation of a MD file does not abort processing of the pipeline. When true a failure aborts and causes pyff to exit with a non zero exit code. Otherwise errors are logged but ignored. - filter_invalid <True*|False> : Controls validation behaviour. When true Entities that fail validation are filtered I.e. are not loaded. When false the entire metadata file is either loaded, or not. fail_on_error controls whether failure to validating the entire MD file will abort processing of the pipeline. """ opts = dict(zip(opts[::2], opts[1::2])) opts.setdefault('timeout', 120) opts.setdefault('max_workers', 5) opts.setdefault('validate', "True") opts.setdefault('fail_on_error', "False") opts.setdefault('filter_invalid', "True") opts['validate'] = bool(strtobool(opts['validate'])) opts['fail_on_error'] = bool(strtobool(opts['fail_on_error'])) opts['filter_invalid'] = bool(strtobool(opts['filter_invalid'])) remotes = [] for x in req.args: x = x.strip() log.debug("load parsing '%s'" % x) r = x.split() assert len(r) in range(1, 7), PipeException( "Usage: load resource [as url] [[verify] verification] [via pipeline]" ) url = r.pop(0) params = dict() while len(r) > 0: elt = r.pop(0) if elt in ("as", "verify", "via"): if len(r) > 0: params[elt] = r.pop(0) else: raise PipeException( "Usage: load resource [as url] [[verify] verification] [via pipeline]" ) else: params['verify'] = elt for elt in ("verify", "via"): params.setdefault(elt, None) params.setdefault('as', url) post = None if params['via'] is not None: post = PipelineCallback(params['via'], req) if "://" in url: log.debug("load {} verify {} as {} via {}".format( url, params['verify'], params['as'], params['via'])) remotes.append((url, params['verify'], params['as'], post)) elif os.path.exists(url): if os.path.isdir(url): log.debug("directory {} verify {} as {} via {}".format( url, params['verify'], params['as'], params['via'])) req.md.load_dir(url, url=params['as'], validate=opts['validate'], post=post, fail_on_error=opts['fail_on_error'], filter_invalid=opts['filter_invalid']) elif os.path.isfile(url): log.debug("file {} verify {} as {} via {}".format( url, params['verify'], params['as'], params['via'])) remotes.append( ("file://%s" % url, params['verify'], params['as'], post)) else: error = "Unknown file type for load: '{}'".format(url) if opts['fail_on_error']: raise PipeException(error) log.error(error) else: error = "Don't know how to load '{}' as {} verify {} via {} (file does not exist?)".format( url, params['as'], params['verify'], params['via']) if opts['fail_on_error']: raise PipeException(error) log.error(error) req.md.fetch_metadata(remotes, **opts)
def certreport(req, *opts): """ Generate a report of the certificates (optionally limited by expiration time or key size) found in the selection. :param req: The request :param opts: Options (not used) :return: always returns the unmodified working document **Examples** .. code-block:: yaml - certreport: error_seconds: 0 warning_seconds: 864000 error_bits: 1024 warning_bits: 2048 For key size checking this will report keys with a size *less* than the size specified, defaulting to errors for keys smaller than 1024 bits and warnings for keys smaller than 2048 bits. It should be understood as the minimum key size for each report level, as such everything below will create report entries. Remember that you need a 'publish' or 'emit' call after certreport in your plumbing to get useful output. PyFF ships with a couple of xslt transforms that are useful for turning metadata with certreport annotation into HTML. """ if req.t is None: raise PipeException("Your pipeline is missing a select statement.") if not req.args: req.args = {} if type(req.args) is not dict: raise PipeException("usage: certreport {warning: 864000, error: 0}") error_seconds = int(req.args.get('error_seconds', "0")) warning_seconds = int(req.args.get('warning_seconds', "864000")) error_bits = int(req.args.get('error_bits', "1024")) warning_bits = int(req.args.get('warning_bits', "2048")) seen = {} for eid in req.t.xpath("//md:EntityDescriptor/@entityID", namespaces=NS, smart_strings=False): for cd in req.t.xpath( "md:EntityDescriptor[@entityID='%s']//ds:X509Certificate" % eid, namespaces=NS, smart_strings=False): try: cert_pem = cd.text cert_der = base64.b64decode(cert_pem) m = hashlib.sha1() m.update(cert_der) fp = m.hexdigest() if not seen.get(fp, False): entity_elt = cd.getparent().getparent().getparent( ).getparent().getparent() seen[fp] = True cdict = xmlsec.utils.b642cert(cert_pem) keysize = cdict['modulus'].bit_length() cert = cdict['cert'] if keysize < error_bits: req.md.annotate( entity_elt, "certificate-error", "keysize too small", "%s has keysize of %s bits (less than %s)" % (cert.getSubject(), keysize, error_bits)) log.error("%s has keysize of %s" % (eid, keysize)) elif keysize < warning_bits: req.md.annotate( entity_elt, "certificate-warning", "keysize small", "%s has keysize of %s bits (less than %s)" % (cert.getSubject(), keysize, warning_bits)) log.warn("%s has keysize of %s" % (eid, keysize)) notafter = cert.getNotAfter() if notafter is None: req.md.annotate( entity_elt, "certificate-error", "certificate has no expiration time", "%s has no expiration time" % cert.getSubject()) else: try: et = datetime.strptime("%s" % notafter, "%y%m%d%H%M%SZ") now = datetime.now() dt = et - now if total_seconds(dt) < error_seconds: req.md.annotate( entity_elt, "certificate-error", "certificate has expired", "%s expired %s ago" % (cert.getSubject(), -dt)) log.error("%s expired %s ago" % (eid, -dt)) elif total_seconds(dt) < warning_seconds: req.md.annotate( entity_elt, "certificate-warning", "certificate about to expire", "%s expires in %s" % (cert.getSubject(), dt)) log.warn("%s expires in %s" % (eid, dt)) except ValueError as ex: req.md.annotate( entity_elt, "certificate-error", "certificate has unknown expiration time", "%s unknown expiration time %s" % (cert.getSubject(), notafter)) except Exception as ex: log.error(ex)
post = None if params['via'] is not None: post = PipelineCallback(params['via'], req, stats) print post if "://" in url: log.debug("load %s verify %s as %s via %s" % (url, params['verify'], params['as'], params['via'])) remote.append((url, params['verify'], params['as'], post)) elif os.path.exists(url): if os.path.isdir(url): log.debug("directory %s verify %s as %s via %s" % (url, params['verify'], params['as'], params['via'])) req.md.load_dir(url, url=params['as'], validate=opts['validate'], post=post) elif os.path.isfile(url): log.debug("file %s verify %s as %s via %s" % (url, params['verify'], params['as'], params['via'])) remote.append(("file://%s" % url, params['verify'], params['as'], post)) else: log.error("Unknown file type for load: '%s'" % url) else: log.error("Don't know how to load '%s' as %s verify %s via %s" % (url, params['as'], params['verify'], params['via'])) print remote req.md.fetch_metadata(remote, **opts) req.state['stats']['Metadata URLs'] = stats def select(req, *opts): """ Select a set of EntityDescriptor elements as the working document. :param req: The request :param opts: Options - used for select alias :return: returns the result of the operation as a working document
for e in t.findall('{%s}EntityDescriptor' % NS['md']): if not schema().validate(e): error = _e(schema().error_log, m=base_url) log.debug("removing '%s': schema validation failed (%s)" % (e.get('entityID'), error)) e.getparent().remove(e) self.fire(type=EVENT_DROP_ENTITY, url=base_url, entityID=e.get('entityID'), error=error) else: # Having removed the invalid entities this should now never happen... schema().assertValid(t) except DocumentInvalid, ex: traceback.print_exc() log.debug("schema validation failed on '%s': %s" % (base_url, _e(ex.error_log, m=base_url))) raise MetadataException("schema validation failed") except Exception, ex: #log.debug(_e(schema().error_log)) log.error(ex) if fail_on_error: raise ex return None if key is not None: try: log.debug("verifying signature using %s" % key) refs = xmlsec.verified(t, key) if len(refs) != 1: raise MetadataException("XML metadata contains %d signatures - exactly 1 is required" % len(refs)) t = refs[0] # prevent wrapping attacks except Exception, ex: tb = traceback.format_exc() print tb log.error(ex) return None
def certreport(req, *opts): """ Generate a report of the certificates (optionally limited by expiration time or key size) found in the selection. :param req: The request :param opts: Options (not used) :return: always returns the unmodified working document **Examples** .. code-block:: yaml - certreport: error_seconds: 0 warning_seconds: 864000 error_bits: 1024 warning_bits: 2048 For key size checking this will report keys with a size *less* than the size specified, defaulting to errors for keys smaller than 1024 bits and warnings for keys smaller than 2048 bits. It should be understood as the minimum key size for each report level, as such everything below will create report entries. Remember that you need a 'publish' or 'emit' call after certreport in your plumbing to get useful output. PyFF ships with a couple of xslt transforms that are useful for turning metadata with certreport annotation into HTML. """ if req.t is None: raise PipeException("Your pipeline is missing a select statement.") if not req.args: req.args = {} if type(req.args) is not dict: raise PipeException("usage: certreport {warning: 864000, error: 0}") error_seconds = int(req.args.get('error_seconds', "0")) warning_seconds = int(req.args.get('warning_seconds', "864000")) error_bits = int(req.args.get('error_bits', "1024")) warning_bits = int(req.args.get('warning_bits', "2048")) seen = {} for eid in req.t.xpath("//md:EntityDescriptor/@entityID", namespaces=NS, smart_strings=False): for cd in req.t.xpath("md:EntityDescriptor[@entityID='%s']//ds:X509Certificate" % eid, namespaces=NS, smart_strings=False): try: cert_pem = cd.text cert_der = base64.b64decode(cert_pem) m = hashlib.sha1() m.update(cert_der) fp = m.hexdigest() if not seen.get(fp, False): seen[fp] = True cdict = xmlsec.utils.b642cert(cert_pem) keysize = cdict['modulus'].bit_length() cert = cdict['cert'] if keysize < error_bits: e = cd.getparent().getparent().getparent().getparent().getparent() req.md.annotate(e, "certificate-error", "keysize too small", "%s has keysize of %s bits (less than %s)" % (cert.getSubject(), keysize, error_bits)) log.error("%s has keysize of %s" % (eid, keysize)) elif keysize < warning_bits: e = cd.getparent().getparent().getparent().getparent().getparent() req.md.annotate(e, "certificate-warning", "keysize small", "%s has keysize of %s bits (less than %s)" % (cert.getSubject(), keysize, warning_bits)) log.warn("%s has keysize of %s" % (eid, keysize)) et = datetime.strptime("%s" % cert.getNotAfter(), "%y%m%d%H%M%SZ") now = datetime.now() dt = et - now if total_seconds(dt) < error_seconds: e = cd.getparent().getparent().getparent().getparent().getparent() req.md.annotate(e, "certificate-error", "certificate has expired", "%s expired %s ago" % (cert.getSubject(), -dt)) log.error("%s expired %s ago" % (eid, -dt)) elif total_seconds(dt) < warning_seconds: e = cd.getparent().getparent().getparent().getparent().getparent() req.md.annotate(e, "certificate-warning", "certificate about to expire", "%s expires in %s" % (cert.getSubject(), dt)) log.warn("%s expires in %s" % (eid, dt)) except Exception, ex: log.error(ex)
def load(req, *opts): """ General-purpose resource fetcher. :param req: The request :param opts: Options: See "Options" below :return: None Supports both remote and local resources. Fetching remote resources is done in parallel using threads. Note: When downloading remote files over HTTPS the TLS server certificate is not validated. Note: Default behaviour is to ignore metadata files or entities in MD files that cannot be loaded Options are put directly after "load". E.g: .. code-block:: yaml - load fail_on_error True filter_invalid False: - http://example.com/some_remote_metadata.xml - local_file.xml - /opt/directory_containing_md_files/ **Options** Defaults are marked with (*) - max_workers <5> : Number of parallel threads to use for loading MD files - timeout <120> : Socket timeout when downloading files - validate <True*|False> : When true downloaded metadata files are validated (schema validation) - fail_on_error <True|False*> : Control whether an error during download, parsing or (optional)validatation of a MD file does not abort processing of the pipeline. When true a failure aborts and causes pyff to exit with a non zero exit code. Otherwise errors are logged but ignored. - filter_invalid <True*|False> : Controls validation behaviour. When true Entities that fail validation are filtered I.e. are not loaded. When false the entire metadata file is either loaded, or not. fail_on_error controls whether failure to validating the entire MD file will abort processing of the pipeline. """ opts = dict(zip(opts[::2], opts[1::2])) opts.setdefault('timeout', 120) opts.setdefault('max_workers', 5) opts.setdefault('validate', "True") opts.setdefault('fail_on_error', "False") opts.setdefault('filter_invalid', "True") opts['validate'] = bool(strtobool(opts['validate'])) opts['fail_on_error'] = bool(strtobool(opts['fail_on_error'])) opts['filter_invalid'] = bool(strtobool(opts['filter_invalid'])) remote = [] for x in req.args: x = x.strip() log.debug("load parsing '%s'" % x) r = x.split() assert len(r) in range(1, 7), PipeException("Usage: load resource [as url] [[verify] verification] [via pipeline]") url = r.pop(0) params = dict() while len(r) > 0: elt = r.pop(0) if elt in ("as", "verify", "via"): if len(r) > 0: params[elt] = r.pop(0) else: raise PipeException("Usage: load resource [as url] [[verify] verification] [via pipeline]") else: params['verify'] = elt for elt in ("verify", "via"): params.setdefault(elt, None) params.setdefault('as', url) post = None if params['via'] is not None: post = PipelineCallback(params['via'], req) if "://" in url: log.debug("load %s verify %s as %s via %s" % (url, params['verify'], params['as'], params['via'])) remote.append((url, params['verify'], params['as'], post)) elif os.path.exists(url): if os.path.isdir(url): log.debug("directory %s verify %s as %s via %s" % (url, params['verify'], params['as'], params['via'])) req.md.load_dir(url, url=params['as'], validate=opts['validate'], post=post, fail_on_error=opts['fail_on_error'], filter_invalid=opts['filter_invalid']) elif os.path.isfile(url): log.debug("file %s verify %s as %s via %s" % (url, params['verify'], params['as'], params['via'])) remote.append(("file://%s" % url, params['verify'], params['as'], post)) else: error="Unknown file type for load: '%s'" % url if opts['fail_on_error']: raise PipeException(error) log.error(error) else: error="Don't know how to load '%s' as %s verify %s via %s (file does not exist?)" % (url, params['as'], params['verify'], params['via']) if opts['fail_on_error']: raise PipeException(error) log.error(error) req.md.fetch_metadata(remote, **opts)
def consumer(q, njobs, stats, next_jobs=None, resolved=None): if next_jobs is None: next_jobs = [] if resolved is None: resolved = set() nfinished = 0 while nfinished < njobs: info = None try: log.debug("waiting for next thread to finish...") thread = q.get(True) thread.join(timeout) if thread.isAlive(): raise MetadataException("thread timeout fetching '%s'" % thread.url) info = { 'Time Spent': thread.time() } if thread.ex is not None: raise thread.ex else: if thread.result is not None: info['Bytes'] = len(thread.result) else: raise MetadataException("empty response fetching '%s'" % thread.url) info['Cached'] = thread.cached info['Date'] = str(thread.date) info['Last-Modified'] = str(thread.last_modified) info['Tries'] = thread.tries xml = thread.result.strip() if thread.status is not None: info['Status'] = thread.status t = self.parse_metadata(StringIO(xml), key=thread.verify, base_url=thread.url) if t is None: self.fire(type=EVENT_IMPORT_FAIL, url=thread.url) raise MetadataException("no valid metadata found at '%s'" % thread.url) relt = root(t) if relt.tag in ('{%s}XRD' % NS['xrd'], '{%s}XRDS' % NS['xrd']): log.debug("%s looks like an xrd document" % thread.url) for xrd in t.xpath("//xrd:XRD", namespaces=NS): log.debug("xrd: %s" % xrd) for link in xrd.findall(".//{%s}Link[@rel='%s']" % (NS['xrd'], NS['md'])): url = link.get("href") certs = xmlsec.CertDict(link) fingerprints = certs.keys() fp = None if len(fingerprints) > 0: fp = fingerprints[0] log.debug("fingerprint: %s" % fp) next_jobs.append((url, fp, url, 0)) elif relt.tag in ('{%s}EntityDescriptor' % NS['md'], '{%s}EntitiesDescriptor' % NS['md']): cacheDuration = self.default_cache_duration if self.respect_cache_duration: cacheDuration = root(t).get('cacheDuration', self.default_cache_duration) offset = duration2timedelta(cacheDuration) if thread.cached: if thread.last_modified + offset < datetime.now() - duration2timedelta(self.min_cache_ttl): raise MetadataException("cached metadata expired") else: log.debug("found cached metadata for '%s' (last-modified: %s)" % (thread.url, thread.last_modified)) ne = self.import_metadata(t, url=thread.id) info['Number of Entities'] = ne else: log.debug("got fresh metadata for '%s' (date: %s)" % (thread.url, thread.date)) ne = self.import_metadata(t, url=thread.id) info['Number of Entities'] = ne info['Cache Expiration Time'] = str(thread.last_modified + offset) certs = xmlsec.CertDict(relt) cert = None if certs.values(): cert = certs.values()[0].strip() resolved.add((thread.url, cert)) else: raise MetadataException("unknown metadata type for '%s' (%s)" % (thread.url, relt.tag)) except Exception, ex: #traceback.print_exc(ex) log.warn("problem fetching '%s' (will retry): %s" % (thread.url, ex)) if info is not None: info['Exception'] = ex if thread.tries < self.retry_limit: next_jobs.append((thread.url, thread.verify, thread.id, thread.tries + 1)) else: #traceback.print_exc(ex) log.error("retry limit exceeded for %s (last error was: %s)" % (thread.url, ex)) finally:
def consumer(q, njobs, stats, next_jobs=None, resolved=None): if next_jobs is None: next_jobs = [] if resolved is None: resolved = set() nfinished = 0 while nfinished < njobs: info = None try: log.debug("waiting for next thread to finish...") thread = q.get(True) thread.join(timeout) if thread.isAlive(): raise MetadataException( "thread timeout fetching '%s'" % thread.url) info = { 'Time Spent': thread.time() } if thread.ex is not None: raise thread.ex else: if thread.result is not None: info['Bytes'] = len(thread.result) else: raise MetadataException( "empty response fetching '%s'" % thread.url) info['Cached'] = thread.cached info['Date'] = str(thread.date) info['Last-Modified'] = str(thread.last_modified) info['Tries'] = thread.tries xml = thread.result.strip() if thread.status is not None: info['Status'] = thread.resp.status_code t = self.parse_metadata( StringIO(xml), key=thread.verify, base_url=thread.url) if t is None: self.fire(type=EVENT_IMPORT_FAIL, url=thread.url) raise MetadataException( "no valid metadata found at '%s'" % thread.url) relt = root(t) if relt.tag in ('{%s}XRD' % NS['xrd'], '{%s}XRDS' % NS['xrd']): log.debug("%s looks like an xrd document" % thread.url) for xrd in t.xpath("//xrd:XRD", namespaces=NS): log.debug("xrd: %s" % xrd) for link in xrd.findall(".//{%s}Link[@rel='%s']" % (NS['xrd'], NS['md'])): url = link.get("href") certs = xmlsec.CertDict(link) fingerprints = certs.keys() fp = None if len(fingerprints) > 0: fp = fingerprints[0] log.debug("fingerprint: %s" % fp) next_jobs.append((url, fp, url, 0)) elif relt.tag in ('{%s}EntityDescriptor' % NS['md'], '{%s}EntitiesDescriptor' % NS['md']): cacheDuration = self.default_cache_duration if self.respect_cache_duration: cacheDuration = root(t).get( 'cacheDuration', self.default_cache_duration) offset = duration2timedelta(cacheDuration) if thread.cached: if thread.last_modified + offset < datetime.now() - duration2timedelta(self.min_cache_ttl): raise MetadataException( "cached metadata expired") else: log.debug("found cached metadata for '%s' (last-modified: %s)" % (thread.url, thread.last_modified)) ne = self.import_metadata(t, url=thread.id) info['Number of Entities'] = ne else: log.debug("got fresh metadata for '%s' (date: %s)" % ( thread.url, thread.date)) ne = self.import_metadata(t, url=thread.id) info['Number of Entities'] = ne info['Cache Expiration Time'] = str( thread.last_modified + offset) certs = xmlsec.CertDict(relt) cert = None if certs.values(): cert = certs.values()[0].strip() resolved.add((thread.url, cert)) else: raise MetadataException( "unknown metadata type for '%s' (%s)" % (thread.url, relt.tag)) except Exception, ex: # traceback.print_exc(ex) log.warn("problem fetching '%s' (will retry): %s" % (thread.url, ex)) if info is not None: info['Exception'] = ex if thread.tries < self.retry_limit: next_jobs.append( (thread.url, thread.verify, thread.id, thread.tries + 1)) else: # traceback.print_exc(ex) log.error( "retry limit exceeded for %s (last error was: %s)" % (thread.url, ex)) finally:
def certreport(req, *opts): """ Generate a report of the certificates (optionally limited by expiration time) found in the selection. :param req: The request :param opts: Options (not used) :return: always returns the unmodified working document **Examples** .. code-block:: yaml - certreport: error_seconds: 0 warning_seconds: 864000 Remember that you need a 'publish' or 'emit' call after certreport in your plumbing to get useful output. PyFF ships with a couple of xslt transforms that are useful for turning metadata with certreport annotation into HTML. """ if req.t is None: raise PipeException("Your plumbing is missing a select statement.") if not req.args: req.args = {} if type(req.args) is not dict: raise PipeException("usage: certreport {warning: 864000, error: 0}") error_seconds = int(req.args.get('error', "0")) warning_seconds = int(req.args.get('warning', "864000")) seen = {} for eid in req.t.xpath("//md:EntityDescriptor/@entityID", namespaces=NS): for cd in req.t.xpath("md:EntityDescriptor[@entityID='%s']//ds:X509Certificate" % eid, namespaces=NS): try: cert_pem = cd.text cert_der = base64.b64decode(cert_pem) m = hashlib.sha1() m.update(cert_der) fp = m.hexdigest() if not seen.get(fp, False): seen[fp] = True cdict = xmlsec.b642cert(cert_pem) cert = cdict['cert'] et = datetime.strptime("%s" % cert.getNotAfter(), "%Y%m%d%H%M%SZ") now = datetime.now() dt = et - now if total_seconds(dt) < error_seconds: e = cd.getparent().getparent().getparent().getparent().getparent() req.md.annotate(e, "certificate-error", "certificate has expired", "%s expired %s ago" % (cert.getSubject(), -dt)) log.error("%s expired %s ago" % (eid, -dt)) elif total_seconds(dt) < warning_seconds: e = cd.getparent().getparent().getparent().getparent().getparent() req.md.annotate(e, "certificate-warning", "certificate about to expire", "%s expires in %s" % (cert.getSubject(), dt)) log.warn("%s expires in %s" % (eid, dt)) except Exception, ex: log.error(ex)
def debug_observer(e): log.error(repr(e))