def _filter(req, *opts): """ Refines the working document by applying a filter. The filter expression is a subset of the select semantics and syntax: .. code-block:: yaml - filter: - "!//md:EntityDescriptor[md:SPSSODescriptor]" - "https://idp.example.com/shibboleth" This would select all SPs and any entity with entityID "https://idp.example.com/shibboleth" from the current working document and return as the new working document. Filter also supports the "as <alias>" construction from select allowing new synthetic collections to be created from filtered documents. """ if req.t is None: raise PipeException( "Unable to filter on an empty document - use select first") alias = False if len(opts) > 0: if opts[0] != 'as' and len(opts) == 1: name = opts[0] alias = True if opts[0] == 'as' and len(opts) == 2: name = opts[1] alias = True name = req.plumbing.id args = req.args if args is None or not args: args = [] def _find(member): return req.md.find(req.t, member) ot = req.md.entity_set(args, name, lookup_fn=_find, copy=False) if alias: nfo = dict(Status='default', Description="Synthetic collection") n = req.md.store.update(ot, name) nfo['Size'] = str(n) set_metadata_info(name, nfo) req.t = None if ot is None: raise PipeException("empty filter - stop") # print "filter returns %s" % [e for e in iter_entities(ot)] return ot
def _filter(req, *opts): """ Refines the working document by applying a filter. The filter expression is a subset of the select semantics and syntax: .. code-block:: yaml - filter: - "!//md:EntityDescriptor[md:SPSSODescriptor]" - "https://idp.example.com/shibboleth" This would select all SPs and any entity with entityID "https://idp.example.com/shibboleth" from the current working document and return as the new working document. Filter also supports the "as <alias>" construction from select allowing new synthetic collections to be created from filtered documents. """ if req.t is None: raise PipeException("Unable to filter on an empty document - use select first") alias = False if len(opts) > 0: if opts[0] != 'as' and len(opts) == 1: name = opts[0] alias = True if opts[0] == 'as' and len(opts) == 2: name = opts[1] alias = True name = req.plumbing.id args = req.args if args is None or not args: args = [] def _find(member): return req.md.find(req.t, member) ot = req.md.entity_set(args, name, lookup_fn=_find, copy=False) if alias: nfo = dict(Status='default', Description="Synthetic collection") n = req.md.store.update(ot, name) nfo['Size'] = str(n) set_metadata_info(name, nfo) req.t = None if ot is None: raise PipeException("empty filter - stop") #print "filter returns %s" % [e for e in iter_entities(ot)] return ot
def load_dir(self, directory, ext=".xml", url=None, validate=False, post=None, description=None, fail_on_error=True, filter_invalid=True): """ :param directory: A directory to walk. :param ext: Include files with this extension (default .xml) Traverse a directory tree looking for metadata. Files ending in the specified extension are included. Directories starting with '.' are excluded. """ if url is None: url = directory if description is None: description = "All entities found in %s" % directory entities = [] for top, dirs, files in os.walk(directory): for dn in dirs: if dn.startswith("."): dirs.remove(dn) for nm in files: if nm.endswith(ext): log.debug("parsing from file %s" % nm) fn = os.path.join(top, nm) try: validation_errors = dict() t, valid_until = self.parse_metadata(fn, base_url=url, fail_on_error=fail_on_error, filter_invalid=filter_invalid, validate=validate, validation_errors=validation_errors, post=post) entities.extend(entities_list(t)) # local metadata is assumed to be ok for (eid, error) in validation_errors.iteritems(): log.error(error) except Exception as ex: if fail_on_error: raise MetadataException('Error parsing "%s": %s' % (fn, str(ex))) log.error(ex) if entities: info = dict(Description=description) n = self.store.update(self.entity_set(entities, url, validate=validate, copy=False), url) info['Size'] = str(n) set_metadata_info(url, info) else: log.info("no entities found in %s" % directory)
def select(req, *opts): """ Select a set of EntityDescriptor elements as the working document. :param req: The request :param opts: Options - used for select alias :return: returns the result of the operation as a working document Select picks and expands elements (with optional filtering) from the active repository you setup using calls to :py:mod:`pyff.pipes.builtins.load`. See :py:mod:`pyff.mdrepo.MDRepository.lookup` for a description of the syntax for selectors. **Examples** .. code-block:: yaml - select This would select all entities in the active repository. .. code-block:: yaml - select: "/var/local-metadata" This would select all entities found in the directory /var/local-metadata. You must have a call to local to load entities from this directory before select statement. .. code-block:: yaml - select: "/var/local-metadata!//md:EntityDescriptor[md:IDPSSODescriptor]" This would selects all IdPs from /var/local-metadata .. code-block:: yaml - select: "!//md:EntityDescriptor[md:SPSSODescriptor]" This would select all SPs Select statements are not cumulative - a select followed by another select in the plumbing resets the working douments to the result of the second select. Most statements except local and remote depend on having a select somewhere in your plumbing and will stop the plumbing if the current working document is empty. For instance, running .. code-block:: yaml - select: "!//md:EntityDescriptor[md:SPSSODescriptor]" would terminate the plumbing at select if there are no SPs in the local repository. This is useful in combination with fork for handling multiple cases in your plumbings. The 'as' keyword allows a select to be stored as an alias in the local repository. For instance .. code-block:: yaml - select as foo-2.0: "!//md:EntityDescriptor[md:IDPSSODescriptor]"" would allow you to use /foo-2.0.json to refer to the JSON-version of all IdPs in the current repository. Note that you should not include an extension in your "as foo-bla-something" since that would make your alias invisible for anything except the corresponding mime type. """ args = _select_args(req) name = req.plumbing.id alias = False if len(opts) > 0: if opts[0] != 'as' and len(opts) == 1: name = opts[0] alias = True if opts[0] == 'as' and len(opts) == 2: name = opts[1] alias = True ot = req.md.entity_set(args, name) if ot is None: raise PipeException("empty select - stop") if alias: nfo = dict(Status='default', Description="Synthetic collection") n = req.md.store.update(ot, name) nfo['Size'] = str(n) set_metadata_info(name, nfo) return ot
def _process_url(rurl, verifier, tid, post, enable_cache=True): tries.setdefault(rurl, 0) resource = load_url(rurl, timeout=timeout, enable_cache=enable_cache) xml = resource.result.strip() retry_resources = [] info = {'Time Spent': "%s seconds" % resource.time} tries[rurl] += 1 info['Tries'] = str(tries[rurl]) if resource.result is not None: info['Bytes'] = str(len(resource.result)) else: raise MetadataException("empty response fetching '%s'" % resource.url) info['URL'] = str(rurl) info['Cached'] = str(resource.cached) info['Date'] = str(resource.date) info['Last-Modified'] = str(resource.last_modified) info['Validation Errors'] = dict() info['Description'] = "Remote metadata" info['Status'] = 'success' if not validate: info['Status'] = 'warning' info['Description'] += " (un-validated)" if not enable_cache: info['Status'] = 'info' if resource.resp is not None: info['HTTP Response'] = resource.resp t, offset = self.parse_metadata( StringIO(xml), key=verifier, base_url=rurl, validate=validate, validation_errors=info['Validation Errors'], expiration=self.expiration, post=post) relt = root(t) if t is None: self.fire(type=EVENT_IMPORT_FAIL, url=rurl) raise MetadataException("no valid metadata found at '%s'" % rurl) expired = False if offset is not None: expire_time = datetime.now() + offset ttl = offset.total_seconds() info['Expiration Time'] = str(expire_time) info['Cache TTL'] = str(ttl) if ttl < self.min_cache_ttl: if tries[rurl] < max_tries: # try to get fresh md but we'll use what we have anyway retry_resources.append( (rurl, verifier, tid, post, False)) else: log.error("giving up on %s" % rurl) if ttl < 0: expired = True if not expired: if relt.tag in ('{%s}XRD' % NS['xrd'], '{%s}XRDS' % NS['xrd']): if log.isDebugEnabled(): log.debug("%s looks like an xrd document" % rurl) for xrd in t.iter("{%s}XRD" % NS['xrd']): for link in xrd.findall(".//{%s}Link[@rel='%s']" % (NS['xrd'], NS['md'])): link_href = link.get("href") certs = xmlsec.CertDict(link) fingerprints = certs.keys() fp = None if len(fingerprints) > 0: fp = fingerprints[0] if log.isDebugEnabled(): log.debug("XRD: '%s' verified by '%s'" % (link_href, fp)) tries.setdefault(link_href, 0) if tries[link_href] < max_tries: retry_resources.append( (link_href, fp, link_href, post, True)) elif relt.tag in ('{%s}EntityDescriptor' % NS['md'], '{%s}EntitiesDescriptor' % NS['md']): n = self.store.update(t, tid) info['Size'] = str(n) else: raise MetadataException( "unknown metadata type for '%s' (%s)" % (rurl, relt.tag)) set_metadata_info(tid, info) if log.isDebugEnabled(): log.debug(info) return retry_resources
t, valid_until = self.parse_metadata( fn, fail_on_error=True, validate=validate, post=post) entities.extend(entities_list( t)) # local metadata is assumed to be ok except Exception, ex: log.error(ex) if entities: info = dict(Description=description) n = self.store.update( self.entity_set(entities, url, validate=validate), url) info['Size'] = str(n) set_metadata_info(url, info) else: log.info("no entities found in %s" % directory) def _lookup(self, member): if member is None: member = "entities" if type(member) is str or type(member) is unicode: if '!' in member: (src, xp) = member.split("!") if len(src) == 0: src = None return self.lookup(src, xp) log.debug("calling store lookup %s" % member)
def _fetch_metadata(self, resources, max_workers=5, timeout=120, max_tries=5, validate=False, fail_on_error=False, filter_invalid=True): tries = dict() def _process_url(rurl, verifier, tid, post, enable_cache=True): tries.setdefault(rurl, 0) try: resource = load_url(rurl, timeout=timeout, enable_cache=enable_cache) except Exception, ex: raise MetadataException(ex, "Exception fetching '%s': %s" % (rurl, str(ex)) ) if (not resource.result): raise MetadataException("error fetching '%s'" % rurl) xml = resource.result.strip() retry_resources = [] info = { 'Time Spent': "%s seconds" % resource.time } tries[rurl] += 1 info['Tries'] = str(tries[rurl]) if resource.result is not None: info['Bytes'] = str(len(resource.result)) else: raise MetadataException("empty response fetching '%s'" % resource.url) info['URL'] = str(rurl) info['Cached'] = str(resource.cached) info['Date'] = str(resource.date) info['Last-Modified'] = str(resource.last_modified) info['Validation Errors'] = dict() info['Description'] = "Remote metadata" info['Status'] = 'success' if not validate: info['Status'] = 'warning' info['Description'] += " (un-validated)" if not enable_cache: info['Status'] = 'info' if resource.resp is not None: info['HTTP Response'] = resource.resp t, offset = self.parse_metadata(StringIO(xml), key=verifier, base_url=rurl, fail_on_error=fail_on_error, filter_invalid=filter_invalid, validate=validate, validation_errors=info['Validation Errors'], expiration=self.expiration, post=post) if t is None: self.fire(type=EVENT_IMPORT_FAIL, url=rurl) raise MetadataException("no valid metadata found at '%s'" % rurl) relt = root(t) expired = False if offset is not None: expire_time = datetime.now() + offset ttl = offset.total_seconds() info['Expiration Time'] = str(expire_time) info['Cache TTL'] = str(ttl) if ttl < self.min_cache_ttl: if tries[rurl] < max_tries: # try to get fresh md but we'll use what we have anyway retry_resources.append((rurl, verifier, tid, post, False)) else: log.error("giving up on %s" % rurl) if ttl < 0: expired = True if not expired: if relt.tag in ('{%s}XRD' % NS['xrd'], '{%s}XRDS' % NS['xrd']): if log.isDebugEnabled(): log.debug("%s looks like an xrd document" % rurl) for xrd in t.iter("{%s}XRD" % NS['xrd']): for link in xrd.findall(".//{%s}Link[@rel='%s']" % (NS['xrd'], NS['md'])): link_href = link.get("href") certs = xmlsec.CertDict(link) fingerprints = certs.keys() fp = None if len(fingerprints) > 0: fp = fingerprints[0] if log.isDebugEnabled(): log.debug("XRD: '%s' verified by '%s'" % (link_href, fp)) tries.setdefault(link_href, 0) if tries[link_href] < max_tries: retry_resources.append((link_href, fp, link_href, post, True)) elif relt.tag in ('{%s}EntityDescriptor' % NS['md'], '{%s}EntitiesDescriptor' % NS['md']): n = self.store.update(t, tid) info['Size'] = str(n) else: raise MetadataException("unknown metadata type for '%s' (%s)" % (rurl, relt.tag)) set_metadata_info(tid, info) if log.isDebugEnabled(): log.debug(info) return retry_resources
validate=validate, validation_errors=validation_errors, post=post) entities.extend(entities_list(t)) # local metadata is assumed to be ok for (eid, error) in validation_errors.iteritems(): log.error(error) except Exception, ex: if fail_on_error: raise MetadataException('Error parsing "%s": %s' % (fn, str(ex))) log.error(ex) if entities: info = dict(Description=description) n = self.store.update(self.entity_set(entities, url, validate=validate, copy=False), url) info['Size'] = str(n) set_metadata_info(url, info) else: log.info("no entities found in %s" % directory) def find(self, t, member): relt = root(t) if type(member) is str or type(member) is unicode: if '!' in member: (src, xp) = member.split("!") return relt.xpath(xp, namespaces=NS, smart_strings=False) else: lst = [] for e in iter_entities(relt): if e.get('entityID') == member: lst.append(e) return lst