def dump(req, *opts): """ Print a representation of the entities set on stdout. Useful for testing. :param req: The request :param opts: Options (unused) :return: None """ if req.t is not None: print dumptree(req.t) else: print "<EntitiesDescriptor xmlns=\"%s\"/>" % NS['md']
def entity_info(e, langs=None): d = entity_simple_summary(e) keywords = filter_lang(e.iter("{%s}Keywords" % NS['mdui']), langs=langs) if keywords is not None: lst = [elt.text for elt in keywords] if len(lst) > 0: d['keywords'] = ",".join(lst) d['privacy_statement_url'] = privacy_statement_url(e, langs) d['geo'] = entity_geoloc(e) d['orgurl'] = entity_orgurl(e, langs) d['service_name'] = entity_service_name(e, langs) d['service_descr'] = entity_service_description(e, langs) d['requested_attributes'] = entity_requested_attributes(e, langs) d['entity_attributes'] = entity_attribute_dict(e) d['contacts'] = entity_contacts(e) d['name_id_formats'] = entity_nameid_formats(e) d['is_idp'] = is_idp(e) d['is_sp'] = is_sp(e) d['is_aa'] = is_aa(e) d['xml'] = (dumptree(e, xml_declaration=False, pretty_print=True).decode('utf8').replace( '<', '<').replace('>', '>')) if d['is_idp']: d['protocols'] = entity_idp(e).get('protocolSupportEnumeration', "").split() return d
def update_entity(self, relt, t, tid, ts, p=None): if p is None: p = self.rc p.set("%s#metadata" % tid, dumptree(t)) self._get_metadata.invalidate(tid) # invalidate the parse-cache entry if ts is not None: p.expireat("%s#metadata" % tid, ts) nfo = dict(expires=ts) nfo.update(**relt.attrib) p.hmset(tid, nfo) if ts is not None: p.expireat(tid, ts)
def dump(req, *opts): """ Print a representation of the entities set on stdout. Useful for testing. :param req: The request :param opts: Options (unused) :return: None """ if req.t is not None: print(dumptree(req.t)) else: print("<EntitiesDescriptor xmlns=\"{}\"/>".format(NS['md']))
def publish(req, *opts): """ Publish the working document in XML form. :param req: The request :param opts: Options (unused) :return: None Publish takes one argument: path to a file where the document tree will be written. **Examples** .. code-block:: yaml - publish: /tmp/idp.xml """ if req.t is None: raise PipeException("Empty document submitted for publication") if req.args is None: raise PipeException("publish must specify output") try: validate_document(req.t) except DocumentInvalid as ex: log.error(ex.error_log) raise PipeException("XML schema validation failed") output_file = None if type(req.args) is dict: output_file = req.args.get("output", None) else: output_file = req.args[0] if output_file is not None: output_file = output_file.strip() log.debug("publish {}".format(output_file)) resource_name = output_file m = re.match(FILESPEC_REGEX, output_file) if m: output_file = m.group(1) resource_name = m.group(2) log.debug("output_file={}, resource_name={}".format( output_file, resource_name)) out = output_file if os.path.isdir(output_file): out = "{}.xml".format(os.path.join(output_file, req.id)) safe_write(out, dumptree(req.t)) req.md.store.update( req.t, tid=resource_name ) # TODO maybe this is not the right thing to do anymore return req.t
def _fmt(data: Any, accepter: MediaAccept) -> Tuple[str, str]: """ Format data according to the accepted content type of the requester. Return data as string (either XML or json) and a content-type. """ if data is None or len(data) == 0: return "", 'text/plain' if _is_xml(data) and _is_xml_type(accepter): return dumptree(data), 'application/samlmetadata+xml' if isinstance(data, (dict, list)) and accepter.get('application/json'): return dumps(data, default=json_serializer), 'application/json' raise exc.exception_response(406)
def emit(req, ctype="application/xml", *opts): """ Returns a UTF-8 encoded representation of the working tree. :param req: The request :param ctype: The mimetype of the response. :param opts: Options (not used) :return: unicode data Renders the working tree as text and sets the digest of the tree as the ETag. If the tree has already been rendered as text by an earlier step the text is returned as utf-8 encoded unicode. The mimetype (ctype) will be set in the Content-Type HTTP response header. **Examples** .. code-block:: yaml - emit application/xml: - break """ d = req.t log.debug("before getroot (%s) %s" % (type(d), repr(d))) if hasattr(d, 'getroot') and hasattr(d.getroot, '__call__'): nd = d.getroot() if nd is None: d = str(d) else: d = nd log.debug("after getroot (%s) %s" % (type(d), repr(d))) if hasattr(d, 'tag'): log.debug("has tag") d = dumptree(d) log.debug("after dumptree (%s) %s" % (type(d), repr(d))) if d is not None: m = hashlib.sha1() m.update(d) req.state['headers']['ETag'] = m.hexdigest() else: raise PipeException("Empty") req.state['headers']['Content-Type'] = ctype return unicode(d.decode('utf-8')).encode("utf-8")
def emit(req, ctype="application/xml", *opts): """ Returns a UTF-8 encoded representation of the working tree. :param req: The request :param ctype: The mimetype of the response. :param opts: Options (not used) :return: unicode data Renders the working tree as text and sets the digest of the tree as the ETag. If the tree has already been rendered as text by an earlier step the text is returned as utf-8 encoded unicode. The mimetype (ctype) will be set in the Content-Type HTTP response header. **Examples** .. code-block:: yaml - emit application/xml: - break """ if req.t is None: raise PipeException("Your pipeline is missing a select statement.") d = req.t if hasattr(d, 'getroot') and hasattr(d.getroot, '__call__'): nd = d.getroot() if nd is None: d = str(d) else: d = nd if hasattr(d, 'tag'): d = dumptree(d) if d is not None: m = hashlib.sha1() m.update(d) req.state['headers']['ETag'] = m.hexdigest() else: raise PipeException("Empty") req.state['headers']['Content-Type'] = ctype return unicode(d.decode('utf-8')).encode("utf-8")
def update(self, t, tid=None, etag=None, lazy=True): relt = root(t) assert relt is not None if relt.tag == "{%s}EntityDescriptor" % NS['md']: ref = object_id(relt) parts = None if ref in self.parts: parts = self.parts[ref] if etag is not None and (parts is None or parts.get('etag', None) != etag): self.parts[ref] = { 'id': relt.get('entityID'), 'etag': etag, 'count': 1, 'items': [ref] } self.objects[ref] = relt self._last_modified = datetime.now() elif relt.tag == "{%s}EntitiesDescriptor" % NS['md']: if tid is None: tid = relt.get('Name') if etag is None: etag = hex_digest(dumptree(t, pretty_print=False), 'sha256') parts = None if tid in self.parts: parts = self.parts[tid] if parts is None or parts.get('etag', None) != etag: items = set() for e in iter_entities(t): ref = object_id(e) items.add(ref) self.objects[ref] = e self.parts[tid] = { 'id': tid, 'count': len(items), 'etag': etag, 'items': list(items) } self._last_modified = datetime.now() if not lazy: self._reindex()
def store(req, *opts): """ Save the working document as separate files :param req: The request :param opts: Options (unused) :return: always returns the unmodified working document Split the working document into EntityDescriptor-parts and save in directory/sha1(@entityID).xml. Note that this does not erase files that may already be in the directory. If you want a "clean" directory, remove it before you call store. """ if req.t is None: raise PipeException("Your pipeline is missing a select statement.") if not req.args: raise PipeException("store requires an argument") target_dir = None if type(req.args) is dict: target_dir = req.args.get('directory', None) else: target_dir = req.args[0] if target_dir is not None: if not os.path.isdir(target_dir): os.makedirs(target_dir) for e in iter_entities(req.t): eid = e.get('entityID') if eid is None or len(eid) == 0: raise PipeException("Missing entityID in %s" % e) m = hashlib.sha1() m.update(eid) d = m.hexdigest() safe_write("%s.xml" % os.path.join(target_dir, d), dumptree(e, pretty_print=True)) return req.t
def request(self, **kwargs): stats['MD Requests'] += 1 pfx = kwargs.get('pfx', None) path = kwargs.get('path', None) content_type = kwargs.get('content_type', None) log.debug("request pfx=%s, path=%s, content_type=%s" % (pfx, path, content_type)) def escape(m): st = m.group(0) if st == '<': return '<' if st == '>': return '>' return st def _d(x): if x is None or len(x) == 0: return None, None if x.startswith("{base64}"): x = x[8:].decode('base64') if '.' in x: (p, sep, ext) = x.rpartition('.') return p, ext else: return x, None _ctypes = {'xml': 'application/xml', 'json': 'application/json', 'htm': 'text/html', 'html': 'text/html', 'ds': 'text/html', 's': 'application/json'} alias = None if pfx: alias = pfx pfx = self.aliases.get(alias, None) if pfx is None: raise NotFound() path, ext = _d(path) if pfx and path: q = "{%s}%s" % (pfx, path) else: q = path logging.debug("request %s %s" % (path, ext)) log.debug(cherrypy.request.headers) accept = {} if content_type is None: if ext is not None and ext in _ctypes: accept = {_ctypes[ext]: True} else: accept = MDServer.MediaAccept() if ext is not None: path = "%s.%s" % (path, ext) else: accept = {content_type: True} with self.lock.readlock: if ext == 'ds': pdict = dict() pdict['http'] = cherrypy.request entityID = kwargs.get('entityID', None) if entityID is None: raise HTTPError(400, "400 Bad Request - missing entityID") pdict['sp'] = self.md.sha1_id(entityID) pdict['ret'] = kwargs.get('return', None) if not path: pdict['search'] = "/search/" else: pdict['search'] = "%s.s" % path if pdict['ret'] is None: raise HTTPError(400, "400 Bad Request - Missing 'return' parameter") pdict['returnIDParam'] = kwargs.get('returnIDParam', 'entityID') cherrypy.response.headers['Content-Type'] = 'text/html' pdict['style'] = '/static/css/style.css' # TODO figure out how to sensibly set this per request return template("ds.html").render(**pdict) elif ext == 's': paged = bool(kwargs.get('paged', False)) query = kwargs.get('query', None) page = kwargs.get('page', 0) page_limit = kwargs.get('page_limit', 10) entity_filter = kwargs.get('entity_filter', None) cherrypy.response.headers['Content-Type'] = 'application/json' if paged: res, more, total = self.md.search(query, path=q, page=int(page), page_limit=int(page_limit), entity_filter=entity_filter) log.debug(dumps({'entities': res, 'more': more, 'total': total})) return dumps({'entities': res, 'more': more, 'total': total}) else: return dumps(self.md.search(query, path=q, entity_filter=entity_filter)) elif accept.get('text/html'): if not q: if pfx: title = pfx else: title = "Metadata By Attributes" return template("index.html").render(http=cherrypy.request, md=self.md, alias=alias, aliases=self.aliases, title=title) else: entities = self.md.lookup(q) if not entities: raise NotFound() if len(entities) > 1: return template("metadata.html").render(http=cherrypy.request, md=self.md, entities=entities) else: entity = entities[0] t = html.fragment_fromstring(unicode(xslt_transform(entity, "entity2html.xsl"))) for c_elt in t.findall(".//code[@role='entity']"): c_txt = dumptree(entity, pretty_print=True, xml_declaration=False).decode("utf-8") p = c_elt.getparent() p.remove(c_elt) if p.text is not None: p.text += c_txt # re.sub(".",escape,c_txt) else: p.text = c_txt # re.sub(".",escape,c_txt) xml = dumptree(t, xml_declaration=False).decode('utf-8') return template("basic.html").render(http=cherrypy.request, content=xml) else: for p in self.plumbings: state = {'request': True, 'headers': {'Content-Type': 'text/xml'}, 'accept': accept, 'url': cherrypy.url(relative=False), 'select': q, 'stats': {}} r = p.process(self.md, state=state) if r is not None: cache_ttl = state.get('cache', 0) log.debug("caching for %d seconds" % cache_ttl) caching.expires(secs=cache_ttl) for k, v in state.get('headers', {}).iteritems(): cherrypy.response.headers[k] = v return r raise NotFound()
def request(self, **kwargs): """The main request processor. This code implements all rendering of metadata. """ stats['MD Requests'] += 1 if not self.ready: raise HTTPError(503, _("Service Unavailable (repository loading)")) pfx = kwargs.get('pfx', None) path = kwargs.get('path', None) content_type = kwargs.get('content_type', None) log.debug("MDServer pfx=%s, path=%s, content_type=%s" % (pfx, path, content_type)) def _d(x, do_split=True): if x is not None: x = x.strip() log.debug("_d(%s,%s)" % (x, do_split)) if x is None or len(x) == 0: return None, None if x.startswith("{base64}"): x = x[8:].decode('base64') if do_split and '.' in x: (pth, dot, extn) = x.rpartition('.') assert (dot == '.') if extn in _ctypes: return pth, extn return x, None _ctypes = {'xml': 'application/xml', 'json': 'application/json', 'htm': 'text/html', 'html': 'text/html', 'ds': 'text/html', 's': 'application/json'} alias = None if pfx: alias = pfx pfx = self.aliases.get(alias, None) if pfx is None: raise NotFound() path, ext = _d(path, content_type is None) if pfx and path: q = "{%s}%s" % (pfx, path) path = "/%s/%s" % (alias, path) else: q = path if ext is not None: log.debug("request path: %s.%s, headers: %s" % (path, ext, cherrypy.request.headers)) else: log.debug("request path: %s, headers: %s" % (path, cherrypy.request.headers)) accept = {} if content_type is None: if ext is not None and ext in _ctypes: accept = {_ctypes[ext]: True} else: accept = MDServer.MediaAccept() if ext is not None: path = "%s.%s" % (path, ext) else: accept = {content_type: True} with self.lock.readlock: if ext == 'ds': pdict = dict() entity_id = kwargs.get('entityID', None) if entity_id is None: raise HTTPError(400, _("400 Bad Request - missing entityID")) pdict['sp'] = self.md.sha1_id(entity_id) e = self.md.store.lookup(entity_id) if e is None or len(e) == 0: raise HTTPError(404) if len(e) > 1: raise HTTPError(400, _("400 Bad Request - multiple matches for") + " %s" % entity_id) pdict['entity'] = self.md.simple_summary(e[0]) if not path: pdict['search'] = "/search/" pdict['list'] = "/role/idp.json" else: pdict['search'] = "%s.s" % path pdict['list'] = "%s.json" % path cherrypy.response.headers['Content-Type'] = 'text/html' return render_template("ds.html", **pdict) elif ext == 's': paged = bool(kwargs.get('paged', False)) query = kwargs.get('query', None) page = kwargs.get('page', 0) page_limit = kwargs.get('page_limit', 10) entity_filter = kwargs.get('entity_filter', None) related = kwargs.get('related', None) cherrypy.response.headers['Content-Type'] = 'application/json' if query is None: log.debug("empty query - creating one") query = [cherrypy.request.remote.ip] referrer = cherrypy.request.headers.get('referrer', None) if referrer is not None: log.debug("including referrer: %s" % referrer) url = urlparse.urlparse(referrer) host = url.netloc if ':' in url.netloc: (host, port) = url.netloc.split(':') for host_part in host.rstrip(self.psl.get_public_suffix(host)).split('.'): if host_part is not None and len(host_part) > 0: query.append(host_part) log.debug("created query: %s" % ",".join(query)) if paged: res, more, total = self.md.search(query, path=q, page=int(page), page_limit=int(page_limit), entity_filter=entity_filter, related=related) # log.debug(dumps({'entities': res, 'more': more, 'total': total})) return dumps({'entities': res, 'more': more, 'total': total}) else: return dumps(self.md.search(query, path=q, entity_filter=entity_filter, related=related)) elif accept.get('text/html'): if not q: if pfx: title = pfx else: title = _("Metadata By Attributes") return render_template("index.html", md=self.md, alias=alias, aliases=self.aliases, title=title) else: entities = self.md.lookup(q) if not entities: raise NotFound() if len(entities) > 1: return render_template("metadata.html", md=self.md, subheading=q, entities=entities) else: entity = entities[0] t = html.fragment_fromstring(unicode(xslt_transform(entity, "entity2html.xsl"))) for c_elt in t.findall(".//code[@role='entity']"): c_txt = dumptree(entity) parser = etree.XMLParser(remove_blank_text=True) src = StringIO(c_txt) tree = etree.parse(src, parser) c_txt = dumptree(tree, pretty_print=True, xml_declaration=False).decode("utf-8") p = c_elt.getparent() p.remove(c_elt) if p.text is not None: p.text += c_txt else: p.text = c_txt xml = dumptree(t, xml_declaration=False).decode('utf-8') return render_template("entity.html", headline=self.md.display(entity).strip(), subheading=entity.get('entityID'), entity_id=entity.get('entityID'), content=xml) else: for p in self.plumbings: state = {'request': True, 'headers': {'Content-Type': 'text/xml'}, 'accept': accept, 'url': cherrypy.url(relative=False), 'select': q, 'path': path, 'stats': {}} r = p.process(self.md, state=state) if r is not None: cache_ttl = state.get('cache', 0) log.debug("caching for %d seconds" % cache_ttl) for k, v in state.get('headers', {}).iteritems(): cherrypy.response.headers[k] = v caching.expires(secs=cache_ttl) return r raise NotFound()
def request(self, **kwargs): """The main request processor. This code implements all rendering of metadata. """ stats['MD Requests'] += 1 pfx = kwargs.get('pfx', None) path = kwargs.get('path', None) content_type = kwargs.get('content_type', None) def escape(m): st = m.group(0) if st == '<': return '<' if st == '>': return '>' return st def _d(x): if x is None or len(x) == 0: return None, None if x.startswith("{base64}"): x = x[8:].decode('base64') if '.' in x: (pth, sep, extn) = x.rpartition('.') return pth, extn else: return x, None _ctypes = {'xml': 'application/xml', 'json': 'application/json', 'htm': 'text/html', 'html': 'text/html', 'ds': 'text/html', 's': 'application/json'} alias = None if pfx: alias = pfx pfx = self.aliases.get(alias, None) if pfx is None: raise NotFound() path, ext = _d(path) if pfx and path: q = "{%s}%s" % (pfx, path) else: q = path log.debug("request path: %s, ext: %s, headers: %s" % (path, ext, cherrypy.request.headers)) accept = {} if content_type is None: if ext is not None and ext in _ctypes: accept = {_ctypes[ext]: True} else: accept = MDServer.MediaAccept() if ext is not None: path = "%s.%s" % (path, ext) else: accept = {content_type: True} with self.lock.readlock: if ext == 'ds': pdict = dict() entity_id = kwargs.get('entityID', None) if entity_id is None: raise HTTPError(400, "400 Bad Request - missing entityID") pdict['sp'] = self.md.sha1_id(entity_id) pdict['ret'] = kwargs.get('return', None) if not path: pdict['search'] = "/search/" else: pdict['search'] = "%s.s" % path if pdict['ret'] is None: raise HTTPError(400, "400 Bad Request - Missing 'return' parameter") pdict['returnIDParam'] = kwargs.get('returnIDParam', 'entityID') cherrypy.response.headers['Content-Type'] = 'text/html' return render_template("ds.html", **pdict) elif ext == 's': paged = bool(kwargs.get('paged', False)) query = kwargs.get('query', None) page = kwargs.get('page', 0) page_limit = kwargs.get('page_limit', 10) entity_filter = kwargs.get('entity_filter', None) cherrypy.response.headers['Content-Type'] = 'application/json' if query is None: log.debug("empty query - creating one") query = [cherrypy.request.remote.ipgit] referrer = cherrypy.request.headers.get('referrer', None) if referrer is not None: log.debug("including referrer: %s" % referrer) url = urlparse.urlparse(referrer) host = url.netloc if ':' in url.netloc: (host, port) = url.netloc.split(':') for host_part in host.rstrip(self.psl.get_public_suffix(host)).split('.'): if host_part is not None and len(host_part) > 0: query.append(host_part) log.debug("created query: %s" % ",".join(query)) if paged: res, more, total = self.md.search(query, path=q, page=int(page), page_limit=int(page_limit), entity_filter=entity_filter) #log.debug(dumps({'entities': res, 'more': more, 'total': total})) return dumps({'entities': res, 'more': more, 'total': total}) else: return dumps(self.md.search(query, path=q, entity_filter=entity_filter)) elif accept.get('text/html'): if not q: if pfx: title = pfx else: title = _("Metadata By Attributes") return render_template("index.html", md=self.md, alias=alias, aliases=self.aliases, title=title) else: entities = self.md.lookup(q) if not entities: raise NotFound() if len(entities) > 1: return render_template("metadata.html", md=self.md, entities=entities) else: entity = entities[0] t = html.fragment_fromstring(unicode(xslt_transform(entity, "entity2html.xsl"))) for c_elt in t.findall(".//code[@role='entity']"): c_txt = dumptree(entity, pretty_print=True, xml_declaration=False).decode("utf-8") p = c_elt.getparent() p.remove(c_elt) if p.text is not None: p.text += c_txt # re.sub(".",escape,c_txt) else: p.text = c_txt # re.sub(".",escape,c_txt) xml = dumptree(t, xml_declaration=False).decode('utf-8') return render_template("basic.html", content=xml) else: for p in self.plumbings: state = {'request': True, 'headers': {'Content-Type': 'text/xml'}, 'accept': accept, 'url': cherrypy.url(relative=False), 'select': q, 'path': path, 'stats': {}} r = p.process(self.md, state=state) if r is not None: cache_ttl = state.get('cache', 0) log.debug("caching for %d seconds" % cache_ttl) for k, v in state.get('headers', {}).iteritems(): cherrypy.response.headers[k] = v caching.expires(secs=cache_ttl) return r raise NotFound()
output_file = req.args.get("output", None) else: output_file = req.args[0] if output_file is not None: output_file = output_file.strip() log.debug("publish %s" % output_file) resource_name = output_file m = re.match(FILESPEC_REGEX, output_file) if m: output_file = m.group(1) resource_name = m.group(2) log.debug("output_file=%s, resource_name=%s" % (output_file, resource_name)) out = output_file if os.path.isdir(output_file): out = "%s.xml" % os.path.join(output_file, req.id) safe_write(out, dumptree(req.t)) req.md.store.update(req.t, tid=resource_name) # TODO maybe this is not the right thing to do anymore return req.t @pipe def loadstats(req, *opts): """ Log (INFO) information about the result of the last call to load :param req: The request :param opts: Options: (none) :return: None """ from stats import metadata _stats = None try: if 'json' in opts:
def entitiesdescriptor( entities, name, lookup_fn=None, cache_duration=None, valid_until=None, validate=True, filter_invalid=True, copy=True, nsmap=None, ): """ :param lookup_fn: a function used to lookup entities by name - set to None to skip resolving :param entities: a set of entities specifiers (lookup is used to find entities from this set) :param name: the @Name attribute :param cache_duration: an XML timedelta expression, eg PT1H for 1hr :param valid_until: a relative time eg 2w 4d 1h for 2 weeks, 4 days and 1hour from now. :param copy: set to False to avoid making a copy of all the entities in list. This may be dangerous. :param validate: set to False to skip schema validation of the resulting EntitiesDescriptor element. This is dangerous! :param filter_invalid: remove invalid EntitiesDescriptor elements from aggregate :param nsmap: additional namespace definitions to include in top level EntitiesDescriptor element Produce an EntityDescriptors set from a list of entities. Optional Name, cacheDuration and validUntil are affixed. """ if nsmap is None: nsmap = dict() nsmap.update(NS) if lookup_fn is not None: entities = resolve_entities(entities, lookup_fn=lookup_fn) for entity in entities: nsmap.update(entity.nsmap) log.debug("selecting %d entities before validation" % len(entities)) attrs = dict(Name=name, nsmap=nsmap) if cache_duration is not None: attrs['cacheDuration'] = cache_duration if valid_until is not None: attrs['validUntil'] = valid_until t = etree.Element("{%s}EntitiesDescriptor" % NS['md'], **attrs) for entity in entities: ent_insert = entity if copy: ent_insert = deepcopy(ent_insert) t.append(ent_insert) if config.devel_write_xml_to_file: import os with open("/tmp/pyff_entities_out-{}.xml".format(os.getpid()), "w") as fd: fd.write(b2u(dumptree(t))) if validate: validation_errors = dict() t = filter_or_validate(t, filter_invalid=filter_invalid, base_url=name, source="request", validation_errors=validation_errors) for base_url, err in validation_errors.items(): log.error("Validation error: @ {}: {}".format(base_url, err)) return t
def _pickle(self, data): return dumptree(data)