Пример #1
0
    def load_dir(self, directory, ext=".xml", url=None):
        """
:param directory: A directory to walk.
:param ext: Include files with this extension (default .xml)

Traverse a directory tree looking for metadata. Files ending in the specified extension are included. Directories
starting with '.' are excluded.
        """
        if url is None:
            url = directory
        log.debug("walking %s" % directory)
        if not directory in self.md:
            entities = []
            for top, dirs, files in os.walk(directory):
                for dn in dirs:
                    if dn.startswith("."):
                        dirs.remove(dn)
                for nm in files:
                    log.debug("found file %s" % nm)
                    if nm.endswith(ext):
                        fn = os.path.join(top, nm)
                        try:
                            t = self.parse_metadata(fn, fail_on_error=True)
                            # local metadata is assumed to be ok
                            entities.extend(self.entities(t))
                        except Exception, ex:
                            log.error(ex)
            self.import_metadata(self.entity_set(entities, url))
Пример #2
0
    def run(self, server):
        locked = False
        try:
            if self.lock.acquire(blocking=0):
                locked = True
                md = self.server.new_repository()
                for o in self.server.observers:
                    md.subscribe(o)

                for p in server.plumbings:
                    state = {'update': True, 'stats': {}}
                    p.process(md, state)
                    stats.update(state.get('stats', {}))
                if not md.sane():
                    log.error("update produced insane active repository - will try again later...")
                with server.lock.writelock:
                    log.debug("update produced new repository with %d entities" % md.index.size())
                    server.md = md
                    md.fire(type=EVENT_REPOSITORY_LIVE, size=md.index.size())
                    stats['Repository Update Time'] = datetime.now()
                    stats['Repository Size'] = md.index.size()
            else:
                log.error("another instance is running - will try again later...")
        except Exception, ex:
            traceback.print_exc(ex)
Пример #3
0
    def lookup(self, key):
        log.debug("redis store lookup: %s" % key)
        if '+' in key:
            hk = hex_digest(key)
            if not self.rc.exists("%s#members" % hk):
                self.rc.zinterstore("%s#members" % hk, ["%s#members" % k for k in key.split('+')], 'min')
                self.rc.expire("%s#members" % hk, 30)  # XXX bad juju - only to keep clients from hammering
            return self.lookup(hk)

        m = re.match("^(.+)=(.+)$", key)
        if m:
            return self.lookup("{%s}%s" % (m.group(1), m.group(2)))

        m = re.match("^{(.+)}(.+)$", key)
        if m and ';' in m.group(2):
            hk = hex_digest(key)
            if not self.rc.exists("%s#members" % hk):
                self.rc.zunionstore("%s#members" % hk,
                                    ["{%s}%s#members" % (m.group(1), v) for v in m.group(2).split(';')], 'min')
                self.rc.expire("%s#members" % hk, 30)  # XXX bad juju - only to keep clients from hammering
            return self.lookup(hk)
        elif self.rc.exists("%s#alias" % key):
            return self.lookup(self.rc.get("%s#alias" % key))
        elif self.rc.exists("%s#metadata" % key):
            return [self._get_metadata(key)]
        else:
            return self._members(key)
Пример #4
0
 def periodic(self, stats):
     now = _now()
     stats['Last Periodic Maintenance'] = now
     log.debug("periodic maintentance...")
     self.rc.zremrangebyscore("members", "-inf", now)
     self._drop_empty_av("collections", "members", now)
     self._drop_empty_av("attributes", "values", now)
Пример #5
0
    def parse_metadata(self, fn, key=None, base_url=None, fail_on_error=False, filter_invalid=True):
        """Parse a piece of XML and split it up into EntityDescriptor elements. Each such element
        is stored in the MDRepository instance.

:param fn: a file-like object containing SAML metadata
:param key: a certificate (file) or a SHA1 fingerprint to use for signature verification
:param base_url: use this base url to resolve relative URLs for XInclude processing
        """
        try:
            t = etree.parse(fn, base_url=base_url,
                            parser=etree.XMLParser(resolve_entities=False))
            t.xinclude()
            if filter_invalid:
                for e in t.findall('{%s}EntityDescriptor' % NS['md']):
                    if not schema().validate(e):
                        error = _e(schema().error_log, m=base_url)
                        log.debug("removing '%s': schema validation failed (%s)" % (
                            e.get('entityID'), error))
                        e.getparent().remove(e)
                        self.fire(type=EVENT_DROP_ENTITY, url=base_url,
                                  entityID=e.get('entityID'), error=error)
            else:
                # Having removed the invalid entities this should now never
                # happen...
                schema().assertValid(t)
        except DocumentInvalid, ex:
            traceback.print_exc()
            log.debug("schema validation failed on '%s': %s" % (
                base_url, _e(ex.error_log, m=base_url)))
            raise MetadataException("schema validation failed")
Пример #6
0
 def periodic(self, stats):
     now = _now()
     stats['Last Periodic Maintenance'] = now
     log.debug("periodic maintentance...")
     self.rc.zremrangebyscore("members", "-inf", now)
     self._drop_empty_av("collections", "members", now)
     self._drop_empty_av("attributes", "values", now)
Пример #7
0
    def test_log_plain(self):
        try:
            logfile = StringIO()
            logger = logging.getLogger()
            old_handlers = []
            for hdl in logger.handlers:
                logger.removeHandler(hdl)
                old_handlers.append(hdl)
            test_handler = logging.StreamHandler(logfile)
            logger.addHandler(test_handler)
            logger.setLevel(logging.WARNING)

            log.info("info")
            log.warn("warn")
            log.warning("warning")
            log.error("error")
            log.critical("critical")
            log.debug("debug")

            lines = logfile.getvalue().split("\n")

            assert ("info" not in lines)
            assert ("warn" in lines)
            assert ("warning" in lines)
            assert ("critical" in lines)
            assert ("error" in lines)
            assert ("debug" not in lines)
        finally:
            logger.removeHandler(test_handler)
            for hdl in old_handlers:
                logger.addHandler(hdl)
Пример #8
0
    def run(self, server):
        locked = False
        try:
            self.lock.acquire()
            locked = True
            md = self.server.md.clone()

            for p in server.plumbings:
                state = {'update': True, 'stats': {}}
                p.process(md, state)
                stats.update(state.get('stats', {}))

            with server.lock.writelock:
                log.debug("update produced new repository with %d entities" % server.md.store.size())
                server.md = md
                server.md.fire(type=EVENT_REPOSITORY_LIVE, size=server.md.store.size())
                stats['Repository Update Time'] = datetime.now()
                stats['Repository Size'] = server.md.store.size()

            self.nruns += 1

            stats['Updates Since Server Start'] = self.nruns

            if hasattr(self.server.md.store, 'periodic'):
                self.server.md.store.periodic(stats)
        except Exception, ex:
            log.error(ex.message)
Пример #9
0
def when(req, condition, *values):
    """
Conditionally execute part of the pipeline.

:param req: The request
:param condition: The condition key
:param values: The condition values
:param opts: More Options (unused)
:return: None

The inner pipeline is executed if the at least one of the condition values is present for the specified key in
the request state.

**Examples**

.. code-block:: yaml

    - when foo
        - something
    - when bar bill
        - other

The condition operates on the state: if 'foo' is present in the state (with any value), then the something branch is
followed. If 'bar' is present in the state with the value 'bill' then the other branch is followed.
    """
    log.debug("condition key: %s" % repr(condition))
    c = req.state.get(condition, None)
    log.debug("condition %s" % repr(c))
    if c is not None:
        if not values or _any(values, c):
            return Plumbing(pipeline=req.args, id="%s.when" % req.plumbing.id)._process(req)
    return req.t
Пример #10
0
    def test_log_plain(self):
        try:
            logfile = StringIO()
            logger = logging.getLogger()
            old_handlers = []
            for hdl in logger.handlers:
                logger.removeHandler(hdl)
                old_handlers.append(hdl)
            test_handler = logging.StreamHandler(logfile)
            logger.addHandler(test_handler)
            logger.setLevel(logging.WARNING)

            log.info("info")
            log.warn("warn")
            log.warning("warning")
            log.error("error")
            log.critical("critical")
            log.debug("debug")

            lines = logfile.getvalue().split("\n")

            assert("info" not in lines)
            assert("warn" in lines)
            assert("warning" in lines)
            assert("critical" in lines)
            assert("error" in lines)
            assert("debug" not in lines)
        finally:
            logger.removeHandler(test_handler)
            for hdl in old_handlers:
                logger.addHandler(hdl)
Пример #11
0
    def _process(self, req):
        """The inner request pipeline processor.

        :param req: The request to run through the pipeline
        """
        log.debug('Processing \n%s' % self)
        for p in self.pipeline:
            try:
                pipe, opts, name, args = loader.load_pipe(p)
                #log.debug("traversing pipe %s,%s,%s using %s" % (pipe,name,args,opts))
                if type(args) is str or type(args) is unicode:
                    args = [args]
                if args is not None and type(args) is not dict and type(args) is not list and type(args) is not tuple:
                    raise PipeException("Unknown argument type %s" % repr(args))
                req.args = args
                req.name = name
                ot = pipe(req, *opts)
                if ot is not None:
                    req.t = ot
                    #log.debug("new state after %s: %s (done=%s)" % (pipe,req.state,req.done))
                if req.done:
                    break
            except PipeException, ex:
                log.error(ex)
                break
Пример #12
0
    def test_log_syslog(self):
        with patch('syslog.syslog', new=self.dummy_syslog):
            try:
                logger = logging.getLogger()
                old_handlers = []
                for hdl in logger.handlers:
                    logger.removeHandler(hdl)
                    old_handlers.append(hdl)
                test_handler = SysLogLibHandler("USER")
                logger.addHandler(test_handler)
                logger.setLevel(logging.WARNING)

                log.info("info")
                log.warn("warn")
                log.warning("warning")
                log.error("error")
                log.critical("critical")
                log.debug("debug")

                lines = self._syslog.getvalue().split("\n")

                assert("info" not in lines)
                assert("12:warn" in lines)
                assert("12:warning" in lines)
                assert("10:critical" in lines)
                assert("11:error" in lines)
                assert("debug" not in lines)
            finally:
                logger.removeHandler(test_handler)
                for hdl in old_handlers:
                    logger.addHandler(hdl)
Пример #13
0
 def producer(q, resources, cache=self.metadata_cache_enabled):
     print resources
     for url, verify, id, tries in resources:
         log.debug("starting fetcher for '%s'" % url)
         thread = URLFetch(url, verify, id, enable_cache=cache, tries=tries)
         thread.start()
         q.put(thread, True)
Пример #14
0
    def test_log_syslog(self):
        with patch('syslog.syslog', new=self.dummy_syslog):
            try:
                logger = logging.getLogger()
                old_handlers = []
                for hdl in logger.handlers:
                    logger.removeHandler(hdl)
                    old_handlers.append(hdl)
                test_handler = SysLogLibHandler("USER")
                logger.addHandler(test_handler)
                logger.setLevel(logging.WARNING)

                log.info("info")
                log.warn("warn")
                log.warning("warning")
                log.error("error")
                log.critical("critical")
                log.debug("debug")

                lines = self._syslog.getvalue().split("\n")

                assert ("info" not in lines)
                assert ("12:warn" in lines)
                assert ("12:warning" in lines)
                assert ("10:critical" in lines)
                assert ("11:error" in lines)
                assert ("debug" not in lines)
            finally:
                logger.removeHandler(test_handler)
                for hdl in old_handlers:
                    logger.addHandler(hdl)
Пример #15
0
 def producer(q, resources, cache=self.metadata_cache_enabled):
     print resources
     for url, verify, id, tries in resources:
         log.debug("Starting fetcher for %s" % url)
         thread = URLFetch(url, verify, id, enable_cache=cache, tries=tries)
         thread.start()
         q.put(thread, True)
Пример #16
0
    def parse_metadata(self, fn, key=None, base_url=None, fail_on_error=False, filter_invalid=True):
        """Parse a piece of XML and split it up into EntityDescriptor elements. Each such element
        is stored in the MDRepository instance.

:param fn: a file-like object containing SAML metadata
:param key: a certificate (file) or a SHA1 fingerprint to use for signature verification
:param base_url: use this base url to resolve relative URLs for XInclude processing
        """
        try:
            t = etree.parse(fn, base_url=base_url, parser=etree.XMLParser(resolve_entities=False))
            t.xinclude()
            if filter_invalid:
                for e in t.findall('{%s}EntityDescriptor' % NS['md']):
                    if not schema().validate(e):
                        error = _e(schema().error_log, m=base_url)
                        log.debug("removing '%s': schema validation failed (%s)" % (e.get('entityID'), error))
                        e.getparent().remove(e)
                        self.fire(type=EVENT_DROP_ENTITY, url=base_url, entityID=e.get('entityID'), error=error)
            else:
            # Having removed the invalid entities this should now never happen...
                schema().assertValid(t)
        except DocumentInvalid, ex:
            traceback.print_exc()
            log.debug("schema validation failed on '%s': %s" % (base_url, _e(ex.error_log, m=base_url)))
            raise MetadataException("schema validation failed")
Пример #17
0
    def load_dir(self, directory, ext=".xml", url=None):
        """
:param directory: A directory to walk.
:param ext: Include files with this extension (default .xml)

Traverse a directory tree looking for metadata. Files ending in the specified extension are included. Directories
starting with '.' are excluded.
        """
        if url is None:
            url = directory
        log.debug("walking %s" % directory)
        if not directory in self.md:
            entities = []
            for top, dirs, files in os.walk(directory):
                for dn in dirs:
                    if dn.startswith("."):
                        dirs.remove(dn)
                for nm in files:
                    log.debug("found file %s" % nm)
                    if nm.endswith(ext):
                        fn = os.path.join(top, nm)
                        try:
                            t = self.parse_metadata(fn, fail_on_error=True)
                            entities.extend(self.entities(t))  # local metadata is assumed to be ok
                        except Exception, ex:
                            log.error(ex)
            self.import_metadata(self.entity_set(entities, url))
Пример #18
0
    def lookup(self, key):
        log.debug("redis store lookup: %s" % key)
        if '+' in key:
            hk = hex_digest(key)
            if not self.rc.exists("%s#members" % hk):
                self.rc.zinterstore("%s#members" % hk, ["%s#members" % k for k in key.split('+')], 'min')
                self.rc.expire("%s#members" % hk, 30)  # XXX bad juju - only to keep clients from hammering
            return self.lookup(hk)

        m = re.match("^(.+)=(.+)$", key)
        if m:
            return self.lookup("{%s}%s" % (m.group(1), m.group(2)))

        m = re.match("^{(.+)}(.+)$", key)
        if m and ';' in m.group(2):
            hk = hex_digest(key)
            if not self.rc.exists("%s#members" % hk):
                self.rc.zunionstore("%s#members" % hk,
                                    ["{%s}%s#members" % (m.group(1), v) for v in m.group(2).split(';')], 'min')
                self.rc.expire("%s#members" % hk, 30)  # XXX bad juju - only to keep clients from hammering
            return self.lookup(hk)
        elif self.rc.exists("%s#alias" % key):
            return self.lookup(self.rc.get("%s#alias" % key))
        elif self.rc.exists("%s#metadata" % key):
            return [self._get_metadata(key)]
        else:
            return self._members(key)
Пример #19
0
    def run(self, server):
        locked = False
        try:
            self.lock.acquire()
            locked = True
            md = self.server.md.clone()

            for p in server.plumbings:
                state = {'update': True, 'stats': {}}
                p.process(md, state)
                stats.update(state.get('stats', {}))

            with server.lock.writelock:
                log.debug("update produced new repository with %d entities" % server.md.store.size())
                server.md = md
                server.md.fire(type=EVENT_REPOSITORY_LIVE, size=server.md.store.size())
                stats['Repository Update Time'] = datetime.now()
                stats['Repository Size'] = server.md.store.size()

            self.nruns += 1

            stats['Updates Since Server Start'] = self.nruns

            if hasattr(self.server.md.store, 'periodic'):
                self.server.md.store.periodic(stats)
        except Exception as ex:
            log.error(ex.message)
        finally:
            if locked:
                self.lock.release()
Пример #20
0
 def _drop_empty_av(self, attr, tag, ts):
     an = "#%s" % attr
     for c in self.rc.smembers(an):
         tn = "%s#members" % c
         self.rc.zremrangebyscore(tn, "-inf", ts)
         if not self.rc.zcard(tn) > 0:
             log.debug("dropping empty %s %s" % (attr, c))
             self.rc.srem(an, c)
Пример #21
0
 def _drop_empty_av(self, attr, tag, ts):
     an = "#%s" % attr
     for c in self.rc.smembers(an):
         tn = "%s#members" % c
         self.rc.zremrangebyscore(tn, "-inf", ts)
         if not self.rc.zcard(tn) > 0:
             log.debug("dropping empty %s %s" % (attr, c))
             self.rc.srem(an, c)
Пример #22
0
 def default(self, *args, **kwargs):
     log.debug("default args: %s, kwargs: %s" % (repr(args), repr(kwargs)))
     if len(args) > 0 and args[0] in self.server.aliases:
         kwargs['pfx'] = args[0]
         if len(args) > 1:
             kwargs['path'] = args[1]
         return self.server.request(**kwargs)
     else:
         kwargs['pfx'] = None
         kwargs['path'] = "/" + "/".join(args)
         return self.server.request(**kwargs)
Пример #23
0
 def default(self, *args, **kwargs):
     log.debug("default args: %s, kwargs: %s" % (repr(args), repr(kwargs)))
     if len(args) > 0 and args[0] in self.server.aliases:
         kwargs['pfx'] = args[0]
         if len(args) > 1:
             kwargs['path'] = args[1]
         return self.server.request(**kwargs)
     else:
         kwargs['pfx'] = None
         kwargs['path'] = "/" + "/".join(args)
         return self.server.request(**kwargs)
Пример #24
0
 def default(self, *args, **kwargs):
     log.debug("request default: %s" % ",".join(args))
     if len(args) > 0 and args[0] in self.server.aliases:
         kwargs['pfx'] = args[0]
         if len(args) > 1:
             kwargs['path'] = args[1]
         return self.server.request(**kwargs)
     else:
         log.debug("not an alias: %s" % "/".join(args))
         kwargs['pfx'] = None
         kwargs['path'] = "/" + "/".join(args)
         return self.server.request(**kwargs)
Пример #25
0
def publish(req, *opts):
    """
Publish the working document in XML form.

:param req: The request
:param opts: Options (unused)
:return: None

 Publish takes one argument: path to a file where the document tree will be written.

**Examples**

.. code-block:: yaml

    - publish: /tmp/idp.xml
    """

    if req.t is None:
        raise PipeException("Empty document submitted for publication")

    if req.args is None:
        raise PipeException("publish must specify output")

    try:
        validate_document(req.t)
    except DocumentInvalid as ex:
        log.error(ex.error_log)
        raise PipeException("XML schema validation failed")

    output_file = None
    if type(req.args) is dict:
        output_file = req.args.get("output", None)
    else:
        output_file = req.args[0]
    if output_file is not None:
        output_file = output_file.strip()
        log.debug("publish {}".format(output_file))
        resource_name = output_file
        m = re.match(FILESPEC_REGEX, output_file)
        if m:
            output_file = m.group(1)
            resource_name = m.group(2)
        log.debug("output_file={}, resource_name={}".format(
            output_file, resource_name))
        out = output_file
        if os.path.isdir(output_file):
            out = "{}.xml".format(os.path.join(output_file, req.id))
        safe_write(out, dumptree(req.t))
        req.md.store.update(
            req.t, tid=resource_name
        )  # TODO maybe this is not the right thing to do anymore
    return req.t
Пример #26
0
    def update(self,
               t,
               tid=None,
               ts=None,
               merge_strategy=None):  # TODO: merge ?
        log.debug("redis store update: %s: %s" % (t, tid))
        relt = root(t)
        ne = 0
        if ts is None:
            ts = int(
                _now() +
                3600 * 24 * 4)  # 4 days is the arbitrary default expiration
        if relt.tag == "{%s}EntityDescriptor" % NS['md']:
            if tid is None:
                tid = relt.get('entityID')
            with self.rc.pipeline() as p:
                self.update_entity(relt, t, tid, ts, p)
                entity_id = relt.get("entityID")
                if entity_id is not None:
                    self.membership("entities", entity_id, ts, p)
                for ea, eav in entity_attribute_dict(relt).iteritems():
                    for v in eav:
                        # log.debug("%s=%s" % (ea, v))
                        self.membership("{%s}%s" % (ea, v), tid, ts, p)
                        p.zadd("%s#values" % ea, v, ts)
                    p.sadd("#attributes", ea)

                for hn in ('sha1', 'sha256', 'md5'):
                    tid_hash = hex_digest(tid, hn)
                    p.set("{%s}%s#alias" % (hn, tid_hash), tid)
                    if ts is not None:
                        p.expireat(tid_hash, ts)
                p.execute()
            ne += 1
        elif relt.tag == "{%s}EntitiesDescriptor" % NS['md']:
            if tid is None:
                tid = relt.get('Name')
            ts = self._expiration(relt)
            with self.rc.pipeline() as p:
                self.update_entity(relt, t, tid, ts, p)
                for e in iter_entities(t):
                    ne += self.update(e, ts=ts)
                    entity_id = e.get("entityID")
                    if entity_id is not None:
                        self.membership(tid, entity_id, ts, p)
                        self.membership("entities", entity_id, ts, p)
                p.execute()
        else:
            raise ValueError("Bad metadata top-level element: '%s'" %
                             root(t).tag)

        return ne
Пример #27
0
 def resolve(self, system_url, public_id, context):
     """
     Resolves URIs using the resource API
     """
     log.debug("resolve SYSTEM URL' %s' for '%s'" % (system_url, public_id))
     path = system_url.split("/")
     fn = path[len(path) - 1]
     if pkg_resources.resource_exists(__name__, fn):
         return self.resolve_file(pkg_resources.resource_stream(__name__, fn), context)
     elif pkg_resources.resource_exists(__name__, "schema/%s" % fn):
         return self.resolve_file(pkg_resources.resource_stream(__name__, "schema/%s" % fn), context)
     else:
         raise ValueError("Unable to locate %s" % fn)
Пример #28
0
 def resolve(self, system_url, public_id, context):
     """
     Resolves URIs using the resource API
     """
     log.debug("resolve SYSTEM URL' %s' for '%s'" % (system_url, public_id))
     path = system_url.split("/")
     fn = path[len(path) - 1]
     if pkg_resources.resource_exists(__name__, fn):
         return self.resolve_file(pkg_resources.resource_stream(__name__, fn), context)
     elif pkg_resources.resource_exists(__name__, "schema/%s" % fn):
         return self.resolve_file(pkg_resources.resource_stream(__name__, "schema/%s" % fn), context)
     else:
         raise ValueError("Unable to locate %s" % fn)
Пример #29
0
def _select_args(req):
    args = req.args
    log.debug("selecting using args: %s" % args)
    if args is None and 'select' in req.state:
        args = [req.state.get('select')]
    if args is None:
        args = req.md.store.collections()
    if args is None or not args:
        args = req.md.store.lookup('entities')
    if args is None or not args:
        args = []

    return args
Пример #30
0
def _select_args(req):
    args = req.args
    log.debug("selecting using args: %s" % args)
    if args is None and 'select' in req.state:
        args = [req.state.get('select')]
    if args is None:
        args = req.md.store.collections()
    if args is None or not args:
        args = req.md.store.lookup('entities')
    if args is None or not args:
        args = []

    return args
Пример #31
0
    def parse_metadata(self,
                       fn,
                       key=None,
                       base_url=None,
                       fail_on_error=False,
                       filter_invalid=True,
                       validate=True,
                       post=None):
        """Parse a piece of XML and split it up into EntityDescriptor elements. Each such element
        is stored in the MDRepository instance.

:param fn: a file-like object containing SAML metadata
:param key: a certificate (file) or a SHA1 fingerprint to use for signature verification
:param base_url: use this base url to resolve relative URLs for XInclude processing
:param fail_on_error: (default: False)
:param filter_invalid: (default True) remove invalid EntityDescriptor elements rather than raise an errror
:param validate: (default: True) set to False to turn off all XML schema validation
:param post: A callable that will be called to modify the parse-tree before any validation
(but after xinclude processing)
        """
        try:
            t = etree.parse(fn, base_url=base_url, parser=etree.XMLParser(resolve_entities=False))
            t.xinclude()

            if key is not None:
                try:
                    log.debug("verifying signature using %s" % key)
                    refs = xmlsec.verified(t, key)
                    if len(refs) != 1:
                        raise MetadataException("XML metadata contains %d signatures - exactly 1 is required" % len(refs))
                    t = refs[0]  # prevent wrapping attacks
                except Exception, ex:
                    tb = traceback.format_exc()
                    print tb
                    log.error(ex)
                    return None

            if post is not None:
                t = post(t)

            if validate:
                if filter_invalid:
                    for e in t.findall('{%s}EntityDescriptor' % NS['md']):
                        if not schema().validate(e):
                            error = _e(schema().error_log, m=base_url)
                            log.debug("removing '%s': schema validation failed (%s)" % (e.get('entityID'), error))
                            e.getparent().remove(e)
                            self.fire(type=EVENT_DROP_ENTITY, url=base_url, entityID=e.get('entityID'), error=error)
                else:
                    # Having removed the invalid entities this should now never happen...
                    schema().assertValid(t)
Пример #32
0
    def _lookup(self, key):
        if key == 'entities' or key is None:
            return self.entities.values()
        if '+' in key:
            key = key.strip('+')
            # log.debug("lookup intersection of '%s'" % ' and '.join(key.split('+')))
            hits = None
            for f in key.split("+"):
                f = f.strip()
                if hits is None:
                    hits = set(self._lookup(f))
                else:
                    other = self._lookup(f)
                    hits.intersection_update(other)

                if not hits:
                    log.debug("empty intersection")
                    return []

            if hits is not None and hits:
                return list(hits)
            else:
                return []

        m = re.match("^(.+)=(.+)$", key)
        if m:
            return self._lookup("{%s}%s" % (m.group(1), m.group(2).rstrip("/")))

        m = re.match("^{(.+)}(.+)$", key)
        if m:
            res = set()
            for v in m.group(2).rstrip("/").split(';'):
                # log.debug("... adding %s=%s" % (m.group(1),v))
                res.update(self._get_index(m.group(1), v))
            return list(res)

        # log.debug("trying null index lookup %s" % key)
        l = self._get_index("null", key)
        if l:
            return list(l)

        # log.debug("trying main index lookup %s: " % key)
        if key in self.md:
            # log.debug("entities list %s: %s" % (key, self.md[key]))
            lst = []
            for entityID in self.md[key]:
                lst.extend(self.lookup(entityID))
            return lst

        return []
Пример #33
0
 def default(self, *args, **kwargs):
     """The default request processor unpacks base64-encoded reuqests and passes them onto the MDServer.request
     handler.
     """
     log.debug("ROOT default args: %s, kwargs: %s" % (repr(args), repr(kwargs)))
     if len(args) > 0 and args[0] in self.server.aliases:
         kwargs['pfx'] = args[0]
         if len(args) > 1:
             kwargs['path'] = args[1]
         return self.server.request(**kwargs)
     else:
         kwargs['pfx'] = None
         kwargs['path'] = "/" + "/".join(args)
         return self.server.request(**kwargs)
Пример #34
0
 def dispatch(self, path_info):
     # log.debug("EncodingDispatcher (%s) called with %s" % (",".join(self.prefixes), path_info))
     # vpath = path_info.replace("%2F", "/")
     vpath = path_info
     for prefix in self.prefixes:
         if vpath.startswith(prefix):
             log.debug("EncodingDispatcher (%s) called with %s" % (",".join(self.prefixes), path_info))
             vpath = path_info.replace("%2F", "/")
             plen = len(prefix)
             vpath = vpath[plen + 1:]
             npath = "%s/%s" % (prefix, self.enc(vpath))
             log.debug("EncodingDispatcher %s" % npath)
             return self.next_dispatcher(npath)
     return self.next_dispatcher(vpath)
Пример #35
0
 def default(self, *args, **kwargs):
     """The default request processor unpacks base64-encoded reuqests and passes them onto the MDServer.request
     handler.
     """
     log.debug("ROOT default args: %s, kwargs: %s" % (repr(args), repr(kwargs)))
     if len(args) > 0 and args[0] in self.server.aliases:
         kwargs['pfx'] = args[0]
         if len(args) > 1:
             kwargs['path'] = args[1]
         return self.server.request(**kwargs)
     else:
         kwargs['pfx'] = None
         kwargs['path'] = "/" + "/".join(args)
         return self.server.request(**kwargs)
Пример #36
0
    def _lookup(self, key):
        if key == 'entities' or key is None:
            return self.entities.values()
        if '+' in key:
            key = key.strip('+')
            #log.debug("lookup intersection of '%s'" % ' and '.join(key.split('+')))
            hits = None
            for f in key.split("+"):
                f = f.strip()
                if hits is None:
                    hits = set(self._lookup(f))
                else:
                    other = self._lookup(f)
                    hits.intersection_update(other)

                if not hits:
                    log.debug("empty intersection")
                    return []

            if hits is not None and hits:
                return list(hits)
            else:
                return []

        m = re.match("^(.+)=(.+)$", key)
        if m:
            return self._lookup("{%s}%s" % (m.group(1), m.group(2).rstrip("/")))

        m = re.match("^{(.+)}(.+)$", key)
        if m:
            res = set()
            for v in m.group(2).rstrip("/").split(';'):
                # log.debug("... adding %s=%s" % (m.group(1),v))
                res.update(self._get_index(m.group(1), v))
            return list(res)

        # log.debug("trying null index lookup %s" % key)
        l = self._get_index("null", key)
        if l:
            return list(l)

        # log.debug("trying main index lookup %s: " % key)
        if key in self.md:
            # log.debug("entities list %s: %s" % (key, self.md[key]))
            lst = []
            for entityID in self.md[key]:
                lst.extend(self.lookup(entityID))
            return lst

        return []
Пример #37
0
 def dispatch(self, path_info):
     # log.debug("EncodingDispatcher (%s) called with %s" % (",".join(self.prefixes), path_info))
     # vpath = path_info.replace("%2F", "/")
     vpath = path_info
     for prefix in self.prefixes:
         if vpath.startswith(prefix):
             log.debug("EncodingDispatcher (%s) called with %s" % (",".join(self.prefixes), path_info))
             vpath = path_info.replace("%2F", "/")
             plen = len(prefix)
             vpath = vpath[plen + 1:]
             npath = "%s/%s" % (prefix, self.enc(vpath))
             log.debug("EncodingDispatcher %s" % npath)
             return self.next_dispatcher(npath)
     return self.next_dispatcher(vpath)
Пример #38
0
    def parse_metadata(self, fn, key=None, base_url=None, fail_on_error=False):
        """Parse a piece of XML and split it up into EntityDescriptor elements. Each such element
        is stored in the MDRepository instance.

:param fn: a file-like object containing SAML metadata
:param key: a certificate (file) or a SHA1 fingerprint to use for signature verification
:param base_url: use this base url to resolve relative URLs for XInclude processing
        """
        try:
            t = etree.parse(fn, base_url=base_url, parser=etree.XMLParser(resolve_entities=False))
            t.xinclude()
            schema().assertValid(t)
        except DocumentInvalid, ex:
            log.debug(_e(ex.error_log))
            raise ValueError("XML schema validation failed")
Пример #39
0
        def _d(x, do_split=True):
            if x is not None:
                x = x.strip()
            log.debug("_d(%s,%s)" % (x, do_split))
            if x is None or len(x) == 0:
                return None, None

            if x.startswith("{base64}"):
                x = x[8:].decode('base64')

            if do_split and '.' in x:
                (pth, dot, extn) = x.rpartition('.')
                assert (dot == '.')
                if extn in _ctypes:
                    return pth, extn

            return x, None
Пример #40
0
        def _d(x, do_split=True):
            if x is not None:
                x = x.strip()
            log.debug("_d(%s,%s)" % (x, do_split))
            if x is None or len(x) == 0:
                return None, None

            if x.startswith("{base64}"):
                x = x[8:].decode('base64')

            if do_split and '.' in x:
                (pth, dot, extn) = x.rpartition('.')
                assert (dot == '.')
                if extn in _ctypes:
                    return pth, extn

            return x, None
Пример #41
0
    def load_pipe(self, d):
        """Return a triple callable,name,args of the pipe specified by the object d.

        :param d: The following alternatives for d are allowed:

 - d is a string (or unicode) in which case the pipe is named d called with None as args.
 - d is a dict of the form {name: args} (i.e one key) in which case the pipe named *name* is called with args
 - d is an iterable (eg tuple or list) in which case d[0] is treated as the pipe name and d[1:] becomes the args
        """
        name = None
        args = None
        opts = []
        if type(d) is str or type(d) is unicode:
            name, opts = self._n(d)
        elif hasattr(d, '__iter__') and not type(d) is dict:
            if not len(d):
                raise PipeException("This does not look like a length of pipe... \n%s" % repr(d))
            name, opts = self._n(d[0])
        elif type(d) is dict:
            k = d.keys()[0]
            name, opts = self._n(k)
            args = d[k]
        else:
            raise PipeException("This does not look like a length of pipe... \n%s" % repr(d))

        if name is None:
            raise PipeException("Anonymous length of pipe... \n%s" % repr(d))

        mname = "pyff.pipes.builtins"
        fn = name
        if ':' in name:
            (mname, sep, fn) = name.rpartition(":")
        pm = mname
        if '.' in mname:
            (pm, sep, mn) = mname.rpartition('.')
            log.debug("importing %s from %s to find %s" % (mn, pm, fn))
        else:
            log.debug("importing %s from %s to find %s" % (mname, pm, fn))
        module = __import__(mname, fromlist=[pm])
        if hasattr(module, fn) and hasattr(getattr(module, fn), '__call__'):
            return getattr(module, fn), opts, fn, args
        elif hasattr(module, "_%s" % fn) and hasattr(getattr(module, "_%s" % fn), '__call__'):
            return getattr(module, "_%s" % fn), opts, fn, args
        else:
            raise PipeException("No such method %s in %s" % (fn, mname))
Пример #42
0
    def update(self, t, tid=None, ts=None, merge_strategy=None):  # TODO: merge ?
        log.debug("redis store update: %s: %s" % (t, tid))
        relt = root(t)
        ne = 0
        if ts is None:
            ts = int(_now() + 3600 * 24 * 4)  # 4 days is the arbitrary default expiration
        if relt.tag == "{%s}EntityDescriptor" % NS['md']:
            if tid is None:
                tid = relt.get('entityID')
            with self.rc.pipeline() as p:
                self.update_entity(relt, t, tid, ts, p)
                entity_id = relt.get("entityID")
                if entity_id is not None:
                    self.membership("entities", entity_id, ts, p)
                for ea, eav in entity_attribute_dict(relt).iteritems():
                    for v in eav:
                        # log.debug("%s=%s" % (ea, v))
                        self.membership("{%s}%s" % (ea, v), tid, ts, p)
                        p.zadd("%s#values" % ea, v, ts)
                    p.sadd("#attributes", ea)

                for hn in ('sha1', 'sha256', 'md5'):
                    tid_hash = hex_digest(tid, hn)
                    p.set("{%s}%s#alias" % (hn, tid_hash), tid)
                    if ts is not None:
                        p.expireat(tid_hash, ts)
                p.execute()
            ne += 1
        elif relt.tag == "{%s}EntitiesDescriptor" % NS['md']:
            if tid is None:
                tid = relt.get('Name')
            ts = self._expiration(relt)
            with self.rc.pipeline() as p:
                self.update_entity(relt, t, tid, ts, p)
                for e in iter_entities(t):
                    ne += self.update(e, ts=ts)
                    entity_id = e.get("entityID")
                    if entity_id is not None:
                        self.membership(tid, entity_id, ts, p)
                        self.membership("entities", entity_id, ts, p)
                p.execute()
        else:
            raise ValueError("Bad metadata top-level element: '%s'" % root(t).tag)

        return ne
Пример #43
0
    def run(self):

        def _parse_date(str):
            if str is None:
                return datetime.new()
            return datetime(*parsedate(str)[:6])

        self.start_time = clock()
        try:
            cache = httplib2.FileCache(".cache")
            headers = dict()
            if not self.enable_cache:
                headers['cache-control'] = 'no-cache'

            log.debug("fetching '%s'" % self.url)

            if self.url.startswith('file://'):
                path = self.url[7:]
                if not os.path.exists(path):
                    raise IOError("file not found: %s" % path)

                with open(path, 'r') as fd:
                    self.result = fd.read()
                    self.cached = False
                    self.date = datetime.now()
                    self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime)
            else:
                h = httplib2.Http(cache=cache, timeout=60,
                                  disable_ssl_certificate_validation=True)  # trust is done using signatures over here
                resp, content = h.request(self.url, headers=headers)
                self.resp = resp
                self.last_modified = _parse_date(resp.get('last-modified', resp.get('date', None)))
                self.date = _parse_date(resp['date'])
                if resp.status != 200:
                    raise IOError(resp.reason)
                self.result = content
                self.cached = resp.fromcache

            log.debug("got %d bytes from '%s'" % (len(self.result), self.url))
        except Exception, ex:
            #traceback.print_exc()
            #log.warn("unable to fetch '%s': %s" % (self.url, ex))
            self.ex = ex
            self.result = None
Пример #44
0
    def entity_set(self,
                   entities,
                   name,
                   cacheDuration=None,
                   validUntil=None,
                   validate=True):
        """
:param entities: a set of entities specifiers (lookup is used to find entities from this set)
:param name: the @Name attribute
:param cacheDuration: an XML timedelta expression, eg PT1H for 1hr
:param validUntil: a relative time eg 2w 4d 1h for 2 weeks, 4 days and 1hour from now.

Produce an EntityDescriptors set from a list of entities. Optional Name, cacheDuration and validUntil are affixed.
        """
        attrs = dict(Name=name, nsmap=NS)
        if cacheDuration is not None:
            attrs['cacheDuration'] = cacheDuration
        if validUntil is not None:
            attrs['validUntil'] = validUntil
        t = etree.Element("{%s}EntitiesDescriptor" % NS['md'], **attrs)
        nent = 0
        seen = {}  # TODO make better de-duplication
        for member in entities:
            for ent in self.lookup(member):
                entityID = ent.get('entityID', None)
                if (ent is not None) and (entityID is not None) and (
                        not seen.get(entityID, False)):
                    t.append(deepcopy(ent))
                    seen[entityID] = True
                    nent += 1

        log.debug(
            "selecting %d entities from %d entity set(s) before validation" %
            (nent, len(entities)))

        if not nent:
            return None

        if validate:
            try:
                schema().assertValid(t)
            except DocumentInvalid, ex:
                log.debug(_e(ex.error_log))
Пример #45
0
def load(req, *opts):
    """
General-purpose resource fetcher.

:param req: The request
:param opts: Options: [qsize <5>] [timeout <30>] [xrd <output xrd file>]
:return: None

Supports both remote and local resources. Fetching remote resources is done in parallell using threads.
    """
    remote = []
    for x in req.args:
        x = x.strip()
        log.debug("load %s" % x)
        m = re.match(FILESPEC_REGEX, x)
        rid = None
        if m:
            x = m.group(1)
            rid = m.group(2)
        r = x.split()
        assert len(r) in [1, 2], PipeException("Usage: load: resource [as url] [verification]")
        verify = None
        url = r[0]
        if len(r) == 2:
            verify = r[1]

        if "://" in url:
            log.debug("remote %s %s %s" % (url, verify, rid))
            remote.append((url, verify, rid))
        elif os.path.exists(url):
            if os.path.isdir(url):
                log.debug("local directory %s %s %s" % (url, verify, rid))
                req.md.load_dir(url, url=rid)
            elif os.path.isfile(url):
                log.debug("local file %s %s %s" % (url, verify, rid))
                remote.append(("file://%s" % url, verify, rid))
            else:
                log.error("Unknown file type for load: %s" % r[0])
        else:
            log.error("Don't know how to load '%s' as %s verified by %s" % (url, rid, verify))

    opts = dict(zip(opts[::2], opts[1::2]))
    opts.setdefault('timeout', 30)
    opts.setdefault('qsize', 5)
    opts.setdefault('xrd', None)
    stats = dict()
    opts.setdefault('stats', stats)
    req.md.fetch_metadata(remote, **opts)
    req.state['stats']['Metadata URLs'] = stats
Пример #46
0
    def entity_set(self, entities, name, cacheDuration=None, validUntil=None, validate=True):
        """
:param entities: a set of entities specifiers (lookup is used to find entities from this set)
:param name: the @Name attribute
:param cacheDuration: an XML timedelta expression, eg PT1H for 1hr
:param validUntil: a relative time eg 2w 4d 1h for 2 weeks, 4 days and 1hour from now.

Produce an EntityDescriptors set from a list of entities. Optional Name, cacheDuration and validUntil are affixed.
        """
        attrs = dict(Name=name, nsmap=NS)
        if cacheDuration is not None:
            attrs['cacheDuration'] = cacheDuration
        if validUntil is not None:
            attrs['validUntil'] = validUntil
        t = etree.Element("{%s}EntitiesDescriptor" % NS['md'], **attrs)
        nent = 0
        seen = {}  # TODO make better de-duplication
        for member in entities:
            for ent in self.lookup(member):
                entityID = ent.get('entityID', None)
                if (ent is not None) and (entityID is not None) and (not seen.get(entityID, False)):
                    t.append(deepcopy(ent))
                    seen[entityID] = True
                    nent += 1

        log.debug("selecting %d entities from %d entity set(s) before validation" % (
            nent, len(entities)))

        if not nent:
            return None

        if validate:
            try:
                schema().assertValid(t)
            except DocumentInvalid, ex:
                log.debug(_e(ex.error_log))
Пример #47
0
def emit(req, ctype="application/xml", *opts):
    """
Returns a UTF-8 encoded representation of the working tree.

:param req: The request
:param ctype: The mimetype of the response.
:param opts: Options (not used)
:return: unicode data

Renders the working tree as text and sets the digest of the tree as the ETag. If the tree has already been rendered as
text by an earlier step the text is returned as utf-8 encoded unicode. The mimetype (ctype) will be set in the
Content-Type HTTP response header.

**Examples**

.. code-block:: yaml

    - emit application/xml:
    - break
    """

    d = req.t
    log.debug("before getroot (%s) %s" % (type(d), repr(d)))
    if hasattr(d, 'getroot') and hasattr(d.getroot, '__call__'):
        nd = d.getroot()
        if nd is None:
            d = str(d)
        else:
            d = nd
    log.debug("after getroot (%s) %s" % (type(d), repr(d)))
    if hasattr(d, 'tag'):
        log.debug("has tag")
        d = dumptree(d)
    log.debug("after dumptree (%s) %s" % (type(d), repr(d)))

    if d is not None:
        m = hashlib.sha1()
        m.update(d)
        req.state['headers']['ETag'] = m.hexdigest()
    else:
        raise PipeException("Empty")

    req.state['headers']['Content-Type'] = ctype
    return unicode(d.decode('utf-8')).encode("utf-8")
Пример #48
0
 def periodic(self, stats):
     now = _now()
     stats['Last Periodic Maintenance'] = now
     log.debug("periodic maintentance...")
     self.rc.zremrangebyscore("members", "-inf", now)
     for c in self.rc.smembers("#collections"):
         self.rc.zremrangebyscore("%s#members", "-inf", now)
         if not self.rc.zcard("%s#members" % c) > 0:
             log.debug("dropping empty collection %s" % c)
             self.rc.srem("#collections", c)
     for an in self.rc.smembers("#attributes"):
         self.rc.zremrangebyscore("%s#values", "-inf", now)
         if not self.rc.zcard("%s#members" % an) > 0:
             log.debug("dropping empty attribute %s" % an)
             self.rc.srem("#attributes", an)
Пример #49
0
 def periodic(self, stats):
     now = _now()
     stats['Last Periodic Maintenance'] = now
     log.debug("periodic maintentance...")
     self.rc.zremrangebyscore("members", "-inf", now)
     for c in self.rc.smembers("#collections"):
         self.rc.zremrangebyscore("%s#members", "-inf", now)
         if not self.rc.zcard("%s#members" % c) > 0:
             log.debug("dropping empty collection %s" % c)
             self.rc.srem("#collections", c)
     for an in self.rc.smembers("#attributes"):
         self.rc.zremrangebyscore("%s#values", "-inf", now)
         if not self.rc.zcard("%s#members" % an) > 0:
             log.debug("dropping empty attribute %s" % an)
             self.rc.srem("#attributes", an)
Пример #50
0
    def run(self):

        def _parse_date(str):
            if str is None:
                return datetime.new()
            return datetime(*parsedate(str)[:6])

        self.start_time = clock()
        try:
            cache = httplib2.FileCache(".cache")
            if not self.enable_cache:
                log.debug("removing '%s' from cache" % self.url)
                cache.delete(self.url)

            log.debug("fetching '%s'" % self.url)

            if self.url.startswith('file://'):
                path = self.url[7:]
                if not os.path.exists(path):
                    raise IOError("file not found: %s" % path)

                with open(path, 'r') as fd:
                    self.result = fd.read()
                    self.cached = False
                    self.date = datetime.now()
                    self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime)
            else:
                try:
                    h = httplib2.Http(cache=cache, timeout=60,
                                      disable_ssl_certificate_validation=True)  # trust is done using signatures over here
                    resp, content = h.request(self.url)
                    self.status = resp.status
                    self.last_modified = _parse_date(resp.get('last-modified', resp.get('date', None)))
                    if resp.status != 200:
                        raise IOError(resp.reason)
                    self.result = content
                    self.cached = resp.fromcache
                except Exception, ex:
                    resp = requests.get(self.url)
                    self.status = resp.status_code
                    self.last_modified = _parse_date(resp.headers['last-modified'] or resp.headers['date'])
                    if resp.status_code != 200:
                        raise IOError(httplib.responses[resp.status_code])
                    self.result = resp.content
                    self.cached = False

            log.debug("got %d bytes from '%s'" % (len(self.result), self.url))
Пример #51
0
    def run(self):
        def _parse_date(str):
            if str is None:
                return datetime.new()
            return datetime(*parsedate(str)[:6])

        self.start_time = clock()
        try:
            requests_cache.install_cache('.cache')
            if not self.enable_cache:
                log.debug("removing '%s' from cache" % self.url)
                requests_cache.get_cache().delete_url(self.url)

            log.debug("fetching '%s'" % self.url)

            if self.url.startswith('file://'):
                path = self.url[7:]
                if not os.path.exists(path):
                    raise IOError("file not found: %s" % path)

                with open(path, 'r') as fd:
                    self.result = fd.read()
                    self.cached = False
                    self.date = datetime.now()
                    self.last_modified = datetime.fromtimestamp(
                        os.stat(path).st_mtime)
            else:
                self.resp = requests.get(self.url, timeout=60, verify=False)
                self.last_modified = _parse_date(
                    self.resp.headers.get('last-modified',
                                          self.resp.headers.get('date', None)))
                self.date = _parse_date(self.resp.headers['date'])
                self.cached = getattr(self.resp, 'from_cache', False)
                self.status = self.resp.status_code
                if self.resp.status_code != 200:
                    raise IOError(self.resp.reason)
                self.result = self.resp.content

            log.debug("got %d bytes from '%s'" % (len(self.result), self.url))
        except Exception, ex:
            traceback.print_exc()
            log.warn("unable to fetch '%s': %s" % (self.url, ex))
            self.ex = ex
            self.result = None
Пример #52
0
    def search(self, query, path=None, page=None, page_limit=10, entity_filter=None):
        """
:param query: A string to search for.
:param path: The repository collection (@Name) to search in - None for search in all collections
:param page:  When using paged search, the page index
:param page_limit: When using paged search, the maximum entry per page
:param entity_filter: A lookup expression used to filter the entries before search is done.

Returns a list of dict's for each EntityDescriptor present in the metadata store such
that any of the DisplayName, ServiceName, OrganizationName or OrganizationDisplayName
elements match the query (as in contains the query as a substring).

The dict in the list contains three items:

:param label: A displayable string, useful as a UI label
:param value: The entityID of the EntityDescriptor
:param id: A sha1-ID of the entityID - on the form {sha1}<sha1-hash-of-entityID>
        """

        def _strings(e):
            lst = [e.get('entityID')]
            for attr in ['.//{%s}DisplayName' % NS['mdui'],
                         './/{%s}ServiceName' % NS['md'],
                         './/{%s}OrganizationDisplayName' % NS['md'],
                         './/{%s}OrganizationName' % NS['md']]:
                lst.extend([x.text.lower() for x in e.findall(attr)])
            return filter(lambda s: s is not None, lst)

        def _match(query, e):
            #log.debug("looking for %s in %s" % (query,",".join(_strings(e))))
            for qstr in _strings(e):
                if query in qstr:
                    return True
            return False

        f = []
        if path is not None:
            f.append(path)
        if entity_filter is not None:
            f.append(entity_filter)
        mexpr = None
        if f:
            mexpr = "+".join(f)

        log.debug("mexpr: %s" % mexpr)

        res = [{'label': self.display(e),
                'value': e.get('entityID'),
                'id': pyff.index.hash_id(e, 'sha1')}
               for e in pyff.index.EntitySet(filter(lambda ent: _match(query, ent), self.lookup(mexpr)))]

        res.sort(key=lambda i: i['label'])

        log.debug(res)

        if page is not None:
            total = len(res)
            begin = (page - 1) * page_limit
            end = begin + page_limit
            more = (end < total)
            return res[begin:end], more, total
        else:
            return res
Пример #53
0
        def consumer(q, njobs, stats, next_jobs=None, resolved=None):
            if next_jobs is None:
                next_jobs = []
            if resolved is None:
                resolved = set()
            nfinished = 0

            while nfinished < njobs:
                info = None
                try:
                    log.debug("waiting for next thread to finish...")
                    thread = q.get(True)
                    thread.join(timeout)

                    if thread.isAlive():
                        raise MetadataException("thread timeout fetching '%s'" % thread.url)

                    info = {
                        'Time Spent': thread.time()
                    }

                    if thread.ex is not None:
                        raise thread.ex
                    else:
                        if thread.result is not None:
                            info['Bytes'] = len(thread.result)
                        else:
                            raise MetadataException("empty response fetching '%s'" % thread.url)
                        info['Cached'] = thread.cached
                        info['Date'] = str(thread.date)
                        info['Last-Modified'] = str(thread.last_modified)
                        info['Tries'] = thread.tries

                    xml = thread.result.strip()

                    if thread.status is not None:
                        info['Status'] = thread.status

                    t = self.parse_metadata(StringIO(xml), key=thread.verify, base_url=thread.url)
                    if t is None:
                        self.fire(type=EVENT_IMPORT_FAIL, url=thread.url)
                        raise MetadataException("no valid metadata found at '%s'" % thread.url)

                    relt = root(t)
                    if relt.tag in ('{%s}XRD' % NS['xrd'], '{%s}XRDS' % NS['xrd']):
                        log.debug("%s looks like an xrd document" % thread.url)
                        for xrd in t.xpath("//xrd:XRD", namespaces=NS):
                            log.debug("xrd: %s" % xrd)
                            for link in xrd.findall(".//{%s}Link[@rel='%s']" % (NS['xrd'], NS['md'])):
                                url = link.get("href")
                                certs = xmlsec.CertDict(link)
                                fingerprints = certs.keys()
                                fp = None
                                if len(fingerprints) > 0:
                                    fp = fingerprints[0]
                                log.debug("fingerprint: %s" % fp)
                                next_jobs.append((url, fp, url, 0))

                    elif relt.tag in ('{%s}EntityDescriptor' % NS['md'], '{%s}EntitiesDescriptor' % NS['md']):
                        cacheDuration = self.default_cache_duration
                        if self.respect_cache_duration:
                            cacheDuration = root(t).get('cacheDuration', self.default_cache_duration)
                        offset = duration2timedelta(cacheDuration)

                        if thread.cached:
                            if thread.last_modified + offset < datetime.now() - duration2timedelta(self.min_cache_ttl):
                                raise MetadataException("cached metadata expired")
                            else:
                                log.debug("found cached metadata for '%s' (last-modified: %s)" % (thread.url, thread.last_modified))
                                ne = self.import_metadata(t, url=thread.id)
                                info['Number of Entities'] = ne
                        else:
                            log.debug("got fresh metadata for '%s' (date: %s)" % (thread.url, thread.date))
                            ne = self.import_metadata(t, url=thread.id)
                            info['Number of Entities'] = ne
                        info['Cache Expiration Time'] = str(thread.last_modified + offset)
                        certs = xmlsec.CertDict(relt)
                        cert = None
                        if certs.values():
                            cert = certs.values()[0].strip()
                        resolved.add((thread.url, cert))
                    else:
                        raise MetadataException("unknown metadata type for '%s' (%s)" % (thread.url, relt.tag))
                except Exception, ex:
                    #traceback.print_exc(ex)
                    log.warn("problem fetching '%s' (will retry): %s" % (thread.url, ex))
                    if info is not None:
                        info['Exception'] = ex
                    if thread.tries < self.retry_limit:
                        next_jobs.append((thread.url, thread.verify, thread.id, thread.tries + 1))
                    else:
                        #traceback.print_exc(ex)
                        log.error("retry limit exceeded for %s (last error was: %s)" % (thread.url, ex))
                finally:
Пример #54
0
    def _lookup(self, member, xp=None):
        """
:param member: Either an entity, URL or a filter expression.

Find a (set of) EntityDescriptor element(s) based on the specified 'member' expression.
        """

        def _hash(hn, strv):
            if hn == 'null':
                return strv
            if not hasattr(hashlib, hn):
                raise MetadataException("Unknown digest mechanism: '%s'" % hn)
            hash_m = getattr(hashlib, hn)
            h = hash_m()
            h.update(strv)
            return h.hexdigest()

        if xp is None:
            xp = "//md:EntityDescriptor"
        if member is None:
            lst = []
            for m in self.keys():
                log.debug("resolving %s filtered by %s" % (m, xp))
                lst.extend(self._lookup(m, xp))
            return lst
        elif hasattr(member, 'xpath'):
            log.debug("xpath filter %s <- %s" % (xp, member))
            return member.xpath(xp, namespaces=NS)
        elif type(member) is str or type(member) is unicode:
            log.debug("string lookup %s" % member)

            if '+' in member:
                member = member.strip('+')
                log.debug("lookup intersection of '%s'" % ' and '.join(member.split('+')))
                hits = None
                for f in member.split("+"):
                    f = f.strip()
                    if hits is None:
                        hits = set(self._lookup(f, xp))
                    else:
                        other = self._lookup(f, xp)
                        hits.intersection_update(other)

                    if not hits:
                        log.debug("empty intersection")
                        return []

                if hits is not None and hits:
                    return list(hits)
                else:
                    return []

            if "!" in member:
                (src, xp) = member.split("!")
                if len(src) == 0:
                    src = None
                    log.debug("filtering using %s" % xp)
                else:
                    log.debug("selecting %s filtered by %s" % (src, xp))
                return self._lookup(src, xp)

            m = re.match("^\{(.+)\}(.+)$", member)
            if m is not None:
                log.debug("attribute-value match: %s='%s'" % (m.group(1), m.group(2)))
                return self.index.get(m.group(1), m.group(2).rstrip("/"))

            m = re.match("^(.+)=(.+)$", member)
            if m is not None:
                log.debug("attribute-value match: %s='%s'" % (m.group(1), m.group(2)))
                return self.index.get(m.group(1), m.group(2).rstrip("/"))

            log.debug("basic lookup %s" % member)
            for idx in DIGESTS:
                e = self.index.get(idx, member)
                if e:
                    log.debug("found %s in %s index" % (e, idx))
                    return e

            e = self.get(member, None)
            if e is not None:
                return self._lookup(e, xp)

            e = self.get("%s.xml" % member, None)  # hackish but helps save people from their misstakes
            if e is not None:
                if not "://" in member:  # not an absolute URL
                    log.warn("Found %s.xml as an alias - AVOID extensions in 'select as' statements" % member)
                return self._lookup(e, xp)

            if "://" in member:  # looks like a URL and wasn't an entity or collection - recurse away!
                log.debug("recursively fetching members from '%s'" % member)
                # note that this supports remote lists which may be more rope than is healthy
                return [self._lookup(line, xp) for line in urllib.urlopen(member).iterlines()]

            return []
        elif hasattr(member, '__iter__') and type(member) is not dict:
            if not len(member):
                member = self.keys()
            return [self._lookup(m, xp) for m in member]
        else:
            raise MetadataException("What about %s ??" % member)
Пример #55
0
    def request(self, **kwargs):
        """The main request processor. This code implements all rendering of metadata.
        """
        stats['MD Requests'] += 1

        if not self.ready:
            raise HTTPError(503, _("Service Unavailable (repository loading)"))

        pfx = kwargs.get('pfx', None)
        path = kwargs.get('path', None)
        content_type = kwargs.get('content_type', None)

        log.debug("MDServer pfx=%s, path=%s, content_type=%s" % (pfx, path, content_type))

        def _d(x, do_split=True):
            if x is not None:
                x = x.strip()
            log.debug("_d(%s,%s)" % (x, do_split))
            if x is None or len(x) == 0:
                return None, None

            if x.startswith("{base64}"):
                x = x[8:].decode('base64')

            if do_split and '.' in x:
                (pth, dot, extn) = x.rpartition('.')
                assert (dot == '.')
                if extn in _ctypes:
                    return pth, extn

            return x, None

        _ctypes = {'xml': 'application/xml',
                   'json': 'application/json',
                   'htm': 'text/html',
                   'html': 'text/html',
                   'ds': 'text/html',
                   's': 'application/json'}

        alias = None
        if pfx:
            alias = pfx
            pfx = self.aliases.get(alias, None)
            if pfx is None:
                raise NotFound()

        path, ext = _d(path, content_type is None)
        if pfx and path:
            q = "{%s}%s" % (pfx, path)
            path = "/%s/%s" % (alias, path)
        else:
            q = path

        if ext is not None:
            log.debug("request path: %s.%s, headers: %s" % (path, ext, cherrypy.request.headers))
        else:
            log.debug("request path: %s, headers: %s" % (path, cherrypy.request.headers))

        accept = {}
        if content_type is None:
            if ext is not None and ext in _ctypes:
                accept = {_ctypes[ext]: True}
            else:
                accept = MDServer.MediaAccept()
                if ext is not None:
                    path = "%s.%s" % (path, ext)
        else:
            accept = {content_type: True}

        with self.lock.readlock:
            if ext == 'ds':
                pdict = dict()
                entity_id = kwargs.get('entityID', None)
                if entity_id is None:
                    raise HTTPError(400, _("400 Bad Request - missing entityID"))
                pdict['sp'] = self.md.sha1_id(entity_id)
                e = self.md.store.lookup(entity_id)
                if e is None or len(e) == 0:
                    raise HTTPError(404)

                if len(e) > 1:
                    raise HTTPError(400, _("400 Bad Request - multiple matches for") + " %s" % entity_id)

                pdict['entity'] = self.md.simple_summary(e[0])
                if not path:
                    pdict['search'] = "/search/"
                    pdict['list'] = "/role/idp.json"
                else:
                    pdict['search'] = "%s.s" % path
                    pdict['list'] = "%s.json" % path
                cherrypy.response.headers['Content-Type'] = 'text/html'
                return render_template("ds.html", **pdict)
            elif ext == 's':
                paged = bool(kwargs.get('paged', False))
                query = kwargs.get('query', None)
                page = kwargs.get('page', 0)
                page_limit = kwargs.get('page_limit', 10)
                entity_filter = kwargs.get('entity_filter', None)
                related = kwargs.get('related', None)

                cherrypy.response.headers['Content-Type'] = 'application/json'

                if query is None:
                    log.debug("empty query - creating one")
                    query = [cherrypy.request.remote.ip]
                    referrer = cherrypy.request.headers.get('referrer', None)
                    if referrer is not None:
                        log.debug("including referrer: %s" % referrer)
                        url = urlparse.urlparse(referrer)
                        host = url.netloc
                        if ':' in url.netloc:
                            (host, port) = url.netloc.split(':')
                        for host_part in host.rstrip(self.psl.get_public_suffix(host)).split('.'):
                            if host_part is not None and len(host_part) > 0:
                                query.append(host_part)
                    log.debug("created query: %s" % ",".join(query))

                if paged:
                    res, more, total = self.md.search(query,
                                                      path=q,
                                                      page=int(page),
                                                      page_limit=int(page_limit),
                                                      entity_filter=entity_filter,
                                                      related=related)
                    # log.debug(dumps({'entities': res, 'more': more, 'total': total}))
                    return dumps({'entities': res, 'more': more, 'total': total})
                else:
                    return dumps(self.md.search(query,
                                                path=q,
                                                entity_filter=entity_filter,
                                                related=related))
            elif accept.get('text/html'):
                if not q:
                    if pfx:
                        title = pfx
                    else:
                        title = _("Metadata By Attributes")
                    return render_template("index.html",
                                           md=self.md,
                                           alias=alias,
                                           aliases=self.aliases,
                                           title=title)
                else:
                    entities = self.md.lookup(q)
                    if not entities:
                        raise NotFound()
                    if len(entities) > 1:
                        return render_template("metadata.html",
                                               md=self.md,
                                               subheading=q,
                                               entities=entities)
                    else:
                        entity = entities[0]
                        t = html.fragment_fromstring(unicode(xslt_transform(entity, "entity2html.xsl")))
                        for c_elt in t.findall(".//code[@role='entity']"):
                            c_txt = dumptree(entity)
                            parser = etree.XMLParser(remove_blank_text=True)
                            src = StringIO(c_txt)
                            tree = etree.parse(src, parser)
                            c_txt = dumptree(tree, pretty_print=True, xml_declaration=False).decode("utf-8")
                            p = c_elt.getparent()
                            p.remove(c_elt)
                            if p.text is not None:
                                p.text += c_txt
                            else:
                                p.text = c_txt
                        xml = dumptree(t, xml_declaration=False).decode('utf-8')
                        return render_template("entity.html",
                                               headline=self.md.display(entity).strip(),
                                               subheading=entity.get('entityID'),
                                               entity_id=entity.get('entityID'),
                                               content=xml)
            else:
                for p in self.plumbings:
                    state = {'request': True,
                             'headers': {'Content-Type': 'text/xml'},
                             'accept': accept,
                             'url': cherrypy.url(relative=False),
                             'select': q,
                             'path': path,
                             'stats': {}}
                    r = p.process(self.md, state=state)
                    if r is not None:
                        cache_ttl = state.get('cache', 0)
                        log.debug("caching for %d seconds" % cache_ttl)
                        for k, v in state.get('headers', {}).iteritems():
                            cherrypy.response.headers[k] = v
                        caching.expires(secs=cache_ttl)
                        return r
        raise NotFound()
Пример #56
0
def load(req, *opts):
    """
General-purpose resource fetcher.

    :param req: The request
    :param opts: Options: See "Options" below
    :return: None

Supports both remote and local resources. Fetching remote resources is done in parallel using threads.

Note: When downloading remote files over HTTPS the TLS server certificate is not validated.
Note: Default behaviour is to ignore metadata files or entities in MD files that cannot be loaded

Options are put directly after "load". E.g:

.. code-block:: yaml

    - load fail_on_error True filter_invalid False:
      - http://example.com/some_remote_metadata.xml
      - local_file.xml
      - /opt/directory_containing_md_files/

**Options**
Defaults are marked with (*)
- max_workers <5> : Number of parallel threads to use for loading MD files
- timeout <120> : Socket timeout when downloading files
- validate <True*|False> : When true downloaded metadata files are validated (schema validation)
- fail_on_error <True|False*> : Control whether an error during download, parsing or (optional)validatation of a MD file
                                does not abort processing of the pipeline. When true a failure aborts and causes pyff
                                to exit with a non zero exit code. Otherwise errors are logged but ignored.
- filter_invalid <True*|False> : Controls validation behaviour. When true Entities that fail validation are filtered
                                 I.e. are not loaded. When false the entire metadata file is either loaded, or not.
                                 fail_on_error controls whether failure to validating the entire MD file will abort
                                 processing of the pipeline.
    """
    opts = dict(zip(opts[::2], opts[1::2]))
    opts.setdefault('timeout', 120)
    opts.setdefault('max_workers', 5)
    opts.setdefault('validate', "True")
    opts.setdefault('fail_on_error', "False")
    opts.setdefault('filter_invalid', "True")
    opts['validate'] = bool(strtobool(opts['validate']))
    opts['fail_on_error'] = bool(strtobool(opts['fail_on_error']))
    opts['filter_invalid'] = bool(strtobool(opts['filter_invalid']))

    remotes = []
    for x in req.args:
        x = x.strip()
        log.debug("load parsing '%s'" % x)
        r = x.split()

        assert len(r) in range(1, 7), PipeException(
            "Usage: load resource [as url] [[verify] verification] [via pipeline]"
        )

        url = r.pop(0)
        params = dict()

        while len(r) > 0:
            elt = r.pop(0)
            if elt in ("as", "verify", "via"):
                if len(r) > 0:
                    params[elt] = r.pop(0)
                else:
                    raise PipeException(
                        "Usage: load resource [as url] [[verify] verification] [via pipeline]"
                    )
            else:
                params['verify'] = elt

        for elt in ("verify", "via"):
            params.setdefault(elt, None)

        params.setdefault('as', url)

        post = None
        if params['via'] is not None:
            post = PipelineCallback(params['via'], req)

        if "://" in url:
            log.debug("load {} verify {} as {} via {}".format(
                url, params['verify'], params['as'], params['via']))
            remotes.append((url, params['verify'], params['as'], post))
        elif os.path.exists(url):
            if os.path.isdir(url):
                log.debug("directory {} verify {} as {} via {}".format(
                    url, params['verify'], params['as'], params['via']))
                req.md.load_dir(url,
                                url=params['as'],
                                validate=opts['validate'],
                                post=post,
                                fail_on_error=opts['fail_on_error'],
                                filter_invalid=opts['filter_invalid'])
            elif os.path.isfile(url):
                log.debug("file {} verify {} as {} via {}".format(
                    url, params['verify'], params['as'], params['via']))
                remotes.append(
                    ("file://%s" % url, params['verify'], params['as'], post))
            else:
                error = "Unknown file type for load: '{}'".format(url)
                if opts['fail_on_error']:
                    raise PipeException(error)
                log.error(error)
        else:
            error = "Don't know how to load '{}' as {} verify {} via {} (file does not exist?)".format(
                url, params['as'], params['verify'], params['via'])
            if opts['fail_on_error']:
                raise PipeException(error)
            log.error(error)

    req.md.fetch_metadata(remotes, **opts)
Пример #57
0
 def __iter__(self):
     for e in self.lookup("entities"):
         log.debug("**** yield entityID=%s" % e.get('entityID'))
         yield e
Пример #58
0
                        info['Exception'] = ex
                    if thread.tries < self.retry_limit:
                        next_jobs.append((thread.url, thread.verify, thread.id, thread.tries + 1))
                    else:
                        #traceback.print_exc(ex)
                        log.error("retry limit exceeded for %s (last error was: %s)" % (thread.url, ex))
                finally:
                    nfinished += 1
                    if info is not None:
                        stats[thread.url] = info

        resources = [(url, verify, rid, 0) for url, verify, rid in resources]
        resolved = set()
        cache = True
        while len(resources) > 0:
            log.debug("fetching %d resources (%s)" % (len(resources), repr(resources)))
            next_jobs = []
            q = Queue(qsize)
            prod_thread = threading.Thread(target=producer, args=(q, resources, cache))
            cons_thread = threading.Thread(target=consumer, args=(q, len(resources), stats, next_jobs, resolved))
            prod_thread.start()
            cons_thread.start()
            prod_thread.join()
            cons_thread.join()
            log.debug("after fetch: %d jobs to retry" % len(next_jobs))
            if len(next_jobs) > 0:
                resources = next_jobs
                cache = False
            else:
                resources = []