def BaseUri(node, fallback=None): """ `BaseUri` is an implementation of the `node.baseURI` attribute that should be attached to DOM Level 3 nodes, but which is currently broken in 4Suite XML. Where you would use `node.baseURI` according to DOM Level 3, use `BaseUri(node)` (this function) instead. `BaseUri` returns the absolute base URI for a given `node` in a Domlette tree, or `None` if no such *absolute* base URI exists. If `fallback` is specified, then it is used to construct a base URI when everything else fails. """ baseUriPart = '' xmlBaseAncestorList = node.xpath('ancestor-or-self::*[@xml:base][1]') if len(xmlBaseAncestorList) > 0: node = xmlBaseAncestorList[0] baseUriPart = node.getAttributeNS(XML_NAMESPACE, 'base') # If the xml:base in scope for the current node is not absolute, we find # the element where that xml:base was declared, then Absolutize our # relative xml:base against the base URI of the parent of declaring # element, recursively. if (not Uri.IsAbsolute(baseUriPart) and node.parentNode is not None): baseUriPart = Uri.Absolutize(baseUriPart, BaseUri(node.parentNode)) # If we still don't have an absolute base URI, resolve against the # document's URI. if not Uri.IsAbsolute(baseUriPart): if hasattr(node, 'createElementNS'): baseUriPart = Uri.Absolutize(baseUriPart, node.documentURI) else: baseUriPart = Uri.Absolutize(baseUriPart, node.ownerDocument.documentURI) # Next, we try resolving against the fallback base URI, if one has been # provided. if not Uri.IsAbsolute(baseUriPart) and fallback is not None: baseUriPart = Uri.Absolutize(baseUriPart, fallback) # And if we *still* don't have an absolute base URI, well, there's not # much more we can do. No biscuit. Do we want to generate one if we # get to this case, instead of returning `None`? if not Uri.IsAbsolute(baseUriPart): return None else: return baseUriPart
def pushdom(source, xpatterns, prefixes=None, validate=False): parser = Sax.CreateParser() if validate: parser.setFeature(xml.sax.handler.feature_validation, True) else: parser.setFeature(xml.sax.handler.feature_external_pes, False) parser.setFeature(Sax.FEATURE_GENERATOR, True) def handle_chunk(docfrag): parser.setProperty(Sax.PROPERTY_YIELD_RESULT, docfrag) handler = sax2dom_chunker(xpatterns=xpatterns, nss=prefixes, chunk_consumer=handle_chunk) parser.setContentHandler(handler) if isinstance(source, InputSource.InputSource): pass elif hasattr(source, 'read'): #Create dummy Uri to use as base dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) source = InputSource.DefaultFactory.fromStream(source, dummy_uri) elif IsXml(source): #Create dummy Uri to use as base dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) source = InputSource.DefaultFactory.fromString(source, dummy_uri) elif Uri.IsAbsolute(source): #or not os.path.isfile(source): source = InputSource.DefaultFactory.fromUri(source) else: source = InputSource.DefaultFactory.fromUri(Uri.OsPathToUri(source)) return parser.parse(source)
def CreateInputSource(obj, uri=None): """ Convenience function for creating an InputSource. obj - a string, Unicode object (only if you really know what you're doing), file-like object (stream), file path or URI. You can also pass an InputSource object, in which case the return value is just the same object, possibly with the URI modified uri - optional override URI. The base URI for the IS will be set to this value Returns an InputSource which can be passed to 4Suite APIs. """ #do the imports within the function: a tad bit less efficient, but #avoid circular crap from Ft.Xml import InputSource factory = InputSource.DefaultFactory from Ft.Lib import Uri, Uuid from Ft.Xml.Lib.XmlString import IsXml if isinstance(obj, InputSource.InputSource): isrc = obj elif hasattr(obj, 'read'): #Create dummy Uri to use as base dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) isrc = factory.fromStream(obj, dummy_uri) elif IsXml(obj): dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) isrc = factory.fromString(obj, dummy_uri) elif Uri.IsAbsolute(obj): #or not os.path.isfile(obj): isrc = factory.fromUri(obj) else: isrc = factory.fromUri(Uri.OsPathToUri(obj)) if uri: isrc.uri = uri return isrc
def GetDefaultCatalog(basename='default.cat'): """ Load the default catalog file(s). """ quiet = 'XML_DEBUG_CATALOG' not in os.environ uris = [] # original 4Suite XML Catalog support if 'XML_CATALOGS' in os.environ: # os.pathsep seperated list of pathnames for path in os.environ['XML_CATALOGS'].split(os.pathsep): uris.append(Uri.OsPathToUri(path)) # libxml2 XML Catalog support if 'XML_CATALOG_FILES' in os.environ: # whitespace-separated list of pathnames or URLs (ick!) for path in os.environ['XML_CATALOG_FILES'].split(): # if its already not already an URL, make it one if not Uri.IsAbsolute(path): uris.append(Uri.OsPathToUri(path)) else: uris.append(path) # add the default 4Suite catalog pathname = os.path.join(GetConfigVar('DATADIR'), basename) if GetConfigVar('RESOURCEBUNDLE'): resource = ImportUtil.OsPathToResource(pathname) uri = Uri.ResourceToUri('Ft.Xml', resource) else: uri = Uri.OsPathToUri(pathname) uris.append(uri) if not quiet: prefix = "Catalog URIs:" for uri in uris: sys.stderr.write('%s %s\n' % (prefix, uri)) prefix = " "*len(prefix) catalog = None for uri in uris: if not quiet: sys.stderr.write('Reading %s\n' % uri) sys.stderr.flush() try: # FIXME: Use dict merging rather than this inefficient cascading if catalog is None: if not quiet: sys.stderr.write('Creating catalog from %s\n' % uri) sys.stderr.flush() catalog = Catalog(uri, quiet) else: if not quiet: sys.stderr.write('Appending %s\n' % uri) sys.stderr.flush() catalog.catalogs.append(Catalog(uri, quiet)) except UriException, e: #warnings.warn("Catalog resource (%s) disabled: %s" % (uri, # e.message), # FtWarning) pass
def _AttachStylesheetToProcessor(stylesheet, processor): from Ft.Lib import Uri, Uuid from Ft.Xml import InputSource from Ft.Xml.Catalog import IsXml if isinstance(stylesheet, InputSource.InputSource): processor.appendStylesheet(stylesheet) #elif stylesheet.find(XSL_NAMESPACE) > 0 and IsXml(stylesheet): #Note: this would break in pathological cases such as a user #passing in a stylesheet string with only an XInclude to the actual XSLT elif IsXml(stylesheet): #Create dummy Uri to use as base dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) processor.appendStylesheet( InputSource.DefaultFactory.fromString(stylesheet, dummy_uri)) elif hasattr(stylesheet, 'read'): #Create dummy Uri to use as base dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) processor.appendStylesheet( InputSource.DefaultFactory.fromStream(stylesheet, dummy_uri)) elif Uri.IsAbsolute(stylesheet): # or not os.path.isfile(stylesheet): processor.appendStylesheet( InputSource.DefaultFactory.fromUri(stylesheet)) else: processor.appendStylesheet( InputSource.DefaultFactory.fromUri(Uri.OsPathToUri(stylesheet))) return
def __init__(self, uri, quiet=True): self.systemIds = {} self.publicIds = {} self.uris = {} self.publicDelegates = [] self.systemDelegates = [] self.uriDelegates = [] self.systemRewrites = [] self.uriRewrites = [] self.catalogs = [] self.uri = uri self.quiet = quiet if not Uri.IsAbsolute(uri): # Using a relative URI here makes it hard to reliably # locate the catalog. Also, if the catalog doesn't set # its own base URI with xml:base, then we won't be able # to resolve relative URI references within the catalog. # So we should warn that this situation is undesirable. warnings.warn("Catalog URI '%s' is not absolute.", FtWarning, 2) stream = Uri.BASIC_RESOLVER.resolve(uri) data = stream.read() stream.close() if IsXml(data): # cannot be a TR 9401 document, assume an XML Catalog self._parseXmlCat(data) else: # cannot be an XML Catalog, assume a TR 9401 file self._parseTr9401(data) # longest match first self.publicDelegates.sort() self.publicDelegates.reverse() self.systemDelegates.sort() self.systemDelegates.reverse() self.uriDelegates.sort() self.uriDelegates.reverse() self.systemRewrites.sort() self.systemRewrites.reverse() self.uriRewrites.sort() self.uriRewrites.reverse() if not quiet: sys.stderr.write('Catalog contents:\n') for key in self.__dict__.keys(): sys.stderr.write(' %s = %r\n' % (key, self.__dict__[key])) sys.stderr.flush() return
def SourceArgToUri(arg, resolver=Uri.BASIC_RESOLVER): """ Some command-line scripts take an argument that is supposed to be either "-" (denoting standard input) or a URI reference that can be resolved against the URI equivalent of the current working directory. This function processes such an argument, given as a string, and returns an appropriate URI. Since users tend to expect local OS paths to work as URIs, this function will accept and use an OS path argument if does appear to point to an existing local file, even though this could interfere with catalog-based resolution. Raises a ValueError if arg is neither a local file nor a valid URI reference nor "-". The resolver object must support a normalize() method that can resolve a URI reference against a base URI, returning a URI. """ if not isinstance(resolver, Uri.UriResolverBase): msg = 'It appears there is a bug in this command-line' \ ' script. A %s was passed as URI resolver to a function that' \ ' requires an instance of Ft.Lib.Uri.UriResolverBase (or' \ ' a subclass thereof).' raise TypeError(msg % type(resolver)) if not isinstance(arg, str) and not isinstance(arg, unicode): msg = 'It appears there is a bug in this command-line' \ ' script. A %s was passed as an argument needing to be' \ ' converted to a URI. A string must be provided instead.' raise TypeError(msg % type(arg)) if arg == '-': return Uri.OsPathToUri('unknown-STDIN', attemptAbsolute=True) elif arg: if os.path.isfile(arg): return Uri.OsPathToUri(arg, attemptAbsolute=True) elif not Uri.MatchesUriRefSyntax(arg): raise ValueError("'%s' is not a valid URI reference." % arg) elif Uri.IsAbsolute(arg): return arg base = Uri.OsPathToUri(os.getcwd(), attemptAbsolute=True) if base[-1] != '/': base += '/' return resolver.normalize(arg, base)
def instantiate(self, context, processor): context.processorNss = self.namespaces context.currentInstruction = self # this uses attributes directly from self self._output_parameters.avtParse(self, context) href = self._href.evaluate(context) if Uri.IsAbsolute(href): uri = href else: try: uri = Uri.Absolutize(href, Uri.OsPathToUri(processor.writer.getStream().name)) except Exception, e: raise XsltRuntimeException( ExsltError.NO_EXSLTDOCUMENT_BASE_URI, context.currentInstruction, href)
def Parse(source): """ Convenience function for parsing XML. Use this function with a single argument, which must either be a string (not Unicode object), file-like object (stream), file path or URI. Returns a Domlette node. Only pass strings or streams to this function if the XML is self-contained XML (i.e. not requiring access to any other resource such as external entities or includes). If you get URI resolution errors, do not use this function: use the lower-level APIs instead. As an example, if you want such resolution to use the current working directory as a base, parse as follows for strings: from Ft.Xml.Domlette import NonvalidatingReader from Ft.Lib import Uri XML = "<!DOCTYPE a [ <!ENTITY b "b.xml"> ]><a>&b;</a>" base = Uri.OsPathToUri('') #Turn CWD into a file: URL doc = NonvalidatingReader.parseString(XML, base) # during parsing, the replacement text for &b; # will be obtained from b.xml in the CWD For streams, use "parseStream" rather than "parseString" in the above. """ #do the imports within the function: a tad bit less efficient, but #avoid circular crap from Ft.Xml.Domlette import NonvalidatingReader from Ft.Lib import Uri, Uuid from Ft.Xml.Lib.XmlString import IsXml if hasattr(source, 'read'): #Create dummy Uri to use as base dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) return NonvalidatingReader.parseStream(source, dummy_uri) elif IsXml(source): dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) return NonvalidatingReader.parseString(source, dummy_uri) elif Uri.IsAbsolute(source): #or not os.path.isfile(source): return NonvalidatingReader.parseUri(source) else: return NonvalidatingReader.parseUri(Uri.OsPathToUri(source))
def Transform(source, stylesheet, params=None, output=None): """ Convenience function for applying an XSLT transform. Returns a string. source - XML source document in the form of a a string (not Unicode object), file-like object (stream), file path, URI or Ft.Xml.InputSource.InputSource instance. If string or stream it must be self-contained XML (i.e. not requiring access to any other resource such as external entities or includes) stylesheet - XSLT document in the form of a string, stream, URL, file path or Ft.Xml.InputSource.InputSource instance params - optional dictionary of stylesheet parameters, the keys of which may be given as unicode objects if they have no namespace, or as (uri, localname) tuples if they do. output - optional file-like object to which output is written (incrementally, as processed) """ #do the imports within the function: a tad bit less efficient, but #avoid circular crap from Ft.Xml.Xslt import Processor from Ft.Xml import InputSource from Ft.Lib import Uri, Uuid from Ft.Xml.Lib.XmlString import IsXml params = params or {} processor = Processor.Processor() _AttachStylesheetToProcessor(stylesheet, processor) if isinstance(source, InputSource.InputSource): pass elif hasattr(source, 'read'): #Create dummy Uri to use as base dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) source = InputSource.DefaultFactory.fromStream(source, dummy_uri) elif IsXml(source): dummy_uri = 'urn:uuid:' + Uuid.UuidAsString(Uuid.GenerateUuid()) source = InputSource.DefaultFactory.fromString(source, dummy_uri) elif Uri.IsAbsolute(source): # or not os.path.isfile(source): source = InputSource.DefaultFactory.fromUri(source) else: source = InputSource.DefaultFactory.fromUri(Uri.OsPathToUri(source)) return processor.run(source, topLevelParams=params, outputStream=output)
def parse(source, uri=None, rules=None, binderobj=None, prefixes=None, validate=False, binding_classes=None): """ Convenience function for parsing XML. Use this function with a single argument, which is a string (not Unicode object), file-like object (stream), file path or URI. Returns a document binding object. Only use this function to parse self-contained XML (i.e. not requiring access to any other resource). For example, do not use it for XML with external entities. If you get URI resolution errors, pass in a URI parameter. uri - establish a base URI for the XML document entity being parsed, required if source is a string or stream containing XML that uses any external resources. If source is a path or URI, then this parameter, if given, is ignored rules - a list of bindery rule objects to fine-tune the binding binderobj - optional binder object to control binding details, the default is None, in which case a binder object will be created prefixes - dictionary mapping prefixes to namespace URIs the default is None validate - True to request DTD validaton by the underlying parser the default is False """ from Ft.Xml import InputSource from Ft.Lib import Uri, Uuid from Ft.Xml.Lib.XmlString import IsXml #if isinstance(source, InputSource.InputSource): # pass if hasattr(source, 'read'): return binderytools.bind_stream(source, uri=uri, rules=rules, binderobj=binderobj, prefixes=prefixes, validate=validate, binding_classes=binding_classes) elif IsXml(source): return binderytools.bind_string(source, uri=uri, rules=rules, binderobj=binderobj, prefixes=prefixes, validate=validate, binding_classes=binding_classes) elif Uri.IsAbsolute(source): #or not os.path.isfile(source): return binderytools.bind_uri(source, rules=rules, binderobj=binderobj, prefixes=prefixes, validate=validate, binding_classes=binding_classes) else: return binderytools.bind_file(source, rules=rules, binderobj=binderobj, prefixes=prefixes, validate=validate, binding_classes=binding_classes)