def xmlvalidate(log): import libxml2 from StringIO import StringIO from random import random prefix="...%s..." % str(random()).replace('0.','') msg=[] libxml2.registerErrorHandler(lambda msg,str: msg.append(str), msg) input = libxml2.inputBuffer(StringIO(xmlEncoding.asUTF8(aString))) reader = input.newTextReader(prefix) reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) ret = reader.Read() while ret == 1: ret = reader.Read() msg=''.join(msg) for line in msg.splitlines(): if line.startswith(prefix): log(line.split(':',4)[-1].strip())
def _validate(aString, firstOccurrenceOnly, loggedEvents, base, encoding, selfURIs=None): """validate RSS from string, returns validator object""" from xml.sax import make_parser, handler from base import SAXDispatcher from exceptions import UnicodeError from cStringIO import StringIO # By now, aString should be Unicode source = InputSource() source.setByteStream(StringIO(xmlEncoding.asUTF8(aString))) validator = SAXDispatcher(base, selfURIs or [base], encoding) validator.setFirstOccurrenceOnly(firstOccurrenceOnly) validator.loggedEvents += loggedEvents # experimental RSS-Profile draft 1.06 support validator.setLiterals(re.findall('&(\w+);',aString)) xmlver = re.match("^<\?\s*xml\s+version\s*=\s*['\"]([-a-zA-Z0-9_.:]*)['\"]",aString) if xmlver and xmlver.group(1)<>'1.0': validator.log(logging.BadXmlVersion({"version":xmlver.group(1)})) try: from xml.sax.expatreader import ExpatParser class fake_dtd_parser(ExpatParser): def reset(self): ExpatParser.reset(self) self._parser.UseForeignDTD(1) parser = fake_dtd_parser() except: parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(validator) parser.setErrorHandler(validator) parser.setEntityResolver(validator) if hasattr(parser, '_ns_stack'): # work around bug in built-in SAX parser (doesn't recognize xml: namespace) # PyXML doesn't have this problem, and it doesn't have _ns_stack either parser._ns_stack.append({'http://www.w3.org/XML/1998/namespace':'xml'}) def xmlvalidate(log): import libxml2 from StringIO import StringIO from random import random prefix="...%s..." % str(random()).replace('0.','') msg=[] libxml2.registerErrorHandler(lambda msg,str: msg.append(str), msg) input = libxml2.inputBuffer(StringIO(xmlEncoding.asUTF8(aString))) reader = input.newTextReader(prefix) reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) ret = reader.Read() while ret == 1: ret = reader.Read() msg=''.join(msg) for line in msg.splitlines(): if line.startswith(prefix): log(line.split(':',4)[-1].strip()) validator.xmlvalidator=xmlvalidate try: parser.parse(source) except SAXException: pass except UnicodeError: import sys exctype, value = sys.exc_info()[:2] validator.log(logging.UnicodeError({"exception":value})) if validator.getFeedType() == TYPE_RSS1: try: from rdflib.syntax.parsers.RDFXMLHandler import RDFXMLHandler class Handler(RDFXMLHandler): ns_prefix_map = {} prefix_ns_map = {} def add(self, triple): pass def __init__(self, dispatcher): RDFXMLHandler.__init__(self, self) self.dispatcher=dispatcher def error(self, message): self.dispatcher.log(InvalidRDF({"message": message})) source.getByteStream().reset() parser.reset() parser.setContentHandler(Handler(parser.getContentHandler())) parser.setErrorHandler(handler.ErrorHandler()) parser.parse(source) except: pass return validator