Exemplo n.º 1
0
    def create(self,request, user_name, vendor, name, version):
        user = user_authentication(user_name)

        # Get the xml containing the tags from the request
        tags_xml = request.__getitem__('tags_xml')

        # Parse the xml containing the tags
	parser = make_parser()
	handler = TagsXMLHandler()

	# Tell the parser to use our handler
	parser.setContentHandler(handler)
		
	# Parse the input
	try:
            from StringIO import StringIO
        except ImportError:
	    from cStringIO import StringIO
	inpsrc = InputSource()
        inpsrc.setByteStream(StringIO(tags_xml))
        parser.parse(inpsrc)
	
        # Get the gadget's id for those vendor, name and version
        gadget = get_object_or_404(GadgetResource, short_name=name,vendor=vendor,version=version)
	
	# Insert the tags for these resource and user in the database
	for e in handler._tags:
	    try:
	        UserTag.objects.get_or_create(tag=e, idUser=user, idResource=gadget)
	    except:
	        return HttpResponseServerError(get_xml_error(str(sys.exc_info()[1])),mimetype='text/xml; charset=UTF-8')

        response = '<?xml version="1.0" encoding="UTF-8" ?>\n'
	response += get_tags_by_resource(gadget, user)
	return HttpResponse(response,mimetype='text/xml; charset=UTF-8')
Exemplo n.º 2
0
def countriesSource() -> InputSource:
    '''
    Provides the countries input source for the XML.
    '''
    source = InputSource()
    source.setByteStream(openURI(path.join(path.dirname(__file__), 'iso_3166-1_list_en.xml')))
    return source
Exemplo n.º 3
0
    def rootElement(self):
        # Get the context that was originally generated during startup and
        # create a new context using its registrations
        real_context = zope.app.appsetup.appsetup.getConfigContext()
        context = config.ConfigurationMachine()
        context._registry = copy.copy(real_context._registry)
        context._features = copy.copy(real_context._features)
        context.package = self.package

        # Shut up i18n domain complaints
        context.i18n_domain = 'zope'

        # Since we want to use a custom configuration handler, we need to
        # instantiate the parser object ourselves
        parser = make_parser()
        handler = MyConfigHandler(context)
        parser.setContentHandler(handler)
        parser.setFeature(feature_namespaces, True)

        # Now open the file
        file = open(self.filename)
        src = InputSource(getattr(file, 'name', '<string>'))
        src.setByteStream(file)

        # and parse it
        parser.parse(src)

        # Finally we retrieve the root element, have it provide a special root
        # directive interface and give it a location, so that we can do local
        # lookups.
        root = handler.rootElement
        directlyProvides(root, IRootDirective)
        root.__parent__ = self
        return root
Exemplo n.º 4
0
def __loadxmlparts(z, manifest, doc, objectpath):
    from load import LoadParser
    from xml.sax import make_parser, handler

    for xmlfile in (
            objectpath +
            'settings.xml',
            objectpath +
            'meta.xml',
            objectpath +
            'content.xml',
            objectpath +
            'styles.xml'):
        if xmlfile not in manifest:
            continue
        try:
            xmlpart = z.read(xmlfile)
            doc._parsing = xmlfile

            parser = make_parser()
            parser.setFeature(handler.feature_namespaces, 1)
            parser.setContentHandler(LoadParser(doc))
            parser.setErrorHandler(handler.ErrorHandler())

            inpsrc = InputSource()
            inpsrc.setByteStream(StringIO(xmlpart))
            parser.parse(inpsrc)
            del doc._parsing
        except KeyError as v:
            pass
Exemplo n.º 5
0
 def parse(self, xml, source='string'):
     '''Parses a XML stream.
        * If p_source is "string", p_xml must be a string containing
          valid XML content.
        * If p_source is "file": p_xml can be:
          - a string containing the path to the XML file on disk;
          - a file instance opened for reading. Note that in this case, this
            method will close it.
     '''
     try:
         from cStringIO import StringIO
     except ImportError:
         from StringIO import StringIO
     self.parser.setContentHandler(self)
     self.parser.setErrorHandler(self)
     self.parser.setFeature(feature_external_ges, False)
     inputSource = InputSource()
     if source == 'string':
         inputSource.setByteStream(StringIO(xml))
     else:
         if not isinstance(xml, file):
             xml = file(xml)
         inputSource.setByteStream(xml)
     self.parser.parse(inputSource)
     if isinstance(xml, file): xml.close()
     return self.res
Exemplo n.º 6
0
def load(odffile):
    """ Load an ODF file into memory
        Returns a reference to the structure
    """
    from load import LoadParser
    from xml.sax import make_parser, handler

    z = zipfile.ZipFile(odffile)
    mimetype = z.read("mimetype")
    doc = OpenDocument(mimetype, add_generator=False)

    # Look in the manifest file to see if which of the four files there are
    manifestpart = z.read("META-INF/manifest.xml")
    manifest = manifestlist(manifestpart)
    for xmlfile in ("settings.xml", "meta.xml", "content.xml", "styles.xml"):
        if not manifest.has_key(xmlfile):
            continue
        try:
            xmlpart = z.read(xmlfile)
            doc._parsing = xmlfile

            parser = make_parser()
            parser.setFeature(handler.feature_namespaces, 1)
            parser.setContentHandler(LoadParser(doc))
            parser.setErrorHandler(handler.ErrorHandler())

            inpsrc = InputSource()
            inpsrc.setByteStream(StringIO(xmlpart))
            parser.parse(inpsrc)
            del doc._parsing
        except KeyError, v:
            pass
Exemplo n.º 7
0
def limit_featurecollection(content, limit=200):
    """
    Parse a WFS FeatureCollection XML string and produce a
    similar string with at most 200 features.
    """

    parser = make_parser()

    _input = BytesIO(content)

    input_source = InputSource()
    input_source.setByteStream(_input)

    output = StringIO()
    downstream = XMLGenerator(output, 'utf-8')

    _filter = _XMLFilterLimit(parser, downstream, limit=limit)
    _filter.parse(input_source)

    result = output.getvalue()

    _input.close()
    output.close()

    return result
Exemplo n.º 8
0
    def create(self, request, vendor, name, version):
        format = request.POST.get('format', 'default')

        # Get the xml containing the tags from the request
        tags_xml = request.POST.get('tags_xml')
        tags_xml = tags_xml.encode("utf-8")

        # Parse the xml containing the tags
        parser = make_parser()
        handler = TagsXMLHandler()

        # Tell the parser to use our handler
        parser.setContentHandler(handler)

        # Parse the input
        inpsrc = InputSource()
        inpsrc.setByteStream(StringIO(tags_xml))
        parser.parse(inpsrc)

        # Get the resource's id for those vendor, name and version
        resource = get_object_or_404(CatalogueResource, short_name=name,
                                   vendor=vendor, version=version)

        # Insert the tags for these resource and user in the database
        for e in handler._tags:
            tag_resource(request.user, e, resource)

        return get_tag_response(resource, request.user, format)
Exemplo n.º 9
0
	def Load(self):
		try:
			self.document()
			self.layer()
			
			error_handler = ErrorHandler()
			entity_resolver = EntityResolver()
			dtd_handler = DTDHandler()
			
			input = open(self.filename, "r")
			input_source = InputSource()
			input_source.setByteStream(input)
			xml_reader = xml.sax.make_parser()
			xml_reader.setContentHandler(SVGHandler(self))
			xml_reader.setErrorHandler(error_handler)
			xml_reader.setEntityResolver(entity_resolver)
			xml_reader.setDTDHandler(dtd_handler)
			xml_reader.setFeature(handler.feature_external_ges, False)
			xml_reader.parse(input_source)
			input.close

			self.end_all()
			
			if self.page_layout:
				self.object.load_SetLayout(self.page_layout)
			
			self.object.load_Completed()
			return self.object
		except:
			warn_tb('INTERNAL')
			raise
Exemplo n.º 10
0
	def _build_model(self):
		content_handler = XMLDocReader(self.presenter)
		error_handler = ErrorHandler()
		entity_resolver = EntityResolver()
		dtd_handler = DTDHandler()
		try:
			filename = os.path.join(self.presenter.doc_dir, 'content.xml')
			handler = open(filename, 'r')
			lines = float(sum(1 for l in handler))
			handler.close()
			self.file_handler = open(filename, "r")
			input_source = InputSource()
			input_source.setByteStream(self.file_handler)
			content_handler.lines = lines
			xml_reader = xml.sax.make_parser()
			xml_reader.setContentHandler(content_handler)
			xml_reader.setErrorHandler(error_handler)
			xml_reader.setEntityResolver(entity_resolver)
			xml_reader.setDTDHandler(dtd_handler)
			xml_reader.parse(input_source)
			self.file_handler.close()
			content_handler.file = None
		except:
			errtype, value, traceback = sys.exc_info()
			msg = _('It seems content.xml is corrupted') + '\n' + value
			events.emit(events.MESSAGES, msgconst.ERROR, msg)
			raise IOError(errtype, msg , traceback)
		self.model = content_handler.model

		msg = _('Content.xml is parsed successfully')
		events.emit(events.MESSAGES, msgconst.OK, msg)
Exemplo n.º 11
0
class AbstractXMLLoader(AbstractLoader, handler.ContentHandler):

	xml_reader = None
	input_source = None

	def init_load(self):
		self.input_source = InputSource()
		self.input_source.setByteStream(self.fileptr)
		self.xml_reader = xml.sax.make_parser()
		self.xml_reader.setContentHandler(self)
		self.xml_reader.setErrorHandler(ErrorHandler())
		self.xml_reader.setEntityResolver(EntityResolver())
		self.xml_reader.setDTDHandler(DTDHandler())
		self.do_load()

	def start_parsing(self):
		self.xml_reader.parse(self.input_source)

	def startElement(self, name, attrs):
		self.start_element(name, attrs)

	def endElement(self, name):
		self.end_element(name)

	def characters(self, data):
		self.element_data(data)

	def start_element(self, name, attrs):pass
	def end_element(self, name):pass
	def element_data(self, data):pass
Exemplo n.º 12
0
  def parse(self) :
    if (isinstance(self.source,unicode)) :
      # Create a string source
      file = io.StringIO(self.source)
      input = InputSource(file)
      input.setEncoding("utf-8")
      input.setCharacterStream(file)
      # There is a bug in xml.sax.saxutils.prepare_input_source
      input.setByteStream(file)
      input.setSystemId(None)
    elif (isinstance(self.source,InputSource)):
      input = self.source
    else:
      raise Exception("Parse source must be either string or InputSource")

    # Create the parser/xmlreader
    parser = xml.sax.make_parser()

    # Tell the parser to use our handler(s)
    parser.setContentHandler(self)
    #parser.setErrorHandler(self)

    #parser.setFeature(xml.sax.handler.feature_namespaces,True)
    # Shut off dtd validation
    parser.setFeature(xml.sax.handler.feature_validation,False)
    parser.setFeature(xml.sax.handler.feature_external_ges, False)

    # Parse the document
    parser.parse(input)
Exemplo n.º 13
0
def load(odffile):
    from load import LoadParser
    from xml.sax import make_parser, handler
    z = zipfile.ZipFile(odffile)
    mimetype = z.read('mimetype')
    doc = OpenDocument(mimetype, add_generator=False)

    # Look in the manifest file to see if which of the four files there are
    manifestpart = z.read('META-INF/manifest.xml')
    manifest =  manifestlist(manifestpart)
    for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
        if not manifest.has_key(xmlfile):
            continue
        try:
            xmlpart = z.read(xmlfile)
            doc._parsing = xmlfile

            parser = make_parser()
            parser.setFeature(handler.feature_namespaces, 1)
            parser.setContentHandler(LoadParser(doc))
            parser.setErrorHandler(handler.ErrorHandler())

            inpsrc = InputSource()
            inpsrc.setByteStream(StringIO(xmlpart))
            parser.parse(inpsrc)
            del doc._parsing
        except KeyError, v: pass
    # FIXME: Add subobjects correctly here
    for mentry,mvalue in manifest.items():
        if mentry[:9] == "Pictures/" and len(mentry) > 9:
            doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
        elif mentry == "Thumbnails/thumbnail.png":
            doc.addThumbnail(z.read(mentry))
        elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
            pass
        else:
            if mvalue['full-path'][-1] == '/':
                doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None))
            else:
                doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry)))
            # Add the SUN junk here to the struct somewhere
            # It is cached data, so it can be out-of-date
    z.close()
    b = doc.getElementsByType(Body)
    if mimetype[:39] == 'application/vnd.oasis.opendocument.text':
        doc.text = b[0].firstChild
    elif mimetype[:43] == 'application/vnd.oasis.opendocument.graphics':
        doc.graphics = b[0].firstChild
    elif mimetype[:47] == 'application/vnd.oasis.opendocument.presentation':
        doc.presentation = b[0].firstChild
    elif mimetype[:46] == 'application/vnd.oasis.opendocument.spreadsheet':
        doc.spreadsheet = b[0].firstChild
    elif mimetype[:40] == 'application/vnd.oasis.opendocument.chart':
        doc.chart = b[0].firstChild
    elif mimetype[:40] == 'application/vnd.oasis.opendocument.image':
        doc.image = b[0].firstChild
    elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula':
        doc.formula = b[0].firstChild
    return doc
Exemplo n.º 14
0
 def resolveEntity(self, publicId, systemId):
   for p in self.path:
     fname = os.path.join(p, systemId)
     if os.path.exists(fname):
       source = InputSource(systemId)
       source.setByteStream(open(fname))
       return source
   return InputSource(systemId)
Exemplo n.º 15
0
	def parse_request(self,soap_body,sinfo,encoding):
		parser = make_parser()
		ch = SOAP11ContentHandler(parser)
		parser.setContentHandler(ch)
		inpsrc = InputSource()
		inpsrc.setByteStream(BytesIO(soap_body))
		parser.parse(inpsrc)
		return ch.req_dict
Exemplo n.º 16
0
 def test_parse_InputSource(self):
     # accept data without declared but with explicitly specified encoding
     make_xml_file(self.data, 'iso-8859-1', None)
     with open(TESTFN, 'rb') as f:
         input = InputSource()
         input.setByteStream(f)
         input.setEncoding('iso-8859-1')
         self.check_parse(input)
Exemplo n.º 17
0
 def test_byte_stream(self):
     # If the source is an InputSource that does not have a character
     # stream but does have a byte stream, use the byte stream.
     src = InputSource(self.file)
     src.setByteStream(self.make_byte_stream())
     prep = prepare_input_source(src)
     self.assertIsNone(prep.getCharacterStream())
     self.checkContent(prep.getByteStream(),
                       b"This is a byte stream.")
Exemplo n.º 18
0
def test_expat_inpsource_stream():
    parser = make_parser()
    result = StringIO()
    xmlgen = XMLGenerator(result)

    parser.setContentHandler(xmlgen)
    inpsrc = InputSource()
    inpsrc.setByteStream(open(findfile("test.xml")))
    parser.parse(inpsrc)

    return result.getvalue() == xml_test_out
Exemplo n.º 19
0
def test_expat_inpsource_location():
    parser = make_parser()
    parser.setContentHandler(ContentHandler()) # do nothing
    source = InputSource()
    source.setByteStream(StringIO("<foo bar foobar>"))   #ill-formed
    name = "a file name"
    source.setSystemId(name)
    try:
        parser.parse(source)
    except SAXException, e:
        return e.getSystemId() == name
Exemplo n.º 20
0
    def test_expat_inpsource_stream(self):
        parser = create_parser()
        result = StringIO()
        xmlgen = XMLGenerator(result)

        parser.setContentHandler(xmlgen)
        inpsrc = InputSource()
        inpsrc.setByteStream(open(findfile("test.xml")))
        parser.parse(inpsrc)

        self.assertEqual(result.getvalue(), xml_test_out)
Exemplo n.º 21
0
 def test_expat_inpsource_location(self):
     parser = create_parser()
     parser.setContentHandler(ContentHandler()) # do nothing
     source = InputSource()
     source.setByteStream(StringIO("<foo bar foobar>"))   #ill-formed
     name = "a file name"
     source.setSystemId(name)
     try:
         parser.parse(source)
         self.fail()
     except SAXException as e:
         self.assertEqual(e.getSystemId(), name)
Exemplo n.º 22
0
    def test_expat_inpsource_stream(self):
        parser = create_parser()
        result = StringIO()
        xmlgen = XMLGenerator(result)

        parser.setContentHandler(xmlgen)
        inpsrc = InputSource()
        with open(TEST_XMLFILE) as f:
            inpsrc.setByteStream(f)
            parser.parse(inpsrc)

        self.assertEqual(result.getvalue(), xml_test_out)
def manifestlist(manifestxml):
    odhandler = ODFManifestHandler()
    parser = make_parser()
    parser.setFeature(handler.feature_namespaces, 1)
    parser.setContentHandler(odhandler)
    parser.setErrorHandler(handler.ErrorHandler())

    inpsrc = InputSource()
    inpsrc.setByteStream(StringIO(manifestxml))
    parser.parse(inpsrc)

    return odhandler.manifest
Exemplo n.º 24
0
 def resolveEntity(self, publicId, systemId):
     source = InputSource()
     source.setSystemId(systemId)
     try:
         dtdPath = self.knownDTDs[systemId]
     except KeyError:
         raise process.ProcessingFailure(
             "Invalid DTD system identifier (%r) in %s.  Only "
             "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd "
             "is allowed." % (systemId, self.filename))
     source.setByteStream(dtdPath.open())
     return source
Exemplo n.º 25
0
def manifestlist(manifestxml):
    odhandler = ODFManifestHandler()
    parser = make_parser()
    parser.setFeature(handler.feature_namespaces, 1)
    parser.setContentHandler(odhandler)
    parser.setErrorHandler(handler.ErrorHandler())

    inpsrc = InputSource()
    inpsrc.setByteStream(StringIO(manifestxml))
    parser.setFeature(handler.feature_external_ges, False)  # Changed by Kovid to ignore external DTDs
    parser.parse(inpsrc)

    return odhandler.manifest
Exemplo n.º 26
0
def listNewDatasets(config, onlyThese=[]):
    """ Reads the table_of_contents.xml to determine which datasets
        have changed since the last download, and then downloads them
    """
    # Start XML parsing
    parser = make_parser()
    parser.setFeature(handler.feature_namespaces, 1)
    ch = ToCParser(config, onlyThese)
    parser.setContentHandler(ch)
    parser.setErrorHandler(handler.ErrorHandler())
    inpsrc = InputSource()
    inpsrc.setByteStream(open('table_of_contents.xml'))
    parser.parse(inpsrc)
    return ch.todownload
Exemplo n.º 27
0
def processxmlfile(file, context, testing=False):
    """Process a configuration file

    See examples in tests/text_xmlconfig.py
    """
    src = InputSource(getattr(file, 'name', '<string>'))
    src.setByteStream(file)
    parser = make_parser()
    parser.setContentHandler(ConfigurationHandler(context, testing=testing))
    parser.setFeature(feature_namespaces, True)
    try:
        parser.parse(src)
    except SAXParseException:
        raise ZopeSAXParseException(sys.exc_info()[1]), None, sys.exc_info()[2]
Exemplo n.º 28
0
	def load_from_file(self, filename=None):
		"""
		Loads color scheme from well-formated xml file.
		
		filename - full path to xml file
		"""
		if filename:
			content_handler = XMLPrefReader(pref=self)
			error_handler = ErrorHandler()
			entity_resolver = EntityResolver()
			dtd_handler = DTDHandler()
			try:
				input = open(filename, "r")
				input_source = InputSource()
				input_source.setByteStream(input)
				xml_reader = xml.sax.make_parser()
				xml_reader.setContentHandler(content_handler)
				xml_reader.setErrorHandler(error_handler)
				xml_reader.setEntityResolver(entity_resolver)
				xml_reader.setDTDHandler(dtd_handler)
				xml_reader.parse(input_source)
				input.close
			except:
				import traceback
				traceback.print_exc()
				raise
				self.name=None
		if self.disabledforeground is None:
			self.disabledforeground=lighter_color(self.foreground, .3)
		if self.menubackground is None:
			self.menubackground=self.bg
		if self.menuforeground is None:
			self.menuforeground=self.foreground
		if self.menuselectbackground is None:
			self.menuselectbackground=self.selectbackground
		if self.menuselectforeground is None:
			self.menuselectforeground=self.selectforeground
		if self.menudisabledforeground is None:
			self.menudisabledforeground=self.disabledforeground
		if self.menubordercolor is None:
			self.menubordercolor=self.disabledforeground
		if self.editfieldbackground is None:
			self.editfieldbackground='#ffffff'
		if self.editfieldforeground is None:
			self.editfieldforeground=self.foreground
		if self.evencolor is None:
			self.evencolor=middle_color(self.bg, self.editfieldbackground, 0.7)	
		if self.treelinescolor is None:
			self.treelinescolor=self.editfieldforeground	
Exemplo n.º 29
0
def __loadxmlparts(z, manifest, doc, objectpath):
    """
    Parses a document from its zipfile
    @param z an instance of zipfile.ZipFile
    @param manifest Manifest data structured in a dictionary
    @param doc instance of OpenDocument to feed in
    @param objectpath unicode string: path to an object
    """
    assert(isinstance(z, zipfile.ZipFile))
    assert(type(manifest)==type(dict()))
    assert(isinstance(doc, OpenDocument))
    assert(type(objectpath)==type(u""))

    from odf.load import LoadParser
    from defusedxml.sax import make_parser
    from xml.sax import handler

    for xmlfile in (objectpath+u'settings.xml', objectpath+u'meta.xml', objectpath+u'content.xml', objectpath+u'styles.xml'):
        if xmlfile not in manifest:
            continue
        ##########################################################
        # this one is added to debug the bad behavior with Python2
        # which raises exceptions of type SAXParseException
        from xml.sax._exceptions import SAXParseException
        ##########################################################
        try:
            xmlpart = z.read(xmlfile).decode("utf-8")
            doc._parsing = xmlfile

            parser = make_parser()
            parser.setFeature(handler.feature_namespaces, 1)
            parser.setFeature(handler.feature_external_ges, 0)
            parser.setContentHandler(LoadParser(doc))
            parser.setErrorHandler(handler.ErrorHandler())

            inpsrc = InputSource()
            #################
            # There may be a SAXParseException triggered because of
            # a missing xmlns prefix like meta, config, etc.
            # So i add such declarations when needed (GK, 2014/10/21).
            # Is there any option to prevent xmlns checks by SAX?
            xmlpart=__fixXmlPart(xmlpart)

            inpsrc.setByteStream(BytesIO(xmlpart.encode("utf-8")))
            parser.parse(inpsrc)
            del doc._parsing
        except KeyError as v: pass
        except SAXParseException:
            print (u"====== SAX FAILED TO PARSE ==========\n", xmlpart)
Exemplo n.º 30
0
 def parse(self, charSet, file):
     '''
     Parses the provided content.
     
     @param charSet: string
         The character set of the content.
     @param file: byte file
         The bytes file object providing the content.
     @return: object
         The object obtained from parsing.
     '''
     inpsrc = InputSource()
     inpsrc.setByteStream(file)
     inpsrc.setEncoding(charSet)
     return self.parseInputSource(inpsrc)
Exemplo n.º 31
0
def _validate(aString,
              firstOccurrenceOnly,
              loggedEvents,
              base,
              encoding,
              selfURIs=None,
              mediaType=None):
    """validate RSS from string, returns validator object"""
    from xml.sax import make_parser, handler
    from .base import SAXDispatcher
    from exceptions import UnicodeError
    from cStringIO import StringIO

    if re.match("^\s+<\?xml", aString) and re.search(
            "<generator.*wordpress.*</generator>", aString):
        lt = aString.find('<')
        gt = aString.find('>')
        if lt > 0 and gt > 0 and lt < gt:
            loggedEvents.append(logging.WPBlankLine({'line': 1, 'column': 1}))
            # rearrange so that other errors can be found
            aString = aString[lt:gt + 1] + aString[0:lt] + aString[gt + 1:]

    # By now, aString should be Unicode
    source = InputSource()
    source.setByteStream(StringIO(xmlEncoding.asUTF8(aString)))

    validator = SAXDispatcher(base, selfURIs or [base], encoding)
    validator.setFirstOccurrenceOnly(firstOccurrenceOnly)

    if mediaType == 'application/atomsvc+xml':
        validator.setFeedType(TYPE_APP_SERVICE)
    elif mediaType == 'application/atomcat+xml':
        validator.setFeedType(TYPE_APP_CATEGORIES)

    validator.loggedEvents += loggedEvents

    # experimental RSS-Profile support
    validator.rssCharData = [s.find('&#x') >= 0 for s in aString.split('\n')]

    xmlver = re.match(
        "^<\?\s*xml\s+version\s*=\s*['\"]([-a-zA-Z0-9_.:]*)['\"]", aString)
    if xmlver and xmlver.group(1) != '1.0':
        validator.log(logging.BadXmlVersion({"version": xmlver.group(1)}))

    try:
        from xml.sax.expatreader import ExpatParser

        class fake_dtd_parser(ExpatParser):
            def reset(self):
                ExpatParser.reset(self)
                self._parser.UseForeignDTD(1)

        parser = fake_dtd_parser()
    except:
        parser = make_parser()

    parser.setFeature(handler.feature_namespaces, 1)
    parser.setContentHandler(validator)
    parser.setErrorHandler(validator)
    parser.setEntityResolver(validator)
    if hasattr(parser, '_ns_stack'):
        # work around bug in built-in SAX parser (doesn't recognize xml: namespace)
        # PyXML doesn't have this problem, and it doesn't have _ns_stack either
        parser._ns_stack.append(
            {'http://www.w3.org/XML/1998/namespace': 'xml'})

    def xmlvalidate(log):
        import libxml2
        from StringIO import StringIO
        from random import random

        prefix = "...%s..." % str(random()).replace('0.', '')
        msg = []
        libxml2.registerErrorHandler(lambda msg, str: msg.append(str), msg)

        input = libxml2.inputBuffer(StringIO(xmlEncoding.asUTF8(aString)))
        reader = input.newTextReader(prefix)
        reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
        ret = reader.Read()
        while ret == 1:
            ret = reader.Read()

        msg = ''.join(msg)
        for line in msg.splitlines():
            if line.startswith(prefix): log(line.split(':', 4)[-1].strip())

    validator.xmlvalidator = xmlvalidate

    try:
        parser.parse(source)
    except SAXException:
        pass
    except UnicodeError:
        import sys
        exctype, value = sys.exc_info()[:2]
        validator.log(logging.UnicodeError({"exception": value}))

    if validator.getFeedType() == TYPE_RSS1:
        try:
            from rdflib.syntax.parsers.RDFXMLHandler import RDFXMLHandler

            class Handler(RDFXMLHandler):
                ns_prefix_map = {}
                prefix_ns_map = {}

                def add(self, triple):
                    pass

                def __init__(self, dispatcher):
                    RDFXMLHandler.__init__(self, self)
                    self.dispatcher = dispatcher

                def error(self, message):
                    self.dispatcher.log(InvalidRDF({"message": message}))

            source = InputSource()
            source.setByteStream(StringIO(xmlEncoding.asUTF8(aString)))

            parser.reset()
            parser.setContentHandler(Handler(parser.getContentHandler()))
            parser.setErrorHandler(handler.ErrorHandler())
            parser.parse(source)
        except:
            pass

    return validator
        elif tag == (RELAXNS, 'choice') and self.currattr is None:
            self.optional = self.optional - 1
        self.data = []


if __name__ == "__main__":
    elements = {}
    parser = make_parser()
    parser.setFeature(handler.feature_namespaces, 1)
    parser.setContentHandler(S22RelaxParser(elements))
    parser.setErrorHandler(handler.ErrorHandler())

    for relaxfile in ["simple-manifest-7-22.rng","simple-schema-7-22.rng"]:
        content = file(relaxfile)
        inpsrc = InputSource()
        inpsrc.setByteStream(content)
        parser.parse(inpsrc)

    slist = elements.keys()
    slist.sort()

    print "required_attributes = {"
    for s in slist:
        e = elements[s]
        if e.ns == DBNS: continue
        if len(e.attrs) > 0:
            print "# required_attributes"
            print "\t(%sNS,u'%s'):" % (nsdict.get(e.ns,'unknown').upper(), e.name),
            print "("
            for a in e.attrs.values():
                print "\t\t(%sNS,u'%s')," % (nsdict.get(a.ns,'unknown').upper(), a.name)
Exemplo n.º 33
0
 def resolveEntity(self, publicId, systemId):
     inpsrc = InputSource()
     inpsrc.setByteStream(BytesIO(b"<entity/>"))
     return inpsrc
Exemplo n.º 34
0
def _validate(aString,
              firstOccurrenceOnly,
              loggedEvents,
              base,
              encoding,
              selfURIs=None):
    """validate RSS from string, returns validator object"""
    from xml.sax import make_parser, handler
    from base import SAXDispatcher
    from exceptions import UnicodeError
    from cStringIO import StringIO

    # By now, aString should be Unicode
    source = InputSource()
    source.setByteStream(StringIO(xmlEncoding.asUTF8(aString)))

    validator = SAXDispatcher(base, selfURIs or [base], encoding)
    validator.setFirstOccurrenceOnly(firstOccurrenceOnly)

    validator.loggedEvents += loggedEvents

    xmlver = re.match(
        "^<\?\s*xml\s+version\s*=\s*['\"]([-a-zA-Z0-9_.:]*)['\"]", aString)
    if xmlver and xmlver.group(1) <> '1.0':
        validator.log(logging.BadXmlVersion({"version": xmlver.group(1)}))

    try:
        from xml.sax.expatreader import ExpatParser

        class fake_dtd_parser(ExpatParser):
            def reset(self):
                ExpatParser.reset(self)
                self._parser.UseForeignDTD(1)

        parser = fake_dtd_parser()
    except:
        parser = make_parser()

    parser.setFeature(handler.feature_namespaces, 1)
    parser.setContentHandler(validator)
    parser.setErrorHandler(validator)
    parser.setEntityResolver(validator)
    if hasattr(parser, '_ns_stack'):
        # work around bug in built-in SAX parser (doesn't recognize xml: namespace)
        # PyXML doesn't have this problem, and it doesn't have _ns_stack either
        parser._ns_stack.append(
            {'http://www.w3.org/XML/1998/namespace': 'xml'})

    def xmlvalidate(log):
        import libxml2
        from StringIO import StringIO
        from random import random

        prefix = "...%s..." % str(random()).replace('0.', '')
        msg = []
        libxml2.registerErrorHandler(lambda msg, str: msg.append(str), msg)

        input = libxml2.inputBuffer(StringIO(xmlEncoding.asUTF8(aString)))
        reader = input.newTextReader(prefix)
        reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
        ret = reader.Read()
        while ret == 1:
            ret = reader.Read()

        msg = ''.join(msg)
        for line in msg.splitlines():
            if line.startswith(prefix): log(line.split(':', 4)[-1].strip())

    validator.xmlvalidator = xmlvalidate

    try:
        parser.parse(source)
    except SAXParseException:
        pass
    except UnicodeError:
        import sys
        exctype, value = sys.exc_info()[:2]
        validator.log(logging.UnicodeError({"exception": value}))

    if validator.getFeedType() == TYPE_RSS1:
        try:
            from rdflib.syntax.parsers.RDFXMLHandler import RDFXMLHandler

            class Handler(RDFXMLHandler):
                ns_prefix_map = {}
                prefix_ns_map = {}

                def add(self, triple):
                    pass

                def __init__(self, dispatcher):
                    RDFXMLHandler.__init__(self, self)
                    self.dispatcher = dispatcher

                def error(self, message):
                    self.dispatcher.log(InvalidRDF({"message": message}))

            source.getByteStream().reset()
            parser.reset()
            parser.setContentHandler(Handler(parser.getContentHandler()))
            parser.setErrorHandler(handler.ErrorHandler())
            parser.parse(source)
        except:
            pass

    return validator
Exemplo n.º 35
0
            except SAXNotRecognizedException:
                pass
    except SAXException, e:
        raise DistutilsModuleError(e.getMessage())

    handler = InclusionFilter()
    parser.setContentHandler(handler)

    if isinstance(source, (str, unicode)):
        try:
            stream = Uri.UrlOpen(source)
        except OSError:
            # Assume part of an XInclude w/fallback.
            return
        source = InputSource(source)
        source.setByteStream(stream)
    elif hasattr(source, 'read'):
        stream = source
        source = InputSource(getattr(stream, 'name', None))
        source.setByteStream(stream)
    parser.parse(source)
    return


INDEX_TEMPLATE = """<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE article PUBLIC "-//OASIS//DTD Simplified DocBook XML V1.1//EN"
          "http://docbook.org/xml/simple/1.1/sdocbook.dtd">
<?ftdb-ignore-namespace http://xmlns.4suite.org/reserved?>
<article>
  <title>%(fullname)s Document Index</title>
%(sections)s
Exemplo n.º 36
0
 def resolveEntity(self,publicId,systemId):
    inp = InputSource()
    inp.setByteStream(StringIO(""))
    return inp
Exemplo n.º 37
0
 def test_byte_stream(self):
     src = InputSource(self.file)
     src.setByteStream(self.make_byte_stream())
     prep = prepare_input_source(src)
     self.assertIsNone(prep.getCharacterStream())
     self.checkContent(prep.getByteStream(), b'This is a byte stream.')
Exemplo n.º 38
0
# regression test for SAX 2.0
Exemplo n.º 39
0
class DataImporter(Digester):
    def __init__(self, ictx, file):
        Digester.__init__(self)
        self._ictx = ictx
        self._file = file
        self._input = InputSource(file.name)
        self._input.setByteStream(BZ2File(file.name, 'r'))
        self._conn = ictx['conn'].connection
        self._cursor = self._conn.cursor()
        self.success = self._closed = False
        self._add_rules()

    def _add_rules(self):
        self.addOnBegin('packet', self._check_packet)
        self.addOnBeginAndEnd('packet/transaction/event', self._on_event,
                              self._on_event_end)
        self.addOnBody('packet/transaction/event/keys/column',
                       self._on_key_column)
        self.addOnBody('packet/transaction/event/values/column',
                       self._on_value_column)
        self.addOnFinish(self._on_finish)

    def _check_packet(self, tag, attrs):
        if self._ictx['schema_seq'] != int(attrs.getValue('schema_seq')):
            raise Exception(
                '<packet> schema_seq: {0} not matched the expected seq number {1}',
                attrs.getValue('schema_seq'), self._ictx['replication_seq'])

        if self._ictx['replication_seq'] != int(
                attrs.getValue('replication_seq')):
            raise Exception(
                '<packet> replication_seq: {0} not matched the expected seq number {1}',
                attrs.getValue('replication_seq'),
                self._ictx['replication_seq'])

    def _on_key_column(self, tag, attrs, val):
        event = self.peek()
        event['keys'][attrs.getValue('name')] = val

    def _on_value_column(self, tag, attrs, val):
        event = self.peek()
        isNull = attrs.getValue("null") if attrs.has_key('null') else None
        event['values'][attrs.getValue(
            'name')] = val if isNull != "yes" else None

    def _on_event(self, tag, attrs):
        event = {
            'op': attrs.getValue('op'),
            'table': attrs.getValue('table'),
            'keys': OrderedDict(),  #array of tuples column name -> column val
            'values': OrderedDict()  #array of tuples column name -> column val
        }
        self.push(event)

    def _on_event_end(self, tag):
        event = self.pop()
        type = event['op']
        table = event['table']
        keys = event['keys']
        values = event['values']
        params = []
        if type == 'I':
            sql_columns = ', '.join(values.keys())
            sql_values = ', '.join(['%s'] * len(values))
            sql = 'INSERT INTO %s (%s) VALUES (%s)' % (table, sql_columns,
                                                       sql_values)
            params = values.values()
        elif type == 'U':
            sql_values = ', '.join('%s=%%s' % i for i in values)
            sql = 'UPDATE %s SET %s' % (table, sql_values)
            params = values.values()
        elif type == 'D':
            sql = 'DELETE FROM %s' % table
        else:
            raise Exception('Invalid <event> op: %s' % type)

        if type == 'D' or type == 'U':
            sql += ' WHERE ' + ' AND '.join(
                '%s%s%%s' % (i, ' IS ' if keys[i] is None else '=')
                for i in keys.keys())
            params.extend(keys.values())

        #print '%s %s' % (sql, params)
        self._cursor.execute(sql, params)

    def _on_finish(self):
        pass

    def load(self):
        logger.warning('Saving dataset....')
        self.parse(self._input)
        self.success = True

    def recover(self):
        """ This is duty hack to remove weird characters presented in some replications files.
            Using the tidy tool.
        """
        logger.warning('Trying to recover invalid XML...')
        originalXML = None
        fixedXML = None
        try:
            originalXML = tempfile.NamedTemporaryFile(
                suffix='.xml', delete=False)  #bunzipped tmp
            fixedXML = tempfile.NamedTemporaryFile(suffix='.xml',
                                                   delete=False)  #fixed tmp
            fixedXML.close()

            #Fetch uncompressed file data to recover
            bzf = self._input.getByteStream()
            bzf.seek(0)
            shutil.copyfileobj(bzf, originalXML)
            originalXML.close()

            cmd = ['tidy', '-xml', '-o', fixedXML.name, originalXML.name]
            logger.warning('Running: %s', ' '.join(cmd))
            ret = subprocess.call(cmd)
            if ret:
                #raise Exception('Failed to fix XML data, ret=%s' % ret)
                pass

            #ready to load
            self.close()
            self._file = file(fixedXML.name, 'r')
            self._input = InputSource(fixedXML.name)
            self._input.setByteStream(self._file)
            self._cursor = self._conn.cursor()
            self.success = self._closed = False
            self.reset()
            self._add_rules()
            self.load()
        finally:
            for f in [originalXML, fixedXML]:
                if f and not f.closed:
                    f.close()
                if f and os.path.exists(f.name):
                    os.unlink(f.name)

    def close(self):
        if self._closed:
            return
        try:
            if self.success:
                self._conn.commit()
                logger.warning('Done')
            else:
                logger.warning(
                    'Rolling back transaction. Seq number: {0}'.format(
                        self._ictx['replication_seq']))
                self._conn.rollback()
            self._cursor.close()
        finally:
            self._closed = True
            self._input.getByteStream().close()
            self._file.close()