예제 #1
0
    def tmx_import(self, file, REQUEST=None, RESPONSE=None):
        """ Imports a TMX level 1 file.
            We use the SAX parser. It has the benefit that it internally
            converts everything to python unicode strings.
        """
        self._v_srclang = self._default_language
        # Create a parser
        parser = make_parser()
        chandler = HandleTMXParsing(self._tmx_tu, self._tmx_header)
        # Tell the parser to use our handler
        parser.setContentHandler(chandler)
        # Don't load the DTD from the Internet
        parser.setFeature(handler.feature_external_ges, 0)
        inputsrc = InputSource()

        if type(file) is StringType:
            inputsrc.setByteStream(StringIO(file))
            parser.parse(inputsrc)
        else:
            content = file.read()
            inputsrc.setByteStream(StringIO(content))
            parser.parse(inputsrc)

        if hasattr(self, '_v_srclang'):
            del self._v_srclang

        if REQUEST is not None:
            RESPONSE.redirect('manage_localPropertiesForm')
예제 #2
0
    def parseFile(self, inputFile, stream=None):

        input = InputSource(inputFile)
        if stream is None:
            stream = file(inputFile)
        input.setByteStream(stream)
        self.parseSource(input)
예제 #3
0
파일: parser.py 프로젝트: chatoooo/suds-ng
 def parse(self, file=None, string=None):
     """
     SAX parse XML text.
     @param file: Parse a python I{file-like} object.
     @type file: I{file-like} object.
     @param string: Parse string XML.
     @type string: str
     """
     timer = metrics.Timer()
     timer.start()
     sax, handler = self.saxparser()
     if file is not None:
         sax.parse(file)
         timer.stop()
         metrics.log.debug('sax (%s) duration: %s', file, timer)
         return handler.nodes[0]
     if string is not None:
         if isinstance(string, six.text_type):
             string = string.encode("utf-8")
         source = InputSource(None)
         source.setByteStream(BytesIO(string))
         sax.parse(source)
         timer.stop()
         metrics.log.debug('%s\nsax duration: %s', string, timer)
         return handler.nodes[0]
예제 #4
0
파일: parser.py 프로젝트: dvska/suds-htj
 def parse(self, file=None, string=None):
     """
     SAX parse XML text.
     @param file: Parse a python I{file-like} object.
     @type file: I{file-like} object.
     @param string: Parse string XML.
     @type string: str
     """
     timer = metrics.Timer()
     timer.start()
     sax, handler = self.saxparser()
     if file is not None:
         sax.parse(file)
         timer.stop()
         metrics.log.debug('sax (%s) duration: %s', file, timer)
         return handler.nodes[0]
     if string is not None:
         source = InputSource(None)
         try:
             source.setByteStream(StringIO(string.encode('utf8')))
         except UnicodeDecodeError:
             source.setByteStream(StringIO(string))
         sax.parse(source)
         timer.stop()
         metrics.log.debug('%s\nsax duration: %s', string, timer)
         return handler.nodes[0]
예제 #5
0
파일: parser.py 프로젝트: Seedstars/suds
    def parse(self, file=None, string=None):
        """
        SAX parse XML text.

        @param file: Parse a python I{file-like} object.
        @type file: I{file-like} object
        @param string: Parse string XML.
        @type string: str
        @return: Parsed XML document.
        @rtype: L{Document}

        """
        if file is None and string is None:
            return
        timer = metrics.Timer()
        timer.start()
        source = file
        if file is None:
            source = InputSource(None)
            source.setByteStream(suds.BytesIO(string))
        sax, handler = self.saxparser()
        sax.parse(source)
        timer.stop()
        if file is None:
            metrics.log.debug("%s\nsax duration: %s", string, timer)
        else:
            metrics.log.debug("sax (%s) duration: %s", file, timer)
        return handler.nodes[0]
예제 #6
0
파일: parser.py 프로젝트: uhla/suds-sw
 def parse(self, file=None, string=None):
     """
     SAX parse XML text.
     @param file: Parse a python I{file-like} object.
     @type file: I{file-like} object.
     @param string: Parse string XML.
     @type string: str
     """
     timer = metrics.Timer()
     timer.start()
     sax, handler = self.saxparser()
     if file is not None:
         sax.parse(file)
         timer.stop()
         metrics.log.debug('sax (%s) duration: %s', file, timer)
         return handler.nodes[0]
     if string is not None:
         if isinstance(string, six.text_type):
             string = string.encode("utf-8")
         source = InputSource(None)
         source.setByteStream(BytesIO(string))
         sax.parse(source)
         timer.stop()
         metrics.log.debug('%s\nsax duration: %s', string, timer)
         return handler.nodes[0]
예제 #7
0
    def parse(self, file=None, string=None):
        """
        SAX parse XML text.

        @param file: Parse a python I{file-like} object.
        @type file: I{file-like} object
        @param string: Parse string XML.
        @type string: str
        @return: Parsed XML document.
        @rtype: L{Document}

        """
        if file is None and string is None:
            return
        timer = suds.metrics.Timer()
        timer.start()
        source = file
        if file is None:
            source = InputSource(None)
            source.setByteStream(suds.BytesIO(string))
        sax, handler = self.saxparser()
        sax.parse(source)
        timer.stop()
        if file is None:
            suds.metrics.log.debug("%s\nsax duration: %s", string, timer)
        else:
            suds.metrics.log.debug("sax (%s) duration: %s", file, timer)
        return handler.nodes[0]
예제 #8
0
def test_ignorable():
    p = XMLValParserFactory.make_parser()
    i = InputSource("doc3.xml")
    i.setByteStream(StringIO(doc3))
    h = H()
    p.setContentHandler(h)
    p.parse(i)
    return h.passed
예제 #9
0
파일: bodycheck.py 프로젝트: fluxer/warmux
 def resolveEntity(self, publicId, systemId):
     if systemId:
         name = os.path.join(self._path, systemId)
         if os.path.isfile(name):
             source = InputSource()
             source.setByteStream(open(name, "rb"))
             return source
     # Using default resolution
     return EntityResolver.resolveEntity(self, publicId, systemId)
예제 #10
0
 def resolveEntity(self, publicId, systemId):
     if systemId:
         name = os.path.join(self._path, systemId)
         if os.path.isfile(name):
             source = InputSource()
             source.setByteStream(open(name, "rb"))
             return source
     # Using default resolution
     return EntityResolver.resolveEntity(self, publicId, systemId)
예제 #11
0
def test_illformed():
    p = XMLValParserFactory.make_parser()
    i = InputSource("doc2.xml")
    i.setByteStream(StringIO(doc2))
    try:
        p.parse(i)
    except SAXException,e:
        print "PASS:",e
        return 1
예제 #12
0
파일: xliff_parser.py 프로젝트: eea/odfpy
 def parseXLIFFSTring(self, xml_string):
     """ """
     chandler = XLIFFHandler()
     parser = make_parser()
     # Tell the parser to use our handler
     parser.setContentHandler(chandler)
     # Don't load the DTD from the Internet
     parser.setFeature(handler.feature_external_ges, 0)
     inpsrc = InputSource()
     inpsrc.setByteStream(StringIO(xml_string))
     try:
         parser.parse(inpsrc)
         return chandler
     except:
         return None
예제 #13
0
 def parseXLIFFSTring(self, xml_string):
     """ """
     chandler = XLIFFHandler()
     parser = make_parser()
     # Tell the parser to use our handler
     parser.setContentHandler(chandler)
     # Don't load the DTD from the Internet
     parser.setFeature(handler.feature_external_ges, 0)
     inpsrc = InputSource()
     inpsrc.setByteStream(StringIO(xml_string))
     try:
         parser.parse(inpsrc)
         return chandler
     except:
         return None
예제 #14
0
    def __init__(self, session, config, parent):
        Parser.__init__(self, session, config, parent)
        self.parser = make_parser()
        self.errorHandler = ErrorHandler()
        self.parser.setErrorHandler(self.errorHandler)
        self.inputSource = SaxInput()
        ch = SaxContentHandler()
        self.contentHandler = ch
        self.parser.setContentHandler(ch)
        self.keepError = 1

        if (self.get_setting(session, 'namespaces')):
            self.parser.setFeature('http://xml.org/sax/features/namespaces',
                                   1)
        p = self.get_setting(session, 'attrHash')
        if (p):
            l = p.split()
            for i in l:
                (a, b) = i.split("@")
                try:
                    ch.hashAttributesNames[a].append(b)
                except:
                    ch.hashAttributesNames[a] = [b]
        if self.get_setting(session, 'stripWhitespace'):
            ch.stripWS = 1
예제 #15
0
 def __init__(self, parent, config):
     C3Object.__init__(self, parent, config)
     self.parser = make_parser()
     self.inputSource = SaxInput()
     self.errorHandler = ErrorHandler()
     self.parser.setErrorHandler(self.errorHandler)
     self.parser.setContentHandler(self)
예제 #16
0
    def parseContent(self, file):
        # Create a parser
        try:
            parser = make_parser()
            chandler = GBoxHandler()
            # Tell the parser to use our handler
            parser.setContentHandler(chandler)
            # Don't load the DTD from the Internet
            parser.setFeature(handler.feature_external_ges, 0)
            inputsrc = InputSource()

            gbox_content = utils.utRead(file)
            inputsrc.setByteStream(StringIO(gbox_content))
            parser.parse(inputsrc)
        except:
            return 'err'
        return chandler
예제 #17
0
    def tmx_import(self, howmuch, file, REQUEST=None, RESPONSE=None):
        """ Imports a TMX level 1 file.
            We use the SAX parser. It has the benefit that it internally
            converts everything to python unicode strings.
        """
        if howmuch == 'clear':
            # Clear the message catalogue prior to import
            self._messages = {}
            self._languages = ()

        self._v_howmuch = howmuch
        self._v_srclang = self._default_language
        self._v_num_translations = 0
        self._v_num_notes = 0
        # Create a parser
        parser = make_parser()
        chandler = HandleTMXParsing(self._tmx_tu, self._tmx_header)
        # Tell the parser to use our handler
        parser.setContentHandler(chandler)
        # Don't load the DTD from the Internet
        parser.setFeature(handler.feature_external_ges, 0)
        inputsrc = InputSource()

        if type(file) is StringType:
            inputsrc.setByteStream(StringIO(file))
        else:
            content = file.read()
            inputsrc.setByteStream(StringIO(content))
        parser.parse(inputsrc)

        num_translations = self._v_num_translations
        num_notes = self._v_num_notes
        del self._v_srclang
        del self._v_howmuch
        del self._v_num_translations
        del self._v_num_notes

        if REQUEST is not None:
            return MessageDialog(
                title = _('Messages imported'),
                message = _('Imported %d messages and %d notes')
                          % (num_translations, num_notes),
                action = 'manage_messages')
예제 #18
0
파일: xliff_parser.py 프로젝트: eea/odfpy
    def parseXLIFFFile(self, file):
        # Create a parser
        parser = make_parser()
        chandler = XLIFFHandler()
        # Tell the parser to use our handler
        parser.setContentHandler(chandler)
        # Don't load the DTD from the Internet
        parser.setFeature(handler.feature_external_ges, 0)
        inputsrc = InputSource()

        try:
            if type(file) is StringType:
                inputsrc.setByteStream(StringIO(file))
            else:
                filecontent = file.read()
                inputsrc.setByteStream(StringIO(filecontent))
            parser.parse(inputsrc)
            return chandler
        except:
            return None
예제 #19
0
 def parse(self, file=None, string=None):
     """
     SAX parse XML text.
     @param file: Parse a python I{file-like} object.
     @type file: I{file-like} object.
     @param string: Parse string XML.
     @type string: str
     """
     timer = metrics.Timer()
     timer.start()
     sax, handler = self.saxparser()
     if file is not None:
         sax.parse(file)
         timer.stop()
         metrics.log.debug('sax (%s) duration: %s', file, timer)
         return handler.nodes[0]
     if string is not None:
         source = InputSource(None)
         try:
             source.setByteStream(StringIO(string.encode('utf8')))
         except UnicodeDecodeError:
             source.setByteStream(StringIO(string))
         sax.parse(source)
         timer.stop()
         metrics.log.debug('%s\nsax duration: %s', string, timer)
         return handler.nodes[0]
예제 #20
0
def addSectionTags(content):

    from cStringIO import StringIO
    src = InputSource()
    src.setByteStream(StringIO(content))

    # Create an XML parser
    parser = make_parser() #("xml.sax.drivers2.drv_xmlproc")
    
    dh = docHandler()
    parser.setContentHandler(dh)

    er = EntityResolver()
    parser.setEntityResolver(er)

    # Allow external entities
    parser.setFeature(feature_external_ges, True)
    
    # Parse the file; your handler's methods will get called
    parser.parse(src)

    return dh.document.encode('UTF-8')
예제 #21
0
def addSectionTags(content):

    from cStringIO import StringIO
    src = InputSource()
    src.setByteStream(StringIO(content))

    # Create an XML parser
    parser = make_parser()  #("xml.sax.drivers2.drv_xmlproc")

    dh = docHandler()
    parser.setContentHandler(dh)

    er = EntityResolver()
    parser.setEntityResolver(er)

    # Allow external entities
    parser.setFeature(feature_external_ges, True)

    # Parse the file; your handler's methods will get called
    parser.parse(src)

    return dh.document.encode('UTF-8')
예제 #22
0
 def parse(self, file=None, url=None, string=None):
     timer = metrics.Timer()
     timer.start()
     sax, handler = self.saxparser()
     if file is not None:
         sax.parse(file)
         timer.stop()
         metrics.log.debug('sax (%s) duration: %s', file, timer)
         return handler.nodes[0]
     if url is not None:
         fp = self.transport.open(Request(url))
         sax.parse(fp)
         timer.stop()
         metrics.log.debug('sax (%s) duration: %s', url, timer)
         return handler.nodes[0]
     if string is not None:
         source = InputSource(None)
         source.setByteStream(StringIO(string))
         sax.parse(source)
         timer.stop()
         metrics.log.debug('%s\nsax duration: %s', string, timer)
         return handler.nodes[0]
예제 #23
0
    def __init__(self, session, config, parent):
        Parser.__init__(self, session, config, parent)
        self.parser = make_parser()
        self.errorHandler = ErrorHandler()
        self.parser.setErrorHandler(self.errorHandler)
        self.inputSource = SaxInput()
        ch = SaxContentHandler()
        self.contentHandler = ch
        self.parser.setContentHandler(ch)
        self.keepError = 1

        if (self.get_setting(session, 'namespaces')):
            self.parser.setFeature('http://xml.org/sax/features/namespaces', 1)
        p = self.get_setting(session, 'attrHash')
        if (p):
            l = p.split()
            for i in l:
                (a, b) = i.split("@")
                try:
                    ch.hashAttributesNames[a].append(b)
                except:
                    ch.hashAttributesNames[a] = [b]
        if self.get_setting(session, 'stripWhitespace'):
            ch.stripWS = 1
예제 #24
0
    def parseXLIFFFile(self, file):
        # Create a parser
        parser = make_parser()
        chandler = XLIFFHandler()
        # Tell the parser to use our handler
        parser.setContentHandler(chandler)
        # Don't load the DTD from the Internet
        parser.setFeature(handler.feature_external_ges, 0)
        inputsrc = InputSource()

        try:
            if type(file) is StringType:
                inputsrc.setByteStream(StringIO(file))
            else:
                filecontent = file.read()
                inputsrc.setByteStream(StringIO(filecontent))
            parser.parse(inputsrc)
            return chandler
        except:
            return None
예제 #25
0
    def parse(self, xml):

        input = InputSource()
        input.setByteStream(StringIO(xml))
        self.parseSource(input)
예제 #26
0
class SaxParser(BaseParser):
    """ Default SAX based parser. Creates SaxRecord """

    _possibleSettings = {'namespaces' : {'docs' : "Enable namespace processing in SAX"},
                         'stripWhitespace' : {'docs' : "Strip additional whitespace when processing."},
			 'attrHash' : {'docs' : "Tag/Attribute combinations to include in hash."}
			 }

    def __init__(self, session, config, parent):
        Parser.__init__(self, session, config, parent)
        self.parser = make_parser()
        self.errorHandler = ErrorHandler()
        self.parser.setErrorHandler(self.errorHandler)
        self.inputSource = SaxInput()
        ch = SaxContentHandler()
        self.contentHandler  = ch
        self.parser.setContentHandler(ch)
        self.keepError = 1

        if (self.get_setting(session, 'namespaces')):
            self.parser.setFeature('http://xml.org/sax/features/namespaces', 1)
        p = self.get_setting(session, 'attrHash')
        if (p):
            l = p.split()
            for i in l:
                (a,b) = i.split("@")
                try:
                    ch.hashAttributesNames[a].append(b)
                except:
                    ch.hashAttributesNames[a] = [b]
        if self.get_setting(session, 'stripWhitespace'):
            ch.stripWS = 1

    def process_document(self, session, doc):

        xml = doc.get_raw(session)        
        self.inputSource.setByteStream(cStringIO.StringIO(xml))        
        ch = self.contentHandler
        ch.reinit()
        try:
            self.parser.parse(self.inputSource)
        except:
            # Splat.  Reset self and reraise
            if self.keepError:
                # Work out path
                path = []
                for l in ch.pathLines:
                    line = ch.currentText[l]
                    elemName = line[2:line.index('{')-1]
                    path.append("%s[@SAXID='%s']" % (elemName, l))
                self.errorPath = '/'.join(path)
            else:
                ch.reinit()
                
            raise        
        rec = SaxRecord(ch.currentText, xml, wordCount=ch.recordWordCount)
        rec.elementHash = ch.elementHash
        rec.byteCount = len(xml)
        self._copyData(doc, rec)
        ch.reinit()
        return rec
예제 #27
0
class SaxParser(BaseParser):
    """ Default SAX based parser. Creates SaxRecord """

    _possibleSettings = {
        'namespaces': {
            'docs': "Enable namespace processing in SAX"
        },
        'stripWhitespace': {
            'docs': "Strip additional whitespace when processing."
        },
        'attrHash': {
            'docs': "Tag/Attribute combinations to include in hash."
        }
    }

    def __init__(self, session, config, parent):
        Parser.__init__(self, session, config, parent)
        self.parser = make_parser()
        self.errorHandler = ErrorHandler()
        self.parser.setErrorHandler(self.errorHandler)
        self.inputSource = SaxInput()
        ch = SaxContentHandler()
        self.contentHandler = ch
        self.parser.setContentHandler(ch)
        self.keepError = 1

        if (self.get_setting(session, 'namespaces')):
            self.parser.setFeature('http://xml.org/sax/features/namespaces', 1)
        p = self.get_setting(session, 'attrHash')
        if (p):
            l = p.split()
            for i in l:
                (a, b) = i.split("@")
                try:
                    ch.hashAttributesNames[a].append(b)
                except:
                    ch.hashAttributesNames[a] = [b]
        if self.get_setting(session, 'stripWhitespace'):
            ch.stripWS = 1

    def process_document(self, session, doc):

        xml = doc.get_raw(session)
        self.inputSource.setByteStream(cStringIO.StringIO(xml))
        ch = self.contentHandler
        ch.reinit()
        try:
            self.parser.parse(self.inputSource)
        except:
            # Splat.  Reset self and reraise
            if self.keepError:
                # Work out path
                path = []
                for l in ch.pathLines:
                    line = ch.currentText[l]
                    elemName = line[2:line.index('{') - 1]
                    path.append("%s[@SAXID='%s']" % (elemName, l))
                self.errorPath = '/'.join(path)
            else:
                ch.reinit()

            raise
        rec = SaxRecord(ch.currentText, xml, wordCount=ch.recordWordCount)
        rec.elementHash = ch.elementHash
        rec.byteCount = len(xml)
        self._copyData(doc, rec)
        ch.reinit()
        return rec
예제 #28
0
class SaxParser(Parser, ContentHandler):
    locked = 0
    currentText = []
    currentPath = []
    pathLines = []
    currentLine = -1
    recordSize = 0
    elementHash = {}

    def __init__(self, parent, config):
        C3Object.__init__(self, parent, config)
        self.parser = make_parser()
        self.inputSource = SaxInput()
        self.errorHandler = ErrorHandler()
        self.parser.setErrorHandler(self.errorHandler)
        self.parser.setContentHandler(self)

    def process_document(self, session, doc):

        if (self.locked):
            # Shouldn't be reusing across threads anyway!
            # XXX: Can we instantiate a new self ??
            raise(ValueError)
        self.locked = 1

        xml = doc.get_raw()
        self.inputSource.setByteStream(StringIO.StringIO(xml))
        self.currentText = []
        self.pathLines = []
        self.currentLine = -1
        self.elementHash = {}
        self.elementIndexes = []
        self.recordSize = 0
        
        try:
            self.parser.parse(self.inputSource)
        except:
            # Try again... sometimes odd things happen
            self.currentText = []
            self.pathLines = []
            self.currentLine = -1
            self.elementHash = {}
            self.elementIndexes = []
            self.recordSize = 0
            self.inputSource.setByteStream(StringIO.StringIO(xml))
            self.parser.parse(self.inputSource)
            
        self.currentText.append("#hash " + repr(self.elementHash))
        self.locked = 0
        rec = SaxRecord(self.currentText, xml, recordSize=self.recordSize)
        return rec

    # We want to fwd elems to NS elem handlers with default NS
    def startElement(self, name, attrs):
        self.currentLine += 1
        attrHash = {}
        for k in attrs.keys():
            attrHash[k] = attrs[k]
        self.pathLines.append(self.currentLine)
        if (len(self.pathLines) > 1):
            parent = self.pathLines[-2]
        else:
            parent = -1

        if (self.currentLine == 0):
            npred = 1
            self.elementIndexes = [{name: npred}]
        elif self.elementIndexes[-1].has_key(name):
            npred = self.elementIndexes[-1][name] + 1
            self.elementIndexes[-1][name] = npred
        else:
            npred = 1
            self.elementIndexes[-1][name] = 1
        self.elementIndexes.append({})
        ptxt = "#elem %s %s %d %d" % (name, repr(attrHash), parent, npred)
        self.currentText.append(ptxt)

    def endElement(self, name):
        self.currentLine += 1
        start = self.pathLines.pop()
        self.currentText.append("#end %s %d" % (name, start))
        self.currentText[start] = "%s %d" % (self.currentText[start], self.currentLine)
        self.elementIndexes.pop()
        if (self.elementHash.has_key(name)):
            self.elementHash[name].append([start, self.currentLine])
        else:
            self.elementHash[name] = [[start, self.currentLine]]

    def startElementNS(self, name, qname, attrs):
        self.currentLine += 1
        attrHash = {}
        for k in attrs.keys():
            attrHash[k] = attrs[k]
        ptxt = "#elemNS %s %s %s" % (name, qname, repr(attrHash))
        self.currentText.append(ptxt)

    def endElementNS(self, name, qname):
        self.currentLine += 1
        self.currentText.append("#endNS %s %s" % (name, qname))

    def characters(self, text, start=0, length=-1):
        if text.isspace():
            text = " "            
        self.currentLine += 1
        self.currentText.append("#text %s" % (text))
        self.recordSize += len(text.split())
                    
    def processingInstruction(self, target, data):
        pass
    def skippedEntity(self, name):
        pass
예제 #29
0
def stream(str):
    inpsrc = InputSource()
    inpsrc.setByteStream(StringIO(str))
    return inpsrc