Python XMLParser.feed示例，lxml.etree.XMLParser.feed Python示例

示例#1

0

显示文件

文件： pushparser.py 项目： seecr/meresco-xml

class PushParser(object):
    def __init__(self, elementPath, onResultDo):
        builder = SubTreesTreeBuilder(elementPath=elementPath, onResult=onResultDo)
        self._parser = XMLParser(target=builder)

    def feed(self, data):
        self._parser.feed(data)

示例#2

0

显示文件

文件： ValidateFilingText.py 项目： JTYim/Arelle

def checkfile(modelXbrl, filepath):
    result = []
    lineNum = 1
    foundXmlDeclaration = False
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    file, encoding = modelXbrl.fileSource.file(filepath)
    parserResults = {}
    class checkFileType(object):
        def start(self, tag, attr): # check root XML element type
            parserResults["rootIsTestcase"] = tag.rpartition("}")[2] in ("testcases", "documentation", "testSuite", "testcase", "testSet")
        def end(self, tag): pass
        def data(self, data): pass
        def close(self): pass
    _parser = XMLParser(target=checkFileType())
    _isTestcase = False
    
    with file as f:
        while True:
            line = f.readline()
            if line == "":
                break;
            # check for disallowed characters or entity codes
            for match in docCheckPattern.finditer(line):
                text = match.group()
                if text.startswith("&"):
                    if not text in xhtmlEntities:
                        modelXbrl.error(("EFM.5.02.02.06", "GFM.1.01.02"),
                            _("Disallowed entity code %(text)s in file %(file)s line %(line)s column %(column)s"),
                            modelDocument=filepath, text=text, file=os.path.basename(filepath), line=lineNum, column=match.start())
                elif isEFM and not _isTestcase:
                    if len(text) == 1:
                        modelXbrl.error("EFM.5.02.01.01",
                            _("Disallowed character '%(text)s' (%(unicodeIndex)s) in file %(file)s at line %(line)s col %(column)s"),
                            modelDocument=filepath, text=text, unicodeIndex="U+{:04X}".format(ord(text)), 
                            file=os.path.basename(filepath), line=lineNum, column=match.start())
                    else:
                        modelXbrl.error("EFM.5.02.01.01",
                            _("Disallowed character '%(text)s' in file %(file)s at line %(line)s col %(column)s"),
                            modelDocument=filepath, text=text, file=os.path.basename(filepath), line=lineNum, column=match.start())
            if lineNum == 1:
                xmlDeclarationMatch = XMLdeclaration.search(line)
                if xmlDeclarationMatch: # remove it for lxml
                    start,end = xmlDeclarationMatch.span()
                    line = line[0:start] + line[end:]
                    foundXmlDeclaration = True
            if _parser: # feed line after removal of xml declaration
                _parser.feed(line.encode('utf-8','ignore'))
                if "rootIsTestcase" in parserResults: # root XML element has been encountered
                    _isTestcase = parserResults["rootIsTestcase"]
                    _parser = None # no point to parse past the root element
            result.append(line)
            lineNum += 1
    result = ''.join(result)
    if not foundXmlDeclaration: # may be multiline, try again
        xmlDeclarationMatch = XMLdeclaration.search(result)
        if xmlDeclarationMatch: # remove it for lxml
            start,end = xmlDeclarationMatch.span()
            result = result[0:start] + result[end:]
            foundXmlDeclaration = True
    return (io.StringIO(initial_value=result), encoding)

示例#3

0

显示文件

文件： ValidateFilingText.py 项目： fewang0521/python_dart

def checkfile(modelXbrl, filepath):
    result = []
    lineNum = 1
    foundXmlDeclaration = False
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    file, encoding = modelXbrl.fileSource.file(filepath)
    parserResults = {}
    class checkFileType(object):
        def start(self, tag, attr): # check root XML element type
            parserResults["rootIsTestcase"] = tag.rpartition("}")[2] in ("testcases", "documentation", "testSuite", "testcase", "testSet")
        def end(self, tag): pass
        def data(self, data): pass
        def close(self): pass
    _parser = XMLParser(target=checkFileType())
    _isTestcase = False
    
    with file as f:
        while True:
            line = f.readline()
            if line == "":
                break;
            # check for disallowed characters or entity codes
            for match in docCheckPattern.finditer(line):
                text = match.group()
                if text.startswith("&"):
                    if not text in xhtmlEntities:
                        modelXbrl.error(("EFM.5.02.02.06", "GFM.1.01.02"),
                            _("Disallowed entity code %(text)s in file %(file)s line %(line)s column %(column)s"),
                            modelDocument=filepath, text=text, file=os.path.basename(filepath), line=lineNum, column=match.start())
                elif isEFM and not _isTestcase:
                    if len(text) == 1:
                        modelXbrl.error("EFM.5.02.01.01",
                            _("Disallowed character '%(text)s' (%(unicodeIndex)s) in file %(file)s at line %(line)s col %(column)s"),
                            modelDocument=filepath, text=text, unicodeIndex="U+{:04X}".format(ord(text)), 
                            file=os.path.basename(filepath), line=lineNum, column=match.start())
                    else:
                        modelXbrl.error("EFM.5.02.01.01",
                            _("Disallowed character '%(text)s' in file %(file)s at line %(line)s col %(column)s"),
                            modelDocument=filepath, text=text, file=os.path.basename(filepath), line=lineNum, column=match.start())
            if lineNum == 1:
                xmlDeclarationMatch = XMLdeclaration.search(line)
                if xmlDeclarationMatch: # remove it for lxml
                    start,end = xmlDeclarationMatch.span()
                    line = line[0:start] + line[end:]
                    foundXmlDeclaration = True
            if _parser: # feed line after removal of xml declaration
                _parser.feed(line.encode('utf-8','ignore'))
                if "rootIsTestcase" in parserResults: # root XML element has been encountered
                    _isTestcase = parserResults["rootIsTestcase"]
                    _parser = None # no point to parse past the root element
            result.append(line)
            lineNum += 1
    result = ''.join(result)
    if not foundXmlDeclaration: # may be multiline, try again
        xmlDeclarationMatch = XMLdeclaration.search(result)
        if xmlDeclarationMatch: # remove it for lxml
            start,end = xmlDeclarationMatch.span()
            result = result[0:start] + result[end:]
            foundXmlDeclaration = True
    return (io.StringIO(initial_value=result), encoding)

示例#4

0

显示文件

class PushParser(object):
    def __init__(self, elementPath, onResultDo):
        builder = SubTreesTreeBuilder(elementPath=elementPath,
                                      onResult=onResultDo)
        self._parser = XMLParser(target=builder)

    def feed(self, data):
        self._parser.feed(data)

示例#5

0

显示文件

文件： subtreestreebuildertest.py 项目： seecr/meresco-xml

 def testOnResult(self):
     trees = []
     def onResult(tree):
         trees.append(tree)
     xml = """<a><b>Dit is een tag in een tag</b></a>"""
     builder = SubTreesTreeBuilder(elementPath=['r', 'a', 'b'], onResult=onResult)
     parser = XMLParser(target=builder)
     parser.feed("<r>")
     parser.feed(xml)
     parser.feed(xml)
     self.assertEquals(2, len(trees))
     self.assertEquals('<b>Dit is een tag in een tag</b>', tostring(trees[0]))

示例#6

0

显示文件

文件： subtreestreebuildertest.py 项目： seecr/meresco-xml

    def testIdentityTransformWithNS(self):
        builder = SubTreesTreeBuilder(buildFor={
            'one': lambda stack: [d['tag'] for d in stack] == ['{u:ri/default#}root'],
        })
        parser = XMLParser(target=builder)
        parser.feed(XML_NS)
        parser.close()

        subtrees = [t for t in builder.getSubtrees()]
        self.assertEquals(1, len(subtrees))

        id, lxml = subtrees[0]
        self.assertEquals('one', id)
        self.assertEqualsLxml(parseString(XML_NS), lxml)

示例#7

0

显示文件

文件： streaminglxmltest.py 项目： seecr/meresco-oai

 def testFilterTag(self):
     target = Target('mies')
     p = XMLParser(target = target)
     p.feed("<aap><mies>")
     p.feed("noot")
     p.feed("</mies>")
     p.feed("</aap>")
     self.assertEquals("<mies>noot</mies>", lxmltostring(target.root))

示例#8

0

显示文件

 def testFilterTag(self):
     target = Target('mies')
     p = XMLParser(target = target)
     p.feed("<aap><mies>")
     p.feed("noot")
     p.feed("</mies>")
     p.feed("</aap>")
     self.assertEqual("<mies>noot</mies>", lxmltostring(target.root))

示例#9

0

显示文件

文件： parse.py 项目： bukzor/RefactorLib

def dictnode_to_lxml(tree, node_lookup=None, encoding=None):
    """
    Input: A dictionary-based representation of a node tree.
    Output: An lxml representation of the same.

    Each dictionary has three attributes:
        name -- The type of node, a string. In html, this would be the tag name.
        text -- The content of the node: <b>text</b>
        tail -- Any content after the end of this node, but before the start of the next: <br/>tail
        attrs -- A dictionary of any extra attributes.
        children -- An ordered list of more node-dictionaries.
    """
    if not node_lookup:
        from refactorlib.node import node_lookup

    from lxml.etree import Element, XMLParser

    root = None
    stack = [(tree, root)]

    while stack:
        node, parent = stack.pop()

        # sort attributes for determinism
        attrs = node.get('attrs', {})
        attrs = {k: attrs[k] for k in sorted(attrs)}

        if parent is None:
            # We use this roundabout method becuase the encoding is always set
            # to 'UTF8' if we use parser.makeelement()
            parser = XMLParser(encoding=encoding)
            parser.set_element_class_lookup(node_lookup)
            parser.feed(b'<a/>')
            lxmlnode = parser.close()
            lxmlnode.tag = node['name']
            lxmlnode.attrib.update(attrs)
            root = lxmlnode
        else:
            lxmlnode = Element(node['name'], attrib=attrs)
            parent.append(lxmlnode)

        lxmlnode.text = node['text']
        lxmlnode.tail = node['tail']

        for child in reversed(node['children']):
            stack.append((child, lxmlnode))

    return root

示例#10

0

显示文件

文件： parse.py 项目： campaul/RefactorLib

def dictnode_to_lxml(tree, node_lookup=None, encoding=None):
	"""
	Input: A dictionary-based representation of a node tree.
	Output: An lxml representation of the same.

	Each dictionary has three attributes:
	    name -- The type of node, a string. In html, this would be the tag name.
		text -- The content of the node: <b>text</b>
		tail -- Any content after the end of this node, but before the start of the next: <br/>tail
		attrs -- A dictionary of any extra attributes.
		children -- An ordered list of more node-dictionaries.
	"""
	if not node_lookup:
		from node import node_lookup

	from lxml.etree import XMLParser
	lxml_parser_object = XMLParser(encoding=encoding)
	lxml_parser_object.set_element_class_lookup(node_lookup)
	Element = lxml_parser_object.makeelement

	root = None
	stack = [ (tree,root) ]

	while stack:
		node, parent = stack.pop()


		if parent is None:
			# We use this roundabout method becuase the encoding is always set
			# to 'UTF8' if we use parser.makeelement()
			lxml_parser_object.feed('<trash></trash>')
			lxmlnode = lxml_parser_object.close()
			lxmlnode.tag = node['name']
			lxmlnode.attrib.update(node.get('attrs', {}))
			root = lxmlnode
		else:
			lxmlnode = Element(node['name'], attrib=node.get('attrs', {}))
			parent.append(lxmlnode)

		lxmlnode.text = node['text']
		lxmlnode.tail = node['tail']

		for child in reversed(node['children']):
			stack.append((child, lxmlnode))

	return root

示例#11

0

显示文件

文件： subtreestreebuildertest.py 项目： seecr/meresco-xml

def parseIncrementallyBy20(builder, inputXml):
    parser = XMLParser(target=builder)
    xmlStream = StringIO(inputXml)
    result = []
    data = xmlStream.read(20)
    loops = 0
    while data:
        loops += 1
        parser.feed(data)
        for id, subtree in builder.getSubtrees():
            result.append((id, subtree))
        data = xmlStream.read(20)
    retval = parser.close()
    for id, subtree in builder.getSubtrees():
        result.append((id, subtree))
    assert retval is None, 'Errr?'
    assert ceil(len(inputXml) / 20.0) == loops, 'Errr?'
    return result, loops

示例#12

0

显示文件

文件： subtreestreebuilder.py 项目： seecr/meresco-xml

    def start(self):
        def isPath(stack):
            return [d['tag'] for d in stack] == self._path
        builder = SubTreesTreeBuilder(buildFor={
            'simple': isPath,
        })
        def processSubtrees():
            for id, subtree in builder.getSubtrees():
                self._callback(subtree)
        parser = XMLParser(target=builder)

        data = self._stream.read(4096)
        while data:
            parser.feed(data)
            processSubtrees()
            data = self._stream.read(4096)
        parser.close()
        processSubtrees()

示例#13

0

显示文件

    def start(self):
        def isPath(stack):
            return [d['tag'] for d in stack] == self._path

        builder = SubTreesTreeBuilder(buildFor={
            'simple': isPath,
        })

        def processSubtrees():
            for id, subtree in builder.getSubtrees():
                self._callback(subtree)

        parser = XMLParser(target=builder)

        data = self._stream.read(4096)
        while data:
            parser.feed(data)
            processSubtrees()
            data = self._stream.read(4096)
        parser.close()
        processSubtrees()

示例#14

0

显示文件

文件： streaminglxmltest.py 项目： seecr/meresco-oai

 def testTwoTags(self):
     target = Target('aap')
     p = XMLParser(target = target)
     p.feed("<aap>")
     p.feed("noot")
     p.feed("</aap>")
     self.assertEquals("<aap>noot</aap>", lxmltostring(target.root))

示例#15

0

显示文件

 def testTwoTags(self):
     target = Target('aap')
     p = XMLParser(target = target)
     p.feed("<aap>")
     p.feed("noot")
     p.feed("</aap>")
     self.assertEqual("<aap>noot</aap>", lxmltostring(target.root))

示例#16

0

显示文件

文件： parser.py 项目： invenia/iterparse

def iterparse(source, events=('end',), tag=None, **kwargs):
    """
    Iteratively parse an xml file, firing end events for any requested
    tags

    stream: The XML stream to parse.
    tag: The iterable of tags to fire events on.
    size: (optional, 1024) The number of bytes to read at a time.
    """
    # Note: We need to remove all kwargs not supported by XMLParser
    # which but are supported by iterparse: source, events, tag, html,
    # recover, huge_tree.
    #
    # http://lxml.de/api/lxml.etree.XMLParser-class.html
    # http://lxml.de/api/lxml.etree.iterparse-class.html
    size = kwargs.pop('size', 1024)

    target_kwargs = dict(
        strip_namespace=kwargs.pop('strip_namespace', False),
        ignore_namespace=kwargs.pop('ignore_namespace', False),
        debug=kwargs.pop('debug', False),
    )

    target = MinimalTarget(events=events, tags=tag, **target_kwargs)
    parser = XMLParser(target=target, **kwargs)

    raw = source.read(size)

    while raw:
        try:
            parser.feed(raw)
        finally:
            # Note: When exceptions are raised within the parser the
            # target's close method will be called.
            events = target.completed_events
            while events:
                yield events.pop(0)

        raw = source.read(size)

示例#17

0

显示文件

def iterparse(source, events=('end', ), tag=None, **kwargs):
    """
    Iteratively parse an xml file, firing end events for any requested
    tags

    stream: The XML stream to parse.
    tag: The iterable of tags to fire events on.
    size: (optional, 1024) The number of bytes to read at a time.
    """
    # Note: We need to remove all kwargs not supported by XMLParser
    # which but are supported by iterparse: source, events, tag, html,
    # recover, huge_tree.
    #
    # http://lxml.de/api/lxml.etree.XMLParser-class.html
    # http://lxml.de/api/lxml.etree.iterparse-class.html
    size = kwargs.pop('size', 1024)

    target_kwargs = dict(
        strip_namespace=kwargs.pop('strip_namespace', False),
        ignore_namespace=kwargs.pop('ignore_namespace', False),
        debug=kwargs.pop('debug', False),
    )

    target = MinimalTarget(events=events, tags=tag, **target_kwargs)
    parser = XMLParser(target=target, **kwargs)

    raw = source.read(size)

    while raw:
        try:
            parser.feed(raw)
        finally:
            # Note: When exceptions are raised within the parser the
            # target's close method will be called.
            events = target.completed_events
            while events:
                yield events.pop(0)

        raw = source.read(size)

示例#18

0

显示文件

 def run(self, xmlPath, xmlFile):
     logger.info(f'{self.name}, normalise start ...')
     try:
         parser = XMLParser(target=self.nodeTree, recover=True)
         logger.info(f'parsing {xmlFile} ...')
         with open(xmlPath, 'r') as fhr:
             parser.feed('<Root>\n')
             for xmlRecord in fhr:
                 try:
                     self.nodeTree.count()
                     parser.feed(xmlRecord)
                 except ParseError as ex:
                     logger.error(exc_info=True)
             parser.feed('<\Root>\n')
             parser.close()
         rowcount = self.nodeTree.result()
         logger.info(f'### {xmlFile} rowcount : {rowcount}')
     except Exception as ex:
         errMsg = f'xml inputFile, recnum : {xmlFile}, {rowcount}'
         logger.error(errMsg, exc_info=True)
         raise

示例#19

0

显示文件

文件： ValidateFilingText.py 项目： Arelle/Arelle

def checkfile(modelXbrl, filepath):
    result = []
    lineNum = 1
    foundXmlDeclaration = False
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    file, encoding = modelXbrl.fileSource.file(filepath)
    parserResults = {}
    class checkFileType(object):
        def start(self, tag, attr, nsmap=None): # check root XML element type
            parserResults["rootIsTestcase"] = tag.rpartition("}")[2] in ("testcases", "documentation", "testSuite", "testcase", "testSet")
            if tag in ("{http://www.w3.org/1999/xhtml}html", "{http://www.w3.org/1999/xhtml}xhtml"):
                if nsmap and any(ns in ixbrlAll for ns in nsmap.values()):
                    parserResults["isInline"] = True
                else:
                    parserResults["maybeInline"] = True
        def end(self, tag): pass
        def data(self, data): pass
        def close(self): pass
    _parser = XMLParser(target=checkFileType())
    _isTestcase = False
    mayBeInline = isInline = False
    
    with file as f:
        while True:
            line = f.readline()
            if line == "":
                break;
            # check for disallowed characters or entity codes
            for match in docCheckPattern.finditer(line):
                text = match.group()
                if text.startswith("&"):
                    if not text in xhtmlEntities:
                        modelXbrl.error(("EFM.5.02.02.06", "GFM.1.01.02"),
                            _("Disallowed entity code %(text)s in file %(file)s line %(line)s column %(column)s"),
                            modelDocument=filepath, text=text, file=os.path.basename(filepath), line=lineNum, column=match.start())
                elif isEFM and not _isTestcase:
                    if len(text) == 1:
                        modelXbrl.error("EFM.5.02.01.01",
                            _("Disallowed character '%(text)s' (%(unicodeIndex)s) in file %(file)s at line %(line)s col %(column)s"),
                            modelDocument=filepath, text=text, unicodeIndex="U+{:04X}".format(ord(text)), 
                            file=os.path.basename(filepath), line=lineNum, column=match.start())
                    else:
                        modelXbrl.error("EFM.5.02.01.01",
                            _("Disallowed character '%(text)s' in file %(file)s at line %(line)s col %(column)s"),
                            modelDocument=filepath, text=text, file=os.path.basename(filepath), line=lineNum, column=match.start())
            if lineNum == 1:
                xmlDeclarationMatch = XMLdeclaration.search(line)
                if xmlDeclarationMatch: # remove it for lxml
                    start,end = xmlDeclarationMatch.span()
                    line = line[0:start] + line[end:]
                    foundXmlDeclaration = True
            if _parser: # feed line after removal of xml declaration
                _parser.feed(line.encode('utf-8','ignore'))
                if "rootIsTestcase" in parserResults: # root XML element has been encountered
                    _isTestcase = parserResults["rootIsTestcase"]
                    if "isInline" in parserResults:
                        isInline = True
                    elif "maybeInline" in parserResults:
                        mayBeInline = True
                    _parser = None # no point to parse past the root element
            if mayBeInline and inlinePattern.search(line):
                mayBeInline = False
                isInline = True
            if isInline:
                for match in inlineSelfClosedElementPattern.finditer(line):
                    selfClosedLocalName = match.group(3)
                    if selfClosedLocalName not in elementsWithNoContent:
                        modelXbrl.warning("ixbrl:selfClosedTagWarning",
                                          _("Self-closed element \"%(element)s\" may contain text or other elements and should not use self-closing tag syntax (/>) when empty; change these to end-tags in file %(file)s line %(line)s column %(column)s"),
                                          modelDocument=filepath, element=match.group(1), file=os.path.basename(filepath), line=lineNum, column=match.start())
            result.append(line)
            lineNum += 1
    result = ''.join(result)
    if not foundXmlDeclaration: # may be multiline, try again
        xmlDeclarationMatch = XMLdeclaration.search(result)
        if xmlDeclarationMatch: # remove it for lxml
            start,end = xmlDeclarationMatch.span()
            result = result[0:start] + result[end:]
            foundXmlDeclaration = True

    return (io.StringIO(initial_value=result), encoding)

示例#20

0

显示文件

def checkfile(modelXbrl, filepath):
    result = []
    lineNum = 1
    foundXmlDeclaration = False
    isEFM = modelXbrl.modelManager.disclosureSystem.validationType == "EFM"
    file, encoding = modelXbrl.fileSource.file(filepath)
    parserResults = {}
    class checkFileType(object):
        def start(self, tag, attr, nsmap=None): # check root XML element type
            parserResults["rootIsTestcase"] = tag.rpartition("}")[2] in ("testcases", "documentation", "testSuite", "testcase", "testSet")
            if tag in ("{http://www.w3.org/1999/xhtml}html", "{http://www.w3.org/1999/xhtml}xhtml"):
                if nsmap and any(ns in ixbrlAll for ns in nsmap.values()):
                    parserResults["isInline"] = True
                else:
                    parserResults["maybeInline"] = True
        def end(self, tag): pass
        def data(self, data): pass
        def close(self): pass
    _parser = XMLParser(target=checkFileType(),huge_tree=True)
    _isTestcase = False
    mayBeInline = isInline = False
    
    with file as f:
        while True:
            line = f.readline()
            if line == "":
                break;
            # check for disallowed characters or entity codes
            for match in docCheckPattern.finditer(line):
                text = match.group()
                if text.startswith("&"):
                    if not text in xhtmlEntities:
                        modelXbrl.error(("EFM.5.02.02.06", "GFM.1.01.02"),
                            _("Disallowed entity code %(text)s in file %(file)s line %(line)s column %(column)s"),
                            modelDocument=filepath, text=text, file=os.path.basename(filepath), line=lineNum, column=match.start())
                elif isEFM and not _isTestcase:
                    if len(text) == 1:
                        modelXbrl.error("EFM.5.02.01.01",
                            _("Disallowed character '%(text)s' (%(unicodeIndex)s) in file %(file)s at line %(line)s col %(column)s"),
                            modelDocument=filepath, text=text, unicodeIndex="U+{:04X}".format(ord(text)), 
                            file=os.path.basename(filepath), line=lineNum, column=match.start())
                    else:
                        modelXbrl.error("EFM.5.02.01.01",
                            _("Disallowed character '%(text)s' in file %(file)s at line %(line)s col %(column)s"),
                            modelDocument=filepath, text=text, file=os.path.basename(filepath), line=lineNum, column=match.start())
            if lineNum == 1:
                xmlDeclarationMatch = XMLdeclaration.search(line)
                if xmlDeclarationMatch: # remove it for lxml
                    start,end = xmlDeclarationMatch.span()
                    line = line[0:start] + line[end:]
                    foundXmlDeclaration = True
            if _parser: # feed line after removal of xml declaration
                _parser.feed(line.encode('utf-8','ignore'))
                if "rootIsTestcase" in parserResults: # root XML element has been encountered
                    _isTestcase = parserResults["rootIsTestcase"]
                    if "isInline" in parserResults:
                        isInline = True
                    elif "maybeInline" in parserResults:
                        mayBeInline = True
                    _parser = None # no point to parse past the root element
            if mayBeInline and inlinePattern.search(line):
                mayBeInline = False
                isInline = True
            if isInline:
                for match in inlineSelfClosedElementPattern.finditer(line):
                    selfClosedLocalName = match.group(3)
                    if selfClosedLocalName not in elementsWithNoContent:
                        modelXbrl.warning("ixbrl:selfClosedTagWarning",
                                          _("Self-closed element \"%(element)s\" may contain text or other elements and should not use self-closing tag syntax (/>) when empty; change these to end-tags in file %(file)s line %(line)s column %(column)s"),
                                          modelDocument=filepath, element=match.group(1), file=os.path.basename(filepath), line=lineNum, column=match.start())
            result.append(line)
            lineNum += 1
    result = ''.join(result)
    if not foundXmlDeclaration: # may be multiline, try again
        xmlDeclarationMatch = XMLdeclaration.search(result)
        if xmlDeclarationMatch: # remove it for lxml
            start,end = xmlDeclarationMatch.span()
            result = result[0:start] + result[end:]
            foundXmlDeclaration = True

    return (io.StringIO(initial_value=result), encoding)

示例#21

0

显示文件

        self.group_name = ''

    def start(self, tag, attrib):
        if tag != 'outline':
            # Ignore anything not part of the outline
            return
        if not attrib.get('xmlUrl'):
            # Remember the current group
            self.group_name = attrib['text']
        else:
            # Output a podcast entry
            self.writer.writerow(
                (self.group_name, attrib['text'], attrib['xmlUrl'],
                 attrib.get('htmlUrl', '')))

    def end(self, tag):
        "Ignore closing tags"

    def data(self, data):
        "Ignore data inside nodes"

    def close(self):
        "Nothing special to do here"


target = PodcastListToCSV(sys.stdout)
parser = XMLParser(target=target)
with open('podcasts.opml', 'rt') as f:
    for line in f:
        parser.feed(line)
parser.close()