示例#1
0
    def fetch_text(xml_file):
        assert os.path.exists(xml_file)            
        def listener(path, element, filter):
            listener.text += element.text

        listener.text = ''            
        grabber = XmlGrabber()
        grabber.addXmlElementFilter(XmlPathFilter([ ".*/samenvatting", ".*/beschrijving", ".*/annotatie"]))
        grabber.grab(xml_file, listener)

        return listener.text
示例#2
0
文件: text.py 项目: barmalei/scalpel
class XmlText(Text):
    def __init__(self, data, lang = 'en', filter = XmlPathFilter(".*")):
        self.grabber = XmlGrabber()
        self.grabber.addXmlElementFilter(XmlPathFilter(".*"))
        Text.__init__(self, data, lang)
    
    def _set_data(self, data, encoding):        
        text = []
        def listener(xml_path, element, filter): 
            text.append(element.text.strip())
    
        self.grabber(data, listener)
        return '\n'.join(text)
示例#3
0
class XmlText(Text):
    def __init__(self, data, lang='en', filter=XmlPathFilter(".*")):
        self.grabber = XmlGrabber()
        self.grabber.addXmlElementFilter(XmlPathFilter(".*"))
        Text.__init__(self, data, lang)

    def _set_data(self, data, encoding):
        text = []

        def listener(xml_path, element, filter):
            text.append(element.text.strip())

        self.grabber(data, listener)
        return '\n'.join(text)
示例#4
0
def fetch_text(xml_file):
    assert os.path.exists(xml_file)

    output_file_name = xml_file + ".txt"
    with codecs.open(output_file_name, "w", "utf-8") as f:

        def listener(path, element, filter):
            f.write(element.text)

        grabber = XmlGrabber()
        grabber.addXmlPathFilter(XmlPathFilter([".*/samenvatting"]))
        grabber.grab(xml_file, listener)

    return output_file_name
示例#5
0
 def test_xmlgrabber(self):
     global counter 
     
     def listener1(xml_path, element, filter):
         global counter
         counter += 1
     
     counter = 0
     grabber = XmlGrabber()
     grabber.addXmlElementFilter(XmlPathFilter([ ".*/selectiepositiedragerannotatie"  ]))
     grabber.grab(xml_file, listener1)
     self.assertEqual(0, counter)
     
     counter = 0
     grabber = XmlGrabber(False)
     grabber.addXmlElementFilter(XmlPathFilter(".*/selectiepositiedragerannotatie" ))
     grabber.grab(xml_file, listener1)
     self.assertEqual(1, counter)
示例#6
0
    def test_xml_elements_collector(self):
        def f1(): 
            g = XmlGrabber()
            b = XmlElementsCollector('root')
            g.addXmlElementFilter(TrueXmlElementFilter())
            g.grab(xml_file, b)
            
        self.assertRaises( NotImplementedError, f1)

        class MyXmlElementsCollector(XmlElementsCollector):
            def grabbed(self, path, element, filter):
                self.root_element().add_kid(element)

        g = XmlGrabber()
        b = MyXmlElementsCollector('root')
        g.addXmlElementFilter(TrueXmlElementFilter())
        g.grab(xml_file, b)
        
        self.assertEqual(len(str(b.root_element())), 10026)
示例#7
0
    def fetch_text(xml_file):
        assert os.path.exists(xml_file)

        def listener(path, element, filter):
            listener.text += element.text

        listener.text = ''
        grabber = XmlGrabber()
        grabber.addXmlElementFilter(
            XmlPathFilter(
                [".*/samenvatting", ".*/beschrijving", ".*/annotatie"]))
        grabber.grab(xml_file, listener)

        return listener.text
示例#8
0
    def test_xml_elements_collector(self):
        def f1():
            g = XmlGrabber()
            b = XmlElementsCollector('root')
            g.addXmlElementFilter(TrueXmlElementFilter())
            g.grab(xml_file, b)

        self.assertRaises(NotImplementedError, f1)

        class MyXmlElementsCollector(XmlElementsCollector):
            def grabbed(self, path, element, filter):
                self.root_element().add_kid(element)

        g = XmlGrabber()
        b = MyXmlElementsCollector('root')
        g.addXmlElementFilter(TrueXmlElementFilter())
        g.grab(xml_file, b)

        self.assertEqual(len(str(b.root_element())), 10026)
示例#9
0
文件: text.py 项目: barmalei/scalpel
 def __init__(self, data, lang = 'en', filter = XmlPathFilter(".*")):
     self.grabber = XmlGrabber()
     self.grabber.addXmlElementFilter(XmlPathFilter(".*"))
     Text.__init__(self, data, lang)
示例#10
0
    def test_xmlgrabber(self):
        global counter

        def listener1(xml_path, element, filter):
            global counter
            counter += 1

        counter = 0
        grabber = XmlGrabber()
        grabber.addXmlElementFilter(
            XmlPathFilter([".*/selectiepositiedragerannotatie"]))
        grabber.grab(xml_file, listener1)
        self.assertEqual(0, counter)

        counter = 0
        grabber = XmlGrabber(False)
        grabber.addXmlElementFilter(
            XmlPathFilter(".*/selectiepositiedragerannotatie"))
        grabber.grab(xml_file, listener1)
        self.assertEqual(1, counter)
示例#11
0
 def f1():
     g = XmlGrabber()
     b = XmlElementsCollector('root')
     g.addXmlElementFilter(TrueXmlElementFilter())
     g.grab(xml_file, b)
示例#12
0
 def __init__(self, data, lang='en', filter=XmlPathFilter(".*")):
     self.grabber = XmlGrabber()
     self.grabber.addXmlElementFilter(XmlPathFilter(".*"))
     Text.__init__(self, data, lang)
示例#13
0
 def f1(): 
     g = XmlGrabber()
     b = XmlElementsCollector('root')
     g.addXmlElementFilter(TrueXmlElementFilter())
     g.grab(xml_file, b)