Exemplo n.º 1
0
 def test_casview(self):
     casxmi = CasXmiParser()
     casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
     str1='''<cas:View xmlns:cas="http:///uima/cas.ecore" xmlns:pos="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:tweet="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos/tweet.ecore" xmlns:morph="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/morph.ecore" xmlns:type3="http:///de/tudarmstadt/ukp/dkpro/core/api/frequency/tfidf/type.ecore" xmlns:dependency="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/dependency.ecore" xmlns:type7="http:///de/tudarmstadt/ukp/dkpro/core/api/semantics/type.ecore" xmlns:type9="http:///de/tudarmstadt/ukp/dkpro/core/api/transform/type.ecore" xmlns:type="http:///de/tudarmstadt/ukp/dkpro/core/api/anomaly/type.ecore" xmlns:type8="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type.ecore" xmlns:type4="http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore" xmlns:type5="http:///de/tudarmstadt/ukp/dkpro/core/api/ner/type.ecore" xmlns:type6="http:///de/tudarmstadt/ukp/dkpro/core/api/segmentation/type.ecore" xmlns:type2="http:///de/tudarmstadt/ukp/dkpro/core/api/coref/type.ecore" xmlns:constituent="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/constituent.ecore" xmlns:chunk="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/chunk.ecore" sofa="878" members="91 102 107 112 122 132 142 152 162 172 182 192 202 212 222 232 242 252 262 272 282 292 302 312 322 332 342 352 362 372 485 426 540 545 550 555 560 565 570 575 580 585 590 595 600 605 610 615 620 625 630 635 640 645 650 655 660 665 670 678 686 694 702 710 718 726 734 742 750 758 766 774 782 790 798 806 814 822 830 838 846 854 862 870"/>\n\n'''
     self.assertEqual(casxmi.convertChildToString(casxmi.getCasViewChild()),str1)
     "print(casxmi.getCasChildren())"
     "print(casxmi.getNonCasChildren())"
Exemplo n.º 2
0
 def buildCAS(self,xmifilepath,typefilepath):    
     #create type ystem object
     typesystem = TypeSystemFactory.readTypeSystem(self, typefilepath)
     #create a CAS object
     cas = CAS(typesystem)
     #create cas xmi perser object to fetch elements from xmi file
     casXmiParser = CasXmiParser()
     casXmiParser.setXmiAsFile(xmifilepath)
     return self.__build(cas, casXmiParser)
    def test_others(self):
        casxmi = CasXmiParser()
        casxmi.setXmiAsFile('document1.txt.xmi')                
        """
        print('all attributes in root');
        print(casxmi.getRootKeys);
        for name, value in casxmi.getRootAttributesAsList():
            if(casxmi.getNamespace(name) !=None):
                print(casxmi.getNamespace(name),':',casxmi.getLocalname(name),' = ',value)
            else:
                print(name,'=',value);    
        """
        """
        elementType="{http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore}TagDescription"
        """
        """
        elementType="{http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore}TagsetDescription"
        """
        """
        elementType="{http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore}DocumentMetaData"
        """
        """
        elementType="cas"
        """
        
        """
        print('filter tags ');
        for child in casxmi.getChildElementsFilterBytag(elementType):
            print('******child element =',casxmi.getChildTag(child),'   at index : ',casxmi.getChildIndex(child),'   ******')
            print('tagname: ',casxmi.getLocalname(child.tag),' namespace: ',casxmi.getNamespace(child.tag))        
            print(sorted(child.keys()));
            for name, value in casxmi.getChildAttributesAsList(child):
                if(casxmi.getNamespace(name) !=None):
                    print(casxmi.getNamespace(name),':',casxmi.getLocalname(name),' = ',value)
                else:
                    print(name,'=',value);    
            print(' ')
        """
        """
        print('return list of attribute name value pairs as a dict ');
        list1 = []
        loopvar = 0    
        for child in casxmi.getChildElementsFilterBytag(elementType):
            print('******child element =',casxmi.getChildTag(child),'   at index : ',casxmi.getChildIndex(child),'   ******')
            attributes = dict(child.attrib)
            list1.append(attributes)
            "print(attributes)"
            loopvar = loopvar + 1;
        print(list1)    
        for listitem in list:
            print(listitem)
            for key, value in listitem.items():
                print(key,value)
        """        

        
Exemplo n.º 4
0
 def test_getChildAttribteNames(self):
     casxmi = CasXmiParser()
     casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
     for child in casxmi.getRootElement():
         self.assertEqual(casxmi.getTag(child),'{http:///uima/cas.ecore}NULL')
         self.assertEqual(casxmi.getChildIndex(child), 0)
         for name, value in casxmi.getChildAttributesAsList(child):
             self.assertEqual(name,'{http://www.omg.org/XMI}id');
             self.assertEqual(value,'0');
         break
Exemplo n.º 5
0
 def test_root(self):
     casxmi = CasXmiParser()
     casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
     casxmi1 = CasXmiParser()
     self.assertEqual(casxmi1.getTag(casxmi.getRootElement()),'{http://www.omg.org/XMI}XMI')
     self.assertEqual(casxmi1.getLocalname(casxmi.getRootElement()),'XMI')
     self.assertEqual(casxmi1.getNamespace(casxmi.getRootElement()),'http://www.omg.org/XMI')
     self.assertEqual(casxmi.getRootTagAsDict(), {'XMI': 'http://www.omg.org/XMI'})
     self.assertEqual(casxmi.getRootAttribteNames(), ['{http://www.omg.org/XMI}version'])
     self.assertEqual(casxmi.getRootAttributesAsDict(),{'{http://www.omg.org/XMI}version': '2.0'})
     self.assertEqual(casxmi.getRootAttributesAsList(),[('{http://www.omg.org/XMI}version', '2.0')])
Exemplo n.º 6
0
    def test_others(self):
        casxmi = CasXmiParser()
        casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
        """
        print('all attributes in root');
        print(casxmi.getRootKeys);
        for name, value in casxmi.getRootAttributesAsList():
            if(casxmi.getNamespace(name) !=None):
                print(casxmi.getNamespace(name),':',casxmi.getLocalname(name),' = ',value)
            else:
                print(name,'=',value);
        """
        """
        elementType="{http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore}TagDescription"
        """
        """
        elementType="{http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore}TagsetDescription"
        """
        """
        elementType="{http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore}DocumentMetaData"
        """
        """
        elementType="cas"
        """

        """
        print('filter tags ');
        for child in casxmi.getChildElementsFilterBytag(elementType):
            print('******child element =',casxmi.getChildTag(child),'   at index : ',casxmi.getChildIndex(child),'   ******')
            print('tagname: ',casxmi.getLocalname(child.tag),' namespace: ',casxmi.getNamespace(child.tag))
            print(sorted(child.keys()));
            for name, value in casxmi.getChildAttributesAsList(child):
                if(casxmi.getNamespace(name) !=None):
                    print(casxmi.getNamespace(name),':',casxmi.getLocalname(name),' = ',value)
                else:
                    print(name,'=',value);
            print(' ')
        """
        """
Exemplo n.º 7
0
 def test_findChildByAttribute(self):
     casxmi = CasXmiParser()
     casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
     str1='''<type4:DocumentMetaData xmlns:type4="http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:pos="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore" xmlns:tweet="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos/tweet.ecore" xmlns:morph="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/morph.ecore" xmlns:type3="http:///de/tudarmstadt/ukp/dkpro/core/api/frequency/tfidf/type.ecore" xmlns:dependency="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/dependency.ecore" xmlns:type7="http:///de/tudarmstadt/ukp/dkpro/core/api/semantics/type.ecore" xmlns:type9="http:///de/tudarmstadt/ukp/dkpro/core/api/transform/type.ecore" xmlns:type="http:///de/tudarmstadt/ukp/dkpro/core/api/anomaly/type.ecore" xmlns:type8="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type.ecore" xmlns:type5="http:///de/tudarmstadt/ukp/dkpro/core/api/ner/type.ecore" xmlns:type6="http:///de/tudarmstadt/ukp/dkpro/core/api/segmentation/type.ecore" xmlns:type2="http:///de/tudarmstadt/ukp/dkpro/core/api/coref/type.ecore" xmlns:constituent="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/constituent.ecore" xmlns:chunk="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/chunk.ecore" xmi:id="91" sofa="878" begin="0" end="152" language="en" documentTitle="document1.txt" documentId="document1.txt" documentUri="file:/C:/Users/Dibyojyoti/Desktop/scriptip/document1.txt" collectionId="file:/C:/Users/Dibyojyoti/Desktop/scriptip/" documentBaseUri="file:/C:/Users/Dibyojyoti/Desktop/scriptip/" isLastSegment="false"/>\n'''
     self.assertEqual(etree.tostring(casxmi.findChildByAttribute('sofa')).decode("utf-8"),str1)
Exemplo n.º 8
0
 def test_getChildAttributesAsDict(self):
     casxmi = CasXmiParser()
     casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
     self.assertEqual(casxmi.getChildAttributesAsDict(casxmi.findChildByLocalname('Sentence')),
           {'end': '115', 'sofa': '878', '{http://www.omg.org/XMI}id': '102', 'begin': '0'})
Exemplo n.º 9
0
 def test_findChildrenByNamespace(self):
     casxmi = CasXmiParser()
     casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
     str1='''<type6:Sentence xmlns:type6="http:///de/tudarmstadt/ukp/dkpro/core/api/segmentation/type.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:pos="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore" xmlns:tweet="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos/tweet.ecore" xmlns:morph="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/morph.ecore" xmlns:type3="http:///de/tudarmstadt/ukp/dkpro/core/api/frequency/tfidf/type.ecore" xmlns:dependency="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/dependency.ecore" xmlns:type7="http:///de/tudarmstadt/ukp/dkpro/core/api/semantics/type.ecore" xmlns:type9="http:///de/tudarmstadt/ukp/dkpro/core/api/transform/type.ecore" xmlns:type="http:///de/tudarmstadt/ukp/dkpro/core/api/anomaly/type.ecore" xmlns:type8="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type.ecore" xmlns:type4="http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore" xmlns:type5="http:///de/tudarmstadt/ukp/dkpro/core/api/ner/type.ecore" xmlns:type2="http:///de/tudarmstadt/ukp/dkpro/core/api/coref/type.ecore" xmlns:constituent="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/constituent.ecore" xmlns:chunk="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/chunk.ecore" xmi:id="102" sofa="878" begin="0" end="115"/>\n\n'''
     self.assertEqual(casxmi.convertChildToString(
         casxmi.findChildrenByNamespace('http:///de/tudarmstadt/ukp/dkpro/core/api/segmentation/type.ecore')[0]),str1)
Exemplo n.º 10
0
 def test_cassofa(self):
     casxmi = CasXmiParser()
     casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
     str1='''<cas:Sofa xmlns:cas="http:///uima/cas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:pos="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:tweet="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos/tweet.ecore" xmlns:morph="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/morph.ecore" xmlns:type3="http:///de/tudarmstadt/ukp/dkpro/core/api/frequency/tfidf/type.ecore" xmlns:dependency="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/dependency.ecore" xmlns:type7="http:///de/tudarmstadt/ukp/dkpro/core/api/semantics/type.ecore" xmlns:type9="http:///de/tudarmstadt/ukp/dkpro/core/api/transform/type.ecore" xmlns:type="http:///de/tudarmstadt/ukp/dkpro/core/api/anomaly/type.ecore" xmlns:type8="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type.ecore" xmlns:type4="http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore" xmlns:type5="http:///de/tudarmstadt/ukp/dkpro/core/api/ner/type.ecore" xmlns:type6="http:///de/tudarmstadt/ukp/dkpro/core/api/segmentation/type.ecore" xmlns:type2="http:///de/tudarmstadt/ukp/dkpro/core/api/coref/type.ecore" xmlns:constituent="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/constituent.ecore" xmlns:chunk="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/chunk.ecore" xmi:id="878" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="These steps install the basis system requirements needed to implement DKPro Core pipelines using the Java language. They need to be performed only once."/>\n\n'''
     self.assertEqual(casxmi.convertChildToString(casxmi.getCasSofaChild()),str1)
Exemplo n.º 11
0
 def test_casnull(self):
     casxmi = CasXmiParser()
     casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
     str1='''<cas:NULL xmlns:cas="http:///uima/cas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:pos="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:tweet="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos/tweet.ecore" xmlns:morph="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/morph.ecore" xmlns:type3="http:///de/tudarmstadt/ukp/dkpro/core/api/frequency/tfidf/type.ecore" xmlns:dependency="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/dependency.ecore" xmlns:type7="http:///de/tudarmstadt/ukp/dkpro/core/api/semantics/type.ecore" xmlns:type9="http:///de/tudarmstadt/ukp/dkpro/core/api/transform/type.ecore" xmlns:type="http:///de/tudarmstadt/ukp/dkpro/core/api/anomaly/type.ecore" xmlns:type8="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type.ecore" xmlns:type4="http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore" xmlns:type5="http:///de/tudarmstadt/ukp/dkpro/core/api/ner/type.ecore" xmlns:type6="http:///de/tudarmstadt/ukp/dkpro/core/api/segmentation/type.ecore" xmlns:type2="http:///de/tudarmstadt/ukp/dkpro/core/api/coref/type.ecore" xmlns:constituent="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/constituent.ecore" xmlns:chunk="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/chunk.ecore" xmi:id="0"/>\n\n'''
     self.assertEqual(casxmi.convertChildToString(casxmi.getCasNullChild()),str1)
Exemplo n.º 12
0
 def test_getChildTagAsDict(self):
     casxmi = CasXmiParser()
     casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
     self.assertEqual(casxmi.getRootElement().items()[0],('{http://www.omg.org/XMI}version', '2.0'))
     self.assertEqual(casxmi.getChildTagAsDict(casxmi.getCasSofaChild()),{'Sofa': 'http:///uima/cas.ecore'})
Exemplo n.º 13
0
 def test_witeXmiAsFile(self):
     casxmi = CasXmiParser()
     casxmi.setXmiAsFile('tests/testing_data/document1.txt.xmi')
     #provide proper path before running
     """casxmi.witeXmiAsFile('C:/Users/Dibyojyoti/Desktop/scriptop/document1_op.txt.xmi')