示例#1
0
    def create_tree(self,zone=None):

        root = Node()
        root.ord = 0
        root._aux['descendants'] = [] 

        root.set_zone(zone)
        self.trees.append(root)
        return root
示例#2
0
    def load(self,args):

        fh = None

        try:
            fh = args['filehandle']
        except:
            filename = args['filename']
            fh = open(filename, 'r')

        fh = codecs.getreader('utf8')(fh)

        nodes = []
        comment = ''

        for line in fh:

            if re.search('^#',line):
                comment = comment + line

            elif re.search('^\d+\-',line):  # HACK: multiword tokens temporarily avoided
                pass

            elif line.strip():

                if not nodes:
                    bundle = Bundle()
                    self.bundles.append(bundle)
                    root = Root() # TODO: nahradit bundle.create_tree, az bude odladene
                    root._aux['comment'] = comment # TODO: ulozit nekam poradne
                    nodes = [root]
                    bundle.trees.append(root)

                columns = line.strip().split('\t')

                node = Node()
                nodes.append(node)

                for index in xrange(0,len(Document.attrnames)):
                    setattr( node, Document.attrnames[index], columns[index] )

                try:  # TODO: kde se v tomhle sloupecku berou podtrzitka
                    node.head = int(node.head)
                except ValueError:
                    node.head = 0

                try:   # TODO: poresit multitokeny
                    node.ord = int(node.ord)
                except ValueError:
                    node.ord = 0


            else: # an empty line is guaranteed even after the last sentence in a conll-u file

                nodes[0]._aux['descendants'] = nodes[1:]

                for node in nodes[1:]:

                    node.set_parent( nodes[node.head] )

                nodes = []
                comment = ''