def load(self,args): fh = None try: fh = args['filehandle'] except: filename = args['filename'] fh = open(filename, 'r') fh = codecs.getreader('utf8')(fh) nodes = [] comment = '' for line in fh: if re.search('^#',line): comment = comment + line elif re.search('^\d+\-',line): # HACK: multiword tokens temporarily avoided pass elif line.strip(): if not nodes: bundle = Bundle() self.bundles.append(bundle) root = Root() # TODO: nahradit bundle.create_tree, az bude odladene root._aux['comment'] = comment # TODO: ulozit nekam poradne nodes = [root] bundle.trees.append(root) columns = line.strip().split('\t') node = Node() nodes.append(node) for index in xrange(0,len(Document.attrnames)): setattr( node, Document.attrnames[index], columns[index] ) try: # TODO: kde se v tomhle sloupecku berou podtrzitka node.head = int(node.head) except ValueError: node.head = 0 try: # TODO: poresit multitokeny node.ord = int(node.ord) except ValueError: node.ord = 0 else: # an empty line is guaranteed even after the last sentence in a conll-u file nodes[0]._aux['descendants'] = nodes[1:] for node in nodes[1:]: node.set_parent( nodes[node.head] ) nodes = [] comment = ''
class LinkedList: def __init__(self,value = None): # if no value is passed in if(value == None): self._size = 0 self._headNode = None self._tailNode = None else: self._headNode = Node(value,None) self._tailNode = self._headNode self._size = 1 def get(self,index): self._verifyIndex(index) if(index == 0): return self._headNode.head() elif(index == self._size - 1): return self._tailNode.head() else: i = 0 walk = self._headNode while(i < index): walk = walk.tail() i += 1 return walk.head() def set(self,index,value): self._verifyIndex(index) i = 0 walk = self._headNode while(i < index): walk = walk.tail() i += 1 walk.setHead(value) def frontadd(self,value): self._headNode = Node(value,self._headNode) # if the headNode has tail None (is the last node) # then it IS tailNode if(self._headNode.tail() == None): self._tailNode = self._headNode self._size += 1 def backadd(self,value): if(self._headNode == None): self.frontadd(value) return self._tailNode.setTail(Node(value,None)) self._tailNode = self._tailNode.tail() self._size += 1 def indexadd(self,index,value): if(index == 0): self.frontadd(value) return if(index == self._size): self.backadd(value) return if(index > self._size): raise IndexError i = 0 walk = self._headNode while(i < index-1): walk = walk.tail() i += 1 walk.setTail(Node(value,walk.tail())) self._size += 1 def indexremove(self,index): if(index == 0): self.frontremove() return if(index == self._size - 1): rval = self.backremove() return rval self._verifyIndex(index) i = 0 walk = self._headNode while(i < index-1): walk = walk.tail() i += 1 rval = walk.tail().head() walk.setTail(walk.tail().tail()) self._size -= 1 return rval def frontremove(self): rval = self._headNode.head() self._headNode = self._headNode.tail() self._size -= 1 return rval def backremove(self): rval = self._tailNode.head() walk = self._headNode while(walk.tail() != self._tailNode): walk = walk.tail() self._tailNode = walk self._tailNode.setTail(None) self._size -= 1 return rval def extract(self): if(self._headNode == None): return [] rlist = [] walk = self._headNode while(walk.tail() != None): rlist.append(walk.head()) walk = walk.tail() rlist.append(walk.head()) return rlist def size(self): return self._size def _verifyIndex(self,index): if(index > self._size-1): raise IndexError("index out of bounds") else: return