def process_noroot(self, sSentence, wfile): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if head.name == "ROOT": head = head.children[0] if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: # print head if self.m_bBinarize: outh = CBinarizedTreeNode() # print outh self.build_binarized_node(outh, head) wfile.write(outh.utf8print()+"\n") #print outh.utf8print() else: outh = fidtree.CTreeNode() self.build_node(outh, head) wfile.write(outh.utf8print()+"\n")
def process(self, sSentence): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: lTokens = [] head_child = self.find_head(head, lTokens) if self.m_bLabeled: head_child.label = 'ROOT' for tokn in lTokens: if self.m_bLabeled: print "\t".join([ gb2utf(tokn.token), tokn.pos, str(tokn.link), tokn.label ]) else: print "\t".join( [gb2utf(tokn.token), tokn.pos, str(tokn.link)]) print # empty line
def process(self, sSentence): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: outh=self.count_flatness(head) print float(outh[1])/float(outh[0])
def process(self, sSentence): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: outh = self.count_flatness(head) if outh[0] == 0: #avoid division by zero if there is no constituent. print 0.0 else: print float(outh[1]) / float(outh[0])
def process(self, sSentence): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: outh = self.count_flatness(head) if outh[0] == 0: # avoid division by zero if there is no constituent. print 0.0 else: print float(outh[1]) / float(outh[0])
def process(self, sSentence): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: lTokens = [] self.find_head(head, lTokens) for tokn in lTokens: print "\t".join([tokn.token, tokn.pos, str(tokn.link)]) print # empty line
def process(self, sSentence): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: # print head if self.m_bBinarize: outh = CBinarizedTreeNode() # print outh self.build_binarized_node(outh, head) print outh else: outh = fidtree.CTreeNode() self.build_node(outh, head) print outh
def process(self, sSentence): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: lTokens = [] head_child = self.find_head(head, lTokens) if self.m_bLabeled: head_child.label = 'ROOT' for tokn in lTokens: if self.m_bLabeled: print "\t".join([gb2utf(tokn.token), tokn.pos, str(tokn.link), tokn.label]) else: print "\t".join([gb2utf(tokn.token), tokn.pos, str(tokn.link)]) print # empty line
def process(self, sSentence, wfile): # don't process empty sentences if sSentence.strip() == "": return # find the cfg node head = fidtree.parse_object(sSentence) if type(head) == list: lHead = head else: lHead = [head] # output the dep node for head in lHead: lTokens = [] head_child = self.find_head(head, lTokens) if self.m_bLabeled: head_child.label = 'ROOT' index = 1 for tokn in lTokens: if self.m_bLabeled: wfile.write("\t".join([str(index), tokn.token, "_", tokn.pos, tokn.pos, "_", str(tokn.link+1), tokn.label, "_", "_"])+"\n") else: wfile.write("\t".join([str(index), tokn.token, "_", tokn.pos, tokn.pos, "_", str(tokn.link+1), "_", "_", "_"])+"\n") index += 1 wfile.write('\n')