def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line[0] in "*+": # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodes[i] node.update({"address": i, "rel": m.group(2), "word": []}) dep_parent = int(m.group(1)) if dep_parent == -1: dg.root = node else: dg.nodes[dep_parent]["deps"].append(i) i += 1 elif line[0] != "#": # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = cells[0], " ".join(cells[1:]) dg.nodes[i - 1]["word"].append(morph) if self.morphs2str: for node in dg.nodes.values(): node["word"] = self.morphs2str(node["word"]) return dg.tree()
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line[0] in '*+': # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodes[i] node.update({'address': i, 'rel': m.group(2), 'word': []}) dep_parent = int(m.group(1)) if dep_parent == -1: dg.root = node else: dg.nodes[dep_parent]['deps'].append(i) i += 1 elif line[0] != '#': # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = cells[0], ' '.join(cells[1:]) dg.nodes[i - 1]['word'].append(morph) if self.morphs2str: for node in dg.nodes.values(): node['word'] = self.morphs2str(node['word']) return dg.tree()
def flattened_node_list(graph): """ Takes an instance of DependencyGraph corresponding to a parsed sentence. Flattens into a list of DependencyGraph instances, each with a different word from the sentence as its root node (and no children). """ nodelist = copy.copy(graph.nodelist[1:]) flattened = [] for node in nodelist: node["deps"] = [] node["head"] = 0 node["address"] = 1 new_graph = DependencyGraph() new_graph.nodelist.append(node) new_graph.root = node flattened.append(new_graph) return flattened
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line.startswith("*") or line.startswith("+"): # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodelist[i] node['address'] = i node['rel'] = m.group(2) # dep_type node['word'] = [] dep_parent = int(m.group(1)) while len(dg.nodelist) < i + 1 or len( dg.nodelist) < dep_parent + 1: dg.nodelist.append({'word': [], 'deps': []}) if dep_parent == -1: dg.root = node else: dg.nodelist[dep_parent]['deps'].append(i) i += 1 elif not line.startswith("#"): # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = (cells[0], ' '.join(cells[1:])) dg.nodelist[i - 1]['word'].append(morph) if self.morphs2str: for node in dg.nodelist: node['word'] = self.morphs2str(node['word']) return dg.tree()
def _parse(self, t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line.startswith("*") or line.startswith("+"): # start of bunsetsu or tag cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[1]) assert m is not None node = dg.nodelist[i] node['address'] = i node['rel'] = m.group(2) # dep_type node['word'] = [] dep_parent = int(m.group(1)) while len(dg.nodelist) < i+1 or len(dg.nodelist) < dep_parent+1: dg.nodelist.append({'word':[], 'deps':[]}) if dep_parent == -1: dg.root = node else: dg.nodelist[dep_parent]['deps'].append(i) i += 1 elif not line.startswith("#"): # normal morph cells = line.strip().split(" ") # convert cells to morph tuples morph = ( cells[0], ' '.join(cells[1:]) ) dg.nodelist[i-1]['word'].append(morph) if self.morphs2str: for node in dg.nodelist: node['word'] = self.morphs2str(node['word']) return dg.tree()
def cabocha2depgraph(t): dg = DependencyGraph() i = 0 for line in t.splitlines(): if line.startswith("*"): # start of bunsetsu cells = line.strip().split(" ", 3) m = re.match(r"([\-0-9]*)([ADIP])", cells[2]) node = dg.nodelist[i] node.update( {'address': i, 'rel': m.group(2), # dep_type 'word': [], 'tag': [] }) dep_parent = int(m.group(1)) while len(dg.nodelist) < i + 1 or len(dg.nodelist) < dep_parent + 1: dg.nodelist.append({'word': [], 'deps': [], 'tag': []}) if dep_parent == -1: dg.root = node else: dg.nodelist[dep_parent]['deps'].append(i) i += 1 elif not line.startswith("EOS"): # normal morph cells = line.strip().split("\t") morph = (cells[0], tuple(cells[1].split(','))) dg.nodelist[i - 1]['word'].append(morph[0]) dg.nodelist[i - 1]['tag'].append(morph[1]) return dg