Exemplos de RstDepTree.nucs em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: educe.rst_dt.deptree

Classe / Tipo: RstDepTree

Método / Função: nucs

Exemplos em hotexamples.com: 2

RstDepTree.nucs em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de educe.rst_dt.deptree.RstDepTree.nucs em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

from_simple_rst_tree(5)

from_rst_tree(3)

RstDepTree(1)

add_dependency(1)

nucs(1)

ranks(1)

sent_idx(1)

set_root(1)

Métodos Frequentes

from_simple_rst_tree (5)

from_rst_tree (3)

RstDepTree (1)

add_dependency (1)

nucs (1)

ranks (1)

sent_idx (1)

set_root (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: util.py Projeto: moreymat/attelo

def get_oracle_ctrees(dep_edges, att_edus, nuc_strategy="unamb_else_most_frequent", rank_strategy="closest-intra-rl-inter-rl", prioritize_same_unit=True, strict=False): """Build the oracle constituency tree(s) for a dependency tree. Parameters ---------- dep_edges: dict(string, [(string, string, string)]) Edges for each document, indexed by doc name Cf. type of return value from irit-rst-dt/ctree.py:load_attelo_output_file() att_edus: cf return type of attelo.io.load_edus EDUs as they are known to attelo strict: boolean, True by default If True, any link from ROOT to an EDU that is neither 'ROOT' nor UNRELATED raises an exception, otherwise a warning is issued. Returns ------- ctrees: list of RstTree There can be several e.g. for leaky sentences. """ # rebuild educe EDUs from their attelo description # and group them by doc_name educe_edus = defaultdict(list) edu2sent_idx = defaultdict(dict) gid2num = dict() for att_edu in att_edus: # doc name doc_name = att_edu.grouping # EDU info # skip ROOT (automatically added by RstDepTree.__init__) if att_edu.id == 'ROOT': continue edu_num = int(att_edu.id.rsplit('_', 1)[1]) edu_span = EduceSpan(att_edu.start, att_edu.end) edu_text = att_edu.text educe_edus[doc_name].append(EduceEDU(edu_num, edu_span, edu_text)) # map global id of EDU to num of EDU inside doc gid2num[att_edu.id] = edu_num # map EDU to sentence try: sent_idx = int(att_edu.subgrouping.split('_sent')[1]) except IndexError: # this EDU could not be attached to any sentence (ex: missing # text in the PTB), so a default subgrouping identifier was used ; # we aim for consistency with educe and map these to "None" sent_idx = None edu2sent_idx[doc_name][edu_num] = sent_idx # check that our info covers only one document assert len(educe_edus) == 1 # then restrict to this document doc_name = educe_edus.keys()[0] educe_edus = educe_edus[doc_name] edu2sent_idx = edu2sent_idx[doc_name] # sort EDUs by num educe_edus = list(sorted(educe_edus, key=lambda e: e.num)) # rebuild educe-style edu2sent ; prepend 0 for the fake root edu2sent = [0] + [edu2sent_idx[e.num] for e in educe_edus] # classifiers for nuclearity and ranking # FIXME declare, fit and predict upstream... # nuclearity nuc_classifier = DummyNuclearityClassifier(strategy=nuc_strategy) nuc_classifier.fit([], []) # empty X and y for dummy fit # ranking classifier rank_classifier = InsideOutAttachmentRanker( strategy=rank_strategy, prioritize_same_unit=prioritize_same_unit) # rebuild RstDepTrees dtree = RstDepTree(educe_edus) for src_id, tgt_id, lbl in dep_edges: if src_id == 'ROOT': if lbl not in ['ROOT', UNKNOWN]: err_msg = 'weird root label: {} {} {}'.format( src_id, tgt_id, lbl) if strict: raise ValueError(err_msg) else: print('W: {}, using ROOT instead'.format(err_msg)) dtree.set_root(gid2num[tgt_id]) else: dtree.add_dependency(gid2num[src_id], gid2num[tgt_id], lbl) # add nuclearity: heuristic baseline dtree.nucs = nuc_classifier.predict([dtree])[0] # add rank: some strategies require a mapping from EDU to sentence # EXPERIMENTAL attach array of sentence index for each EDU in tree dtree.sent_idx = edu2sent # end EXPERIMENTAL dtree.ranks = rank_classifier.predict([dtree])[0] # end NEW # create pred ctree try: bin_srtrees = deptree_to_simple_rst_tree(dtree, allow_forest=True) if False: # EXPERIMENTAL # currently False to run on output that already has # labels embedding nuclearity bin_srtrees = [ SimpleRSTTree.incorporate_nuclearity_into_label(bin_srtree) for bin_srtree in bin_srtrees ] bin_rtrees = [ SimpleRSTTree.to_binary_rst_tree(bin_srtree) for bin_srtree in bin_srtrees ] except RstDtException as rst_e: print(rst_e) if False: print('\n'.join('{}: {}'.format(edu.text_span(), edu) for edu in educe_edus[doc_name])) raise ctrees = bin_rtrees return ctrees

Exemplo n.º 2

0

Exibir arquivo

Arquivo: util.py Projeto: eipiplusun/attelo

def get_oracle_ctrees(dep_edges, att_edus, nuc_strategy="unamb_else_most_frequent", rank_strategy="closest-intra-rl-inter-rl", prioritize_same_unit=True, strict=False): """Build the oracle constituency tree(s) for a dependency tree. Parameters ---------- dep_edges: dict(string, [(string, string, string)]) Edges for each document, indexed by doc name Cf. type of return value from irit-rst-dt/ctree.py:load_attelo_output_file() att_edus: cf return type of attelo.io.load_edus EDUs as they are known to attelo strict: boolean, True by default If True, any link from ROOT to an EDU that is neither 'ROOT' nor UNRELATED raises an exception, otherwise a warning is issued. Returns ------- ctrees: list of RstTree There can be several e.g. for leaky sentences. """ # rebuild educe EDUs from their attelo description # and group them by doc_name educe_edus = defaultdict(list) edu2sent_idx = defaultdict(dict) gid2num = dict() for att_edu in att_edus: # doc name doc_name = att_edu.grouping # EDU info # skip ROOT (automatically added by RstDepTree.__init__) if att_edu.id == 'ROOT': continue edu_num = int(att_edu.id.rsplit('_', 1)[1]) edu_span = EduceSpan(att_edu.start, att_edu.end) edu_text = att_edu.text educe_edus[doc_name].append(EduceEDU(edu_num, edu_span, edu_text)) # map global id of EDU to num of EDU inside doc gid2num[att_edu.id] = edu_num # map EDU to sentence try: sent_idx = int(att_edu.subgrouping.split('_sent')[1]) except IndexError: # this EDU could not be attached to any sentence (ex: missing # text in the PTB), so a default subgrouping identifier was used ; # we aim for consistency with educe and map these to "None" sent_idx = None edu2sent_idx[doc_name][edu_num] = sent_idx # check that our info covers only one document assert len(educe_edus) == 1 # then restrict to this document doc_name = educe_edus.keys()[0] educe_edus = educe_edus[doc_name] edu2sent_idx = edu2sent_idx[doc_name] # sort EDUs by num educe_edus = list(sorted(educe_edus, key=lambda e: e.num)) # rebuild educe-style edu2sent ; prepend 0 for the fake root edu2sent = [0] + [edu2sent_idx[e.num] for e in educe_edus] # classifiers for nuclearity and ranking # FIXME declare, fit and predict upstream... # nuclearity nuc_classifier = DummyNuclearityClassifier(strategy=nuc_strategy) nuc_classifier.fit([], []) # empty X and y for dummy fit # ranking classifier rank_classifier = InsideOutAttachmentRanker( strategy=rank_strategy, prioritize_same_unit=prioritize_same_unit) # rebuild RstDepTrees dtree = RstDepTree(educe_edus) for src_id, tgt_id, lbl in dep_edges: if src_id == 'ROOT': if lbl not in ['ROOT', UNKNOWN]: err_msg = 'weird root label: {} {} {}'.format( src_id, tgt_id, lbl) if strict: raise ValueError(err_msg) else: print('W: {}, using ROOT instead'.format(err_msg)) dtree.set_root(gid2num[tgt_id]) else: dtree.add_dependency(gid2num[src_id], gid2num[tgt_id], lbl) # add nuclearity: heuristic baseline dtree.nucs = nuc_classifier.predict([dtree])[0] # add rank: some strategies require a mapping from EDU to sentence # EXPERIMENTAL attach array of sentence index for each EDU in tree dtree.sent_idx = edu2sent # end EXPERIMENTAL dtree.ranks = rank_classifier.predict([dtree])[0] # end NEW # create pred ctree try: bin_srtrees = deptree_to_simple_rst_tree(dtree, allow_forest=True) if False: # EXPERIMENTAL # currently False to run on output that already has # labels embedding nuclearity bin_srtrees = [SimpleRSTTree.incorporate_nuclearity_into_label( bin_srtree) for bin_srtree in bin_srtrees] bin_rtrees = [SimpleRSTTree.to_binary_rst_tree(bin_srtree) for bin_srtree in bin_srtrees] except RstDtException as rst_e: print(rst_e) if False: print('\n'.join('{}: {}'.format(edu.text_span(), edu) for edu in educe_edus[doc_name])) raise ctrees = bin_rtrees return ctrees