def postcompute_input1(self, input0_package, input1, mask0=None, mask1=None, rel1_t=None): # ff_score0, ff2_hid0, biaff_hid0 = input0_package ff_score0, biaff_hid0 = input0_package ret = 0 if rel1_t is not None: input1 = input1 + rel1_t if self.use_ff: ret = ff_score0 + self.A1(input1) if self.use_ff2: zfatal("Not supported in this mode!") if self.use_biaffine: # [*, in1, out] expr0 = biaff_hid0.view( BK.get_shape(biaff_hid0)[:-1] + [self.in_size1, self.out_size]) # [*, 1, in1] * [*, in1, out] -> [*, 1, out] -> [*, out] expr1 = BK.matmul(input1.unsqueeze(-2), expr0).squeeze(-2) ret += expr1 / self.biaffine_div if self.use_bias: ret += self.B # mask if mask0 is not None: ret += self.mask_value * (1. - mask0).unsqueeze(-1) if mask1 is not None: ret += self.mask_value * (1. - mask1).unsqueeze(-1) return self.drop_node(ret)
def valid_bigrams(scheme, tag_forms, bos): types = ChunksSeq.collect_types(tag_forms) # rets = {(bos, "O"), ("O", "O")} if scheme == "BIO": for t in types: rets.add((bos, "B-"+t)) rets.add(("O", "B-"+t)) rets.add(("B-"+t, "I-"+t)) rets.add(("I-"+t, "I-"+t)) rets.add(("B-"+t, "O")) rets.add(("I-"+t, "O")) for t2 in types: rets.add(("B-"+t, "B-"+t2)) rets.add(("I-"+t, "B-"+t2)) elif scheme == "BIOES": for t in types: rets.add((bos, "B-"+t)) rets.add(("O", "B-"+t)) rets.add((bos, "S-"+t)) rets.add(("O", "S-"+t)) rets.add(("B-"+t, "I-"+t)) rets.add(("I-"+t, "I-"+t)) rets.add(("B-"+t, "E-"+t)) rets.add(("I-"+t, "E-"+t)) rets.add(("S-"+t, "O")) rets.add(("E-"+t, "O")) for t2 in types: rets.add(("S-"+t, "B-"+t2)) rets.add(("E-"+t, "B-"+t2)) rets.add(("S-"+t, "S-"+t2)) rets.add(("E-"+t, "S-"+t2)) else: zfatal("Unknown tagging scheme") return rets
def __init__(self, model_type, args): # top-levels self.model_type = model_type self.conf_output = "" self.log_file = Logger.MAGIC_CODE self.msp_seed = 9341 # self.niconf = NIConf() # nn-init-conf self.dconf = DConf() # data-conf # parser-conf if model_type == "simple": from ..models2.model import MySimpleIEModelConf self.mconf = MySimpleIEModelConf() elif model_type == "m3": from ..models3.model import M3IEModelConf self.mconf = M3IEModelConf() elif model_type == "m3a": from ..models3.modelA import M3AIEModelConf self.mconf = M3AIEModelConf() elif model_type == "m3r": from ..models3.modelR import M3RIEModelConf self.mconf = M3RIEModelConf() else: zfatal(f"Unknown model type: {model_type}, please provide correct type!") # ===== # if args is not None: self.update_from_args(args) self.validate()
def get_data_writer(file_or_fd, output_format): if output_format == "conllu": return ParserConlluWriter(file_or_fd) elif output_format == "plain": zwarn("May be meaningless to write plain files for parses!") return ParserPlainWriter(file_or_fd) elif output_format == "json": return ParserJsonWriter(file_or_fd) else: zfatal( "Unknown output_format %s, should select from {conllu,plain,json}" % output_format)
def precompute_input0(self, input0): ff_score0 = None ff2_hid0 = None biaff_hid0 = None if self.use_ff: ff_score0 = self.A0(input0) if self.use_ff2: # todo(+3) zfatal("Not supported in this mode!") if self.use_biaffine: biaff_hid0 = BK.matmul(input0, self.W) # return (ff_score0, ff2_hid0, biaff_hid0) return (ff_score0, biaff_hid0)
def _marginal(self, full_score_expr, maske_expr, lengths_arr): if self.alg_unproj: marginals_expr = nmarginal_unproj(full_score_expr, maske_expr, lengths_arr, labeled=True) elif self.alg_proj: marginals_expr = nmarginal_proj(full_score_expr, maske_expr, lengths_arr, labeled=True) else: zfatal( "Unsupported marginal-calculation for the decoding algorithm of " + self.conf.iconf.dec_algorithm) marginals_expr = None return marginals_expr
def get_data_reader(file_or_fd, input_format, use_la0, noef_link0, aux_repr_file=None, max_evt_layers=100): if input_format == "json": r = MyDocReader(file_or_fd, use_la0, noef_link0, alter_carg_by_coref=True, max_evt_layers=max_evt_layers) else: zfatal("Unknown input_format %s" % input_format) r = None if aux_repr_file is not None and len(aux_repr_file) > 0: r = AuxDataReader(r, aux_repr_file, "aux_repr") return r
def build_model(model_type, conf, vpack): mconf = conf.mconf model = None if model_type == "simple": from ..models2.model import MySimpleIEModel model = MySimpleIEModel(mconf, vpack) elif model_type == "m3": from ..models3.model import M3IEModel model = M3IEModel(mconf, vpack) elif model_type == "m3a": from ..models3.modelA import M3AIEModel model = M3AIEModel(mconf, vpack) elif model_type == "m3r": from ..models3.modelR import M3RIEModel model = M3RIEModel(mconf, vpack) else: zfatal(f"Unknown model type: {model_type}, please provide correct type!") return model
def __init__(self, partype, args): # top-levels self.partype = partype self.conf_output = "" self.log_file = Logger.MAGIC_CODE self.msp_seed = 9341 # self.niconf = NIConf() # nn-init-conf self.dconf = DConf() # data-conf # parser-conf if partype == "graph": from ..graph.parser import GraphParserConf self.pconf = GraphParserConf() elif partype == "td": from ..transition.topdown.parser import TdParserConf self.pconf = TdParserConf() elif partype == "ef": from ..ef.parser import EfParserConf self.pconf = EfParserConf() elif partype == "g1": from ..ef.parser import G1ParserConf self.pconf = G1ParserConf() elif partype == "g2": from ..ef.parser import G2ParserConf self.pconf = G2ParserConf() elif partype == "s2": from ..ef.parser import S2ParserConf self.pconf = S2ParserConf() elif partype == "fp": from ..zfp.fp import FpParserConf self.pconf = FpParserConf() else: zfatal( f"Unknown parser type: {partype}, please provide correct type with the option." ) # ===== # self.update_from_args(args) self.validate()
def output_tags(self, scheme): ret = [] if scheme == "BIO": begin_tag, end_tag, single_tag = "B", "I", "B" elif scheme == "BIOES": begin_tag, end_tag, single_tag = "B", "E", "S" else: zfatal("Unknown tagging scheme") # for one in self.all_chunks: start, end, type = one if type: if end-start==1: ret.append(single_tag+"-"+type) else: ret.append(begin_tag+"-"+type) for _ in range(end-start-2): ret.append("I-"+type) ret.append(end_tag+"-"+type) else: ret.append("O") zcheck(len(ret)==self.length, "Err length.") return ret
def _decode(self, full_score_expr, maske_expr, lengths_arr): if self.alg_unproj: return nmst_unproj(full_score_expr, maske_expr, lengths_arr, labeled=True, ret_arr=True) elif self.alg_proj: return nmst_proj(full_score_expr, maske_expr, lengths_arr, labeled=True, ret_arr=True) elif self.alg_greedy: return nmst_greedy(full_score_expr, maske_expr, lengths_arr, labeled=True, ret_arr=True) else: zfatal("Unknown decoding algorithm " + self.conf.iconf.dec_algorithm) return None
def build_model(partype, conf, vpack): pconf = conf.pconf parser = None if partype == "graph": # original first-order graph with various output constraints from ..graph.parser import GraphParser parser = GraphParser(pconf, vpack) elif partype == "td": # re-implementation of the top-down stack-pointer parser zwarn( "Warning: Current implementation of td-mode is deprecated and outdated." ) from ..transition.topdown.parser import TdParser parser = TdParser(pconf, vpack) elif partype == "ef": # generalized easy-first parser from ..ef.parser import EfParser parser = EfParser(pconf, vpack) elif partype == "g1": # first-order graph parser from ..ef.parser import G1Parser parser = G1Parser(pconf, vpack) elif partype == "g2": # higher-order graph parser from ..ef.parser import G2Parser parser = G2Parser(pconf, vpack) elif partype == "s2": # two-stage parser from ..ef.parser import S2Parser parser = S2Parser(pconf, vpack) elif partype == "fp": # the finale parser from ..zfp.fp import FpParser parser = FpParser(pconf, vpack) else: zfatal("Unknown parser type: %s") return parser
def get_data_reader(file_or_fd, input_format, aug_code, use_la0, aux_repr_file=None, aux_score_file=None, cut=None): cut = -1 if (cut is None or len(cut) == 0) else int(cut) if input_format == "conllu": r = ParseConlluReader(file_or_fd, aug_code, use_la0=use_la0, cut=cut) elif input_format == "plain": r = ParseTextReader(file_or_fd, aug_code, cut=cut) elif input_format == "json": r = ParseJsonReader(file_or_fd, aug_code, use_la0=use_la0, cut=cut) else: zfatal( "Unknown input_format %s, should select from {conllu,plain,json}" % input_format) r = None if aux_repr_file is not None and len(aux_repr_file) > 0: r = AuxDataReader(r, aux_repr_file, "aux_repr") if aux_score_file is not None and len(aux_score_file) > 0: r = AuxDataReader(r, aux_score_file, "aux_score") return r
def refresh(self, rop=None): zfatal("Should call special_refresh instead!")
def get_data_writer(file_or_fd, output_format): if output_format == "json": return MyDocWriter(file_or_fd) else: zfatal("Unknown output_format %s" % output_format)
def _restart(self): if self.it_ is None or self.restartable: self.it_ = iter(self.src_) else: zfatal("Cannot restart a non-repeatable stream")
def _restart(self): if self.started: zfatal("Cannot restart this fd stream") self.started = True
def __init__(self, pc: BK.ParamCollection, econf: EncConf): super().__init__(pc, None, None) self.conf = econf # self.input_dim = econf._input_dim self.enc_hidden = econf.enc_hidden # add the sublayers self.layers = [] # todo(0): allowing repeated names last_dim = self.input_dim for name in econf.enc_ordering: if name == "rnn": if econf.enc_rnn_layer > 0: rnn_bidirect, rnn_sep_bidirection = econf.enc_rnn_bidirect, econf.enc_rnn_sep_bidirection rnn_enc_size = self.enc_hidden // 2 if rnn_bidirect else self.enc_hidden rnn_layer = self.add_sub_node( "rnn", RnnLayerBatchFirstWrapper( pc, RnnLayer(pc, last_dim, rnn_enc_size, econf.enc_rnn_layer, node_type=econf.enc_rnn_type, bidirection=rnn_bidirect, sep_bidirection=rnn_sep_bidirection))) self.layers.append(rnn_layer) # todo(+2): different i/o sizes for cnn and att? elif name == "cnn": if econf.enc_cnn_layer > 0: per_cnn_size = self.enc_hidden // len( econf.enc_cnn_windows) cnn_layer = self.add_sub_node( "cnn", Sequential(pc, [ CnnLayer(pc, last_dim, per_cnn_size, econf.enc_cnn_windows, act="elu") for _ in range(econf.enc_cnn_layer) ])) self.layers.append(cnn_layer) elif name == "att": if econf.enc_att_layer > 0: zcheck(last_dim == self.enc_hidden, "I/O should have same dim for Att-Enc") att_layer = self.add_sub_node( "att", TransformerEncoder( pc, econf.enc_att_layer, last_dim, econf.enc_att_ff, econf.enc_att_add_wrapper, econf.enc_att_conf, final_act=econf.enc_att_final_act, fixed_range_vals=econf.enc_att_fixed_ranges)) self.layers.append(att_layer) elif name == "att2": if econf.enc_att2_layer > 0: zcheck(last_dim == self.enc_hidden, "I/O should have same dim for Att-Enc") att2_layer = self.add_sub_node( "att2", Transformer2Encoder( pc, econf.enc_att2_layer, last_dim, econf.enc_att2_conf, short_range=econf.enc_att2_short_range, long_ranges=econf.enc_att2_long_ranges)) self.layers.append(att2_layer) else: zfatal("Unknown encoder name: " + name) if len(self.layers) > 0: last_dim = self.layers[-1].get_output_dims()[-1] self.output_dim = last_dim # if econf.no_final_dropout: self.disable_final_dropout()