def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, elu_dim=64, dep_dim=100, dropout_ratio=0.5, use_cudnn=False): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.elu_dim = elu_dim p.nlayers = nlayers p.dump(defs_file) self.targets = read_model_defs(model_path + "/target.txt") self.words = read_model_defs(model_path + "/words.txt") self.suffixes = read_model_defs(model_path + "/suffixes.txt") self.prefixes = read_model_defs(model_path + "/prefixes.txt") self.in_dim = self.word_dim + 8 * self.afix_dim self.dropout_ratio = dropout_ratio super(PeepHoleLSTMParser, self).__init__( emb_word=L.EmbedID(len(self.words), self.word_dim, ignore_label=IGNORE), emb_suf=L.EmbedID(len(self.suffixes), self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(len(self.prefixes), self.afix_dim, ignore_label=IGNORE), lstm_f1=DyerLSTM(self.in_dim, self.hidden_dim), lstm_f2=DyerLSTM(self.hidden_dim, self.hidden_dim), lstm_b1=DyerLSTM(self.in_dim, self.hidden_dim), lstm_b2=DyerLSTM(self.hidden_dim, self.hidden_dim), linear_cat1=L.Linear(2 * self.hidden_dim, self.elu_dim), linear_cat2=L.Linear(self.elu_dim, len(self.targets)), linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine=Biaffine(self.dep_dim) )
def __init__(self, model_path, ccgbank_path, tritrain_path, weight): self.model_path = model_path self.targets = read_model_defs(model_path + "/target.txt") self.extractor = FeatureExtractor(model_path) self.weight = weight self.ncopies = 15 with open(ccgbank_path) as f: self.ccgbank_samples = sorted(json.load(f), key=lambda x: len(x[1][0])) self.ccgbank_size = len(self.ccgbank_samples) with open(tritrain_path) as f: self.tritrain_samples = sorted(json.load(f), key=lambda x: len(x[1][0])) self.tritrain_size = len(self.tritrain_samples) print >> sys.stderr, "len(ccgbank):", self.ccgbank_size print >> sys.stderr, "len(ccgbank) * # copies:", self.ccgbank_size * self.ncopies print >> sys.stderr, "len(tritrain):", self.tritrain_size
class LSTMParserDataset(chainer.dataset.DatasetMixin): def __init__(self, model_path, samples_path): self.model_path = model_path self.targets = read_model_defs(model_path + "/target.txt") self.extractor = FeatureExtractor(model_path) with open(samples_path) as f: self.samples = sorted(json.load(f), key=lambda x: len(x[1][0])) def __len__(self): return len(self.samples) def get_example(self, i): words, [cats, deps] = self.samples[i] splitted = words.split(" ") w, s, p = self.extractor.process(splitted) cats = np.array([-1] + [self.targets.get(x, IGNORE) for x in cats] + [-1], 'i') deps = np.array([-1] + deps + [-1], 'i') l = len(splitted) + 2 weight = np.array(1, 'f') return w, s, p, l, cats, deps, weight
class PeepHoleLSTMParser(chainer.Chain): def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, elu_dim=64, dep_dim=100, dropout_ratio=0.5, use_cudnn=False): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.elu_dim = elu_dim p.nlayers = nlayers p.dump(defs_file) self.targets = read_model_defs(model_path + "/target.txt") self.words = read_model_defs(model_path + "/words.txt") self.suffixes = read_model_defs(model_path + "/suffixes.txt") self.prefixes = read_model_defs(model_path + "/prefixes.txt") self.in_dim = self.word_dim + 8 * self.afix_dim self.dropout_ratio = dropout_ratio super(PeepHoleLSTMParser, self).__init__( emb_word=L.EmbedID(len(self.words), self.word_dim, ignore_label=IGNORE), emb_suf=L.EmbedID(len(self.suffixes), self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(len(self.prefixes), self.afix_dim, ignore_label=IGNORE), lstm_f1=DyerLSTM(self.in_dim, self.hidden_dim), lstm_f2=DyerLSTM(self.hidden_dim, self.hidden_dim), lstm_b1=DyerLSTM(self.in_dim, self.hidden_dim), lstm_b2=DyerLSTM(self.hidden_dim, self.hidden_dim), linear_cat1=L.Linear(2 * self.hidden_dim, self.elu_dim), linear_cat2=L.Linear(self.elu_dim, len(self.targets)), linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine=Biaffine(self.dep_dim) ) def load_pretrained_embeddings(self, path): self.emb_word.W.data = read_pretrained_embeddings(path) def __call__(self, ws, ss, ps, ts): """ xs [(w,s,p,y), ..., ] w: word, s: suffix, p: prefix, y: label """ batchsize, length = ws.shape cat_ys, dep_ys = self.forward(ws, ss, ps)[1:-1] cat_ts = [F.reshape(x, (batchsize,)) for x \ in F.split_axis(F.transpose(cat_ts), length, 0)] dep_ts = [F.reshape(x, (batchsize,)) for x \ in F.split_axis(F.transpose(dep_ts), length, 0)] cat_loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(cat_ys, cat_ts)]) cat_acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(cat_ys, cat_ts)]) dep_loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(dep_ys, dep_ts)]) dep_acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(dep_ys, dep_ts)]) cat_acc /= length dep_acc /= length chainer.report({ "tagging_loss": cat_loss, "tagging_accuracy": cat_acc, "parsing_loss": dep_loss, "parsing_accuracy": dep_acc }, self) return cat_loss + dep_loss def forward(self, ws, ss, ps): batchsize, length = ws.shape xp = chainer.cuda.get_array_module(ws[0]) ws = self.emb_word(ws) # (batch, length, word_dim) ss = F.reshape(self.emb_suf(ss), (batchsize, length, -1)) ps = F.reshape(self.emb_prf(ps), (batchsize, length, -1)) hs = F.transpose(F.concat([ws, ss, ps], 2), (1, 0, 2)) hs = F.dropout(hs, self.dropout_ratio, train=self.train) hs = F.split_axis(hs, length, 0) hs_f = [] hs_b = [] self._init_state() for h_in_f, h_in_b in zip(hs, reversed(hs)): h_f = self.lstm_f2(self.lstm_f1(F.reshape(h_in_f, (-1, self.in_dim)))) hs_f.append(h_f) h_b = self.lstm_b2(self.lstm_b1(F.reshape(h_in_b, (-1, self.in_dim)))) hs_b.append(h_b) hs = zip(hs_f, reversed(hs_b)) cat_ys = [self.linear_cat2(F.dropout( F.elu(self.linear_cat1(h)), 0.5, train=self.train)) for h in hs] dep_ys = [self.biaffine( F.elu(F.dropout(self.linear_dep(h), 0.32, train=self.train)), F.elu(F.dropout(self.linear_head(h), 0.32, train=self.train))) for h in hs] return cat_ys, dep_ys def predict(self, xs): """ batch: list of splitted sentences """ batchsize = len(xs) xs = [self.extractor.process(x) for x in xs] ws, ss, ps = concat_examples(xs, padding=IGNORE) cat_ys, dep_ys = self.forward(ws, ss, ps) cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1)) dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1)) cat_ys = [F.squeeze(y, 0).data[1:len(x) + 1] for x, y in \ zip(xs, F.split_axis(cat_ys, batchsize, 0))] dep_ys = [F.squeeze(F.log_softmax(y[1:len(x) + 1, :-1]), 0).data \ for x, y in zip(xs, F.split_axis(dep_ys, batchsize, 0))] return cat_ys, dep_ys def predict_doc(self, doc, batchsize=16): """ doc list of splitted sentences """ res = [] for i in range(0, len(doc), batchsize): res.extend([(i + j, 0, y) for j, y in enumerate(self.predict(doc[i:i + batchsize]))]) return res def _init_state(self): self.lstm_f1.reset_state() self.lstm_f2.reset_state() self.lstm_b1.reset_state() self.lstm_b2.reset_state() @property def cats(self): return zip(*sorted(self.targets.items(), key=lambda x: x[1]))[0]
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, elu_dim=64, dep_dim=100, dropout_ratio=0.5, use_cudnn=False): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.elu_dim = elu_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt")) p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + 8 * self.afix_dim self.dropout_ratio = dropout_ratio super(QRNNParser, self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE), emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE), qrnn_fs=ChainList(), qrnn_bs=ChainList(), arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets))) in_dim = self.in_dim for _ in range(self.nlayers): self.qrnn_fs.add_link(QRNNLayer(in_dim, self.hidden_dim)) self.qrnn_bs.add_link(QRNNLayer(in_dim, self.hidden_dim)) in_dim = self.hidden_dim
class QRNNParser(chainer.Chain): def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, elu_dim=64, dep_dim=100, dropout_ratio=0.5, use_cudnn=False): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.elu_dim = elu_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt")) p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + 8 * self.afix_dim self.dropout_ratio = dropout_ratio super(QRNNParser, self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE), emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE), qrnn_fs=ChainList(), qrnn_bs=ChainList(), arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets))) in_dim = self.in_dim for _ in range(self.nlayers): self.qrnn_fs.add_link(QRNNLayer(in_dim, self.hidden_dim)) self.qrnn_bs.add_link(QRNNLayer(in_dim, self.hidden_dim)) in_dim = self.hidden_dim # in_dim += self.hidden_dim def load_pretrained_embeddings(self, path): self.emb_word.W.data = read_pretrained_embeddings(path) def __call__(self, ws, ss, ps, ls, cat_ts, dep_ts, weights): """ xs [(w,s,p,y), ..., ] w: word, s: suffix, p: prefix, y: label """ try: batchsize, length = ws.shape cat_ys, dep_ys = self.forward(ws, ss, ps, ls, dep_ts if self.train else None) cat_loss = reduce(lambda x, y: x + y, [we * F.softmax_cross_entropy(y, t) \ for y, t, we in zip(cat_ys, cat_ts, weights)]) cat_acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) \ for y, t in zip(cat_ys, cat_ts)]) / batchsize dep_loss = reduce(lambda x, y: x + y, [we * F.softmax_cross_entropy(y, t) \ for y, t, we in zip(dep_ys, dep_ts, weights)]) dep_acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) \ for y, t in zip(dep_ys, dep_ts)]) / batchsize except: print "caught erroneous example ignoring..." print[w.shape for w in ws] print[w.shape for w in ss] print[w.shape for w in ps] print ls print[w.shape for w in cat_ts] print[w.shape for w in dep_ts] xp = chainer.cuda.get_array_module(ws[0]) return Variable(xp.array(0, 'f')) chainer.report( { "tagging_loss": cat_loss, "tagging_accuracy": cat_acc, "parsing_loss": dep_loss, "parsing_accuracy": dep_acc }, self) return cat_loss + dep_loss def forward(self, ws, ss, ps, ls, dep_ts=None): batchsize, length = ws.shape split = scanl(lambda x, y: x + y, 0, ls)[1:-1] xp = chainer.cuda.get_array_module(ws[0]) ws = self.emb_word(ws) # (batch, length, word_dim) ss = F.reshape(self.emb_suf(ss), (batchsize, length, -1)) ps = F.reshape(self.emb_prf(ps), (batchsize, length, -1)) hs = F.concat([ws, ss, ps], 2) hs = F.dropout(hs, self.dropout_ratio, train=self.train) fs = hs for qrnn_f in self.qrnn_fs: inp = fs fs = qrnn_f(inp) bs = hs[:, ::-1, :] for qrnn_b in self.qrnn_bs: inp = bs bs = qrnn_b(inp) # fs = [hs] # for qrnn_f in self.qrnn_fs: # inp = F.concat(fs, 2) # fs.append(F.dropout(qrnn_f(inp), 0.32, train=self.train)) # fs = fs[-1] # # bs = [hs[:, ::-1, :]] # for qrnn_b in self.qrnn_bs: # inp = F.concat(bs, 2) # bs.append(F.dropout(qrnn_b(inp), 0.32, train=self.train)) # bs = bs[-1] # hs = F.concat([fs, bs[:, ::-1, :]], 2) _, hs_len, hidden = hs.shape hs = [F.reshape(var, (hs_len, hidden))[:l] for l, var in \ zip(ls, F.split_axis(hs, batchsize, 0))] dep_ys = [ self.biaffine_arc( F.elu(F.dropout(self.arc_dep(h), 0.32, train=self.train)), F.elu(F.dropout(self.arc_head(h), 0.32, train=self.train))) for h in hs ] if dep_ts is not None: heads = dep_ts else: heads = [F.argmax(y, axis=1) for y in dep_ys] heads = F.elu(F.dropout( self.rel_head( F.vstack([F.embed_id(t, h, ignore_label=IGNORE) \ for h, t in zip(hs, heads)])), 0.32, train=self.train)) childs = F.elu( F.dropout(self.rel_dep(F.vstack(hs)), 0.32, train=self.train)) cat_ys = self.biaffine_tag(childs, heads) cat_ys = list(F.split_axis(cat_ys, split, 0)) return cat_ys, dep_ys def predict(self, xs): """ batch: list of splitted sentences """ fs = [self.extractor.process(x) for x in xs] ws, ss, ps = concat_examples(fs) ls = [len(x) + 2 for x in xs] cat_ys, dep_ys = self.forward(ws, ss, ps, ls) return zip([F.log_softmax(y[1:-1]).data for y in cat_ys], [F.log_softmax(y[1:-1, :-1]).data for y in dep_ys]) def predict_doc(self, doc, batchsize=16): """ doc list of splitted sentences """ res = [] for i in range(0, len(doc), batchsize): res.extend([ (i + j, 0, y) for j, y in enumerate(self.predict(doc[i:i + batchsize])) ]) return res @property def cats(self): return zip(*sorted(self.targets.items(), key=lambda x: x[1]))[0]
def __init__(self, model_path, samples_path): self.model_path = model_path self.targets = read_model_defs(model_path + "/target.txt") self.extractor = FeatureExtractor(model_path) with open(samples_path) as f: self.samples = sorted(json.load(f), key=lambda x: len(x[1][0]))