def setUp(self): wdic = { "<MASK>": 0, "<RARE>": 1, "the": 10, "a": 5, "his": 50, "abracadabrqmsd--qsdfmqgf-": 6 } wdic2 = { "<MASK>": 0, "<RARE>": 1, "the": 2, "a": 3, "his": 4, "abracadabrqmsd--qsdfmqgf-": 5, "qsdfqsdf": 7 } self.adapted = q.WordLinout(10, worddic=wdic, bias=False) self.vanilla = q.WordLinout( 10, worddic=wdic, weight=self.adapted.lin.weight.detach().numpy(), bias=False) self.adapted = self.adapted.adapt(wdic2)
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropconnect=0., tie_weights=False): super(NewRNNModel, self).__init__() worddic = dict(zip([str(x) for x in range(ntoken)], range(ntoken))) dims = [ninp] + [nhid] * nlayers self.nhid = nhid self.nlayers = nlayers self.dims = dims self.D = worddic self.states = None # make layers self.emb = q.WordEmb(dims[0], worddic=self.D) self.out = q.WordLinout(dims[-1], worddic=self.D) self.rnn = self.encodertype(*dims, bidir=False, bias=True, dropout_in=dropout, dropconnect=dropconnect) self.rnn.ret_all_states = True self.dropout = nn.Dropout(p=dropout)
def run(lr=0.001): x = torch.randint(1, 100, (5, 8, 6), dtype=torch.int64) y = x[:, 1:, :-1] y = torch.cat([torch.ones(y.size(0), y.size(1), 1, dtype=y.dtype), y], 2) y = torch.cat( [y, torch.randint(1, 100, (y.size(0), 1, y.size(2))).long()], 1) D = dict(zip(["<MASK>"] + [str(i) for i in range(1, 100)], range(100))) m = BasicHierarchicalEncoderDecoder(q.WordEmb(10, worddic=D), q.WordLinout(25, worddic=D), 10, (20, ), (30, ), (25, )) pred = m(x, y)
def test_overridden(self): worddic = "second third fourth fifth" worddic = dict(zip(worddic.split(), range(len(worddic.split())))) linout = q.WordLinout(10, worddic=worddic) l = self.linout.override(linout) x = Variable(torch.randn(7, 10)) msk = Variable( torch.FloatTensor([[1, 0, 1, 1, 0, 1, 0]] * 5 + [[0, 1, 0, 0, 1, 0, 1]] * 2)) y = l(x, mask=msk) print(y)
def setUp(self): wdic = { "<MASK>": 0, "<RARE>": 1, "the": 10, "a": 5, "his": 50, "monkey": 6 } wdic2 = { "<MASK>": 0, "<RARE>": 1, "the": 2, "a": 3, "his": 4, "abracadabrqmsd--qsdfmqgf-": 5, "qsdfqsdf": 7 } self.base = q.WordLinout(10, worddic=wdic, bias=False) self.over = q.WordLinout(10, worddic=wdic2, bias=False) self.overridden = self.base.override(self.over)
def __init__(self, D, embdim, zdim, startsym, *innerdim, **kw): super(Decoder, self).__init__() self.emb = q.WordEmb(embdim, worddic=D) innerdim = (embdim+zdim,) + innerdim self.layers = torch.nn.ModuleList(modules=[ q.LSTMCell(innerdim[i-1], innerdim[i]) for i in range(1, len(innerdim)) ]) self.linout = q.WordLinout(innerdim[-1], worddic=D) self.sm = torch.nn.Softmax(-1) self.maxtime = q.getkw(kw, "maxtime", 100) self.startid = D[startsym] self.sm_sample = True self.zdim = zdim
def __init__(self, *dims: int, worddic: dict = None, bias: bool = True, dropout: float = 0., **kw): super(RNNLayer_LM, self).__init__(**kw) self.dims = dims self.D = worddic self.states = None # make layers self.emb = q.WordEmb(dims[0], worddic=self.D) self.out = q.WordLinout(dims[-1], worddic=self.D) self.rnn = self.encodertype(*dims, bidir=False, bias=bias, dropout_in=dropout) self.rnn.ret_all_states = True self.dropout = torch.nn.Dropout(p=dropout)
def test_it(self): x = np.random.randint(0, 100, (1000, 7)) y_inp = x[:, :-1] y_out = x[:, 1:] wD = dict((chr(xi), xi) for xi in range(100)) ctx = torch.randn(1000, 8, 30) decoder_emb = q.WordEmb(20, worddic=wD) decoder_lstm = q.LSTMCell(20, 30) decoder_att = q.DotAttention() decoder_out = q.WordLinout(60, worddic=wD) decoder_cell = q.DecoderCell(decoder_emb, decoder_lstm, decoder_att, None, decoder_out) decoder_tf = q.TFDecoder(decoder_cell) y = decoder_tf(torch.tensor(x), ctx=ctx) self.assertTrue(y.size(), (1000, 7, 100))
def run_classify(lr=0.001, seqlen=6, numex=500, epochs=25, batsize=10, test=True, cuda=False, gpu=0): device = torch.device("cpu") if cuda: device = torch.device("cuda", gpu) # region construct data colors = "red blue green magenta cyan orange yellow grey salmon pink purple teal".split( ) D = dict(zip(colors, range(len(colors)))) inpseqs = [] targets = [] for i in range(numex): inpseq = list(np.random.choice(colors, seqlen, replace=False)) target = np.random.choice(range(len(inpseq)), 1)[0] target_class = D[inpseq[target]] inpseq[target] = "${}$".format(inpseq[target]) inpseqs.append("".join(inpseq)) targets.append(target_class) sm = q.StringMatrix() sm.tokenize = lambda x: list(x) for inpseq in inpseqs: sm.add(inpseq) sm.finalize() print(sm[0]) print(sm.D) targets = np.asarray(targets) data = q.dataload(sm.matrix[:-100], targets[:-100], batch_size=batsize) valid_data = q.dataload(sm.matrix[-100:], targets[-100:], batch_size=batsize) # endregion # region model embdim = 20 enc2inpdim = 45 encdim = 20 outdim = 20 emb = q.WordEmb(embdim, worddic=sm.D) # sm dictionary (characters) out = q.WordLinout(outdim, worddic=D) # target dictionary # encoders: enc1 = q.RNNEncoder(embdim, encdim, bidir=True) enc2 = q.RNNCellEncoder(enc2inpdim, outdim // 2, bidir=True) # model class Model(torch.nn.Module): def __init__(self, dim, _emb, _out, _enc1, _enc2, **kw): super(Model, self).__init__(**kw) self.dim, self.emb, self.out, self.enc1, self.enc2 = dim, _emb, _out, _enc1, _enc2 self.score = torch.nn.Sequential( torch.nn.Linear(dim, 1, bias=False), torch.nn.Sigmoid()) self.emb_expander = ExpandVecs(embdim, enc2inpdim, 2) self.enc_expander = ExpandVecs(encdim * 2, enc2inpdim, 2) def forward(self, x, with_att=False): # embed and encode xemb, xmask = self.emb(x) xenc = self.enc1(xemb, mask=xmask) # compute attention xatt = self.score(xenc).squeeze( 2) * xmask.float()[:, :xenc.size(1)] # encode again _xemb = self.emb_expander(xemb[:, :xenc.size(1)]) _xenc = self.enc_expander(xenc) _, xenc2 = self.enc2(_xemb, gate=xatt, mask=xmask[:, :xenc.size(1)], ret_states=True) scores = self.out(xenc2.view(xenc.size(0), -1)) if with_att: return scores, xatt else: return scores model = Model(40, emb, out, enc1, enc2) # endregion # region test if test: inps = torch.tensor(sm.matrix[0:2]) outs = model(inps) # endregion # region train optimizer = torch.optim.Adam(q.params_of(model), lr=lr) trainer = q.trainer(model).on(data).loss(torch.nn.CrossEntropyLoss(), q.Accuracy())\ .optimizer(optimizer).hook(q.ClipGradNorm(5.)).device(device) validator = q.tester(model).on(valid_data).loss( q.Accuracy()).device(device) q.train(trainer, validator).run(epochs=epochs) # endregion # region check attention #TODO # feed a batch inpd = torch.tensor(sm.matrix[400:410]) outd, att = model(inpd, with_att=True) outd = torch.max(outd, 1)[1].cpu().detach().numpy() inpd = inpd.cpu().detach().numpy() att = att.cpu().detach().numpy() rD = {v: k for k, v in sm.D.items()} roD = {v: k for k, v in D.items()} for i in range(len(att)): inpdi = " ".join([rD[x] for x in inpd[i]]) outdi = roD[outd[i]] print("input: {}\nattention: {}\nprediction: {}".format( inpdi, " ".join(["{:.1f}".format(x) for x in att[i]]), outdi))
def run(lr=0.001, dropout=0.2, batsize=50, embdim=50, encdim=50, decdim=50, numlayers=1, bidir=False, which="geo", # "geo", "atis", "jobs" test=True, ): settings = locals().copy() logger = q.log.Logger(prefix="seq2seq_base") logger.save_settings(**settings) # region data nlsm, qlsm, splitidxs = load_data(which=which) print(nlsm[0], qlsm[0]) print(nlsm._rarewords) trainloader = q.dataload(nlsm.matrix[:splitidxs[0]], qlsm.matrix[:splitidxs[0]], batch_size=batsize, shuffle=True) devloader = q.dataload(nlsm.matrix[splitidxs[0]:splitidxs[1]], qlsm.matrix[splitidxs[0]:splitidxs[1]], batch_size=batsize, shuffle=False) testloader = q.dataload(nlsm.matrix[splitidxs[1]:], qlsm.matrix[splitidxs[1]:], batch_size=batsize, shuffle=False) # endregion # region model encdims = [encdim] * numlayers outdim = (encdim if not bidir else encdim * 2) + decdim nlemb = q.WordEmb(embdim, worddic=nlsm.D) qlemb = q.WordEmb(embdim, worddic=qlsm.D) nlenc = q.LSTMEncoder(embdim, *encdims, bidir=bidir, dropout_in=dropout) att = q.att.DotAtt() if numlayers > 1: qldec_core = torch.nn.Sequential( *[q.LSTMCell(_indim, _outdim, dropout_in=dropout) for _indim, _outdim in [(embdim, decdim)] + [(decdim, decdim)] * (numlayers - 1)] ) else: qldec_core = q.LSTMCell(embdim, decdim, dropout_in=dropout) qlout = q.WordLinout(outdim, worddic=qlsm.D) qldec = q.LuongCell(emb=qlemb, core=qldec_core, att=att, out=qlout) class Model(torch.nn.Module): def __init__(self, _nlemb, _nlenc, _qldec, train=True, **kw): super(Model, self).__init__(**kw) self.nlemb, self.nlenc, self._q_train = _nlemb, _nlenc, train if train: self.qldec = q.TFDecoder(_qldec) else: self.qldec = q.FreeDecoder(_qldec, maxtime=100) def forward(self, x, y): # (batsize, seqlen) int ids xemb, xmask = self.nlemb(x) xenc = self.nlenc(xemb, mask=xmask) if self._q_train is False: assert(y.dim() == 2) dec = self.qldec(y, ctx=xenc, ctxmask=xmask[:, :xenc.size(1)]) return dec m_train = Model(nlemb, nlenc, qldec, train=True) m_test = Model(nlemb, nlenc, qldec, train=False) if test: test_out = m_train(torch.tensor(nlsm.matrix[:5]), torch.tensor(qlsm.matrix[:5])) print("test_out.size() = {}".format(test_out.size()))
def setUp(self): wd = dict(zip(map(lambda x: chr(x), range(100)), range(100))) self.base = q.WordLinout(50, worddic=wd, bias=False) self.merg = q.WordLinout(50, worddic=wd, bias=False) self.linout = self.base.merge(self.merg)
def setUp(self): worddic = "<MASK> <RARE> first second third fourth fifth sixth" worddic = dict(zip(worddic.split(), range(len(worddic.split())))) self.linout = q.WordLinout(10, worddic=worddic, cosnorm=True)
def run_normal_seqvae_toy( lr=0.001, embdim=64, encdim=100, zdim=64, batsize=50, epochs=100, ): # test vocsize = 100 seqlen = 12 wD = dict((chr(xi), xi) for xi in range(vocsize)) # region encoder encoder_emb = q.WordEmb(embdim, worddic=wD) encoder_lstm = q.FastestLSTMEncoder(embdim, encdim) class EncoderNet(torch.nn.Module): def __init__(self, emb, core): super(EncoderNet, self).__init__() self.emb, self.core = emb, core def forward(self, x): embs, mask = self.emb(x) out, states = self.core(embs, mask, ret_states=True) top_state = states[-1][0][:, 0] # top_state = top_state.unsqueeze(1).repeat(1, out.size(1), 1) return top_state # (batsize, encdim) encoder_net = EncoderNet(encoder_emb, encoder_lstm) encoder = Posterior(encoder_net, encdim, zdim) # endregion # region decoder decoder_emb = q.WordEmb(embdim, worddic=wD) decoder_lstm = q.LSTMCell(embdim + zdim, encdim) decoder_outlin = q.WordLinout(encdim, worddic=wD) class DecoderCell(torch.nn.Module): def __init__(self, emb, core, out, **kw): super(DecoderCell, self).__init__() self.emb, self.core, self.out = emb, core, out def forward(self, xs, z=None): embs, mask = self.emb(xs) core_inp = torch.cat([embs, z], 1) core_out = self.core(core_inp) out = self.out(core_out) return out decoder_cell = DecoderCell(decoder_emb, decoder_lstm, decoder_outlin) decoder = q.TFDecoder(decoder_cell) # endregion likelihood = Likelihood() vae = SeqVAE(encoder, decoder, likelihood) x = torch.randint(0, vocsize, (batsize, seqlen), dtype=torch.int64) ys = vae(x) optim = torch.optim.Adam(q.params_of(vae), lr=lr) x = torch.randint(0, vocsize, (batsize * 100, seqlen), dtype=torch.int64) dataloader = q.dataload(x, batch_size=batsize, shuffle=True) trainer = q.trainer(vae).on(dataloader).optimizer(optim).loss(4).epochs( epochs) trainer.run() print("done \n\n")