def setUp(self): words = "<MASK> <RARE> the a his monkey inception key earlgrey" wdic = dict(zip(words.split(), range(0, len(words.split())))) overwords = "he his her mine cat monkey the interstellar grey key" overwdic = dict( zip(overwords.split(), range(0, len(overwords.split())))) self.baseemb = q.WordEmb(dim=50, worddic=wdic) self.overemb = q.WordEmb(dim=50, worddic=overwdic) self.emb = self.baseemb.override(self.overemb) pass
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropconnect=0., tie_weights=False): super(NewRNNModel, self).__init__() worddic = dict(zip([str(x) for x in range(ntoken)], range(ntoken))) dims = [ninp] + [nhid] * nlayers self.nhid = nhid self.nlayers = nlayers self.dims = dims self.D = worddic self.states = None # make layers self.emb = q.WordEmb(dims[0], worddic=self.D) self.out = q.WordLinout(dims[-1], worddic=self.D) self.rnn = self.encodertype(*dims, bidir=False, bias=True, dropout_in=dropout, dropconnect=dropconnect) self.rnn.ret_all_states = True self.dropout = nn.Dropout(p=dropout)
def test_creation_simple(self): dic = dict(zip(map(chr, range(97, 122)), range(122 - 97))) m = q.WordEmb(10, worddic=dic) embedding, _ = m(Variable(torch.LongTensor([0, 1, 2]))) self.assertEqual(embedding.size(), (3, 10)) trueemb = m.embedding.weight.cpu().detach().numpy()[0] self.assertTrue(np.allclose(trueemb, embedding[0].detach().numpy()))
def test_partially_loaded(self): D = "<MASK> <RARE> cat dog person arizonaiceteaa".split() D = dict(zip(D, range(len(D)))) baseemb = q.WordEmb(dim=50, worddic=D) baseemb = baseemb.override(self.glove) q.PartiallyPretrainedWordEmb.defaultpath = "../data/glove/miniglove.%dd" plemb = q.PartiallyPretrainedWordEmb( dim=50, worddic=D, value=baseemb.base.embedding.weight.detach().numpy(), gradfracs=(1., 0.5)) x = torch.tensor(np.asarray([0, 1, 2, 3, 4, 5]), dtype=torch.int64) base_out, base_mask = baseemb(x) pl_out, mask = plemb(x) self.assertTrue( np.allclose(base_out[2:].detach().numpy(), pl_out[2:].detach().numpy())) # test gradients l = pl_out.sum() l.backward() gradnorm = plemb.embedding.weight.grad.norm() thegrad = plemb.embedding.weight.grad print(gradnorm) self.assertTrue(np.all(thegrad.detach().numpy()[0, :] == 0)) self.assertTrue(np.all(thegrad.detach().numpy()[[1, 2, 5], :] == 1.)) self.assertTrue(np.all(thegrad.detach().numpy()[[3, 4], :] == 0.5)) print(base_out - pl_out)
def __init__(self, D, embdim, *innerdim, **kw): super(Discriminator, self).__init__() self.emb = q.WordEmb(embdim, worddic=D) self.core = q.FastestLSTMEncoder(embdim, *innerdim) self.outlin1 = torch.nn.Linear(innerdim[-1], innerdim[-1]) self.outlin1_sigm = torch.nn.Sigmoid() self.outlin2 = torch.nn.Linear(innerdim[-1], 1) self.outlin2_sigm = torch.nn.Sigmoid()
def run(lr=0.001): x = torch.randint(1, 100, (5, 8, 6), dtype=torch.int64) y = x[:, 1:, :-1] y = torch.cat([torch.ones(y.size(0), y.size(1), 1, dtype=y.dtype), y], 2) y = torch.cat( [y, torch.randint(1, 100, (y.size(0), 1, y.size(2))).long()], 1) D = dict(zip(["<MASK>"] + [str(i) for i in range(1, 100)], range(100))) m = BasicHierarchicalEncoderDecoder(q.WordEmb(10, worddic=D), q.WordLinout(25, worddic=D), 10, (20, ), (30, ), (25, )) pred = m(x, y)
def test_creation_masked(self): dic = dict(zip(map(chr, range(97, 122)), range(1, 122 - 97 + 1))) dic[q.WordEmb.masktoken] = 0 m = q.WordEmb(10, worddic=dic) embedding, mask = m(Variable(torch.LongTensor([0, 1, 2]))) self.assertEqual(embedding.size(), (3, 10)) trueemb = m.embedding.weight.cpu().detach().numpy()[1] self.assertTrue(np.allclose(trueemb, embedding[1].detach().numpy())) self.assertTrue( np.allclose(embedding[0].detach().numpy(), np.zeros((10, )))) print(mask) self.assertTrue(np.allclose(mask.detach().numpy(), [0, 1, 1]))
def __init__(self, D, embdim, zdim, startsym, *innerdim, **kw): super(Decoder, self).__init__() self.emb = q.WordEmb(embdim, worddic=D) innerdim = (embdim+zdim,) + innerdim self.layers = torch.nn.ModuleList(modules=[ q.LSTMCell(innerdim[i-1], innerdim[i]) for i in range(1, len(innerdim)) ]) self.linout = q.WordLinout(innerdim[-1], worddic=D) self.sm = torch.nn.Softmax(-1) self.maxtime = q.getkw(kw, "maxtime", 100) self.startid = D[startsym] self.sm_sample = True self.zdim = zdim
def setUp(self): wdic = { "<MASK>": 0, "<RARE>": 1, "the": 10, "a": 5, "his": 50, "abracadabrqmsd--qsdfmqgf-": 6 } wdic2 = { "<MASK>": 0, "<RARE>": 1, "the": 2, "a": 3, "his": 4, "abracadabrqmsd--qsdfmqgf-": 5, "qsdfqsdf": 7 } self.adapted = q.WordEmb(50, worddic=wdic) self.vanilla = q.WordEmb( 50, worddic=wdic, value=self.adapted.embedding.weight.detach().numpy()) self.adapted = self.adapted.adapt(wdic2)
def test_overridden(self): dic = dict(zip(map(chr, range(97, 122)), range(1, 122 - 97 + 1))) dic[q.WordEmb.masktoken] = 0 m = q.ZeroWordEmb(10, worddic=dic) dic = dict(zip(map(chr, range(97, 122)), range(0, 122 - 97))) mo = q.WordEmb(10, worddic=dic) moe = m.override(mo) emb, mask = moe(Variable(torch.LongTensor([0, 1, 2]))) self.assertEqual(emb.size(), (3, 10)) self.assertTrue(np.allclose(mask.detach().numpy(), [0, 1, 1])) self.assertTrue(np.allclose(emb[0].detach().numpy(), np.zeros((10, )))) oemb, mask = mo(Variable(torch.LongTensor([0, 0, 1]))) self.assertEqual(oemb.size(), (3, 10)) self.assertTrue(mask is None) self.assertTrue( np.allclose(oemb.detach().numpy()[1:], emb.detach().numpy()[1:]))
def __init__(self, *dims: int, worddic: dict = None, bias: bool = True, dropout: float = 0., **kw): super(RNNLayer_LM, self).__init__(**kw) self.dims = dims self.D = worddic self.states = None # make layers self.emb = q.WordEmb(dims[0], worddic=self.D) self.out = q.WordLinout(dims[-1], worddic=self.D) self.rnn = self.encodertype(*dims, bidir=False, bias=bias, dropout_in=dropout) self.rnn.ret_all_states = True self.dropout = torch.nn.Dropout(p=dropout)
def test_it(self): x = np.random.randint(0, 100, (1000, 7)) y_inp = x[:, :-1] y_out = x[:, 1:] wD = dict((chr(xi), xi) for xi in range(100)) ctx = torch.randn(1000, 8, 30) decoder_emb = q.WordEmb(20, worddic=wD) decoder_lstm = q.LSTMCell(20, 30) decoder_att = q.DotAttention() decoder_out = q.WordLinout(60, worddic=wD) decoder_cell = q.DecoderCell(decoder_emb, decoder_lstm, decoder_att, None, decoder_out) decoder_tf = q.TFDecoder(decoder_cell) y = decoder_tf(torch.tensor(x), ctx=ctx) self.assertTrue(y.size(), (1000, 7, 100))
def run_classify(lr=0.001, seqlen=6, numex=500, epochs=25, batsize=10, test=True, cuda=False, gpu=0): device = torch.device("cpu") if cuda: device = torch.device("cuda", gpu) # region construct data colors = "red blue green magenta cyan orange yellow grey salmon pink purple teal".split( ) D = dict(zip(colors, range(len(colors)))) inpseqs = [] targets = [] for i in range(numex): inpseq = list(np.random.choice(colors, seqlen, replace=False)) target = np.random.choice(range(len(inpseq)), 1)[0] target_class = D[inpseq[target]] inpseq[target] = "${}$".format(inpseq[target]) inpseqs.append("".join(inpseq)) targets.append(target_class) sm = q.StringMatrix() sm.tokenize = lambda x: list(x) for inpseq in inpseqs: sm.add(inpseq) sm.finalize() print(sm[0]) print(sm.D) targets = np.asarray(targets) data = q.dataload(sm.matrix[:-100], targets[:-100], batch_size=batsize) valid_data = q.dataload(sm.matrix[-100:], targets[-100:], batch_size=batsize) # endregion # region model embdim = 20 enc2inpdim = 45 encdim = 20 outdim = 20 emb = q.WordEmb(embdim, worddic=sm.D) # sm dictionary (characters) out = q.WordLinout(outdim, worddic=D) # target dictionary # encoders: enc1 = q.RNNEncoder(embdim, encdim, bidir=True) enc2 = q.RNNCellEncoder(enc2inpdim, outdim // 2, bidir=True) # model class Model(torch.nn.Module): def __init__(self, dim, _emb, _out, _enc1, _enc2, **kw): super(Model, self).__init__(**kw) self.dim, self.emb, self.out, self.enc1, self.enc2 = dim, _emb, _out, _enc1, _enc2 self.score = torch.nn.Sequential( torch.nn.Linear(dim, 1, bias=False), torch.nn.Sigmoid()) self.emb_expander = ExpandVecs(embdim, enc2inpdim, 2) self.enc_expander = ExpandVecs(encdim * 2, enc2inpdim, 2) def forward(self, x, with_att=False): # embed and encode xemb, xmask = self.emb(x) xenc = self.enc1(xemb, mask=xmask) # compute attention xatt = self.score(xenc).squeeze( 2) * xmask.float()[:, :xenc.size(1)] # encode again _xemb = self.emb_expander(xemb[:, :xenc.size(1)]) _xenc = self.enc_expander(xenc) _, xenc2 = self.enc2(_xemb, gate=xatt, mask=xmask[:, :xenc.size(1)], ret_states=True) scores = self.out(xenc2.view(xenc.size(0), -1)) if with_att: return scores, xatt else: return scores model = Model(40, emb, out, enc1, enc2) # endregion # region test if test: inps = torch.tensor(sm.matrix[0:2]) outs = model(inps) # endregion # region train optimizer = torch.optim.Adam(q.params_of(model), lr=lr) trainer = q.trainer(model).on(data).loss(torch.nn.CrossEntropyLoss(), q.Accuracy())\ .optimizer(optimizer).hook(q.ClipGradNorm(5.)).device(device) validator = q.tester(model).on(valid_data).loss( q.Accuracy()).device(device) q.train(trainer, validator).run(epochs=epochs) # endregion # region check attention #TODO # feed a batch inpd = torch.tensor(sm.matrix[400:410]) outd, att = model(inpd, with_att=True) outd = torch.max(outd, 1)[1].cpu().detach().numpy() inpd = inpd.cpu().detach().numpy() att = att.cpu().detach().numpy() rD = {v: k for k, v in sm.D.items()} roD = {v: k for k, v in D.items()} for i in range(len(att)): inpdi = " ".join([rD[x] for x in inpd[i]]) outdi = roD[outd[i]] print("input: {}\nattention: {}\nprediction: {}".format( inpdi, " ".join(["{:.1f}".format(x) for x in att[i]]), outdi))
def run(lr=0.001, dropout=0.2, batsize=50, embdim=50, encdim=50, decdim=50, numlayers=1, bidir=False, which="geo", # "geo", "atis", "jobs" test=True, ): settings = locals().copy() logger = q.log.Logger(prefix="seq2seq_base") logger.save_settings(**settings) # region data nlsm, qlsm, splitidxs = load_data(which=which) print(nlsm[0], qlsm[0]) print(nlsm._rarewords) trainloader = q.dataload(nlsm.matrix[:splitidxs[0]], qlsm.matrix[:splitidxs[0]], batch_size=batsize, shuffle=True) devloader = q.dataload(nlsm.matrix[splitidxs[0]:splitidxs[1]], qlsm.matrix[splitidxs[0]:splitidxs[1]], batch_size=batsize, shuffle=False) testloader = q.dataload(nlsm.matrix[splitidxs[1]:], qlsm.matrix[splitidxs[1]:], batch_size=batsize, shuffle=False) # endregion # region model encdims = [encdim] * numlayers outdim = (encdim if not bidir else encdim * 2) + decdim nlemb = q.WordEmb(embdim, worddic=nlsm.D) qlemb = q.WordEmb(embdim, worddic=qlsm.D) nlenc = q.LSTMEncoder(embdim, *encdims, bidir=bidir, dropout_in=dropout) att = q.att.DotAtt() if numlayers > 1: qldec_core = torch.nn.Sequential( *[q.LSTMCell(_indim, _outdim, dropout_in=dropout) for _indim, _outdim in [(embdim, decdim)] + [(decdim, decdim)] * (numlayers - 1)] ) else: qldec_core = q.LSTMCell(embdim, decdim, dropout_in=dropout) qlout = q.WordLinout(outdim, worddic=qlsm.D) qldec = q.LuongCell(emb=qlemb, core=qldec_core, att=att, out=qlout) class Model(torch.nn.Module): def __init__(self, _nlemb, _nlenc, _qldec, train=True, **kw): super(Model, self).__init__(**kw) self.nlemb, self.nlenc, self._q_train = _nlemb, _nlenc, train if train: self.qldec = q.TFDecoder(_qldec) else: self.qldec = q.FreeDecoder(_qldec, maxtime=100) def forward(self, x, y): # (batsize, seqlen) int ids xemb, xmask = self.nlemb(x) xenc = self.nlenc(xemb, mask=xmask) if self._q_train is False: assert(y.dim() == 2) dec = self.qldec(y, ctx=xenc, ctxmask=xmask[:, :xenc.size(1)]) return dec m_train = Model(nlemb, nlenc, qldec, train=True) m_test = Model(nlemb, nlenc, qldec, train=False) if test: test_out = m_train(torch.tensor(nlsm.matrix[:5]), torch.tensor(qlsm.matrix[:5])) print("test_out.size() = {}".format(test_out.size()))
def setUp(self): worddic = "<MASK> <RARE> first second third fourth fifth" worddic = dict(zip(worddic.split(), range(len(worddic.split())))) self.emb1 = q.WordEmb(100, worddic=worddic) self.emb2 = q.WordEmb(100, worddic=worddic)
def run_normal_seqvae_toy( lr=0.001, embdim=64, encdim=100, zdim=64, batsize=50, epochs=100, ): # test vocsize = 100 seqlen = 12 wD = dict((chr(xi), xi) for xi in range(vocsize)) # region encoder encoder_emb = q.WordEmb(embdim, worddic=wD) encoder_lstm = q.FastestLSTMEncoder(embdim, encdim) class EncoderNet(torch.nn.Module): def __init__(self, emb, core): super(EncoderNet, self).__init__() self.emb, self.core = emb, core def forward(self, x): embs, mask = self.emb(x) out, states = self.core(embs, mask, ret_states=True) top_state = states[-1][0][:, 0] # top_state = top_state.unsqueeze(1).repeat(1, out.size(1), 1) return top_state # (batsize, encdim) encoder_net = EncoderNet(encoder_emb, encoder_lstm) encoder = Posterior(encoder_net, encdim, zdim) # endregion # region decoder decoder_emb = q.WordEmb(embdim, worddic=wD) decoder_lstm = q.LSTMCell(embdim + zdim, encdim) decoder_outlin = q.WordLinout(encdim, worddic=wD) class DecoderCell(torch.nn.Module): def __init__(self, emb, core, out, **kw): super(DecoderCell, self).__init__() self.emb, self.core, self.out = emb, core, out def forward(self, xs, z=None): embs, mask = self.emb(xs) core_inp = torch.cat([embs, z], 1) core_out = self.core(core_inp) out = self.out(core_out) return out decoder_cell = DecoderCell(decoder_emb, decoder_lstm, decoder_outlin) decoder = q.TFDecoder(decoder_cell) # endregion likelihood = Likelihood() vae = SeqVAE(encoder, decoder, likelihood) x = torch.randint(0, vocsize, (batsize, seqlen), dtype=torch.int64) ys = vae(x) optim = torch.optim.Adam(q.params_of(vae), lr=lr) x = torch.randint(0, vocsize, (batsize * 100, seqlen), dtype=torch.int64) dataloader = q.dataload(x, batch_size=batsize, shuffle=True) trainer = q.trainer(vae).on(dataloader).optimizer(optim).loss(4).epochs( epochs) trainer.run() print("done \n\n")