示例#1
0
 def setUp(self):
     words = "<MASK> <RARE> the a his monkey inception key earlgrey"
     wdic = dict(zip(words.split(), range(0, len(words.split()))))
     overwords = "he his her mine cat monkey the interstellar grey key"
     overwdic = dict(
         zip(overwords.split(), range(0, len(overwords.split()))))
     self.baseemb = q.WordEmb(dim=50, worddic=wdic)
     self.overemb = q.WordEmb(dim=50, worddic=overwdic)
     self.emb = self.baseemb.override(self.overemb)
     pass
示例#2
0
 def test_creation_simple(self):
     dic = dict(zip(map(chr, range(97, 122)), range(122 - 97)))
     m = q.WordEmb(10, worddic=dic)
     embedding, _ = m(Variable(torch.LongTensor([0, 1, 2])))
     self.assertEqual(embedding.size(), (3, 10))
     trueemb = m.weight.cpu().detach().numpy()[0]
     self.assertTrue(np.allclose(trueemb, embedding[0].detach().numpy()))
示例#3
0
 def __init__(self,
              *dims: int,
              worddic: dict = None,
              bias: bool = True,
              tieweights=False,
              dropout: float = 0.,
              dropouti: float = 0.,
              dropouth: float = 0.,
              dropoute: float = 0.,
              **kw):
     super(RNNLayer_LM, self).__init__(**kw)
     self.dims = dims
     self.D = worddic
     self.states = None
     # make layers
     self.emb = q.WordEmb(dims[0], worddic=self.D)
     self.out = q.WordLinout(dims[-1], worddic=self.D)
     if tieweights:
         self.out.weight = self.emb.weight
     self.rnn = self.encodertype(*dims,
                                 bidir=False,
                                 bias=bias,
                                 dropout_in=dropout)
     self.rnn.ret_all_states = True
     self.dropout = torch.nn.Dropout(p=dropout)
     self.dropouti = torch.nn.Dropout(p=dropouti)
     self.dropoute = torch.nn.Dropout(p=dropoute)
     self.dropouth = torch.nn.Dropout(p=dropouth)
示例#4
0
    def test_decoder_shape(self):
        wdic = "<MASK> a b c d e f g h i j k l m n o p".split()
        wdic = dict(zip(wdic, range(len(wdic))))
        emb = q.WordEmb(10, worddic=wdic)
        m = q.AYNDecoder(emb, n_max_seq=7, n_layers=3, n_head=2,
                         d_k=4, d_v=6, d_pos_vec=6, d_model=16,
                         d_inner_hid=20, dropout=0)
        src_seq = q.var(np.random.randint(1, max(wdic.values()), (5, 7))).v
        src_seq_mask_starts = np.random.randint(1, 7, (5,), dtype="int64")
        src_seq_mask = np.ones_like(src_seq.data.numpy())
        for i in range(5):
            src_seq_mask[i, :src_seq_mask_starts[i]] = 0
        src_seq_mask = q.var(src_seq_mask).v
        src_seq.masked_fill_(src_seq_mask.byte(), 0)
        src_pos = q.var(np.arange(0, 7, dtype="int64")).v
        src_pos = src_pos.unsqueeze(0).repeat(5, 1)

        ctx = q.var(np.random.random((5, 8, 16)).astype("float32")).v

        ctx_seq_mask_starts = np.random.randint(1, 8, (5,), dtype="int64")
        ctx_seq_mask = np.ones((5, 8))
        for i in range(5):
            ctx_seq_mask[i, :ctx_seq_mask_starts[i]] = 0
        ctx_seq_mask = -1*q.var(ctx_seq_mask).v.byte()+1

        out = m(src_seq, ctx, ctx_seq_mask)

        print(out)
        self.assertEqual(out.size(), (5, 7, 16))

        loss = out.sum()
        loss.backward()
示例#5
0
    def test_it(self):
        D = "<MASK> <RARE> cat dog person earlgreytea the".split()
        D = dict(zip(D, range(len(D))))
        base = q.WordEmb(50, worddic=D)
        switched = q.SwitchedWordEmb(base)
        words = "cat dog person".split()
        over = q.WordEmb(50, worddic=D)
        switched.override(over, selectwords=words)

        x = torch.arange(0, len(D)).unsqueeze(0)
        y, ymask = switched(x)

        ybase, _ = base(x)
        yover, _ = over(x)
        ymix = torch.tensor([0, 0, 1, 1, 1, 0,
                             0]).float().unsqueeze(0).unsqueeze(-1)
        y_ref = ybase * (1 - ymix) + yover * ymix
        print((y - y_ref).norm())
        self.assertTrue(np.allclose(y.detach().numpy(),
                                    y_ref.detach().numpy()))
        print(y.size())
示例#6
0
 def test_creation_masked(self):
     dic = dict(zip(map(chr, range(97, 122)), range(1, 122 - 97 + 1)))
     dic[q.WordEmb.masktoken] = 0
     m = q.WordEmb(10, worddic=dic)
     embedding, mask = m(Variable(torch.LongTensor([0, 1, 2])))
     self.assertEqual(embedding.size(), (3, 10))
     trueemb = m.weight.cpu().detach().numpy()[1]
     self.assertTrue(np.allclose(trueemb, embedding[1].detach().numpy()))
     self.assertTrue(
         np.allclose(embedding[0].detach().numpy(), np.zeros((10, ))))
     print(mask)
     self.assertTrue(np.allclose(mask.detach().numpy(), [0, 1, 1]))
示例#7
0
 def setUp(self):
     wdic = {
         "<MASK>": 0,
         "<RARE>": 1,
         "the": 10,
         "a": 5,
         "his": 50,
         "abracadabrqmsd--qsdfmqgf-": 6
     }
     wdic2 = {
         "<MASK>": 0,
         "<RARE>": 1,
         "the": 2,
         "a": 3,
         "his": 4,
         "abracadabrqmsd--qsdfmqgf-": 5,
         "qsdfqsdf": 7
     }
     self.adapted = q.WordEmb(50, worddic=wdic)
     self.vanilla = q.WordEmb(
         50, worddic=wdic, value=self.adapted.embedding.weight.data.numpy())
     self.adapted = self.adapted.adapt(wdic2)
示例#8
0
 def __init__(self,
              dim=512,
              worddic=None,
              numlayers=3,
              numheads=8,
              activation=q.GeLU,
              embedding_dropout=0.,
              attention_dropout=0.,
              residual_dropout=0.,
              word_dropout=0.,
              relpos=True,
              tie_wordvecs=False,
              maxlen=512):
     super(TransformerLM, self).__init__()
     self.wordemb = q.WordEmb(dim,
                              worddic=worddic,
                              word_dropout=word_dropout)
     posemb = None
     if relpos is False:
         print("using learned absolute position embeddings")
         posembD = dict(zip(range(maxlen), range(maxlen)))
         posemb = q.WordEmb(dim, worddic=posembD)
     self.transformer = q.TransformerDecoder(
         dim=dim,
         numlayers=numlayers,
         numheads=numheads,
         activation=activation,
         embedding_dropout=embedding_dropout,
         attention_dropout=attention_dropout,
         residual_dropout=residual_dropout,
         relpos=relpos,
         noctx=True,
         maxlen=maxlen,
         posemb=posemb)
     q.RecDropout.convert_to_standard_in(self.transformer)
     self.wordout = q.WordLinout(dim, worddic=worddic)
     if tie_wordvecs:
         self.wordout.weight = self.wordemb.weight
示例#9
0
    def test_it(self):
        x = np.random.randint(0, 100, (1000, 7))
        y_inp = x[:, :-1]
        y_out = x[:, 1:]
        wD = dict((chr(xi), xi) for xi in range(100))

        ctx = torch.randn(1000, 8, 30)

        decoder_emb = q.WordEmb(20, worddic=wD)
        decoder_lstm = q.LSTMCell(20, 30)
        decoder_att = q.DotAttention()
        decoder_out = q.WordLinout(60, worddic=wD)

        decoder_cell = q.DecoderCell(decoder_emb, decoder_lstm, decoder_att,
                                     None, decoder_out)
        decoder_tf = q.TFDecoder(decoder_cell)

        y = decoder_tf(torch.tensor(x), ctx=ctx)

        self.assertTrue(y.size(), (1000, 7, 100))


# endregion
示例#10
0
    def test_it(self):
        D = "<MASK> [RED] NT(START) NT(a) T(b) NT(c) T(d) T(e) NT(f) T(g) T(h) T(i)"
        D = dict(zip(D.split(), range(len(D.split()))))
        tok2act = {
            k: (2 if k == "[RED]" else 1 if k[:2] == "NT" else 0)
            for k in D
        }

        class CustomCombiner(StackCellCombiner):
            def forward(self, _x, mask):
                ret = (_x * mask.unsqueeze(-1).float()).sum(
                    1) / mask.float().sum(1).unsqueeze(-1).clamp_min(1e-6)
                ret = ret.detach()  # TODO: for grad debugging
                return ret

        class CustomWordLinout(q.WordLinout):
            def update(self, _):
                pass

        class Tok2Act(torch.nn.Module):
            def __init__(self, t2a, D):
                super(Tok2Act, self).__init__()
                self.D = D
                t2a_ = torch.zeros(max(D.values()) + 1).long()
                for k, v in t2a.items():
                    t2a_[D[k]] = v
                self.register_buffer("t2a", t2a_)

            def forward(self, _x):
                return self.t2a[_x]

        embdim = 4
        coredim = 5
        emb = q.WordEmb(embdim, worddic=D)
        core = q.LSTMCell(embdim, coredim, dropout_rec=.1)
        # combiner = BasicCombiner(embdim)
        combiner = CustomCombiner()
        att = BasicAttention()
        out = CustomWordLinout(coredim * 2, worddic=D)
        tok2act = Tok2Act(tok2act, D)

        cell = StackCell(emb=emb,
                         tok2act=tok2act,
                         core=core,
                         combiner=combiner,
                         att=att,
                         out=out)
        ctx = torch.randn(2, 6, coredim)
        cell.save_ctx(ctx)

        ex1 = "NT(START) NT(a) T(b) NT(c) T(d) T(e) [RED] NT(f) T(g) T(h) [RED] T(i) [RED]"
        ex2 = "NT(START) NT(a) NT(c) T(d) T(e) [RED] [RED]"
        x1 = [D[exi] for exi in ex1.split()] + [0]
        x2 = [D[exi] for exi in ex2.split()]
        x2 = x2 + [0] * (len(x1) - len(x2))
        x = torch.tensor([x1, x2])

        cell._debug_embs = torch.nn.Parameter(torch.zeros(2, len(x1), embdim))

        ys = []
        for i in range(len(x[0])):
            y = cell(x[:, i])
            ys.append(y)

        # print(cell._debug_embs)
        print(cell._debug_embs.size())
        l = ys[11][0].sum()
        l.backward()

        print(cell._debug_embs.grad)
        print(cell._stack)
示例#11
0
def run_gatedtree(
    lr=0.01,
    gradclip=5.,
    batsize=20,
    epochs=80,
    embdim=200,
    encdim=200,
    numlayer=1,
    cuda=False,
    gpu=0,
    wreg=1e-8,
    dropout=0.5,
    smoothing=0.4,
    goldsmoothing=-0.1,
    which="geo",
    relatt=False,
):
    tt = q.ticktock("script")
    tt.msg("running gated tree decoder")
    device = torch.device("cpu")
    if cuda:
        device = torch.device("cuda", gpu)

    # region data
    tt.tick("generating data")
    # dss, D = gen_sort_data(seqlen=seqlen, numvoc=numvoc, numex=numex, prepend_inp=False)
    dss, nlD, flD = gen_datasets(which=which)
    tloader, vloader, xloader = [
        torch.utils.data.DataLoader(ds, batch_size=batsize, shuffle=True)
        for ds in dss
    ]
    seqlen = len(dss[0][0][1])
    id2pushpop = torch.zeros(len(flD), dtype=torch.long, device=device)
    id2pushpop[flD["("]] = +1
    id2pushpop[flD[")"]] = -1

    tt.tock("data generated")
    # endregion

    # region model
    tt.tick("building model")
    # source side
    inpemb = q.WordEmb(embdim, worddic=nlD)
    encdims = [encdim] * numlayer
    encoder = q.LSTMEncoder(embdim,
                            *encdims,
                            bidir=False,
                            dropout_in_shared=dropout)

    # target side
    decemb = q.WordEmb(embdim, worddic=flD)
    decinpdim = embdim
    decdims = [decinpdim] + [encdim] * numlayer
    dec_core = \
        [GatedTreeLSTMCell(decdims[i-1], decdims[i], dropout_in=dropout) for i in range(1, len(decdims))]        ###
    dec_core = TreeRNNDecoderCellCore(*dec_core)
    if relatt:
        att = ComboAbsRelAttention(ctxdim=encdim, vecdim=encdim)
    else:
        att = BasicAttention()
    out = torch.nn.Sequential(q.WordLinout(encdim, worddic=flD),
                              # torch.nn.Softmax(-1)
                              )
    merge = q.rnn.FwdDecCellMerge(decdims[-1], encdims[-1], outdim=encdim)
    deccell = TreeRNNDecoderCell(emb=decemb,
                                 core=dec_core,
                                 att=att,
                                 out=out,
                                 merge=merge,
                                 id2pushpop=id2pushpop)
    train_dec = q.TFDecoder(deccell)
    test_dec = q.FreeDecoder(deccell, maxtime=seqlen + 10)
    train_encdec = EncDec(inpemb, encoder, train_dec)
    test_encdec = Test_EncDec(inpemb, encoder, test_dec)

    train_encdec.to(device)
    test_encdec.to(device)
    tt.tock("built model")
    # endregion

    # region training
    # losses:
    if smoothing == 0:
        ce = q.loss.CELoss(mode="logits", ignore_index=0)
    elif goldsmoothing < 0.:
        ce = q.loss.SmoothedCELoss(mode="logits",
                                   ignore_index=0,
                                   smoothing=smoothing)
    else:
        ce = q.loss.DiffSmoothedCELoss(mode="logits",
                                       ignore_index=0,
                                       alpha=goldsmoothing,
                                       beta=smoothing)
    acc = q.loss.SeqAccuracy(ignore_index=0)
    elemacc = q.loss.SeqElemAccuracy(ignore_index=0)
    treeacc = TreeAccuracyLambdaDFPar(flD=flD)
    # optim
    optim = torch.optim.RMSprop(train_encdec.parameters(),
                                lr=lr,
                                alpha=0.95,
                                weight_decay=wreg)
    clipgradnorm = lambda: torch.nn.utils.clip_grad_value_(
        train_encdec.parameters(), clip_value=gradclip)
    # lööps
    batchloop = partial(q.train_batch, on_before_optim_step=[clipgradnorm])
    trainloop = partial(
        q.train_epoch,
        model=train_encdec,
        dataloader=tloader,
        optim=optim,
        device=device,
        losses=[q.LossWrapper(ce),
                q.LossWrapper(elemacc),
                q.LossWrapper(acc)],
        print_every_batch=False,
        _train_batch=batchloop)
    validloop = partial(q.test_epoch,
                        model=test_encdec,
                        dataloader=vloader,
                        device=device,
                        losses=[q.LossWrapper(treeacc)],
                        print_every_batch=False)

    tt.tick("training")
    q.run_training(trainloop, validloop, max_epochs=epochs)
    tt.tock("trained")

    tt.tick("testing")
    test_results = validloop(model=test_encdec, dataloader=xloader)
    print("Test results (freerunning): {}".format(test_results))
    test_results = validloop(model=train_encdec, dataloader=xloader)
    print("Test results (TF): {}".format(test_results))
    tt.tock("tested")
    # endregion
    tt.msg("done")
示例#12
0
 def setUp(self):
     worddic = "<MASK> <RARE> first second third fourth fifth"
     worddic = dict(zip(worddic.split(), range(len(worddic.split()))))
     self.emb1 = q.WordEmb(100, worddic=worddic)
     self.emb2 = q.WordEmb(100, worddic=worddic)
示例#13
0
def run_seq2seq_(
    lr=0.001,
    batsize=32,
    evalbatsize=256,
    epochs=100,
    warmup=5,
    embdim=50,
    encdim=100,
    numlayers=2,
    dropout=.0,
    wreg=1e-6,
    cuda=False,
    gpu=0,
):
    settings = locals().copy()
    device = torch.device("cpu") if not cuda else torch.device("cuda", gpu)
    tt = q.ticktock("script")
    tt.msg("running seq2seq on LC-QuAD")

    tt.tick("loading data")
    xsm, ysm, teststart, tok2act = load_data()
    _tok2act = {ysm.RD[k]: v for k, v in tok2act.items()}

    print("Some examples:")
    for i in range(5):
        print(
            f"{xsm[i]}\n ->{ysm[i]}\n -> {Node.from_transitions(' '.join(ysm[i].split()[1:]), _tok2act)}"
        )

    print("Non-leaf tokens:")
    print({ysm.RD[k]: v for k, v in tok2act.items() if v > 0})

    devstart = teststart - 500
    trainds = torch.utils.data.TensorDataset(
        torch.tensor(xsm.matrix[:devstart]).long(),
        torch.tensor(ysm.matrix[:devstart, :-1]).long(),
        torch.tensor(ysm.matrix[:devstart, 1:]).long())
    valds = torch.utils.data.TensorDataset(
        torch.tensor(xsm.matrix[devstart:teststart]).long(),
        torch.tensor(ysm.matrix[devstart:teststart, :-1]).long(),
        torch.tensor(ysm.matrix[devstart:teststart, 1:]).long())
    testds = torch.utils.data.TensorDataset(
        torch.tensor(xsm.matrix[teststart:]).long(),
        torch.tensor(ysm.matrix[teststart:, :-1]).long(),
        torch.tensor(ysm.matrix[teststart:, 1:]).long())
    tt.msg(
        f"Data splits: train: {len(trainds)}, valid: {len(valds)}, test: {len(testds)}"
    )

    tloader = torch.utils.data.DataLoader(trainds,
                                          batch_size=batsize,
                                          shuffle=True)
    vloader = torch.utils.data.DataLoader(valds,
                                          batch_size=evalbatsize,
                                          shuffle=False)
    xloader = torch.utils.data.DataLoader(testds,
                                          batch_size=evalbatsize,
                                          shuffle=False)
    tt.tock("data loaded")

    # model
    enclayers, declayers = numlayers, numlayers
    decdim = encdim
    xemb = q.WordEmb(embdim, worddic=xsm.D)
    yemb = q.WordEmb(embdim, worddic=ysm.D)
    encdims = [embdim] + [encdim // 2] * enclayers
    xenc = q.LSTMEncoder(embdim,
                         *encdims[1:],
                         bidir=True,
                         dropout_in_shared=dropout)
    decdims = [embdim] + [decdim] * declayers
    dec_core = torch.nn.Sequential(*[
        q.LSTMCell(decdims[i - 1],
                   decdims[i],
                   dropout_in=dropout,
                   dropout_rec=dropout) for i in range(1, len(decdims))
    ])
    yout = q.WordLinout(encdim + decdim, worddic=ysm.D)
    dec_cell = semparse.rnn.LuongCell(emb=yemb,
                                      core=dec_core,
                                      out=yout,
                                      dropout=dropout)
    decoder = q.TFDecoder(dec_cell)
    testdecoder = q.FreeDecoder(dec_cell, maxtime=100)

    m = Seq2Seq(xemb, xenc, decoder)
    testm = Seq2Seq(xemb, xenc, testdecoder, test=True)

    # test model
    tt.tick("running a batch")
    test_y = m(*iter(tloader).next()[:-1])
    q.batch_reset(m)
    test_y = testm(*iter(vloader).next()[:-1])
    q.batch_reset(m)
    tt.tock(f"ran a batch: {test_y.size()}")

    optim = torch.optim.Adam(m.parameters(), lr=lr, weight_decay=wreg)
    tlosses = [
        q.CELoss(mode="logits", ignore_index=0),
        q.Accuracy(ignore_index=0),
        q.SeqAccuracy(ignore_index=0)
    ]
    xlosses = [
        q.CELoss(mode="logits", ignore_index=0),
        q.Accuracy(ignore_index=0),
        q.SeqAccuracy(ignore_index=0)
    ]
    tlosses = [q.LossWrapper(l) for l in tlosses]
    vlosses = [q.LossWrapper(l) for l in xlosses]
    xlosses = [q.LossWrapper(l) for l in xlosses]
    trainloop = partial(q.train_epoch,
                        model=m,
                        dataloader=tloader,
                        optim=optim,
                        losses=tlosses,
                        device=device)
    devloop = partial(q.test_epoch,
                      model=testm,
                      dataloader=vloader,
                      losses=vlosses,
                      device=device)
    testloop = partial(q.test_epoch,
                       model=testm,
                       dataloader=xloader,
                       losses=xlosses,
                       device=device)

    lrplateau = q.util.ReduceLROnPlateau(optim,
                                         mode="max",
                                         factor=.1,
                                         patience=3,
                                         cooldown=1,
                                         warmup=warmup,
                                         threshold=0.,
                                         verbose=True,
                                         eps=1e-9)
    on_after_valid = [lambda: lrplateau.step(vlosses[1].get_epoch_error())]
    _devloop = partial(devloop, on_end=on_after_valid)
    stoptrain = [lambda: all([pg["lr"] <= 1e-7 for pg in optim.param_groups])]

    tt.tick("training")
    q.run_training(trainloop,
                   _devloop,
                   max_epochs=epochs,
                   check_stop=stoptrain)
    tt.tock("done training")

    tt.tick("testing")
    testres = testloop()
    print(testres)
    settings["testres"] = testres
    tt.tock("tested")

    devres = devloop()
    print(devres, vlosses[0].get_epoch_error())

    return vlosses[1].get_epoch_error()
示例#14
0
def run_relations(
    lr=DEFAULT_LR,
    dropout=.3,
    wreg=DEFAULT_WREG,
    initwreg=DEFAULT_INITWREG,
    batsize=DEFAULT_BATSIZE,
    epochs=10,
    smoothing=DEFAULT_SMOOTHING,
    cuda=False,
    gpu=0,
    balanced=False,
    maskentity=False,
    savep="exp_bilstm_rels_",
    test=False,
    datafrac=1.,
    glove=False,
    embdim=50,
    dim=300,
    numlayers=2,
    warmup=0.0,
    cycles=0.5,
    sched="cos",
    evalbatsize=-1,
    classweighted=False,
    fixembed=False,
):
    print(locals())
    settings = locals().copy()
    if evalbatsize < 0:
        evalbatsize = batsize
    if test:
        epochs = 0
    if cuda:
        device = torch.device("cuda", gpu)
    else:
        device = torch.device("cpu")
    # region data
    tt = q.ticktock("script")
    tt.msg("running relation classifier with BiLSTM")
    tt.tick("loading data")
    data = load_data(which="wordmat,wordborders,rels",
                     datafrac=datafrac,
                     retrelD=True)
    trainds, devds, testds, wD, relD = data
    rev_wD = {v: k for k, v in wD.items()}

    def pp(ids):
        ret = " ".join(
            [rev_wD[idse.item()] for idse in ids if idse.item() != 0])
        return ret

    print(pp(trainds.tensors[0][0]))
    print(trainds.tensors[1][0])
    if maskentity:
        trainds, devds, testds = replace_entity_span(trainds,
                                                     devds,
                                                     testds,
                                                     D=wD)
    else:
        trainds, devds, testds = [
            TensorDataset(ds.tensors[0], ds.tensors[2])
            for ds in [trainds, devds, testds]
        ]

    for i in range(10):
        question = trainds.tensors[0][i]
        print(pp(question))
    print()
    for i in range(10):
        question = devds.tensors[0][i]
        print(pp(question))
    print()
    for i in range(10):
        question = testds.tensors[0][i]
        print(pp(question))

    relcounts = torch.zeros(max(relD.values()) + 1)
    trainrelcounts = torch.tensor(
        np.bincount(trainds.tensors[1].detach().cpu().numpy()))
    relcounts[:len(trainrelcounts)] += trainrelcounts.float()
    tt.tock("data loaded")
    tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds),
                                                   len(testds)))
    trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True)
    devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False)
    testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False)
    evalds = TensorDataset(*testloader.dataset.tensors[:1])
    evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False)
    evalds_dev = TensorDataset(*devloader.dataset.tensors[:1])
    evalloader_dev = DataLoader(evalds_dev,
                                batch_size=evalbatsize,
                                shuffle=False)

    if test:
        evalloader = DataLoader(TensorDataset(*evalloader.dataset[:10]),
                                batch_size=batsize,
                                shuffle=False)
        testloader = DataLoader(TensorDataset(*testloader.dataset[:10]),
                                batch_size=batsize,
                                shuffle=False)
    # endregion

    # region model
    tt.tick("making model")
    emb = q.WordEmb(embdim, worddic=wD)
    if glove:
        print("using glove")
        stoi_, vectors_, dim = torch.load(
            "../../data/buboqa/data/sq_glove300d.pt")
        # map vectors from custom glove ids to wD ids
        vectors = torch.zeros(max(wD.values()) + 1,
                              embdim,
                              device=vectors_.device,
                              dtype=vectors_.dtype)
        stoi = {}
        for k, v in stoi_.items():
            if k in wD:
                vectors[wD[k]] = vectors_[v]
                stoi[k] = wD[k]
        print("{} words in stoi that are in wD".format(len(stoi)))
        gloveemb = q.WordEmb(embdim, worddic=stoi, _weight=vectors)
        # gloveemb = q.WordEmb.load_glove("glove.{}d".format(embdim), selectD=wD)
        if fixembed:
            gloveemb.freeze()
            emb.freeze()
        emb = q.SwitchedWordEmb(emb).override(gloveemb)

    bilstm = q.rnn.LSTMEncoder(embdim,
                               *([dim] * numlayers),
                               bidir=True,
                               dropout_in=dropout)
    # bilstm = torch.nn.LSTM(embdim, dim, batch_first=True, num_layers=numlayers, bidirectional=True, dropout=dropout)
    m = RelationClassifier(emb=emb,
                           bilstm=bilstm,
                           dim=dim,
                           relD=relD,
                           dropout=dropout)
    m.to(device)

    # model = RelationPrediction(config)
    tt.tock("made model")
    # endregion

    # region training
    totalsteps = len(trainloader) * epochs
    params = m.parameters()
    params = [param for param in params if param.requires_grad == True]
    sched = get_schedule(sched,
                         warmup=warmup,
                         t_total=totalsteps,
                         cycles=cycles)
    optim = BertAdam(params,
                     lr=lr,
                     weight_decay=wreg,
                     warmup=warmup,
                     t_total=totalsteps,
                     schedule=sched)
    # optim = torch.optim.Adam(params, lr=lr, weight_decay=wreg)
    # losses = [
    #     torch.nn.CrossEntropyLoss(size_average=True),
    #     q.Accuracy()
    # ]
    losses = [
        q.SmoothedCELoss(smoothing=smoothing,
                         weight=1 /
                         relcounts.clamp_min(1e-6) if classweighted else None),
        q.Accuracy()
    ]
    # xlosses = [
    #     torch.nn.CrossEntropyLoss(size_average=True),
    #     q.Accuracy()
    # ]
    xlosses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()]
    trainlosses = [q.LossWrapper(l) for l in losses]
    devlosses = [q.LossWrapper(l) for l in xlosses]
    testlosses = [q.LossWrapper(l) for l in xlosses]
    trainloop = partial(q.train_epoch,
                        model=m,
                        dataloader=trainloader,
                        optim=optim,
                        losses=trainlosses,
                        device=device)
    devloop = partial(q.test_epoch,
                      model=m,
                      dataloader=devloader,
                      losses=devlosses,
                      device=device)
    testloop = partial(q.test_epoch,
                       model=m,
                       dataloader=testloader,
                       losses=testlosses,
                       device=device)

    tt.tick("training")
    q.run_training(trainloop, devloop, max_epochs=epochs)
    tt.tock("done training")

    tt.tick("testing")
    testres = testloop()
    print(testres)
    tt.tock("tested")

    if len(savep) > 0:
        tt.tick("making predictions and saving")
        i = 0
        while os.path.exists(savep + str(i)):
            i += 1
        os.mkdir(savep + str(i))
        savedir = savep + str(i)
        # save model
        # torch.save(m, open(os.path.join(savedir, "model.pt"), "wb"))
        # save settings
        json.dump(settings, open(os.path.join(savedir, "settings.json"), "w"))
        # save relation dictionary
        # json.dump(relD, open(os.path.join(savedir, "relD.json"), "w"))
        # save test predictions
        testpreds = q.eval_loop(m, evalloader, device=device)
        testpreds = testpreds[0].cpu().detach().numpy()
        np.save(os.path.join(savedir, "relpreds.test.npy"), testpreds)
        testpreds = q.eval_loop(m, evalloader_dev, device=device)
        testpreds = testpreds[0].cpu().detach().numpy()
        np.save(os.path.join(savedir, "relpreds.dev.npy"), testpreds)
        tt.msg("saved in {}".format(savedir))
        # save bert-tokenized questions
        # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        # with open(os.path.join(savedir, "testquestions.txt"), "w") as f:
        #     for batch in evalloader:
        #         ques, io = batch
        #         ques = ques.numpy()
        #         for question in ques:
        #             qstr = " ".join([x for x in tokenizer.convert_ids_to_tokens(question) if x != "[PAD]"])
        #             f.write(qstr + "\n")

        tt.tock("done")
示例#15
0
def run_span_borders(
    lr=DEFAULT_LR,
    dropout=.3,
    wreg=DEFAULT_WREG,
    initwreg=DEFAULT_INITWREG,
    batsize=DEFAULT_BATSIZE,
    evalbatsize=-1,
    epochs=DEFAULT_EPOCHS,
    smoothing=DEFAULT_SMOOTHING,
    dim=200,
    numlayers=1,
    cuda=False,
    gpu=0,
    savep="exp_bilstm_span_borders_",
    datafrac=1.,
    glove=False,
    fixembed=False,
    embdim=50,
    sched="cos",
    warmup=0.1,
    cycles=0.5,
):
    settings = locals().copy()
    print(locals())
    if evalbatsize < 0:
        evalbatsize = batsize
    if cuda:
        device = torch.device("cuda", gpu)
    else:
        device = torch.device("cpu")
    # region data
    tt = q.ticktock("script")
    tt.msg("running span border with BiLSTM")
    tt.tick("loading data")
    data = load_data(which="wordmat,wordborders", datafrac=datafrac)
    trainds, devds, testds, wD = data
    tt.tock("data loaded")
    tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds),
                                                   len(testds)))
    trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True)
    devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False)
    testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False)
    evalds = TensorDataset(*testloader.dataset.tensors[:1])
    evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False)
    evalds_dev = TensorDataset(*devloader.dataset.tensors[:1])
    evalloader_dev = DataLoader(evalds_dev,
                                batch_size=evalbatsize,
                                shuffle=False)
    # endregion

    # region model
    tt.tick("creating model")
    emb = q.WordEmb(embdim, worddic=wD)
    if glove:
        print("using glove")
        stoi_, vectors_, dim = torch.load(
            "../../data/buboqa/data/sq_glove300d.pt")
        # map vectors from custom glove ids to wD ids
        vectors = torch.zeros(max(wD.values()) + 1,
                              embdim,
                              device=vectors_.device,
                              dtype=vectors_.dtype)
        stoi = {}
        for k, v in stoi_.items():
            if k in wD:
                vectors[wD[k]] = vectors_[v]
                stoi[k] = wD[k]
        print("{} words in stoi that are in wD".format(len(stoi)))
        gloveemb = q.WordEmb(embdim, worddic=stoi, _weight=vectors)
        # gloveemb = q.WordEmb.load_glove("glove.{}d".format(embdim), selectD=wD)
        if fixembed:
            gloveemb.freeze()
        emb = q.SwitchedWordEmb(emb).override(gloveemb)
    # inpD = tokenizer.vocab
    # q.WordEmb.masktoken = "[PAD]"
    # emb = q.WordEmb(embdim, worddic=inpD)
    bilstm = q.rnn.LSTMEncoder(embdim,
                               *([dim] * numlayers),
                               bidir=True,
                               dropout_in_shared=dropout)
    spandet = BorderSpanDetector(emb, bilstm, dim * 2, dropout=dropout)
    spandet.to(device)
    tt.tock("model created")
    # endregion

    # region training
    totalsteps = len(trainloader) * epochs
    params = spandet.parameters()
    sched = get_schedule(sched,
                         warmup=warmup,
                         t_total=totalsteps,
                         cycles=cycles)
    optim = BertAdam(params, lr=lr, weight_decay=wreg, schedule=sched)
    # optim = torch.optim.Adam(spandet.parameters(), lr=lr, weight_decay=wreg)
    losses = [
        q.SmoothedCELoss(smoothing=smoothing),
        SpanF1Borders(),
        q.SeqAccuracy()
    ]
    xlosses = [
        q.SmoothedCELoss(smoothing=smoothing),
        SpanF1Borders(),
        q.SeqAccuracy()
    ]
    trainlosses = [q.LossWrapper(l) for l in losses]
    devlosses = [q.LossWrapper(l) for l in xlosses]
    testlosses = [q.LossWrapper(l) for l in xlosses]
    trainloop = partial(q.train_epoch,
                        model=spandet,
                        dataloader=trainloader,
                        optim=optim,
                        losses=trainlosses,
                        device=device)
    devloop = partial(q.test_epoch,
                      model=spandet,
                      dataloader=devloader,
                      losses=devlosses,
                      device=device)
    testloop = partial(q.test_epoch,
                       model=spandet,
                       dataloader=testloader,
                       losses=testlosses,
                       device=device)

    tt.tick("training")
    q.run_training(trainloop, devloop, max_epochs=epochs)
    tt.tock("done training")

    tt.tick("testing")
    testres = testloop()
    print(testres)
    tt.tock("tested")

    if len(savep) > 0:
        tt.tick("making predictions and saving")
        i = 0
        while os.path.exists(savep + str(i)):
            i += 1
        os.mkdir(savep + str(i))
        savedir = savep + str(i)
        # save model
        # torch.save(spandet, open(os.path.join(savedir, "model.pt"), "wb"))
        # save settings
        json.dump(settings, open(os.path.join(savedir, "settings.json"), "w"))

        outlen = trainloader.dataset.tensors[0].size(1)
        spandet.outlen = outlen

        # save test predictions
        testpreds = q.eval_loop(spandet, evalloader, device=device)
        testpreds = testpreds[0].cpu().detach().numpy()
        np.save(os.path.join(savedir, "borderpreds.test.npy"), testpreds)
        # save dev predictions
        testpreds = q.eval_loop(spandet, evalloader_dev, device=device)
        testpreds = testpreds[0].cpu().detach().numpy()
        np.save(os.path.join(savedir, "borderpreds.dev.npy"), testpreds)
        tt.msg("saved in {}".format(savedir))
        tt.tock("done")