def __init__(self, m, cesmoothing=0., **kw): super(BordersAndRelationLosses, self).__init__(**kw) self.m = m self.blosses = [ q.SmoothedCELoss(smoothing=cesmoothing, reduction="none"), q.SeqAccuracy(reduction="none"), SpanF1Borders(reduction="none") ] self.rlosses = [ q.SmoothedCELoss(smoothing=cesmoothing, reduction="none"), q.Accuracy(reduction="none") ] self.sm = torch.nn.Softmax(-1)
def __init__(self, tagger: GRUDecoderCell, vocab=None, max_size: int = 100, smoothing: float = 0., mode="normal", mcdropout=-1, **kw): super(SeqDecoderBaseline, self).__init__(**kw) self.tagger = tagger self.vocab = vocab self.max_size = max_size self.smoothing = smoothing self.mode = mode if self.smoothing > 0: self.loss = q.SmoothedCELoss(reduction="none", ignore_index=0, smoothing=smoothing, mode="logprobs") else: self.loss = torch.nn.NLLLoss(reduction="none", ignore_index=0) self.logsm = torch.nn.LogSoftmax(-1) self.mcdropout = mcdropout
def __init__(self, ptagger: PTransformerDecoderCell, qtagger: QTransformerDecoderCell, vocab=None, max_size: int = 100, smoothing: float = 0., priorweight=1., tree_compare=None, **kw): super(SeqDecoderOrderVAE, self).__init__(**kw) self.ptagger = ptagger self.qtagger = qtagger self.vocab = vocab self.max_size = max_size self.smoothing = smoothing self.priorweight = priorweight if self.smoothing > 0: self.loss = q.SmoothedCELoss(reduction="none", ignore_index=0, smoothing=smoothing, mode="logprobs") else: self.loss = torch.nn.NLLLoss(reduction="none", ignore_index=0) self.logsm = torch.nn.LogSoftmax(-1) self.tree_compare = tree_compare if tree_compare is not None else partial( are_equal_trees, orderless=ORDERLESS, unktoken="@UNK@")
def __init__(self, model: TransitionModel, smoothing=0., **kw): super(TFActionSeqDecoder, self).__init__(**kw) self.model = model if smoothing > 0: self.loss = q.SmoothedCELoss(reduction="none", ignore_index=0, mode="probs") else: self.loss = q.CELoss(reduction="none", ignore_index=0, mode="probs")
def test_it(self): m = q.SmoothedCELoss(smoothing=0.2, mode="logits") x = torch.randn(5, 6) g = torch.randint(0, 6, (5, )).long() l = m(x, g) print(l) uniform = torch.ones_like(x) / x.size(1) # print(uniform) kl = torch.nn.KLDivLoss(reduction="none")(x, uniform).sum(-1).mean() ce = q.CELoss(mode="logits")(x, g) print(kl, ce) print(kl * 0.2 + ce * 0.8)
def test_it_with_weights(self): weights = torch.tensor([0.1, 0.2, 0.3, 1., 1., 1.]) m = q.SmoothedCELoss(smoothing=0.2, mode="logits", weight=weights) x = torch.randn(5, 6) g = torch.randint(0, 6, (5, )).long() l = m(x, g) print(l) uniform = torch.ones_like(x) / x.size(1) # print(uniform) kl = torch.nn.KLDivLoss(reduction="none")(x, uniform).sum(-1).mean() ce = q.CELoss(mode="logits")(x, g) print(kl, ce) print(kl * 0.2 + ce * 0.8)
def __init__(self, weight=None, reduction="mean", ignore_index=0, mode="logits", smoothing: float = 0., **kw): super(CELoss, self).__init__(**kw) self.mode = mode self.ce = q.CELoss(weight=weight, reduction=reduction, ignore_index=ignore_index, mode=mode) if smoothing != 0.: assert (smoothing < 1. and smoothing > 0.) assert (mode in ["logits", "logprobs"]) self.ce = q.SmoothedCELoss(reduction=reduction, ignore_index=ignore_index, smoothing=smoothing, mode=mode, weight=weight)
def run_relations( lr=DEFAULT_LR, dropout=.3, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, epochs=10, smoothing=DEFAULT_SMOOTHING, cuda=False, gpu=0, balanced=False, maskentity=False, savep="exp_bilstm_rels_", test=False, datafrac=1., vanillaemb=False, gloveemb=True, embdim=300, dim=300, numlayers=2, warmup=0.01, cycles=0.5, sched="cos", evalbatsize=-1, classweighted=False, ): print(locals()) settings = locals().copy() if evalbatsize < 0: evalbatsize = batsize if test: epochs = 0 if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data assert (not gloveemb or not vanillaemb) tt = q.ticktock("script") tt.msg("running relation classifier with BiLSTM") tt.tick("loading data") data = load_data(which="rel+borders", retrelD=True, datafrac=datafrac, wordlevel=gloveemb, rettokD=True) trainds, devds, testds, relD, tokD = data if maskentity: trainds, devds, testds = replace_entity_span(trainds, devds, testds) else: trainds, devds, testds = [ TensorDataset(ds.tensors[0], ds.tensors[2]) for ds in [trainds, devds, testds] ] relcounts = torch.zeros(max(relD.values()) + 1) trainrelcounts = torch.bincount(trainds.tensors[1]) relcounts[:len(trainrelcounts)] += trainrelcounts.float() tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) if test: evalloader = DataLoader(TensorDataset(*evalloader.dataset[:10]), batch_size=batsize, shuffle=False) testloader = DataLoader(TensorDataset(*testloader.dataset[:10]), batch_size=batsize, shuffle=False) # endregion # region model tt.tick("making model") if vanillaemb: bert = BertModel.from_pretrained("bert-base-uncased") emb = bert.embeddings.word_embeddings tt.msg("using vanilla emb of size {}".format(embdim)) emb = torch.nn.Embedding(emb.weight.size(0), embdim) elif gloveemb: emb = q.WordEmb.load_glove("glove.50d", selectD=tokD) else: bert = BertModel.from_pretrained("bert-base-uncased") emb = bert.embeddings.word_embeddings embdim = bert.config.hidden_size bilstm = q.rnn.LSTMEncoder(embdim, *([dim] * numlayers), bidir=True, dropout_in=dropout) # bilstm = torch.nn.LSTM(embdim, dim, batch_first=True, num_layers=numlayers, bidirectional=True, dropout=dropout) m = RelationClassifier(emb=emb, bilstm=bilstm, dim=dim * 2, relD=relD, dropout=dropout) m.to(device) tt.tock("made model") # endregion # region training totalsteps = len(trainloader) * epochs params = m.parameters() sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) # optim = BertAdam(params, lr=lr, weight_decay=wreg, warmup=warmup, t_total=totalsteps, schedule=schedmap[sched]) optim = BertAdam(params, lr=lr, weight_decay=wreg, schedule=sched) losses = [ q.SmoothedCELoss(smoothing=smoothing, weight=1 / relcounts.clamp_min(1e-6) if classweighted else None), q.Accuracy() ] xlosses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=m, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=m, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=m, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(m, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) # save relation dictionary # json.dump(relD, open(os.path.join(savedir, "relD.json"), "w")) # save test predictions testpreds = q.eval_loop(m, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.test.npy"), testpreds) testpreds = q.eval_loop(m, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.dev.npy"), testpreds) tt.msg("saved in {}".format(savedir)) # save bert-tokenized questions # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # with open(os.path.join(savedir, "testquestions.txt"), "w") as f: # for batch in evalloader: # ques, io = batch # ques = ques.numpy() # for question in ques: # qstr = " ".join([x for x in tokenizer.convert_ids_to_tokens(question) if x != "[PAD]"]) # f.write(qstr + "\n") tt.tock("done")
def run_span_borders( lr=DEFAULT_LR, dropout=.3, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, evalbatsize=-1, epochs=DEFAULT_EPOCHS, smoothing=DEFAULT_SMOOTHING, dim=200, numlayers=1, cuda=False, gpu=0, savep="exp_bilstm_span_borders_", datafrac=1., vanillaemb=False, embdim=300, sched="cos", warmup=0.1, cycles=0.5, ): settings = locals().copy() print(locals()) if evalbatsize < 0: evalbatsize = batsize if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running span border with BiLSTM") tt.tick("loading data") data = load_data(which="span/borders", datafrac=datafrac) trainds, devds, testds = data tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:-1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:-1]) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) # endregion # region model tt.tick("creating model") # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") bert = BertModel.from_pretrained("bert-base-uncased") emb = bert.embeddings.word_embeddings if vanillaemb: tt.msg("using vanilla emb of size {}".format(embdim)) emb = torch.nn.Embedding(emb.weight.size(0), embdim) else: embdim = bert.config.hidden_size # inpD = tokenizer.vocab # q.WordEmb.masktoken = "[PAD]" # emb = q.WordEmb(embdim, worddic=inpD) bilstm = q.rnn.LSTMEncoder(embdim, *([dim] * numlayers), bidir=True, dropout_in_shared=dropout) spandet = BorderSpanDetector(emb, bilstm, dim * 2, dropout=dropout) spandet.to(device) tt.tock("model created") # endregion # region training totalsteps = len(trainloader) * epochs params = spandet.parameters() sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) optim = BertAdam(params, lr=lr, weight_decay=wreg, schedule=sched) # optim = torch.optim.Adam(spandet.parameters(), lr=lr, weight_decay=wreg) losses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(), q.SeqAccuracy() ] xlosses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(), q.SeqAccuracy() ] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=spandet, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=spandet, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=spandet, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(spandet, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) outlen = trainloader.dataset.tensors[0].size(1) spandet.outlen = outlen # save test predictions testpreds = q.eval_loop(spandet, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.test.npy"), testpreds) # save dev predictions testpreds = q.eval_loop(spandet, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.dev.npy"), testpreds) tt.msg("saved in {}".format(savedir)) tt.tock("done")
def run_relations( lr=DEFAULT_LR, dropout=.5, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, epochs=10, smoothing=DEFAULT_SMOOTHING, cuda=False, gpu=0, balanced=False, maskentity=False, warmup=-1., sched="ang", savep="exp_bert_rels_", test=False, freezeemb=False, ): settings = locals().copy() if test: epochs = 0 print(locals()) if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running relation classifier with BERT") tt.tick("loading data") data = load_data(which="rel+borders", retrelD=True) trainds, devds, testds, relD = data if maskentity: trainds, devds, testds = replace_entity_span(trainds, devds, testds) else: trainds, devds, testds = [ TensorDataset(ds.tensors[0], ds.tensors[2]) for ds in [trainds, devds, testds] ] tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=batsize, shuffle=False) testloader = DataLoader(testds, batch_size=batsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=batsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader_dev = DataLoader(evalds_dev, batch_size=batsize, shuffle=False) if test: evalloader = DataLoader(TensorDataset(*evalloader.dataset[:10]), batch_size=batsize, shuffle=False) testloader = DataLoader(TensorDataset(*testloader.dataset[:10]), batch_size=batsize, shuffle=False) # endregion # region model tt.tick("loading BERT") bert = BertModel.from_pretrained("bert-base-uncased") m = RelationClassifier(bert, relD, dropout=dropout) m.to(device) tt.tock("loaded BERT") # endregion # region training totalsteps = len(trainloader) * epochs params = [] for paramname, param in m.named_parameters(): if paramname.startswith("bert.embeddings.word_embeddings"): if not freezeemb: params.append(param) else: params.append(param) optim = BertAdam(params, lr=lr, weight_decay=wreg, warmup=warmup, t_total=totalsteps, schedule=schedmap[sched], init_weight_decay=initwreg) losses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] xlosses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=m, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=m, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=m, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(m, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) # save relation dictionary # json.dump(relD, open(os.path.join(savedir, "relD.json"), "w")) # save test predictions testpreds = q.eval_loop(m, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.test.npy"), testpreds) testpreds = q.eval_loop(m, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.dev.npy"), testpreds) # save bert-tokenized questions # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # with open(os.path.join(savedir, "testquestions.txt"), "w") as f: # for batch in evalloader: # ques, io = batch # ques = ques.numpy() # for question in ques: # qstr = " ".join([x for x in tokenizer.convert_ids_to_tokens(question) if x != "[PAD]"]) # f.write(qstr + "\n") tt.tock("done")
def run_span_borders( lr=DEFAULT_LR, dropout=.5, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, epochs=DEFAULT_EPOCHS, smoothing=DEFAULT_SMOOTHING, cuda=False, gpu=0, balanced=False, warmup=-1., sched="ang", savep="exp_bert_span_borders_", freezeemb=False, ): settings = locals().copy() print(locals()) if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running span border with BERT") tt.tick("loading data") data = load_data(which="span/borders") trainds, devds, testds = data tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=batsize, shuffle=False) testloader = DataLoader(testds, batch_size=batsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:-1]) evalloader = DataLoader(evalds, batch_size=batsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:-1]) evalloader_dev = DataLoader(evalds_dev, batch_size=batsize, shuffle=False) # endregion # region model tt.tick("loading BERT") bert = BertModel.from_pretrained("bert-base-uncased") spandet = BorderSpanDetector(bert, dropout=dropout) spandet.to(device) tt.tock("loaded BERT") # endregion # region training totalsteps = len(trainloader) * epochs params = [] for paramname, param in spandet.named_parameters(): if paramname.startswith("bert.embeddings.word_embeddings"): if not freezeemb: params.append(param) else: params.append(param) optim = BertAdam(params, lr=lr, weight_decay=wreg, warmup=warmup, t_total=totalsteps, schedule=schedmap[sched]) losses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(reduction="none"), q.SeqAccuracy() ] xlosses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(reduction="none"), q.SeqAccuracy() ] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=spandet, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=spandet, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=spandet, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(spandet, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) # save test predictions testpreds = q.eval_loop(spandet, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.test.npy"), testpreds) # save dev predictions testpreds = q.eval_loop(spandet, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.dev.npy"), testpreds) tt.tock("done")
def run_relations( lr=DEFAULT_LR, dropout=.3, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, epochs=10, smoothing=DEFAULT_SMOOTHING, cuda=False, gpu=0, balanced=False, maskentity=False, savep="exp_bilstm_rels_", test=False, datafrac=1., glove=False, embdim=50, dim=300, numlayers=2, warmup=0.0, cycles=0.5, sched="cos", evalbatsize=-1, classweighted=False, fixembed=False, ): print(locals()) settings = locals().copy() if evalbatsize < 0: evalbatsize = batsize if test: epochs = 0 if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running relation classifier with BiLSTM") tt.tick("loading data") data = load_data(which="wordmat,wordborders,rels", datafrac=datafrac, retrelD=True) trainds, devds, testds, wD, relD = data rev_wD = {v: k for k, v in wD.items()} def pp(ids): ret = " ".join( [rev_wD[idse.item()] for idse in ids if idse.item() != 0]) return ret print(pp(trainds.tensors[0][0])) print(trainds.tensors[1][0]) if maskentity: trainds, devds, testds = replace_entity_span(trainds, devds, testds, D=wD) else: trainds, devds, testds = [ TensorDataset(ds.tensors[0], ds.tensors[2]) for ds in [trainds, devds, testds] ] for i in range(10): question = trainds.tensors[0][i] print(pp(question)) print() for i in range(10): question = devds.tensors[0][i] print(pp(question)) print() for i in range(10): question = testds.tensors[0][i] print(pp(question)) relcounts = torch.zeros(max(relD.values()) + 1) trainrelcounts = torch.tensor( np.bincount(trainds.tensors[1].detach().cpu().numpy())) relcounts[:len(trainrelcounts)] += trainrelcounts.float() tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) if test: evalloader = DataLoader(TensorDataset(*evalloader.dataset[:10]), batch_size=batsize, shuffle=False) testloader = DataLoader(TensorDataset(*testloader.dataset[:10]), batch_size=batsize, shuffle=False) # endregion # region model tt.tick("making model") emb = q.WordEmb(embdim, worddic=wD) if glove: print("using glove") stoi_, vectors_, dim = torch.load( "../../data/buboqa/data/sq_glove300d.pt") # map vectors from custom glove ids to wD ids vectors = torch.zeros(max(wD.values()) + 1, embdim, device=vectors_.device, dtype=vectors_.dtype) stoi = {} for k, v in stoi_.items(): if k in wD: vectors[wD[k]] = vectors_[v] stoi[k] = wD[k] print("{} words in stoi that are in wD".format(len(stoi))) gloveemb = q.WordEmb(embdim, worddic=stoi, _weight=vectors) # gloveemb = q.WordEmb.load_glove("glove.{}d".format(embdim), selectD=wD) if fixembed: gloveemb.freeze() emb.freeze() emb = q.SwitchedWordEmb(emb).override(gloveemb) bilstm = q.rnn.LSTMEncoder(embdim, *([dim] * numlayers), bidir=True, dropout_in=dropout) # bilstm = torch.nn.LSTM(embdim, dim, batch_first=True, num_layers=numlayers, bidirectional=True, dropout=dropout) m = RelationClassifier(emb=emb, bilstm=bilstm, dim=dim, relD=relD, dropout=dropout) m.to(device) # model = RelationPrediction(config) tt.tock("made model") # endregion # region training totalsteps = len(trainloader) * epochs params = m.parameters() params = [param for param in params if param.requires_grad == True] sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) optim = BertAdam(params, lr=lr, weight_decay=wreg, warmup=warmup, t_total=totalsteps, schedule=sched) # optim = torch.optim.Adam(params, lr=lr, weight_decay=wreg) # losses = [ # torch.nn.CrossEntropyLoss(size_average=True), # q.Accuracy() # ] losses = [ q.SmoothedCELoss(smoothing=smoothing, weight=1 / relcounts.clamp_min(1e-6) if classweighted else None), q.Accuracy() ] # xlosses = [ # torch.nn.CrossEntropyLoss(size_average=True), # q.Accuracy() # ] xlosses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=m, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=m, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=m, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(m, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) # save relation dictionary # json.dump(relD, open(os.path.join(savedir, "relD.json"), "w")) # save test predictions testpreds = q.eval_loop(m, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.test.npy"), testpreds) testpreds = q.eval_loop(m, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.dev.npy"), testpreds) tt.msg("saved in {}".format(savedir)) # save bert-tokenized questions # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # with open(os.path.join(savedir, "testquestions.txt"), "w") as f: # for batch in evalloader: # ques, io = batch # ques = ques.numpy() # for question in ques: # qstr = " ".join([x for x in tokenizer.convert_ids_to_tokens(question) if x != "[PAD]"]) # f.write(qstr + "\n") tt.tock("done")
def run_span_borders( lr=DEFAULT_LR, dropout=.3, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, evalbatsize=-1, epochs=DEFAULT_EPOCHS, smoothing=DEFAULT_SMOOTHING, dim=200, numlayers=1, cuda=False, gpu=0, savep="exp_bilstm_span_borders_", datafrac=1., glove=False, fixembed=False, embdim=50, sched="cos", warmup=0.1, cycles=0.5, ): settings = locals().copy() print(locals()) if evalbatsize < 0: evalbatsize = batsize if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running span border with BiLSTM") tt.tick("loading data") data = load_data(which="wordmat,wordborders", datafrac=datafrac) trainds, devds, testds, wD = data tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) # endregion # region model tt.tick("creating model") emb = q.WordEmb(embdim, worddic=wD) if glove: print("using glove") stoi_, vectors_, dim = torch.load( "../../data/buboqa/data/sq_glove300d.pt") # map vectors from custom glove ids to wD ids vectors = torch.zeros(max(wD.values()) + 1, embdim, device=vectors_.device, dtype=vectors_.dtype) stoi = {} for k, v in stoi_.items(): if k in wD: vectors[wD[k]] = vectors_[v] stoi[k] = wD[k] print("{} words in stoi that are in wD".format(len(stoi))) gloveemb = q.WordEmb(embdim, worddic=stoi, _weight=vectors) # gloveemb = q.WordEmb.load_glove("glove.{}d".format(embdim), selectD=wD) if fixembed: gloveemb.freeze() emb = q.SwitchedWordEmb(emb).override(gloveemb) # inpD = tokenizer.vocab # q.WordEmb.masktoken = "[PAD]" # emb = q.WordEmb(embdim, worddic=inpD) bilstm = q.rnn.LSTMEncoder(embdim, *([dim] * numlayers), bidir=True, dropout_in_shared=dropout) spandet = BorderSpanDetector(emb, bilstm, dim * 2, dropout=dropout) spandet.to(device) tt.tock("model created") # endregion # region training totalsteps = len(trainloader) * epochs params = spandet.parameters() sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) optim = BertAdam(params, lr=lr, weight_decay=wreg, schedule=sched) # optim = torch.optim.Adam(spandet.parameters(), lr=lr, weight_decay=wreg) losses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(), q.SeqAccuracy() ] xlosses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(), q.SeqAccuracy() ] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=spandet, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=spandet, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=spandet, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(spandet, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) outlen = trainloader.dataset.tensors[0].size(1) spandet.outlen = outlen # save test predictions testpreds = q.eval_loop(spandet, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.test.npy"), testpreds) # save dev predictions testpreds = q.eval_loop(spandet, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.dev.npy"), testpreds) tt.msg("saved in {}".format(savedir)) tt.tock("done")