示例#1
0
def train_alphaBert_stage1(TS_model,
                           dloader,
                           testloader,
                           lr=1e-4,
                           epoch=10,
                           log_interval=20,
                           cloze_fix=True,
                           use_amp=False,
                           lkahead=False,
                           parallel=True):
    global checkpoint_file
    TS_model.to(device)
    #    model_optimizer = optim.Adam(TS_model.parameters(), lr=lr)
    #    if lkahead:
    #        print('using Lookahead')
    #        model_optimizer = lookahead_pytorch.Lookahead(model_optimizer, la_steps=5, la_alpha=0.5)
    model_optimizer = Ranger(TS_model.parameters(), lr=lr)
    if use_amp:
        TS_model, model_optimizer = amp.initialize(TS_model,
                                                   model_optimizer,
                                                   opt_level="O1")
    if parallel:
        TS_model = torch.nn.DataParallel(TS_model)


#    torch.distributed.init_process_group(backend='nccl',
#                                         init_method='env://host',
#                                         world_size=0,
#                                         rank=0,
#                                         store=None,
#                                         group_name='')
#    TS_model = DDP(TS_model)
#    TS_model = apex.parallel.DistributedDataParallel(TS_model)
    TS_model.train()

    #    criterion = alphabert_loss.Alphabert_satge1_loss(device=device)
    criterion = nn.CrossEntropyLoss(ignore_index=-1).to(device)
    iteration = 0
    total_loss = []
    out_pred_res = []
    out_pred_test = []
    for ep in range(epoch):
        t0 = time.time()
        #        step_loss = 0
        epoch_loss = 0
        epoch_cases = 0
        for batch_idx, sample in enumerate(dloader):
            #            TS_model.train()
            model_optimizer.zero_grad()
            loss = 0

            src = sample['src_token']
            trg = sample['trg']
            att_mask = sample['mask_padding']
            origin_len = sample['origin_seq_length']

            bs, max_len = src.shape

            #            src, err_cloze = make_cloze(src,
            #                                        max_len,
            #                                        device=device,
            #                                        percent=0.15,
            #                                        fix=cloze_fix)

            src = src.float().to(device)
            trg = trg.long().to(device)
            att_mask = att_mask.float().to(device)
            origin_len = origin_len.to(device)

            prediction_scores, = TS_model(input_ids=src,
                                          attention_mask=att_mask)

            #            print(1111,prediction_scores.view(-1,84).shape)
            #            print(1111,trg.view(-1).shape)

            loss = criterion(
                prediction_scores.view(-1, 100).contiguous(),
                trg.view(-1).contiguous())

            if use_amp:
                with amp.scale_loss(loss, model_optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            model_optimizer.step()

            with torch.no_grad():
                epoch_loss += loss.item() * bs
                epoch_cases += bs

                if iteration % log_interval == 0:
                    print('Ep:{} [{} ({:.0f}%)/ ep_time:{:.0f}min] L:{:.4f}'.
                          format(ep, batch_idx * batch_size,
                                 100. * batch_idx / len(dloader),
                                 (time.time() - t0) * len(dloader) /
                                 (60 * (batch_idx + 1)), loss.item()))

                if iteration % 400 == 0:
                    save_checkpoint(checkpoint_file,
                                    'd2s_total.pth',
                                    TS_model,
                                    model_optimizer,
                                    parallel=parallel)
                    a_ = tokenize_alphabets.convert_idx2str(
                        src[0][:origin_len[0]])
                    print(a_)
                    print(' ******** ******** ******** ')
                    _, show_pred = torch.max(prediction_scores[0], dim=1)
                    err_cloze_ = trg[0] > -1
                    src[0][err_cloze_] = show_pred[err_cloze_].float()
                    b_ = tokenize_alphabets.convert_idx2str(
                        src[0][:origin_len[0]])
                    print(b_)
                    print(' ******** ******** ******** ')
                    src[0][err_cloze_] = trg[0][err_cloze_].float()
                    c_ = tokenize_alphabets.convert_idx2str(
                        src[0][:origin_len[0]])
                    print(c_)

                    out_pred_res.append((ep, a_, b_, c_, err_cloze_))
                    out_pd_res = pd.DataFrame(out_pred_res)
                    out_pd_res.to_csv('./result/out_pred_train.csv', sep=',')

                if iteration % 999 == 0:
                    print(' ===== Show the Test of Pretrain ===== ')
                    test_res = test_alphaBert_stage1(TS_model, testloader)
                    print(' ===== Show the Test of Pretrain ===== ')

                    out_pred_test.append((ep, *test_res))
                    out_pd_test = pd.DataFrame(out_pred_test)
                    out_pd_test.to_csv('./result/out_pred_test.csv', sep=',')

            iteration += 1
        if ep % 1 == 0:
            save_checkpoint(checkpoint_file,
                            'd2s_total.pth',
                            TS_model,
                            model_optimizer,
                            parallel=parallel)

            print('======= epoch:%i ========' % ep)

        print('++ Ep Time: {:.1f} Secs ++'.format(time.time() - t0))
        total_loss.append(float(epoch_loss / epoch_cases))
        pd_total_loss = pd.DataFrame(total_loss)
        pd_total_loss.to_csv('./result/total_loss_pretrain.csv', sep=',')
    print(total_loss)
示例#2
0
        preds = preds.log_softmax(dim=self.dim)
        return torch.mean(torch.sum(-one_hot_target * preds, dim=self.dim))


# loss_fn = nn.CrossEntropyLoss()
loss_fn = LabelSmoothingLoss()
device = torch.device('cuda:1')

from ranger import Ranger

epochs = 20
patience = 15

#opt = torch.optim.AdamW(model.parameters(), lr=3e-4)
#opt = Lookahead(opt)
opt = Ranger(model.parameters(), lr=3e-4)
model = model.to(device)
rolling_loss = dict(train=RollingLoss(), valid=RollingLoss())
steps = dict(train=0, valid=0)

trials = 0
best_metric = -np.inf
history = []
stop = False

vis = Visdom(server='0.0.0.0',
             port=9090,
             username=os.environ['VISDOM_USERNAME'],
             password=os.environ['VISDOM_PASSWORD'])

# loaders = create_loaders(batch_size=7)
示例#3
0
def train_alphaBert(DS_model,
                    dloader,
                    lr=1e-4,
                    epoch=10,
                    log_interval=20,
                    lkahead=False):
    global checkpoint_file
    DS_model.to(device)
    #    model_optimizer = optim.Adam(DS_model.parameters(), lr=lr)
    model_optimizer = Ranger(DS_model.parameters(), lr=lr)
    DS_model = torch.nn.DataParallel(DS_model)
    DS_model.train()
    #    if lkahead:
    #        print('using Lookahead')
    #        model_optimizer = lookahead_pytorch.Lookahead(model_optimizer, la_steps=5, la_alpha=0.5)
    #    model_optimizer = Ranger(DS_model.parameters(), lr=4e-3, alpha=0.5, k=5)
    #    criterion = nn.MSELoss().to(device)
    #    criterion = alphabert_loss_v02.Alphabert_loss(device=device)
    criterion = nn.CrossEntropyLoss(ignore_index=-1).to(device)
    iteration = 0
    total_loss = []
    for ep in range(epoch):
        DS_model.train()

        t0 = time.time()
        #        step_loss = 0
        epoch_loss = 0
        epoch_cases = 0
        for batch_idx, sample in enumerate(dloader):
            model_optimizer.zero_grad()
            loss = 0

            src = sample['src_token']
            trg = sample['trg']
            att_mask = sample['mask_padding']
            origin_len = sample['origin_seq_length']

            bs = len(src)

            src = src.float().to(device)
            trg = trg.long().to(device)
            att_mask = att_mask.float().to(device)
            origin_len = origin_len.to(device)

            pred_prop, = DS_model(input_ids=src,
                                  attention_mask=att_mask,
                                  out='finehead')

            trg_view = trg.view(-1).contiguous()
            trg_mask0 = trg_view == 0
            trg_mask1 = trg_view == 1

            loss = criterion(pred_prop, trg_view)
            #            try:
            #                loss0 = criterion(pred_prop[trg_mask0],trg_view[trg_mask0])
            #                loss1 = criterion(pred_prop[trg_mask1],trg_view[trg_mask1])
            #
            #                loss += 0.2*loss0+0.8*loss1
            #            except:
            #                loss = criterion(pred_prop,trg.view(-1).contiguous())

            loss.backward()
            model_optimizer.step()

            with torch.no_grad():
                epoch_loss += loss.item() * bs
                epoch_cases += bs

            if iteration % log_interval == 0:
                #                step_loss.backward()
                #                model_optimizer.step()
                #                print('+++ update +++')
                print(
                    'Ep:{} [{} ({:.0f}%)/ ep_time:{:.0f}min] L:{:.4f}'.format(
                        ep, batch_idx * batch_size,
                        100. * batch_idx / len(dloader), (time.time() - t0) *
                        len(dloader) / (60 * (batch_idx + 1)), loss.item()))
#                print(0,st_target)
#                step_loss = 0

            if iteration % 400 == 0:
                save_checkpoint(checkpoint_file,
                                'd2s_total.pth',
                                DS_model,
                                model_optimizer,
                                parallel=parallel)
                print(
                    tokenize_alphabets.convert_idx2str(src[0][:origin_len[0]]))
            iteration += 1
        if ep % 1 == 0:
            save_checkpoint(checkpoint_file,
                            'd2s_total.pth',
                            DS_model,
                            model_optimizer,
                            parallel=parallel)
            #            test_alphaBert(DS_model,D2S_valloader,
            #                           is_clean_up=True, ep=ep,train=True)

            print('======= epoch:%i ========' % ep)


#        print('total loss: {:.4f}'.format(total_loss/len(dloader)))
        print('++ Ep Time: {:.1f} Secs ++'.format(time.time() - t0))
        #        total_loss.append(epoch_loss)
        total_loss.append(float(epoch_loss / epoch_cases))
        pd_total_loss = pd.DataFrame(total_loss)
        pd_total_loss.to_csv('./iou_pic/total_loss_finetune.csv', sep=',')
    print(total_loss)
示例#4
0
def train(fold_idx=None):
    # model = UNet(n_classes=1, n_channels=3)
    model = DeepLabV3_plus(num_classes=1, backbone='resnet', sync_bn=True)
    train_dataloader, valid_dataloader = get_trainval_dataloader()
    criterion = nn.BCEWithLogitsLoss()
    optimizer = Ranger(model.parameters(), lr=1e-3, weight_decay=0.0005)
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                                  T_max=10)

    best_val_score = 0
    last_improved_epoch = 0
    if fold_idx is None:
        print('start')
        model_save_path = os.path.join(config.dir_weight,
                                       '{}.bin'.format(config.save_model_name))
    else:
        print('start fold: {}'.format(fold_idx + 1))
        model_save_path = os.path.join(
            config.dir_weight, '{}_fold{}.bin'.format(config.save_model_name,
                                                      fold_idx))
    for cur_epoch in range(config.num_epochs):
        start_time = int(time.time())
        model.train()
        print('epoch: ', cur_epoch + 1)
        cur_step = 0
        for batch in train_dataloader:
            batch_x = batch['image']
            batch_y = batch['mask']
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            optimizer.zero_grad()
            mask_pred = model(batch_x)
            train_loss = criterion(mask_pred, batch_y)
            train_loss.backward()
            optimizer.step()

            cur_step += 1
            if cur_step % config.step_train_print == 0:
                train_acc = accuracy(mask_pred, batch_y)
                msg = 'the current step: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}'
                print(
                    msg.format(cur_step, len(train_dataloader),
                               train_loss.item(), train_acc[0].item()))

        val_miou = eval_net_unet_miou(model, valid_dataloader, device)
        val_score = val_miou
        if val_score > best_val_score:
            best_val_score = val_score
            torch.save(model.state_dict(), model_save_path)
            improved_str = '*'
            last_improved_epoch = cur_epoch
        else:
            improved_str = ''
        msg = 'the current epoch: {0}/{1}, val score: {3:>6.2%}, cost: {4}s {5}'
        end_time = int(time.time())
        print(
            msg.format(cur_epoch + 1, config.num_epochs, val_score,
                       end_time - start_time, improved_str))
        if cur_epoch - last_improved_epoch > config.num_patience_epoch:
            print("No optimization for a long time, auto-stopping...")
            break
        scheduler_cosine.step()
    del model
    gc.collect()
    return best_val_score
示例#5
0
def train(args):
    # get configs
    epochs = args.epoch
    dim = args.dim
    lr = args.lr
    weight_decay = args.l2
    head_num = args.head_num
    aggregate = "sum"
    device = args.device
    act = args.act
    fusion = args.fusion
    beta = args.beta
    model = AttentionalTreeEmbeddig(leaf_num,importer_size,item_size,\
                                    dim,head_num,\
                                    fusion_type=fusion,act=act,device=device,
                                    ).to(device)
    #     model = torch.load("./saved_models/DATE_0.6028.pkl").to(device)
    # initialize parameters
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

    # optimizer & loss
    optimizer = Ranger(model.parameters(), weight_decay=weight_decay, lr=lr)
    cls_loss_func = nn.BCELoss()
    reg_loss_func = nn.MSELoss()

    # save best model
    global_best_score = 0
    model_state = None

    # early stop settings
    stop_rounds = 3
    no_improvement = 0
    current_score = None

    for epoch in range(epochs):
        for step, (batch_feature, batch_user, batch_item, batch_cls,
                   batch_reg) in enumerate(train_loader):
            model.train()  # prep to train model
            batch_feature,batch_user,batch_item,batch_cls,batch_reg =  \
            batch_feature.to(device), batch_user.to(device), batch_item.to(device),\
             batch_cls.to(device), batch_reg.to(device)
            batch_cls, batch_reg = batch_cls.view(-1, 1), batch_reg.view(-1, 1)

            # model output
            classification_output, regression_output, hidden_vector = model(
                batch_feature, batch_user, batch_item)

            # FGM attack
            adv_vector = fgsm_attack(model, cls_loss_func, hidden_vector,
                                     batch_cls, 0.01)
            adv_output = model.pred_from_hidden(adv_vector)

            # calculate loss
            adv_loss = beta * cls_loss_func(adv_output, batch_cls)
            cls_loss = cls_loss_func(classification_output, batch_cls)
            revenue_loss = 10 * reg_loss_func(regression_output, batch_reg)
            loss = cls_loss + revenue_loss + adv_loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if (step + 1) % 1000 == 0:
                print("CLS loss:%.4f, REG loss:%.4f, ADV loss:%.4f, Loss:%.4f"\
                %(cls_loss.item(),revenue_loss.item(),adv_loss.item(),loss.item()))

        # evaluate
        model.eval()
        print("Validate at epoch %s" % (epoch + 1))
        y_prob, val_loss = model.eval_on_batch(valid_loader)
        y_pred_tensor = torch.tensor(y_prob).float().to(device)
        best_threshold, val_score, roc = torch_threshold(y_prob, xgb_validy)
        overall_f1, auc, precisions, recalls, f1s, revenues = metrics(
            y_prob, xgb_validy, revenue_valid)
        select_best = np.mean(f1s)
        print("Over-all F1:%.4f, AUC:%.4f, F1-top:%.4f" %
              (overall_f1, auc, select_best))

        print("Evaluate at epoch %s" % (epoch + 1))
        y_prob, val_loss = model.eval_on_batch(test_loader)
        y_pred_tensor = torch.tensor(y_prob).float().to(device)
        overall_f1, auc, precisions, recalls, f1s, revenues = metrics(
            y_prob, xgb_testy, revenue_test, best_thresh=best_threshold)
        print("Over-all F1:%.4f, AUC:%.4f, F1-top:%.4f" %
              (overall_f1, auc, np.mean(f1s)))

        # save best model
        if select_best > global_best_score:
            global_best_score = select_best
            torch.save(model, model_path)

        # early stopping
        if current_score == None:
            current_score = select_best
            continue
        if select_best < current_score:
            current_score = select_best
            no_improvement += 1
        if no_improvement >= stop_rounds:
            print("Early stopping...")
            break
        if select_best > current_score:
            no_improvement = 0
            current_score = None
示例#6
0
if not config.amp:
    model = nn.DataParallel(model)

if config.optim == 'adamw':
    optimizer = AdamW(model.parameters(),
                      lr=config.learning_rate,
                      weight_decay=config.weight_decay)
elif config.optim == 'sgd':
    optimizer = SGD(model.parameters(),
                    lr=config.learning_rate,
                    weight_decay=config.weight_decay,
                    momentum=0.9,
                    nesterov=True)
elif config.optim == 'ranger':
    optimizer = Ranger(model.parameters(),
                       lr=config.learning_rate,
                       weight_decay=config.weight_decay,
                       use_gc=True)

if config.amp:
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    model = nn.DataParallel(model)

if config.mode == 'multimodal':
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=config.n_epoch // 2,
                                                gamma=0.1,
                                                last_epoch=-1)
else:
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=config.n_epoch * len(train_loader))
if config.warmup:
示例#7
0
文件: diff.py 项目: devforfu/protein
from ranger import Ranger

epochs = 100
patience = 15

#opt = torch.optim.AdamW(model.parameters(), lr=3e-4)
#opt = Lookahead(opt)
base_lr = 3e-4
opt = Ranger(params=[{
    'params': model.head.parameters(),
    'lr': base_lr
}, {
    'params': model.base.features.denseblock4.parameters(),
    'lr': base_lr // 3
}, {
    'params': model.base.features.denseblock3.parameters(),
    'lr': base_lr // 5
}, {
    'params': model.base.features.denseblock2.parameters(),
    'lr': base_lr // 10
}, {
    'params': model.base.features.denseblock1.parameters(),
    'lr': base_lr // 100
}])
model = model.to(device)
rolling_loss = dict(train=RollingLoss(), valid=RollingLoss())
steps = dict(train=0, valid=0)

trials = 0
best_metric = -np.inf
history = []
stop = False