Exemplo n.º 1
0
def train():
    vis = Visualizer("Kesci" + time.strftime('%m%d%H%M'))
    train_data = AppData("../kesci/data/data_v3_23d/train_ab.json",
                         iflabel=True)
    val_data = AppData("../kesci/data/data_v3_23d/val_ab.json", iflabel=True)
    train_dataloader = DataLoader(train_data, 256, shuffle=True, num_workers=4)
    val_dataloader = DataLoader(val_data, 512, shuffle=False, num_workers=2)
    test_data = AppData("../kesci/data/data_v3_23d/test_ab.json", iflabel=True)
    test_dataloader = DataLoader(test_data, 512, shuffle=False, num_workers=2)

    criterion = t.nn.BCEWithLogitsLoss().cuda()
    learning_rate = 0.002
    weight_decay = 0.0003
    model = DoubleSequence(31, 128, 1).cuda()
    optimizer = t.optim.Adam(model.parameters(),
                             lr=learning_rate,
                             weight_decay=weight_decay)

    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    for epoch in range(400):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, property, target) in tqdm(enumerate(train_dataloader)):
            input = Variable(data).cuda()
            input2 = Variable(property).cuda()
            target = Variable(target).cuda()
            output = model(input, input2)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            loss_meter.add(loss.data[0])

            if ii % 100 == 99:
                vis.plot('loss', loss_meter.value()[0])

        if epoch % 3 == 2:
            train_cm, train_f1 = val(model, train_dataloader)
            vis.plot('train_f1', train_f1)
        val_cm, val_f1 = val(model, val_dataloader)

        vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate})
        if loss_meter.value()[0] > previous_loss:
            learning_rate = learning_rate * 0.9
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

        previous_loss = loss_meter.value()[0]

        if epoch % 3 == 2:
            model.save()
            test_cm, test_f1 = val(model, test_dataloader)
            vis.plot('test_f1', test_f1)
            vis.log(
                "训练集:{train_f1:%}, {train_pre:%}, {train_rec:%} | 验证集:{val_f1:%}, {val_pre:%}, {val_rec:%} | \
            测试集:{test_f1:%}, {test_pre:%}, {test_rec:%} | {train_true_num:%}, {val_true_num:%}, {test_true_num:%}"
                .format(
                    train_f1=train_f1,
                    val_f1=val_f1,
                    test_f1=test_f1,
                    train_true_num=train_cm.value()[:, 0].sum() /
                    len(train_data),
                    val_true_num=val_cm.value()[:, 0].sum() / len(val_data),
                    test_true_num=test_cm.value()[:, 0].sum() / len(test_data),
                    train_pre=train_cm.value()[0][0] /
                    train_cm.value()[0].sum(),
                    train_rec=train_cm.value()[0][0] /
                    train_cm.value()[:, 0].sum(),
                    val_pre=val_cm.value()[0][0] / val_cm.value()[0].sum(),
                    val_rec=val_cm.value()[0][0] / val_cm.value()[:, 0].sum(),
                    test_pre=test_cm.value()[0][0] / test_cm.value()[0].sum(),
                    test_rec=test_cm.value()[0][0] /
                    test_cm.value()[:, 0].sum()))
Exemplo n.º 2
0
def train():
    vis = Visualizer(opt.env, port=opt.vis_port)
    # step1 : load model
    model = getattr(models, opt.model)(pretrained=True)
    # 加载预训练模型,微调或者特征提取
    model = init_extract_model(model, 10)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # step2: data
    train_data_list = standard_data(opt.train_data_dir, 'train')
    val_data_list = standard_data(opt.train_data_dir, 'val')
    train_dataloader = DataLoader(IcvDataset(train_data_list),
                                  batch_size=opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(IcvDataset(val_data_list, train=False),
                                batch_size=opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer and scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=opt.lr,
                                 weight_decay=opt.weight_decay)
    # 每100个epoch 下降 lr=lr*gamma
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=300,
                                                gamma=0.1)

    # step4: define metrics
    train_losses = AverageMeter()
    train_top1 = AverageMeter()

    # step5.1: some parameters for K-fold and restart model
    start_epoch = 0
    best_top1 = 50

    # step5.2: restart the training process
    # PyTorch 保存断点checkpoints 的格式为 .tar文件扩展名格式
    if opt.resum_model_dir is not None:
        checkpoint = torch.load(opt.resum_model_dir)
        start_epoch = checkpoint["epoch"]
        best_top1 = checkpoint["best_top1"]
        optimizer.load_state_dict(checkpoint["optimizer"])
        model.load_state_dict(checkpoint["state_dict"])

    # 在恢复训练时,需要调用 model.train() 以确保所有网络层处于训练模式
    model.train()

    # step6 : train
    for epoch in range(start_epoch, opt.max_epoch):
        # lr 下降
        scheduler.step(epoch)
        lr = get_learning_rate(optimizer)
        train_losses.reset()
        train_top1.reset()
        for iter, (input, target) in enumerate(train_dataloader):
            input = input.to(device)
            target = target.to(device)
            optimizer.zero_grad()
            # forword
            output = model(input)
            loss = criterion(output, target)
            precious = accuracy(output, target, topk=(1, ))
            # loss and acc
            train_losses.update(loss.item(), input.size(0))
            train_top1.update(precious[0].item(), input.size(0))
            # backword
            loss.backward()
            optimizer.step()
        val_loss, val_top1 = val(model, val_dataloader, criterion, device)

        is_best = val_top1.avg > best_top1
        best_top1 = max(val_top1.avg, best_top1)

        print("epoch : {}/{}".format(epoch, opt.max_epoch))
        print("train-->loss:{},acc:{}".format(train_losses.avg,
                                              train_top1.avg))
        print("val-->loss:{},acc:{}".format(val_loss.avg, val_top1.avg))

        vis.plot_many({
            'train_loss': train_losses.avg,
            'val_loss': val_loss.avg
        })
        # vis.plot('train_loss', train_losses.avg)
        # vis.plot('val_accuracy', val_top1.avg)

        vis.log(
            "epoch:{epoch},lr:{lr},train_loss:{train_loss},val_loss:{val_loss},train_acc:{train_acc},val_acc:{val_acc}"
            .format(epoch=epoch,
                    train_loss=train_losses.avg,
                    val_loss=str(val_loss.avg),
                    train_acc=str(train_top1.avg),
                    val_acc=str(val_top1.avg),
                    lr=lr))

        if epoch % 10 == 0:
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "model": opt.model,
                    "state_dict": model.state_dict(),
                    "best_top1": best_top1,
                    "optimizer": optimizer.state_dict(),
                    "val_loss": val_loss.avg,
                }, opt.save_model_dir, is_best, epoch)
Exemplo n.º 3
0
def train():
    vis = Visualizer("Kesci")
    train_data = AppData("data/data_16d_target/train.json", iflabel=True)
    val_data = AppData("data/data_16d_target/val.json", iflabel=True)
    train_dataloader = DataLoader(train_data, 32, shuffle=True, num_workers=4)
    val_dataloader = DataLoader(val_data, 256, shuffle=False, num_workers=2)
    test_data = AppData("data/data_16d_target/test.json", iflabel=True)
    test_dataloader = DataLoader(test_data, 256, shuffle=False, num_workers=2)

    criterion = t.nn.CrossEntropyLoss().cuda()
    learning_rate = 0.003
    weight_decay = 0.0002
    model = Sequence(15, 128, 1).cuda()
    optimizer = t.optim.Adam(model.parameters(),
                             lr=learning_rate,
                             weight_decay=weight_decay)

    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    for epoch in range(500):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, property, label) in tqdm(enumerate(train_dataloader)):
            input = Variable(data).cuda()
            input2 = Variable(property).cuda()
            target = Variable(label).cuda().view(-1)
            output = model(input, input2)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data[0])

            confusion_matrix.add(output.data, target.data)

            if ii % 100 == 99:
                vis.plot('loss', loss_meter.value()[0])

        if epoch % 3 == 2:
            train_cm, train_f1 = val(model, train_dataloader)
            vis.plot('train_f1', train_f1)
        val_cm, val_f1 = val(model, val_dataloader)

        vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate})

        # vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
        #     epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()),
        #     train_cm=str(confusion_matrix.value()), lr=learning_rate))

        if loss_meter.value()[0] > previous_loss:
            learning_rate = learning_rate * 0.95
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

        previous_loss = loss_meter.value()[0]

        if epoch % 10 == 9:
            model.save()
            test_cm, test_f1 = val(model, test_dataloader)
            vis.plot('test_f1', test_f1)
            vis.log(
                "model:{model} | {train_f1}, {train_pre}, {train_rec} | {val_f1}, {val_pre}, {val_rec} | {test_f1}, {test_pre}, {test_rec}"
                .format(train_f1=train_f1,
                        val_f1=val_f1,
                        test_f1=test_f1,
                        model=time.strftime('%m%d %H:%M:%S'),
                        train_pre=str(train_cm.value()[0][0] /
                                      train_cm.value()[:, 0].sum()),
                        train_rec=str(train_cm.value()[0][0] /
                                      train_cm.value()[0].sum()),
                        val_pre=str(val_cm.value()[0][0] /
                                    val_cm.value()[:, 0].sum()),
                        val_rec=str(val_cm.value()[0][0] /
                                    val_cm.value()[0].sum()),
                        test_pre=str(test_cm.value()[0][0] /
                                     test_cm.value()[:, 0].sum()),
                        test_rec=str(test_cm.value()[0][0] /
                                     test_cm.value()[0].sum())))
Exemplo n.º 4
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    # step2: data
    train_data = Ocean(opt.train_data_root, mode='train')
    val_data = Ocean(opt.train_data_root, mode='val')
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    #loss_fn = t.nn.BCELoss(reduce=False, size_average=False)
    criterion = t.nn.CrossEntropyLoss(weight=t.Tensor([1, 5]).cuda())
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader)):

            # train model
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            print(score, target)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.data[0])
            confusion_matrix.add(score.data, target.data)

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

        # validate and visualize
        train_cm, train_accuracy = val(model, train_dataloader)
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot_many({
            'val_accuracy': val_accuracy,
            'train_accuracy': train_accuracy,
            'learning_rate': lr
        })

        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]

        if epoch % 10 == 9:
            model.save()