示例#1
0
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env, port=opt.vis_port)

    model = models.WaveNet(opt.input_size, opt.out_size, opt.residual_size,
                           opt.skip_size, opt.dilation_cycles,
                           opt.dilation_depth)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    device = torch.device('cuda') if opt.use_gpu else torch.device('cpu')
    model.to(device)

    data_utility = Data_utility(opt.train_data_root, opt.WINDOW_SIZE)
    scaler = data_utility.get_scaler()
    joblib.dump(scaler, 'scaler.pkl')

    X, Y = data_utility.get_data()

    criterion = nn.MSELoss()
    lr = opt.lr
    optimizer = model.get_optimizer(lr, opt.weight_decay)

    loss_meter = meter.AverageValueMeter()
    previous_loss = 1e10

    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        for i, (data, label) in tqdm(
                enumerate(data_utility.get_batches(X, Y, opt.batch_size))):

            inputs = data.to(device)
            targets = label.to(device)

            optimizer.zero_grad()
            preds = model(inputs)
            preds = preds.squeeze(2)
            loss = criterion(preds, targets)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.item())
            if (i + 1) % opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

        save_name = 'models/checkpoints/' + opt.model + str(epoch) + '.pth'
        model.save(save_name)

        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
示例#2
0
文件: main.py 项目: zyy341/U-net-svs
def train():
    vis = Visualizer(env='svs')
    model = getattr(models, 'Unet')()
    model.train().cuda()

    train_data = Spg('F:/crop_test', train=True)
    val_data = Spg('F:/crop_test', train=False)
    train_dataloader = DataLoader(train_data, batch_size=4, drop_last=True)
    val_dataloader = DataLoader(val_data, batch_size=1, drop_last=True)
    loss_meter = meter.AverageValueMeter()
    lr = 0.001
    lr_decay = 0.05

    optimizer = t.optim.Adam(model.parameters(), lr=lr, weight_decay=lr_decay)
    previous_loss = 1e100

    for epoch in range(5):
        loss_meter.reset()
        for ii, (data, label) in tqdm(enumerate(train_dataloader)):

            input1 = Variable(data).cuda()
            target = Variable(label).cuda()
            optimizer.zero_grad()
            scroe = model(input1)
            loss = MyLoss()(input1, scroe, target).cuda()
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data.item())

            if ii % 20 == 19:
                vis.plot('loss', loss_meter.value().item())
        prefix = 'G:/Unet_svs/check/'
        name = time.strftime(prefix + '%m%d_%H_%M_%S.pth')

        t.save(model.state_dict(), name)
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
示例#3
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    #step1: config model
    model = getattr(Nets,opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model.to(device)

    #step2: data
    train_data = imageSentiment(opt.train_path,train = True) #训练集
    val_data = imageSentiment(opt.train_path,train = False) #验证集
    train_dataloader = DataLoader(train_data,batch_size = opt.batch_size,shuffle=True,num_workers = opt.num_workers)
    val_dataloader = DataLoader(val_data,batch_size = opt.batch_size,shuffle=False,num_workers = opt.num_workers)

    #step3: 定义损失函数及优化器
    # criterion = nn.CrossEntropyLoss() #交叉熵损失函数 如果使用该损失函数 则网络最后无需使用softmax函数
    lr = opt.lr
    # optimizer = Optim.Adam(model.parameters(),lr = lr,weight_decay= opt.weight_decay)
    optimizer = Optim.SGD(model.parameters(),lr = 0.001,momentum=0.9,nesterov=True)
    #step4: 统计指标(计算平均损失以及混淆矩阵)
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(7)
    previous_loss = 1e100

    #训练
    for i in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()
        total_loss = 0.
        for ii,(label,data) in tqdm(enumerate(train_dataloader),total=len(train_dataloader)):
            if opt.use_gpu:
                label,data = label.to(device),data.to(device)

            optimizer.zero_grad()
            score = model(data)
            # ps:使用nll_loss和crossentropyloss进行多分类时 target为索引标签即可 无需转为one-hot
            loss = F.nll_loss(score,label)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()

            #更新统计指标以及可视化
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data,label.data)

            if ii%opt.print_freq==opt.print_freq-1:
                vis.plot('loss',loss_meter.value()[0])

        vis.plot('mach avgloss', total_loss/len(train_dataloader))
        model.save()

        #计算验证集上的指标
        val_accuracy = val(model,val_dataloader)

        vis.plot('val_accuracy',val_accuracy)
示例#4
0
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env,port = opt.vis_port)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    model.to(opt.device)

    # step2: data
    train_data = DogCat(opt.train_data_root,train=True)
    val_data = DogCat(opt.train_data_root,train=False)
    train_dataloader = DataLoader(train_data,opt.batch_size,
                        shuffle=True,num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,opt.batch_size,
                        shuffle=False,num_workers=opt.num_workers)
    
    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = model.get_optimizer(lr, opt.weight_decay)
        
    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    # train
    for epoch in range(opt.max_epoch):
        
        loss_meter.reset()
        confusion_matrix.reset()

        train_loss = 0.
        train_acc = 0.
        i = 0

        for ii,(data,label) in tqdm(enumerate(train_dataloader)):

            # train model 
            input = data.to(opt.device)
            target = label.to(opt.device)


            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)

            train_loss += loss.item()
            pred = t.max(score, 1)[1]
            train_correct = (pred==target).sum()
            train_acc += train_correct.item()
            print('epoch ', epoch, ' batch ', i)
            i+=1
            print('Train Loss: %f, Acc: %f' % (loss.item(), train_correct.item() / float(len(data))))

            loss.backward()
            optimizer.step()
            
            
            # meters update and visualize
            loss_meter.add(loss.item())
            # detach 一下更安全保险
            confusion_matrix.add(score.detach(), target.detach()) 

            if (ii + 1)%opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])
                
                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()

        print('Train Loss: {:.6f}, Acc: {:.6f}'.format(train_loss / (len(
            train_data)), train_acc / (len(train_data))))


        # model.save()
        prefix = 'checkpoints/' + opt.model + '_a'+str(epoch)+'.pth'
        t.save(model.state_dict(), prefix)

        # validate and visualize
        val_cm,val_accuracy = val(model,val_dataloader, criterion, val_data)

        vis.plot('val_accuracy',val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
                    epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr))
        
        # update learning rate
        if loss_meter.value()[0] > previous_loss:          
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        

        previous_loss = loss_meter.value()[0]
示例#5
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)()
    if os.path.exists(opt.load_model_path):
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    if os.path.exists(opt.pars_path):
        dic = load_dict(opt.pars_path)
        previous_loss = dic['loss'][-1] if 'loss' in dic.keys() else 1e100
    else:
        dic = {}
    # step2: data
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step2: criterion and optimizer
    criterion = nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    #previous_loss = 1e100
    # train
    for epoch in range(5, opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader),
                                      total=len(train_dataloader)):
            #confusion_matrix.reset()
            # train model
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.data.item())
            confusion_matrix.add(score.data, target.data)
            if ii % opt.print_freq == opt.print_freq - 1:
                dic = save_dict(opt.pars_path,
                                dic,
                                loss_data=loss_meter.value()[0])
                #loss_meter.reset()
                vis.plot('loss', dic['loss_data'])
                name = model.save()
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trave()

        name = model.save()
        # update learning: reduce learning rate when loss no longer decrease
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]
        dic = save_dict(opt.pars_path,
                        dic,
                        name=name,
                        epoch=epoch,
                        lr=lr,
                        loss=loss_meter.value()[0],
                        train_cm=confusion_matrix.value())

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)
        dic = save_dict(opt.pars_path,
                        dic,
                        val_accuracy=val_accuracy,
                        val_cm=val_cm.value())

        vis.log(dic)
示例#6
0
def train(**kwargs):
    '''
    训练
    :param kwargs: 可调整参数,默认是config中的默认参数
    :return:训练出完整模型
    '''

    # 根据命令行参数更新配置
    opt.parse(kwargs)
    # visdom绘图程序
    vis = Visualizer(opt.env, port=opt.vis_port)

    # step:1 构建模型
    # 选取配置中名字为model的模型
    model = getattr(models, opt.model)()
    # 是否读取保存好的模型参数
    if opt.load_model_path:
        model.load(opt.load_model_path)

    # 设置GPU
    os.environ["CUDA_VISIBLE_DEVICES"] = "2"
    model.to(opt.device)

    # step2: 数据
    train_data = CWRUDataset2D(opt.train_data_root, train=True)
    # 测试数据集和验证数据集是一样的,这些数据是没有用于训练的
    test_data = CWRUDataset2D(opt.train_data_root, train=False)

    train_dataloader = DataLoader(train_data, opt.batch_size, shuffle=True)
    test_dataloader = DataLoader(test_data, opt.batch_size, shuffle=False)

    # step3: 目标函数和优化器
    # 损失函数,交叉熵
    criterion = torch.nn.CrossEntropyLoss()
    lr = opt.lr
    # 优化函数,Adam
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=opt.weight_decay)

    # step4: 统计指标,平滑处理之后的损失,还有混淆矩阵
    # 损失进行取平均及方差计算。
    loss_meter = meter.AverageValueMeter()
    # 混淆矩阵
    confusion_matrix = meter.ConfusionMeter(opt.category)
    previous_loss = 1e10

    # 训练
    for epoch in range(opt.max_epoch):

        # 重置
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader)):

            # 训练模型
            input = data.to(opt.device)
            target = label.to(opt.device)

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # 更新统计指标以及可视化
            loss_meter.add(loss.item())
            # detach 一下更安全保险
            confusion_matrix.add(score.detach(), target.detach())

            if (ii + 1) % opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()

        # 每个batch保存模型
        model.save()

        # 计算测试集上的指标和可视化
        val_cm, val_accuracy = val(model, test_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
            epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()), train_cm=str(confusion_matrix.value()),
            lr=lr))

        # 如果损失不在下降,那么就降低学习率
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
示例#7
0
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env, port=opt.vis_port)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    model.to(opt.device)

    # step2: data
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = model.get_optimizer(lr, opt.weight_decay)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    # train
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()
        print("trian epoch: ", epoch)
        for ii, (data, label) in tqdm(enumerate(train_dataloader)):

            # train model
            input = data.to(opt.device)
            target = label.to(opt.device)

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.item())
            # detach 一下更安全保险
            confusion_matrix.add(score.detach(), target.detach())

            if (ii + 1) % opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

                # # 进入debug模式
                # if os.path.exists(opt.debug_file):
                #     import ipdb;
                #     ipdb.set_trace()

        model.save()

        # validate and visualize
        print("start eval:")
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        vis.plot('lr', lr)
        previous_loss = loss_meter.value()[0]
def train(**kwargs):
    """
    训练
    """
    # 根据传入的参数更改配置信息
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    cudnn.enabled = True
    cudnn.benchmark = True

    # step1: 配置并加载模型
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    model.to(opt.device)

    # step2: 加载数据(训练集和交叉验证集)
    train_data = SceneData(opt.train_data_root, opt.labels, train=True)
    val_data = SceneData(opt.train_data_root, opt.labels, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)

    # step3: 目标函数和优化器
    criterion = t.nn.CrossEntropyLoss()  # 交叉熵损失函数
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=opt.lr,
                             weight_decay=opt.weight_decay)  # Adam算法
    """
    # 冻结除全连接层外的所有层,只训练最后的全连接层(用于有全连接层模型的finetune)
    for para in list(model.parameters())[:-1]:
        para.requires_grad = False
    optimizer = t.optim.Adam(params=[model.fc.weight, model.fc.bias], lr=opt.lr, weight_decay=opt.weight_decay)  # Adam算法
    """

    # step4: 统计指标:平滑处理之后的损失,还有混淆矩阵
    loss_meter = meter.AverageValueMeter()  # 能够计算所有数的平均值和标准差,用来统计一次训练中损失的平均值
    confusion_matrix = meter.ConfusionMeter(opt.num_labels)
    previous_loss = 1e100

    # 训练
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        # 每次读出一个batch的数据训练
        for step, (data, label) in tqdm.tqdm(enumerate(train_dataloader),
                                             total=len(train_data)):

            train_input = data.to(opt.device)
            label_input = label.to(opt.device)

            optimizer.zero_grad()  # 梯度清零
            score = model(train_input)  # 调用模型
            loss = criterion(score, label_input)  # 计算损失函数
            loss.backward()  # 反向传播
            optimizer.step()  # 优化

            # 更新统计指标及可视化
            loss_meter.add(loss.item())
            confusion_matrix.add(score.detach(), label_input.detach())

            if step % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

        model.save()

        # 计算验证集上的指标及可视化
        val_cm, val_accuracy = val(model, val_dataloader)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # 如果损失不再下降,则降低学习率
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 动态修改学习率
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
示例#9
0
def train(**kwargs):
    #init
    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)

    if opt.vis:
        vis = Visualizer(opt.env)
        vis_val = Visualizer('valdemoire')

    #dataset
    FiveCrop_transforms = transforms.Compose([
        transforms.FiveCrop(256),
        transforms.Lambda(lambda crops: torch.stack(
            [transforms.ToTensor()(crop) for crop in crops]))
    ])
    data_transforms = transforms.Compose([
        # transforms.RandomCrop(256),
        transforms.ToTensor()
    ])
    train_data = MoireData(opt.train_path)
    test_data = MoireData(opt.test_path, is_val=True)
    train_dataloader = DataLoader(train_data,
                                  batch_size=opt.train_batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers,
                                  drop_last=True)
    test_dataloader = DataLoader(test_data,
                                 batch_size=opt.val_batch_size,
                                 shuffle=True,
                                 num_workers=opt.num_workers,
                                 drop_last=True)

    last_epoch = 0
    #model_init
    cfg.merge_from_file("config/cfg.yaml")
    model = get_pose_net(cfg, pretrained=opt.model_path)  #initweight
    model = model.to(opt.device)

    if opt.vis:
        val_loss, val_psnr = val(model, test_dataloader, vis_val)
        print(val_loss, val_psnr)
    else:
        val_loss, val_psnr = val(model, test_dataloader)
        print(val_loss, val_psnr)

    criterion_c = L1_Charbonnier_loss()
    criterion_s = L1_Sobel_Loss()
    lr = opt.lr
    optimizer = torch.optim.Adam(
        params=model.parameters(),
        lr=lr,
        weight_decay=0.01  #0.005
    )

    if opt.model_path:
        map_location = lambda storage, loc: storage
        checkpoint = torch.load(opt.model_path, map_location=map_location)
        last_epoch = checkpoint["epoch"]
        optimizer_state = checkpoint["optimizer"]
        optimizer.load_state_dict(optimizer_state)

        lr = checkpoint["lr"]
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    loss_meter = meter.AverageValueMeter()
    psnr_meter = meter.AverageValueMeter()
    previous_loss = 1e100
    accumulation_steps = opt.accumulation_steps

    for epoch in range(opt.max_epoch):
        if epoch < last_epoch:
            continue
        loss_meter.reset()
        psnr_meter.reset()
        torch.cuda.empty_cache()
        loss_list = []

        for ii, (moires, clear_list) in tqdm(enumerate(train_dataloader)):
            moires = moires.to(opt.device)
            clears = clear_list[0].to(opt.device)

            output_list, edge_output_list = model(moires)
            outputs, edge_X = output_list[0], edge_output_list[0]

            if epoch < 20:
                pass
            elif epoch >= 20 and epoch < 40:
                opt.loss_alpha = 0.9
            else:
                opt.loss_alpha = 1.0

            c_loss = criterion_c(outputs, clears)
            s_loss = criterion_s(edge_X, clears)
            loss = opt.loss_alpha * c_loss + (1 - opt.loss_alpha) * s_loss

            # saocaozuo gradient accumulation
            loss = loss / accumulation_steps
            loss.backward()

            if (ii + 1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

            loss_meter.add(loss.item() * accumulation_steps)

            moires = tensor2im(moires)
            outputs = tensor2im(outputs)
            clears = tensor2im(clears)

            psnr = colour.utilities.metric_psnr(outputs, clears)
            psnr_meter.add(psnr)

            if opt.vis and (ii + 1) % opt.plot_every == 0:  #100个batch画图一次
                vis.images(moires, win='moire_image')
                vis.images(outputs, win='output_image')
                vis.text(
                    "current outputs_size:{outputs_size},<br/> outputs:{outputs}<br/>"
                    .format(outputs_size=outputs.shape, outputs=outputs),
                    win="size")
                vis.images(clears, win='clear_image')
                #record the train loss to txt
                vis.plot('train_loss',
                         loss_meter.value()
                         [0])  #meter.value() return 2 value of mean and std
                vis.log(
                    "epoch:{epoch}, lr:{lr}, train_loss:{loss}, train_psnr:{train_psnr}"
                    .format(epoch=epoch + 1,
                            loss=loss_meter.value()[0],
                            lr=lr,
                            train_psnr=psnr_meter.value()[0]))
                loss_list.append(str(loss_meter.value()[0]))

            torch.cuda.empty_cache()
        if opt.vis:
            val_loss, val_psnr = val(model, test_dataloader, vis_val)
            vis.plot('val_loss', val_loss)
            vis.log(
                "epoch:{epoch}, average val_loss:{val_loss}, average val_psnr:{val_psnr}"
                .format(epoch=epoch + 1, val_loss=val_loss, val_psnr=val_psnr))
        else:
            val_loss, val_psnr = val(model, test_dataloader)

        #每个epoch把loss写入文件
        with open(opt.save_prefix + "loss_list.txt", 'a') as f:
            f.write("\nepoch_{}\n".format(epoch + 1))
            f.write('\n'.join(loss_list))

        if (epoch + 1) % opt.save_every == 0 or epoch == 0:  # 每5个epoch保存一次
            prefix = opt.save_prefix + 'HRnet_epoch{}_'.format(epoch + 1)
            file_name = time.strftime(prefix + '%m%d_%H_%M_%S.pth')
            checkpoint = {
                'epoch': epoch + 1,
                "optimizer": optimizer.state_dict(),
                "model": model.state_dict(),
                "lr": lr
            }
            torch.save(checkpoint, file_name)

        if (loss_meter.value()[0] > previous_loss) or ((epoch + 1) % 10) == 0:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]

    prefix = opt.save_prefix + 'HRnet_final_'
    file_name = time.strftime(prefix + '%m%d_%H_%M_%S.pth')
    checkpoint = {
        'epoch': epoch + 1,
        "optimizer": optimizer.state_dict(),
        "model": model.state_dict(),
        "lr": lr
    }
    torch.save(checkpoint, file_name)
示例#10
0
    def train(self):

        if self.net == 'vgg16':
            photo_net = DataParallel(self._get_vgg16()).cuda()
            sketch_net = DataParallel(self._get_vgg16()).cuda()
        elif self.net == 'resnet34':
            photo_net = DataParallel(self._get_resnet34()).cuda()
            sketch_net = DataParallel(self._get_resnet34()).cuda()
        elif self.net == 'resnet50':
            photo_net = DataParallel(self._get_resnet50()).cuda()
            sketch_net = DataParallel(self._get_resnet50()).cuda()

        if self.fine_tune:
            photo_net_root = self.model_root
            sketch_net_root = self.model_root.replace('photo', 'sketch')

            photo_net.load_state_dict(
                t.load(photo_net_root, map_location=t.device('cpu')))
            sketch_net.load_state_dict(
                t.load(sketch_net_root, map_location=t.device('cpu')))

        print('net')
        print(photo_net)

        # triplet_loss = nn.TripletMarginLoss(margin=self.margin, p=self.p).cuda()
        photo_cat_loss = nn.CrossEntropyLoss().cuda()
        sketch_cat_loss = nn.CrossEntropyLoss().cuda()

        my_triplet_loss = TripletLoss().cuda()

        # optimizer
        photo_optimizer = t.optim.Adam(photo_net.parameters(), lr=self.lr)
        sketch_optimizer = t.optim.Adam(sketch_net.parameters(), lr=self.lr)

        if self.vis:
            vis = Visualizer(self.env)

        triplet_loss_meter = AverageValueMeter()
        sketch_cat_loss_meter = AverageValueMeter()
        photo_cat_loss_meter = AverageValueMeter()

        data_loader = TripleDataLoader(self.dataloader_opt)
        dataset = data_loader.load_data()

        for epoch in range(self.epochs):

            print('---------------{0}---------------'.format(epoch))

            if self.test and epoch % self.test_f == 0:

                tester_config = Config()
                tester_config.test_bs = 128
                tester_config.photo_net = photo_net
                tester_config.sketch_net = sketch_net

                tester_config.photo_test = self.photo_test
                tester_config.sketch_test = self.sketch_test

                tester = Tester(tester_config)
                test_result = tester.test_instance_recall()

                result_key = list(test_result.keys())
                vis.plot('recall',
                         np.array([
                             test_result[result_key[0]],
                             test_result[result_key[1]]
                         ]),
                         legend=[result_key[0], result_key[1]])
                if self.save_model:
                    t.save(
                        photo_net.state_dict(), self.save_dir + '/photo' +
                        '/photo_' + self.net + '_%s.pth' % epoch)
                    t.save(
                        sketch_net.state_dict(), self.save_dir + '/sketch' +
                        '/sketch_' + self.net + '_%s.pth' % epoch)

            photo_net.train()
            sketch_net.train()

            for ii, data in enumerate(dataset):

                photo_optimizer.zero_grad()
                sketch_optimizer.zero_grad()

                photo = data['P'].cuda()
                sketch = data['S'].cuda()
                label = data['L'].cuda()

                p_cat, p_feature = photo_net(photo)
                s_cat, s_feature = sketch_net(sketch)

                # category loss
                p_cat_loss = photo_cat_loss(p_cat, label)
                s_cat_loss = sketch_cat_loss(s_cat, label)

                photo_cat_loss_meter.add(p_cat_loss.item())
                sketch_cat_loss_meter.add(s_cat_loss.item())

                # triplet loss
                loss = p_cat_loss + s_cat_loss

                # tri_record = 0.
                '''
                for i in range(self.batch_size):
                    # negative
                    negative_feature = t.cat([p_feature[0:i, :], p_feature[i + 1:, :]], dim=0)
                    # print('negative_feature.size :', negative_feature.size())
                    # photo_feature
                    anchor_feature = s_feature[i, :]
                    anchor_feature = anchor_feature.expand_as(negative_feature)
                    # print('anchor_feature.size :', anchor_feature.size())

                    # positive
                    positive_feature = p_feature[i, :]
                    positive_feature = positive_feature.expand_as(negative_feature)
                    # print('positive_feature.size :', positive_feature.size())

                    tri_loss = triplet_loss(anchor_feature, positive_feature, negative_feature)

                    tri_record = tri_record + tri_loss

                    # print('tri_loss :', tri_loss)
                    loss = loss + tri_loss
                '''
                # print('tri_record : ', tri_record)

                my_tri_loss = my_triplet_loss(
                    s_feature, p_feature) / (self.batch_size - 1)
                triplet_loss_meter.add(my_tri_loss.item())
                # print('my_tri_loss : ', my_tri_loss)

                # print(tri_record - my_tri_loss)
                loss = loss + my_tri_loss
                # print('loss :', loss)
                # loss = loss / opt.batch_size

                loss.backward()

                photo_optimizer.step()
                sketch_optimizer.step()

                if self.vis:
                    vis.plot('triplet_loss',
                             np.array([
                                 triplet_loss_meter.value()[0],
                                 photo_cat_loss_meter.value()[0],
                                 sketch_cat_loss_meter.value()[0]
                             ]),
                             legend=[
                                 'triplet_loss', 'photo_cat_loss',
                                 'sketch_cat_loss'
                             ])

                triplet_loss_meter.reset()
                photo_cat_loss_meter.reset()
                sketch_cat_loss_meter.reset()
示例#11
0
def train():
    vis = Visualizer("Kesci")
    train_data = AppData("data/data_16d_target/train.json", iflabel=True)
    val_data = AppData("data/data_16d_target/val.json", iflabel=True)
    train_dataloader = DataLoader(train_data, 32, shuffle=True, num_workers=4)
    val_dataloader = DataLoader(val_data, 256, shuffle=False, num_workers=2)
    test_data = AppData("data/data_16d_target/test.json", iflabel=True)
    test_dataloader = DataLoader(test_data, 256, shuffle=False, num_workers=2)

    criterion = t.nn.CrossEntropyLoss().cuda()
    learning_rate = 0.003
    weight_decay = 0.0002
    model = Sequence(15, 128, 1).cuda()
    optimizer = t.optim.Adam(model.parameters(),
                             lr=learning_rate,
                             weight_decay=weight_decay)

    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    for epoch in range(500):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, property, label) in tqdm(enumerate(train_dataloader)):
            input = Variable(data).cuda()
            input2 = Variable(property).cuda()
            target = Variable(label).cuda().view(-1)
            output = model(input, input2)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data[0])

            confusion_matrix.add(output.data, target.data)

            if ii % 100 == 99:
                vis.plot('loss', loss_meter.value()[0])

        if epoch % 3 == 2:
            train_cm, train_f1 = val(model, train_dataloader)
            vis.plot('train_f1', train_f1)
        val_cm, val_f1 = val(model, val_dataloader)

        vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate})

        # vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
        #     epoch=epoch, loss=loss_meter.value()[0], val_cm=str(val_cm.value()),
        #     train_cm=str(confusion_matrix.value()), lr=learning_rate))

        if loss_meter.value()[0] > previous_loss:
            learning_rate = learning_rate * 0.95
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

        previous_loss = loss_meter.value()[0]

        if epoch % 10 == 9:
            model.save()
            test_cm, test_f1 = val(model, test_dataloader)
            vis.plot('test_f1', test_f1)
            vis.log(
                "model:{model} | {train_f1}, {train_pre}, {train_rec} | {val_f1}, {val_pre}, {val_rec} | {test_f1}, {test_pre}, {test_rec}"
                .format(train_f1=train_f1,
                        val_f1=val_f1,
                        test_f1=test_f1,
                        model=time.strftime('%m%d %H:%M:%S'),
                        train_pre=str(train_cm.value()[0][0] /
                                      train_cm.value()[:, 0].sum()),
                        train_rec=str(train_cm.value()[0][0] /
                                      train_cm.value()[0].sum()),
                        val_pre=str(val_cm.value()[0][0] /
                                    val_cm.value()[:, 0].sum()),
                        val_rec=str(val_cm.value()[0][0] /
                                    val_cm.value()[0].sum()),
                        test_pre=str(test_cm.value()[0][0] /
                                     test_cm.value()[:, 0].sum()),
                        test_rec=str(test_cm.value()[0][0] /
                                     test_cm.value()[0].sum())))
示例#12
0
class Trainer(BaseTrainer):
    """
    Trainer class

    Note:
        Inherited from BaseTrainer.
        self.optimizer is by default handled by BaseTrainer based on config.
    """
    def __init__(self,
                 model,
                 loss,
                 metrics,
                 resume,
                 config,
                 data_loader,
                 toolbox: Toolbox,
                 valid_data_loader=None,
                 train_logger=None):
        super(Trainer, self).__init__(model, loss, metrics, resume, config,
                                      train_logger)
        self.config = config
        self.batch_size = data_loader.batch_size
        self.data_loader = data_loader
        self.valid_data_loader = valid_data_loader
        self.valid = True if self.valid_data_loader is not None else False
        self.log_step = int(np.sqrt(self.batch_size))
        self.toolbox = toolbox
        self.visdom = Visualizer(env='FOTS')

    def _to_tensor(self, *tensors):
        t = []
        for __tensors in tensors:
            t.append(__tensors.to(self.device))
        return t

    def _eval_metrics(self, output, target, mask):
        acc_metrics = np.zeros(len(self.metrics))
        output = output.cpu().data.numpy()
        target = target.cpu().data.numpy()
        output = np.argmax(output, axis=1)
        for i, metric in enumerate(self.metrics):
            acc_metrics[i] += metric(output, target)
        return acc_metrics

    def _train_epoch(self, epoch):
        """
        Training logic for an epoch

        :param epoch: Current training epoch.
        :return: A log that contains all information you want to save.

        Note:
            If you have additional information to record, for example:
                > additional_log = {"x": x, "y": y}
            merge it with log before return. i.e.
                > log = {**log, **additional_log}
                > return log

            The metrics in log must have the key 'metrics'.
        """
        self.model.train()

        total_loss = 0
        total_metrics = np.zeros(len(self.metrics))
        for batch_idx, gt in enumerate(self.data_loader):
            img, score_map, geo_map, training_mask, transcript = gt
            img, score_map, geo_map, training_mask = self._to_tensor(
                img, score_map, geo_map, training_mask)
            recog_map = None

            self.optimizer.zero_grad()
            pred_score_map, pred_geo_map, pred_recog_map = self.model(img)

            loss = self.loss(score_map, pred_score_map, geo_map, pred_geo_map,
                             pred_recog_map, recog_map, training_mask)
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()
            #total_metrics += self._eval_metrics(output, target)

            total_metrics += 0

            if self.verbosity >= 2 and batch_idx % self.log_step == 0:
                self.logger.info(
                    'Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format(
                        epoch, batch_idx * self.data_loader.batch_size,
                        len(self.data_loader) * self.data_loader.batch_size,
                        100.0 * batch_idx / len(self.data_loader),
                        loss.item()))
        self.visdom.plot('train_loss', total_loss / len(self.data_loader))
        log = {
            'loss': total_loss / len(self.data_loader),
            'metrics': (total_metrics / len(self.data_loader)).tolist()
        }

        if self.valid:
            val_log = self._valid_epoch()
            log = {**log, **val_log}

        return log

    def _valid_epoch(self):
        """
        Validate after training an epoch

        :return: A log that contains information about validation

        Note:
            The validation metrics in log must have the key 'val_metrics'.
        """
        self.model.eval()
        total_val_loss = 0
        total_val_metrics = np.zeros(len(self.metrics))
        with torch.no_grad():
            for batch_idx, gt in enumerate(self.valid_data_loader):
                img, score_map, geo_map, training_mask, transcript = gt
                img, score_map, geo_map, training_mask = self._to_tensor(
                    img, score_map, geo_map, training_mask)
                recog_map = None

                pred_score_map, pred_geo_map, pred_recog_map = self.model(img)

                loss = self.loss(score_map, pred_score_map, geo_map,
                                 pred_geo_map, pred_recog_map, recog_map,
                                 training_mask)

                total_val_loss += loss.item()

                output = (pred_score_map, pred_geo_map, pred_recog_map)
                target = (score_map, geo_map, recog_map)
                #total_val_metrics += self._eval_metrics(output, target, training_mask) #TODO: should add AP metric
        self.visdom.plot('val_loss',
                         total_val_loss / len(self.valid_data_loader))
        return {
            'val_loss':
            total_val_loss / len(self.valid_data_loader),
            'val_metrics':
            (total_val_metrics / len(self.valid_data_loader)).tolist()
        }
示例#13
0
文件: main.py 项目: eglrp/Gist_code
def train():
    t.cuda.set_device(1)

    # n_channels:医学影像为一通道灰度图    n_classes:二分类
    net = UNet(n_channels=1, n_classes=1)
    optimizer = t.optim.SGD(net.parameters(),
                            lr=opt.learning_rate,
                            momentum=0.9,
                            weight_decay=0.0005)
    criterion = t.nn.BCELoss()  # 二进制交叉熵(适合mask占据图像面积较大的场景)

    start_epoch = 0
    if opt.load_model_path:
        checkpoint = t.load(opt.load_model_path)

        # 加载多GPU模型参数到 单模型上
        state_dict = checkpoint['net']
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            name = k[7:]  # remove `module.`
            new_state_dict[name] = v
        net.load_state_dict(new_state_dict)  # 加载模型
        optimizer.load_state_dict(checkpoint['optimizer'])  # 加载优化器
        start_epoch = checkpoint['epoch']  # 加载训练批次

    # 学习率每当到达milestones值则更新参数
    if start_epoch == 0:
        scheduler = t.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=opt.milestones,
                                                     gamma=0.1,
                                                     last_epoch=-1)  # 默认为-1
        print('从头训练 ,学习率为{}'.format(optimizer.param_groups[0]['lr']))
    else:
        scheduler = t.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=opt.milestones,
                                                     gamma=0.1,
                                                     last_epoch=start_epoch)
        print('加载预训练模型{}并从{}轮开始训练,学习率为{}'.format(
            opt.load_model_path, start_epoch, optimizer.param_groups[0]['lr']))

    # 网络转移到GPU上
    if opt.use_gpu:
        net = t.nn.DataParallel(net, device_ids=opt.device_ids)  # 模型转为GPU并行
        net.cuda()
        cudnn.benchmark = True

    # 定义可视化对象
    vis = Visualizer(opt.env)

    train_data = NodeDataSet(train=True)
    val_data = NodeDataSet(val=True)
    test_data = NodeDataSet(test=True)

    # 数据集加载器
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)
    test_dataloader = DataLoader(test_data,
                                 opt.test_batch_size,
                                 shuffle=False,
                                 num_workers=opt.num_workers)
    for epoch in range(opt.max_epoch - start_epoch):
        print('开始 epoch {}/{}.'.format(start_epoch + epoch + 1, opt.max_epoch))
        epoch_loss = 0

        # 每轮判断是否更新学习率
        scheduler.step()

        # 迭代数据集加载器
        for ii, (img, mask) in enumerate(
                train_dataloader):  # pytorch0.4写法,不再将tensor封装为Variable
            # 将数据转到GPU
            if opt.use_gpu:
                img = img.cuda()
                true_masks = mask.cuda()
            masks_pred = net(img)

            # 经过sigmoid
            masks_probs = t.sigmoid(masks_pred)

            # 损失 = 二进制交叉熵损失 + dice损失
            loss = criterion(masks_probs.view(-1), true_masks.view(-1))

            # 加入dice损失
            if opt.use_dice_loss:
                loss += dice_loss(masks_probs, true_masks)

            epoch_loss += loss.item()

            if ii % 2 == 0:
                vis.plot('训练集loss', loss.item())

            # 优化器梯度清零
            optimizer.zero_grad()
            # 反向传播
            loss.backward()
            # 更新参数
            optimizer.step()

        # 当前时刻的一些信息
        vis.log("epoch:{epoch},lr:{lr},loss:{loss}".format(
            epoch=epoch, loss=loss.item(), lr=optimizer.param_groups[0]['lr']))

        vis.plot('每轮epoch的loss均值', epoch_loss / ii)
        # 保存模型、优化器、当前轮次等
        state = {
            'net': net.state_dict(),
            'optimizer': optimizer.state_dict(),
            'epoch': epoch
        }
        t.save(state, opt.checkpoint_root + '{}_unet.pth'.format(epoch))

        # ============验证===================

        net.eval()
        # 评价函数:Dice系数    Dice距离用于度量两个集合的相似性
        tot = 0
        for jj, (img_val, mask_val) in enumerate(val_dataloader):
            img_val = img_val
            true_mask_val = mask_val
            if opt.use_gpu:
                img_val = img_val.cuda()
                true_mask_val = true_mask_val.cuda()

            mask_pred = net(img_val)
            mask_pred = (t.sigmoid(mask_pred) > 0.5).float()  # 阈值为0.5
            # 评价函数:Dice系数   Dice距离用于度量两个集合的相似性
            tot += dice_loss(mask_pred, true_mask_val).item()
        val_dice = tot / jj
        vis.plot('验证集 Dice损失', val_dice)

        # ============验证召回率===================
        # 每10轮验证一次测试集召回率
        if epoch % 10 == 0:
            result_test = []
            for kk, (img_test, mask_test) in enumerate(test_dataloader):
                # 测试 unet分割能力,故 不使用真值mask
                if opt.use_gpu:
                    img_test = img_test.cuda()
                mask_pred_test = net(img_test)  # [1,1,512,512]

                probs = t.sigmoid(mask_pred_test).squeeze().squeeze().cpu(
                ).detach().numpy()  # [512,512]
                mask = probs > opt.out_threshold
                result_test.append(mask)

            # 得到 测试集所有预测掩码,计算二维召回率
            vis.plot('测试集二维召回率', getRecall(result_test).getResult())
        net.train()
示例#14
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()
    # 数据设定  户籍科 010 82640433
    train_data = DogCat(opt.load_model_path, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    train_dataloader = DataLoader(test_data,
                                  opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers)
    # 目标函数和优化器
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model)
    # 统计指标,平滑处理之后的损失
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(
                enumerate(train_dataloader)):  # ii num ,(data,label) enumerate
            # 训练模型参数
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.stop()
            # 更新统计指标及可视化
            loss_meter.add(loss.data[0])
            confusion_matrix.add(loss.data[0])
            confusion_matrix.add(score.data, target.data)
            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                if os.path.exist(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
            model.save()

            # 计算验证集上的指标及其可视化
            val_cm, val_accuracy = val(model, val_dataloader)
            vis.plot('val_accuracy', val_accuracy)
            vis.log(
                'epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}'
                .format(epoch=epoch,
                        loss=loss_meter.value()[0],
                        val_cm=str(val_cm.value()),
                        train_cm=str(confusion_matrix.value()),
                        lr=lr))
            if loss_meter.value()[0] > previous_loss:
                lr = lr * opt.lr_decay
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                previous_loss = loss_meter.value()[0]
示例#15
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)(opt)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    # step2: data
    train_data = DocumentPair(opt.train_data_root,
                              doc_type='train',
                              suffix='txt',
                              load=lambda x: x.strip().split(','))
    train_data.initialize(vocab_size=opt.vocab_size)
    val_data = DocumentPair(opt.validate_data_root,
                            doc_type='validate',
                            suffix='txt',
                            load=lambda x: x.strip().split(','),
                            vocab=train_data.vocab)
    val_data.initialize()
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=False,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, batch in enumerate(train_dataloader):

            data_left, data_right, label, num_pos = load_data(
                batch, opt, train_data.vocab)

            # train model
            input_data_left, input_data_right = Variable(
                t.from_numpy(data_left)), Variable(t.from_numpy(data_right))
            target = Variable(t.from_numpy(label))
            if opt.use_gpu:
                input_data_left, input_data_right = input_data_left.cuda(
                ), input_data_right.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            scores, predictions = model((input_data_left, input_data_right))
            loss = criterion(scores, target.max(1)[1])
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.data[0])
            confusion_matrix.add(predictions.data, target.max(1)[1].data)

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

        model.save()

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
示例#16
0
def train_model(model, criterion, optimizer, dataloaders, model_path, start_epoch, iter_num, logger, device):
    since = time.time()
    best_loss = np.inf
    best_map = 0
    trial_log = args.trial_log
    num_epochs = args.num_epochs
    test_interval = args.test_interval
    burn_in = args.burn_in
    lr = args.learning_rate
    lr_steps = args.lr_steps
    size_grid_cell = args.size_grid_cell
    num_boxes = args.num_boxes
    num_classes = args.num_classes
    conf_thresh = args.conf_thresh
    iou_thresh = args.iou_thresh
    nms_thresh = args.nms_thresh
    port = args.port
    vis = Visualizer(env=trial_log, port=port)
        
    for epoch in range(start_epoch, num_epochs):
        logger.info('Epoch {} / {}'.format(epoch+1, num_epochs))
        logger.info('-' * 64)

        # set learning rate manually
        if epoch in lr_steps:
            lr *= 0.1
        adjust_learning_rate(optimizer, lr)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                # scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            total_loss = 0.0
            # Iterate over data.
            for i, (inputs, targets) in enumerate(dataloaders[phase]):
                # warmming up of the learning rate
                if phase == 'train':
                    if iter_num < args.burn_in:
                        burn_lr = get_learning_rate(iter_num, lr, burn_in)
                        adjust_learning_rate(optimizer, burn_lr)
                        iter_num += 1
                    else:
                        adjust_learning_rate(optimizer, lr)
                    
                inputs = inputs.to(device)
                targets = targets.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss, obj_coord_loss, obj_conf_loss, noobj_conf_loss, obj_class_loss = criterion(outputs, targets)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                total_loss += loss.item()

                if phase == 'train':
                    cur_lr = optimizer.state_dict()['param_groups'][0]['lr']
                    vis.plot('cur_lr', cur_lr)
                    logger.info('Epoch [{}/{}], iter [{}/{}], lr: {:g}, loss: {:.4f}, average_loss: {:.4f}'.format(
                        epoch+1, args.num_epochs, i+1, len(dataloaders[phase]), cur_lr, loss.item(), total_loss/(i+1)))
                    logger.debug('  obj_coord_loss: {:.4f}, obj_conf_loss: {:.4f}, noobj_conf_loss: {:.4f}, obj_class_loss: {:.4f}'.format(
                        obj_coord_loss, obj_conf_loss, noobj_conf_loss, obj_class_loss))
                    vis.plot('train_loss', total_loss/(i+1))

            # save model for inferencing and resuming training process
            if phase == 'train':
                torch.save(model.state_dict(), osp.join(model_path, 'latest.pth'))
                torch.save({
                    'iter_num: ': iter_num,
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, osp.join(model_path, 'latest.tar'))

            # evaluate latest model
            if phase == 'val':
                current_loss = total_loss / (i+1)
                if best_loss > current_loss:
                    best_loss = current_loss
                logger.info('current val loss: {:.4f}, best val Loss: {:.4f}'.format(current_loss, best_loss))
                vis.plot('val_loss', total_loss/(i+1))

                if epoch < 10 or (epoch+1) % test_interval == 0:
                    current_map = calc_map(logger, dataloaders[phase].dataset, model_path, 
                        size_grid_cell, num_boxes, num_classes, conf_thresh, iou_thresh, nms_thresh)
                    # save the best model as so far
                    if best_map < current_map:
                        best_map = current_map
                        torch.save(model.state_dict(), osp.join(model_path, 'best.pth'))
                    logger.info('current val map: {:.4f}, best val map: {:.4f}'.format(current_map, best_map))
                    vis.plot('val_map', current_map)

    time_elapsed = time.time() - since
    logger.info('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    logger.info('Optimization Done.')
示例#17
0
def train(**kwargs):
    """根据命令行参数更新配置"""
    opt.parse(kwargs)
    vis = Visualizer(opt.env)
    """(1)step1:加载网络,若有预训练模型也加载"""
    #model = getattr(models,opt.model)()
    model = models.resnet34(pretrained=True)
    model.fc = nn.Linear(512, 2)
    #if opt.load_model_path:
    #	model.load(opt.load_model_path)
    if opt.use_gpu:  #GPU
        model.cuda()
    """(2)step2:处理数据"""
    train_data = DogCat(opt.train_data_root, train=True)  #训练集
    val_data = DogCat(opt.train_data_root, train=False)  #验证集

    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    """(3)step3:定义损失函数和优化器"""
    criterion = t.nn.CrossEntropyLoss()  #交叉熵损失
    lr = opt.lr  #学习率
    optimizer = t.optim.SGD(model.parameters(),
                            lr=opt.lr,
                            weight_decay=opt.weight_decay)
    """(4)step4:统计指标,平滑处理之后的损失,还有混淆矩阵"""
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10
    """(5)开始训练"""
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in enumerate(train_dataloader):

            print "ii:", ii
            #训练模型参数
            input = Variable(data)
            target = Variable(label)

            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            #梯度清零
            optimizer.zero_grad()
            score = model(input)

            loss = criterion(score, target)
            loss.backward()  #反向传播

            #更新参数
            optimizer.step()

            #更新统计指标及可视化
            loss_meter.add(loss.item())
            #print score.shape,target.shape
            confusion_matrix.add(score.detach(), target.detach())

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
        #model.save()
        name = time.strftime('model' + '%m%d_%H:%M:%S.pth')
        t.save(model.state_dict(), 'checkpoints/' + name)
        """计算验证集上的指标及可视化"""
        val_cm, val_accuracy = val(model, val_dataloader)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        print "epoch:", epoch, "loss:", loss_meter.value(
        )[0], "accuracy:", val_accuracy
        """如果损失不再下降,则降低学习率"""
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        previous_loss = loss_meter.value()[0]
示例#18
0
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env,port = opt.vis_port)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    model.to(opt.device)

    # step2: data
    train_data = DogCat(opt.train_data_root,train=True)
    val_data = DogCat(opt.train_data_root,train=False)
    train_dataloader = DataLoader(train_data,opt.batch_size,
                        shuffle=True,num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,opt.batch_size,
                        shuffle=False,num_workers=opt.num_workers)
    
    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = model.get_optimizer(lr, opt.weight_decay)
        
    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    # train
    for epoch in range(opt.max_epoch):
        
        loss_meter.reset()
        confusion_matrix.reset()

        for ii,(data,label) in tqdm(enumerate(train_dataloader)):

            # train model 
            input = data.to(opt.device)
            target = label.to(opt.device)


            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)
            loss.backward()
            optimizer.step()
            
            
            # meters update and visualize
            loss_meter.add(loss.item())
            # detach 一下更安全保险
            confusion_matrix.add(score.detach(), target.detach()) 

            if (ii + 1)%opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])
                
                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()


        model.save()

        # validate and visualize
        val_cm,val_accuracy = val(model,val_dataloader)

        vis.plot('val_accuracy',val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
                    epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr))
        
        # update learning rate
        if loss_meter.value()[0] > previous_loss:          
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        

        previous_loss = loss_meter.value()[0]
示例#19
0
文件: main.py 项目: lucineIT/GAN
def train(**kwargs):
    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)
    if opt.vis:
        from utils.visualize import Visualizer
        vis = Visualizer(opt.env)

    transforms = tv.transforms.Compose([
        tv.transforms.Scale(opt.image_size),
        tv.transforms.CenterCrop(opt.image_size),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    dataset = tv.datasets.ImageFolder(opt.data_path, transform=transforms)
    dataloader = t.utils.data.DataLoader(dataset,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=opt.num_workers,
                                         drop_last=True)

    # 定义网络
    netg, netd = NetGenerator(opt), NetD(opt)
    map_location = lambda storage, loc: storage
    if opt.netd_path:
        netd.load_state_dict(t.load(opt.netd_path, map_location=map_location))
    if opt.netg_path:
        netg.load_state_dict(t.load(opt.netg_path, map_location=map_location))

    # 定义优化器和损失
    optimizer_g = t.optim.Adam(netg.parameters(),
                               opt.G_lr,
                               betas=(opt.beta1, 0.999))
    optimizer_d = t.optim.Adam(netd.parameters(),
                               opt.D_lr,
                               betas=(opt.beta1, 0.999))
    criterion = t.nn.BCELoss()

    # 真图片label为1,假图片label为0
    # noises为生成网络的输入
    true_labels = Variable(t.ones(opt.batch_size))
    fake_labels = Variable(t.zeros(opt.batch_size))
    fix_noises = Variable(t.randn(opt.batch_size, opt.nz, 1, 1))
    noises = Variable(t.randn(opt.batch_size, opt.nz, 1, 1))

    errord_meter = AverageValueMeter()
    errorg_meter = AverageValueMeter()

    if opt.use_gpu:
        netd.cuda()
        netg.cuda()
        criterion.cuda()
        true_labels, fake_labels = true_labels.cuda(), fake_labels.cuda()
        fix_noises, noises = fix_noises.cuda(), noises.cuda()

    epochs = range(opt.max_epoch)
    for epoch in iter(epochs):
        for ii, (img, _) in tqdm.tqdm(enumerate(dataloader)):
            real_img = Variable(img)
            if opt.use_gpu:
                real_img = real_img.cuda()
            if ii % opt.d_every == 0:
                # 训练判别器
                optimizer_d.zero_grad()
                ## 尽可能的把真图片判别为正确
                output = netd(real_img)
                error_d_real = criterion(output, true_labels)
                error_d_real.backward()

                ## 尽可能把假图片判别为错误
                noises.data.copy_(t.randn(opt.batch_size, opt.nz, 1, 1))
                fake_img = netg(noises).detach()  # 根据噪声生成假图
                output = netd(fake_img)
                error_d_fake = criterion(output, fake_labels)
                error_d_fake.backward()
                optimizer_d.step()

                error_d = error_d_fake + error_d_real

                errord_meter.add(error_d.data[0])

            if ii % opt.g_every == 0:
                # 训练生成器
                optimizer_g.zero_grad()
                noises.data.copy_(t.randn(opt.batch_size, opt.nz, 1, 1))
                fake_img = netg(noises)
                output = netd(fake_img)
                error_g = criterion(output, true_labels)
                error_g.backward()
                optimizer_g.step()
                errorg_meter.add(error_g.data[0])

            if opt.vis and ii % opt.plot_every == opt.plot_every - 1:
                ## 可视化
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()
                fix_fake_imgs = netg(fix_noises)
                vis.images(fix_fake_imgs.data.cpu().numpy()[:64] * 0.5 + 0.5,
                           win='fixfake')
                vis.plot('error_d', errord_meter.value()[0])
                vis.images(real_img.data.cpu().numpy()[:64] * 0.5 + 0.5,
                           win='real')
                vis.plot('error_g', errorg_meter.value()[0])

        if epoch % opt.decay_every == 0:
            # 保存模型、图片
            tv.utils.save_image(fix_fake_imgs.data[:64],
                                '%s/%s.png' % (opt.save_path,
                                               (epoch + opt.startpoint)),
                                normalize=True,
                                range=(-1, 1))
            t.save(netd.state_dict(),
                   'checkpoints/netd_%s.pth' % (epoch + opt.startpoint))
            t.save(netg.state_dict(),
                   'checkpoints/netg_%s.pth' % (epoch + opt.startpoint))
            errord_meter.reset()
            errorg_meter.reset()
            optimizer_g = t.optim.Adam(netg.parameters(),
                                       opt.G_lr,
                                       betas=(opt.beta1, 0.999))
            optimizer_d = t.optim.Adam(netd.parameters(),
                                       opt.D_lr,
                                       betas=(opt.beta1, 0.999))
示例#20
0
def train(**kwargs):
    # opt.parse(kwargs)
    vis = Visualizer(opt.env)

    savingData = []  #
    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_root, train=True)
    val_data = DogCat(opt.train_data_root, train=False)
    test_data = DogCat(opt.test_data_root, test=True)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    test_dataloader = DataLoader(test_data,
                                 opt.batch_size,
                                 shuffle=False,
                                 num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch + 1):

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)
        test_cm, test_accuracy = val(model, test_dataloader)
        vis.plot('test_accuracy', test_accuracy)
        vis.plot('lr', lr)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm},test_cm:{test_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    test_cm=str(test_cm.value()),
                    lr=lr))
        print("epoch = ", epoch, "   loss = ",
              loss_meter.value()[0], "   lr = ", lr)
        batch_results = [(epoch, loss_meter.value()[0], lr,
                          str(val_cm.value()), str(confusion_matrix.value()),
                          str(test_cm.value()), val_accuracy, test_accuracy)
                         ]  #
        savingData += batch_results  #
        save_training_data(savingData, opt.traingData_file)  #
        # update learning rate
        # if loss_meter.value()[0] > previous_loss:
        lr = lr * opt.lr_decay
        # # 第二种降低学习率的方法:不会有moment等信息的丢失
        # for param_group in optimizer.param_groups:
        #     param_group['lr'] = lr

        if epoch == opt.max_epoch:
            return

        previous_loss = loss_meter.value()[0]
        loss_meter.reset()
        confusion_matrix.reset()
        for ii, (data, label) in tqdm(enumerate(train_dataloader),
                                      total=len(train_data) / opt.batch_size):

            # train model
            input = data
            target = label
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data, target.data)

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()

        prefix = 'checkpoints/'
        name = time.strftime(prefix + '%m%d_%H:%M:%S_' + str(epoch + 1) +
                             '.pth')
        if epoch == 0:
            model.save(name)
        if np.mod(epoch + 1, 10) == 0:
            model.save(name)
示例#21
0
文件: main.py 项目: wds-seu/Aceso
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: prepare data
    dh = DataHelper(opt.train_data_root, train=True)
    x_text, cuis, sentences_origin, y, vocabulary, vocabulary_inv = dh.load_data(
    )
    x_train, x_val, y_train, y_val = train_test_split(x_text,
                                                      y,
                                                      test_size=0.3,
                                                      random_state=1,
                                                      shuffle=True)
    x_train = torch.from_numpy(x_train).long()
    y_train = torch.from_numpy(y_train).long()
    y_train = y_train.view(-1)
    train_data = TensorDataset(x_train, y_train)

    x_val = torch.from_numpy(x_val).long()
    y_val = torch.from_numpy(y_val).long()
    y_val = y_val.view(-1)
    val_data = TensorDataset(x_val, y_val)

    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step2: model
    if opt.mixing_train:
        pretrained_embeddings = emb_utils.load_mixing_embedding()
    else:
        pretrained_embeddings = emb_utils.load_words_embedding()
    model = getattr(models,
                    opt.model)(vocab_size=len(vocabulary),
                               pretrained_embeddings=pretrained_embeddings)
    # if opt.load_model_path:
    #    model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    # setp3 : loss function and optim
    criterion = nn.CrossEntropyLoss()
    lr = opt.lr
    # fix the emb parameters
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=lr,
                                 weight_decay=opt.weight_decay)

    # step4 : CM
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(4)
    previous_loss = 1e100

    print("train start...")
    # step5 : train
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()
        for ii, (data, label) in tqdm(enumerate(train_dataloader)):
            # train_dataloader.batch_size
            # 训练模型参数d
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                torch.cuda.set_device(opt.device)
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            if "LSTM" in opt.model or "RNN" in opt.model:
                score, _ = model(input.t_())
            else:
                score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()
            # 更新统计指标以及可视化
            loss_meter.add(loss.data[0])
            m = torch.max(score, 1)[1]
            confusion_matrix.add(m.view(target.size()).data, target.data)

            if (ii + 1) % opt.print_freq == 0:
                # 训练集指标可视化
                cm_value = confusion_matrix.value()
                if not opt.together_calculate:
                    result_p, result_i, result_o, result_n = vis.calculate_and_show(
                        cm_value, together_calculate=False)
                    data = [result_p, result_i, result_o, result_n]
                    vis.plot_lprf_dependent(data, env="train")
                    vis.plot('train_loss', loss_meter.value()[0])
                else:
                    train_accuracy, train_precision, train_recall, train_f1 = vis.calculate_and_show(
                        cm_value)
                    data = [
                        train_accuracy, train_precision, train_recall, train_f1
                    ]
                    vis.plot_laprf(data, env="train")
                    vis.plot('train_loss', loss_meter.value()[0])
        model.save(epoch=epoch)

        # 计算验证集上的指标以及可视化
        vocabulary_inv = {index: word for word, index in vocabulary.items()}
        val(model, val_dataloader, loss_meter, vis, epoch, vocabulary_inv)
        # 如果损失不再下降,则降低学习率
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = loss_meter.value()[0]
示例#22
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)
    Model = getattr(models, opt.model)
    model = Model(40)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    train_data = CGHData(opt.train_data_root, train=True)
    val_data = CGHData(opt.train_data_root, train=False)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    criterion = nn.MSELoss()
    lr = opt.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=opt.weight_decay)

    loss_meter = meter.AverageValueMeter()
    # confusion_matrix=meter.ConfusionMeter(2)
    previous_loss = 1e100

    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        # confusion_matrix.reset()

        for k, (data, label) in enumerate(train_dataloader):
            # print(k)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data[0])
            # confusion_matrix.add(score.data, target.data)

            if k % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])

            model.save()

            vak_cm, val_accuracy = val(model, val_dataloader)
            vis.plot('val_accuracy', val_accuracy)
            vis.log("epoch:{epoch},lr:{lr},loss:{loss}".format(
                epoch=epoch, loss=loss_meter.value()[0], lr=lr))

            if loss_meter.value()[0] > previous_loss:
                lr = lr * opt.lr_decay
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr

            previous_loss = loss_meter.value()[0]
def train(args, config):
    vis = Visualizer()

    train_set = MNIST(data_path=config.train_data_path,
                      label_path=config.train_label_path,
                      config=config,
                      mode='train')
    valid_set = MNIST(data_path=config.train_data_path,
                      label_path=config.train_label_path,
                      config=config,
                      mode='valid')

    train_dataloader = DataLoader(train_set,
                                  config.batch_size,
                                  shuffle=True,
                                  num_workers=config.num_workers)
    valid_dataloader = DataLoader(valid_set,
                                  config.batch_size,
                                  shuffle=False,
                                  num_workers=config.num_workers)

    model = getattr(network, args.model)().eval()
    if args.load_model_path:
        model.load(args.load_model_path)
    if args.use_gpu:
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.lr)

    train_loss_meter, valid_loss_meter = meter.AverageValueMeter(
    ), meter.AverageValueMeter()
    train_confusion_matrix, valid_confusion_matrix = meter.ConfusionMeter(
        10), meter.ConfusionMeter(10)

    best_valid_loss = 1e5
    best_epoch = 0
    dist_to_best = 0

    time_begin = time.clock()

    for epoch in range(config.epoch):

        # train
        model.train()
        train_loss_meter.reset()
        train_confusion_matrix.reset()

        for _iter, (train_data, train_target) in enumerate(train_dataloader):

            if args.use_gpu:
                train_data = train_data.cuda()
                train_target = train_target.cuda()

            optimizer.zero_grad()
            train_logits, train_output = model(train_data)
            train_loss = criterion(train_logits, train_target)
            train_loss.backward()
            optimizer.step()

            train_loss_meter.add(train_loss.item())
            train_confusion_matrix.add(train_logits.data, train_target.data)

            if _iter % config.print_freq == 0:
                vis.plot('train_loss', train_loss_meter.value()[0])
        model.save(path=os.path.join(args.ckpts_dir, 'model_{0}.pth'.format(
            str(epoch))))

        # valid
        model.eval()
        valid_loss_meter.reset()
        valid_confusion_matrix.reset()

        for _iter, (valid_data, valid_target) in enumerate(valid_dataloader):

            if args.use_gpu:
                valid_data = valid_data.cuda()
                valid_target = valid_target.cuda()

            valid_logits, valid_output = model(valid_data)
            valid_loss = criterion(valid_logits, valid_target)

            valid_loss_meter.add(valid_loss.item())
            valid_confusion_matrix.add(valid_logits.detach().squeeze(),
                                       valid_target.type(t.LongTensor))

        valid_cm = valid_confusion_matrix.value()
        valid_accuracy = 100. * (valid_cm.diagonal().sum()) / (valid_cm.sum())

        vis.plot('valid_accuracy', valid_accuracy)

        vis.log(
            "epoch:{epoch}, train_loss:{train_loss}, train_cm:{train_cm}, valid_loss:{valid_loss}, valid_cm:{valid_cm}, valid_accuracy:{valid_accuracy}"
            .format(epoch=epoch,
                    train_loss=train_loss_meter.value()[0],
                    train_cm=str(train_confusion_matrix.value()),
                    valid_loss=valid_loss_meter.value()[0],
                    valid_cm=str(valid_cm),
                    valid_accuracy=valid_accuracy))
        print(
            "epoch:{epoch}, train_loss:{train_loss}, valid_loss:{valid_loss}, valid_accuracy:{valid_accuracy}"
            .format(epoch=epoch,
                    train_loss=train_loss_meter.value()[0],
                    valid_loss=valid_loss_meter.value()[0],
                    valid_accuracy=valid_accuracy))
        print("train_cm:\n{train_cm}\n\nvalid_cm:\n{valid_cm}".format(
            train_cm=str(train_confusion_matrix.value()),
            valid_cm=str(valid_cm),
        ))

        # early stop
        if valid_loss_meter.value()[0] < best_valid_loss:
            best_epoch = epoch
            best_valid_loss = valid_loss_meter.value()[0]
            dist_to_best = 0

        dist_to_best += 1
        if dist_to_best > 4:
            break

    model.save(path=os.path.join(args.ckpts_dir, 'model.pth'))
    vis.save()
    print("save model successfully")
    print("best epoch: ", best_epoch)
    print("best valid loss: ", best_valid_loss)
    time_end = time.clock()
    print('time cost: %.2f' % (time_end - time_begin))
示例#24
0
def train():
    vis = Visualizer("Kesci" + time.strftime('%m%d%H%M'))
    train_data = AppData("../kesci/data/data_v3_23d/train_ab.json",
                         iflabel=True)
    val_data = AppData("../kesci/data/data_v3_23d/val_ab.json", iflabel=True)
    train_dataloader = DataLoader(train_data, 256, shuffle=True, num_workers=4)
    val_dataloader = DataLoader(val_data, 512, shuffle=False, num_workers=2)
    test_data = AppData("../kesci/data/data_v3_23d/test_ab.json", iflabel=True)
    test_dataloader = DataLoader(test_data, 512, shuffle=False, num_workers=2)

    criterion = t.nn.BCEWithLogitsLoss().cuda()
    learning_rate = 0.002
    weight_decay = 0.0003
    model = DoubleSequence(31, 128, 1).cuda()
    optimizer = t.optim.Adam(model.parameters(),
                             lr=learning_rate,
                             weight_decay=weight_decay)

    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    for epoch in range(400):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, property, target) in tqdm(enumerate(train_dataloader)):
            input = Variable(data).cuda()
            input2 = Variable(property).cuda()
            target = Variable(target).cuda()
            output = model(input, input2)

            optimizer.zero_grad()
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            loss_meter.add(loss.data[0])

            if ii % 100 == 99:
                vis.plot('loss', loss_meter.value()[0])

        if epoch % 3 == 2:
            train_cm, train_f1 = val(model, train_dataloader)
            vis.plot('train_f1', train_f1)
        val_cm, val_f1 = val(model, val_dataloader)

        vis.plot_many({'val_f1': val_f1, 'learning_rate': learning_rate})
        if loss_meter.value()[0] > previous_loss:
            learning_rate = learning_rate * 0.9
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

        previous_loss = loss_meter.value()[0]

        if epoch % 3 == 2:
            model.save()
            test_cm, test_f1 = val(model, test_dataloader)
            vis.plot('test_f1', test_f1)
            vis.log(
                "训练集:{train_f1:%}, {train_pre:%}, {train_rec:%} | 验证集:{val_f1:%}, {val_pre:%}, {val_rec:%} | \
            测试集:{test_f1:%}, {test_pre:%}, {test_rec:%} | {train_true_num:%}, {val_true_num:%}, {test_true_num:%}"
                .format(
                    train_f1=train_f1,
                    val_f1=val_f1,
                    test_f1=test_f1,
                    train_true_num=train_cm.value()[:, 0].sum() /
                    len(train_data),
                    val_true_num=val_cm.value()[:, 0].sum() / len(val_data),
                    test_true_num=test_cm.value()[:, 0].sum() / len(test_data),
                    train_pre=train_cm.value()[0][0] /
                    train_cm.value()[0].sum(),
                    train_rec=train_cm.value()[0][0] /
                    train_cm.value()[:, 0].sum(),
                    val_pre=val_cm.value()[0][0] / val_cm.value()[0].sum(),
                    val_rec=val_cm.value()[0][0] / val_cm.value()[:, 0].sum(),
                    test_pre=test_cm.value()[0][0] / test_cm.value()[0].sum(),
                    test_rec=test_cm.value()[0][0] /
                    test_cm.value()[:, 0].sum()))
示例#25
0
def train(opt):
    model_train = getattr(model, opt.model)()
    vis = Visualizer(opt.env)

    if opt.load_model_path:
        model_train.load(opt.load_model_path)
    if opt.use_gpu:
        model_train.cuda()

    train_dataloader = dataloader(opt.train_data_root,
                            train=True,
                            batch_size=opt.batch_size,
                            shuffle=True,
                            num_workers=opt.num_workers)
    val_dataloader = dataloader(opt.valid_data_root,
                            train=False,
                            batch_size=opt.batch_size,
                            shuffle=True,
                            num_workers=opt.num_workers)
    criterion = torch.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = torch.optim.Adam(model_train.classifier.parameters(),
                                lr=lr,
                                weight_decay=opt.weight_decay)

    # meter
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(120)
    previous_loss = 1e100

    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        if epoch == 20:
            model_train.set_requires_grad()

        for ii, (data, label) in tqdm(enumerate(train_dataloader)):
            if opt.use_gpu:
                data = data.cuda()
                label = label.cuda()

            optimizer.zero_grad()
            score = model_train(data)
            loss = criterion(score, label)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.item())
            confusion_matrix.add(score.data, label.data)

            if ii % opt.print_freq:
                # print(ii, ' loss: ', loss_meter.value()[0])
                vis.plot('loss', loss_meter.value()[0])

        model_train.save(opt.save_model_path+opt.model+'_'+str(epoch))

        # validate and visualize
        val_cm, val_accuracy = val(model_train, val_dataloader, opt)
        vis.plot('val_accuracy', val_accuracy)
        # vis.log()

        # update learning rate
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param in optimizer.param_groups:
                param['lr'] = lr

        previous_loss = loss_meter.value()[0]
示例#26
0
def train(**kwargs):
    """根据命令行参数更新配置"""
    opt.parse(kwargs)
    vis = Visualizer(opt.env)
    """(1)step1:加载网络,若有预训练模型也加载"""
    model = getattr(models, opt.model)()
    """(2)step2:处理数据"""
    train_data = Ictal(opt.train_data_root, opt.model, train=True)  # 训练集
    val_data = Ictal(opt.train_data_root, opt.model, train=False)  # 验证集

    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)
    """(3)step3:定义损失函数和优化器"""
    criterion = t.nn.CrossEntropyLoss()  # 交叉熵损失
    lr = opt.lr  # 学习率
    optimizer = t.optim.SGD(model.parameters(),
                            lr=opt.lr,
                            weight_decay=opt.weight_decay)
    """(4)step4:统计指标,平滑处理之后的损失,还有混淆矩阵"""
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    start = time.time()
    """(5)开始训练"""
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()
        for ii, (data, label) in enumerate(train_dataloader):
            # 训练模型参数
            input = Variable(data)
            if opt.model == 'CNN_1d':
                input = input.permute(0, 2, 1)

            target = Variable(label)

            # 梯度清零
            optimizer.zero_grad()
            score = model(input)

            loss = criterion(score, target)
            loss.backward()  # 反向传播

            # 更新参数
            optimizer.step()

            # 更新统计指标及可视化
            loss_meter.add(loss.item())
            # print score.shape, target.shape
            confusion_matrix.add(score.detach(), target.detach())

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('loss', loss_meter.value()[0])
                if os.path.exists(opt.debug_file):
                    import ipdb
                    ipdb.set_trace()
        model.save(epoch)
        """计算验证集上的指标及可视化"""
        val_cm, val_accuracy = val(model, val_dataloader, opt.model)
        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))

        tra_cm, tra_accuracy = val(model, train_dataloader, opt.model)

        print("epoch:", epoch, "loss:",
              loss_meter.value()[0], "val_accuracy:", val_accuracy,
              "tra_accuracy:", tra_accuracy)
        """如果损失不再下降,则降低学习率"""
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group["lr"] = lr

        previous_loss = loss_meter.value()[0]
    end = time.time()

    print(end - start)
def train(**kwargs):
    opt._parse(kwargs)
    vis = Visualizer(opt.env, port=opt.vis_port)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load_new(opt.load_model_path)
    else:
        print('Initialize the model!')
        model.apply(weight_init)

    model.to(opt.device)

    # step2: data
    train_data = TextData(opt.data_root, opt.train_txt_path)
    val_data = TextData(opt.data_root, opt.val_txt_path)
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=False,
                                num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = model.get_optimizer(lr, opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e10

    # train
    for epoch in range(opt.max_epoch):

        loss_meter.reset()
        confusion_matrix.reset()

        for ii, (data, label) in tqdm(enumerate(train_dataloader)):
            # train model
            input = data.to(opt.device)
            target = label.to(opt.device)
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            #for n, p in model.named_parameters():
            #    print(n)
            #    h = p.register_hook(lambda grad: print(grad))
            optimizer.step()

            # meters update and visualize
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data, target.data)
            if ii % opt.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

                # enter debug mode
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()
            if ii % (opt.print_freq * 10) == 0:
                vis.images(input.cpu().numpy(),
                           opts=dict(title='Label', caption='Label'),
                           win=1)
                print('Epoch: {} Iter: {} Loss: {}'.format(epoch, ii, loss))

        if epoch % 2 == 0:
            model.save('./checkpoints/' + opt.env + '_' + str(epoch) + '.pth')

        # validate and visualize
        val_cm, val_accuracy = val(model, val_dataloader)

        vis.plot('val_accuracy', val_accuracy)
        vis.log(
            "epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}"
            .format(epoch=epoch,
                    loss=loss_meter.value()[0],
                    val_cm=str(val_cm.value()),
                    train_cm=str(confusion_matrix.value()),
                    lr=lr))
        train_cm = confusion_matrix.value()
        t_accuracy = 100. * (train_cm[0][0] +
                             train_cm[1][1]) / (train_cm.sum())
        vis.plot('train_accuracy', t_accuracy)
        if loss_meter.value()[0] > previous_loss:
            lr = lr * opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
示例#28
0
def train(**kwargs):
    # load kwargs
    opt.parse(kwargs)
    print(kwargs)

    # visdom
    vis = Visualizer(opt.env)

    # vis log opt
    vis.log('user config:')
    for k, v in opt.__class__.__dict__.items():
        if not k.startswith('__'):
            vis.log('{} {}'.format(k, getattr(opt, k)))

    # config model
    model = getattr(models, opt.model)()

    if opt.use_pretrained_model:
        model = load_pretrained()

    if opt.load_model_path:
        # load exist model
        model.load(opt.load_model_path)
    elif opt.use_weight_init:
        # we need init weight
        #
        model.apply(weight_init)
    # if use GPU
    if opt.use_gpu:
        model.cuda()

    # genearte_data
    train_data = Flower(train=True)
    val_data = Flower(train=False)
    test_data = Flower(test=True)

    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)
    test_dataloader = DataLoader(test_data,
                                 opt.batch_size,
                                 shuffle=False,
                                 num_workers=opt.num_workers)

    # criterion and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    lr = opt.lr
    if 'Dense' in opt.model:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=0.9,
                                    nesterov=True,
                                    weight_decay=opt.weight_decay)
    else:
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=opt.weight_decay)

    # meters
    loss_meter = meter.AverageValueMeter()
    # 17 classes
    confusion_matrix = meter.ConfusionMeter(17)
    previous_loss = 1e100

    #

    best_accuracy = 0

    # start training
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for bactch_index, (data, label) in tqdm(enumerate(train_dataloader)):

            # train model
            input = Variable(data)
            target = Variable(label)
            # gpu update
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            # update meter
            loss_meter.add(loss.data[0])

            # print(score.data, target.data)
            #      [batch_size, 17]  [batch_size]
            confusion_matrix.add(score.data, target.data)

            # plot
            if bactch_index % opt.print_freq == opt.print_freq - 1:
                # cross_entropy
                print('loss ', loss_meter.value()[0])
                # visualize loss
                vis.plot('loss', loss_meter.value()[0])

        # save model for this epoch
        if opt.use_pretrained_model is False and epoch % opt.save_freq == 0:
            model.save()

        # validate
        val_cm, val_accuracy = val(model, val_dataloader)

        # test
        test_cm, test_accuracy = val(model, test_dataloader)

        # plot validation accuracy
        print('Epoch {}/{}: val_accuracy  {}'.format(epoch, opt.max_epoch,
                                                     val_accuracy))

        # plot vis
        vis.plot('val_accuracy', val_accuracy)
        vis.plot('test_accuracy', test_accuracy)
        vis.log('epoch:{epoch}, lr:{lr}, loss:{loss}'.format(
            epoch=epoch, loss=loss_meter.value()[0], lr=lr))
        # vis.log('epoch:{epoch}, lr:{lr}, loss:{loss}, train_cm:{train_cm}, val_cm:{val_cm}'.format(
        #     epoch=epoch, loss=loss_meter.value()[0], val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr)
        # )

        # update best validation model
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            torch.save(model.state_dict(),
                       './checkpoints/best_{}.pth'.format(opt.model))
            if opt.use_pretrained_model is False:
                model.save('./checkpoints/best_{}.pth'.format(
                    model.model_name))

        # update learning rate for this epoch
        if float(loss_meter.value()[0]) > previous_loss:
            lr = lr * opt.lr_decay

            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

        previous_loss = loss_meter.value()[0]
    print('Best model validation accuracy {}'.format(best_accuracy))
示例#29
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.retrain:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_root,train=True)
    val_data = DogCat(opt.train_data_root,train=False)
    train_dataloader = DataLoader(train_data,opt.batch_size,
                        shuffle=True,num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,opt.batch_size,
                        shuffle=False,num_workers=opt.num_workers)
    
    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),lr = lr,weight_decay = opt.weight_decay)
        
    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):
        
        loss_meter.reset()
        confusion_matrix.reset()

        for ii,(data,label) in tqdm(enumerate(train_dataloader),total=len(train_data)):

            # train model 
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)
            loss.backward()
            optimizer.step()
            
            
            # meters update and visualize
            loss_meter.add(loss.item())
            confusion_matrix.add(score.data, target.data)

            if ii%opt.print_freq==opt.print_freq-1:
                vis.plot('loss', loss_meter.value()[0])
                
                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()


        model.save(opt.load_model_path)

        # validate and visualize
        val_cm,val_accuracy = val(model,val_dataloader)

        vis.plot('val_accuracy',val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
                    epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr))
        
        # update learning rate
        if loss_meter.value()[0] > previous_loss:          
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        

        previous_loss = loss_meter.value()[0]
示例#30
0
            #--update_netd--    Update D network: Ladv = |f(real) - f(fake)|_2
            #self.pred_real, self.feat_real = self.netd(self.input)
            #self.pred_fake, self.feat_fake = self.netd(self.fake.detach())
            netd.zero_grad()
            fake, latent_i, latent_o = netg(img_st)
            out_d_real, feat_true = netd(img_st)
            out_d_fake, feat_fake = netd(fake.detach())
            err_d = .5 * criterion_BCE(
                out_d_real, y_real_) + .5 * criterion_BCE(
                    out_d_fake, y_fake_)  #+ criterion_L2(feat_real, feat_fake)
            err_d.backward(retain_graph=True)
            optimizer_d.step()
            optimizer_f.step()
            errord_meter.add(err_d.data.cpu().numpy())
            vis.plot('errord', errord_meter.value()[0])
            # If D loss is zero, then re-initialize netD
            if err_d.item() < 1e-5:
                netd.apply(weights_init)

            #--update_netg--    Update G network: log(D(G(x)))  + ||G(x) - x||
            netg.zero_grad()
            #out_g, _ = netd(fake)
            err_g_bce = criterion_L2(feat_true, feat_fake)  # l_adv
            err_g_l1l = criterion_L1(fake, img_st)  # l_con
            err_g_enc = criterion_L2(latent_i, latent_o)  # l_enc
            err_g = err_g_bce * config.w_bce + err_g_l1l * config.w_rec + err_g_enc * config.w_enc
            err_g.backward()
            optimizer_g.step()
            optimizer_f.step()
            errorg_meter.add(err_g.data.cpu().numpy())
示例#31
0
def train(**kwargs):
    opt.parse(**kwargs)
    # step1: configure model
    model = getattr(models,opt.model)(opt.num_class)
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu:
        model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_path, transform=opt.train_transform, train = True)
    val_data = DogCat(opt.train_data_path, transform=opt.test_val_transform, train = False, test= False)
    train_dataloader = DataLoader(train_data, batch_size= opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers)
    val_dataloader   = DataLoader(val_data,   batch_size= opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers)

    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(params=model.parameters(), lr=lr, weight_decay=opt.weight_decay)

    # step4: meters
    loss_meter = meter.AverageValueMeter()                   # 用于统计一个epoch内的平均误差
    confusion_matrix = meter.ConfusionMeter(opt.num_class)
    previous_loss=1e6
    # step5: train
    vis  = Visualizer(opt.env)
    for epoch in range(opt.max_epoch):
        loss_meter.reset()
        confusion_matrix.reset()

        for ii,(data, label) in tqdm(enumerate(train_dataloader)):
            # train model
            input = Variable(data)
            target  = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()
            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data)
            confusion_matrix.add(score.data, target.data)

            # ipdb.set_trace()
            if ii%opt.print_freq == opt.print_freq-1:
                vis.plot(win='loss', y=loss_meter.value()[0])

        model.save()

        # step6: validate and visualize
        val_confusion_matrix, val_accuracy = val(model, val_dataloader)
        vis.plot(win='val_accuracy',y=val_accuracy)
        vis.log(win='log_text', info=
                'epoch:{epoch}, lr:{lr}, loss:{loss}, train_cm:{train_cm}, val_cm:{val_cm}'.format(
                    epoch=epoch,lr=lr,loss=loss_meter.value()[0],train_cm=str(confusion_matrix.value()),val_cm=str(val_confusion_matrix)
                )
                )

        # step7: update learning_rate
        if loss_meter.value()[0] > previous_loss:
            lr=lr*opt.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr']=lr

        previous_loss=loss_meter.value()[0]