def __init__(self, net, lr, name, evalmode=False): self.model = net self.cuda_net = torch.nn.DataParallel(self.model, device_ids=range( torch.cuda.device_count())) #self.optimizer = torch.optim.RMSprop(params=self.net.parameters(), lr=lr) if evalmode: for i in self.model.modules(): if isinstance(i, nn.BatchNorm2d): i.eval() self.isTrain = True self.num_classes = 1 self.tensorborad_dir = "log/tensorboard_log/" self.model_dir = "weights/" # self.lr = 0.007 self.lr = lr self.lr_power = 0.9 self.momentum = 0.9 self.wd = 0.0001 # weight decay self.accum_steps = 1 self.iterSize = 10 self.net_name = name self.which_epoch = 0 # self.device = if self.isTrain: # self.criterionSeg = torch.nn.CrossEntropyLoss(ignore_index=255).cuda() # maybe edit # Change the crossentropyloss to BCEloss # self.criterionSeg = torch.nn.BCELoss().cuda() # self.criterionSeg = loss_func().cuda() self.criterionSeg = dice_bce_loss().cuda() self.optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), lr=self.lr, momentum=self.momentum, weight_decay=self.wd) params_w = list(self.model.decoder.dupsample.conv_w.parameters()) params_p = list(self.model.decoder.dupsample.conv_p.parameters()) self.optimizer_w = torch.optim.SGD(params_w + params_p, lr=self.lr, momentum=self.momentum) self.old_lr = self.lr self.averageloss = [] self.writer = SummaryWriter(self.tensorborad_dir) self.counter = 0 self.model.cuda() self.normweightgrad = 0.
def __init__(self, net, lr=2e-4, evalmode=False, batchsize=1): self.net = net().cuda() self.net = torch.nn.DataParallel(self.net, device_ids=range( torch.cuda.device_count())) self.optimizer = torch.optim.Adam(params=self.net.parameters(), lr=lr) #self.optimizer = torch.optim.RMSprop(params=self.net.parameters(), lr=lr) self.log_vars = nn.Parameter(torch.zeros((2))).cuda() self.segloss = dice_bce_loss() self.edgeloss = edge_loss() # self.Dualloss=DualTaskLoss() # self.edgeattention=ImageBasedCrossEntropyLoss2d(1) self.old_lr = lr # self.confusion_matrix = np.zeros((2,) * 2) self.batchsize = batchsize # self.evaluator = Evaluator(2) if evalmode: for i in self.net.modules(): if isinstance(i, nn.BatchNorm2d): i.eval()
run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0 save_dir = os.path.join(save_dir_root, 'run') # Network definition # if backbone == 'xception': # net = deeplab_xception.DeepLabv3_plus(nInputChannels=3, n_classes=12, os=16, pretrained=False) # elif backbone == 'resnet': # net = deeplab_resnet.DeepLabv3_plus(nInputChannels=3, n_classes=12, os=16, pretrained=False) # else: # raise NotImplementedError net = BRU_net.BRU_net(n_classes=12) modelName = 'deeplabv3plus-' + backbone + '-voc' criterion = loss.dice_bce_loss() # criterion = loss.FocalLoss(gamma=2) print("Backbone network is {}".format(backbone)) if resume_epoch == 0: print("Training deeplabv3+ from scratch...") else: print("Initializing weights from: {}...".format( os.path.join(save_dir, 'models', modelName + '_epoch-' + str(resume_epoch - 1) + '.pth'))) net.load_state_dict( torch.load(os.path.join(save_dir, 'models', modelName + '_epoch-' + str(resume_epoch - 1) + '.pth'), map_location=lambda storage, loc: storage)) # Load all tensors onto the CPU if gpu_id >= 0: torch.cuda.set_device(device=gpu_id)
def fpn_Net_Train(train_i=0): NAME = 'fold'+str(train_i+1)+'3fpn-Net' model = FPN_Net(1, 1).cuda() model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) # model = FPN_Net(1, 1) # print(model) folds = data2() batch_size = 4 (x_train, y_train), (x_test, y_test) = load_numpy(folds, train_i) dataset = torch.utils.data.TensorDataset(x_train, y_train) data_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=True, num_workers=4) mylog = open('logs/' + NAME + '.log', 'w') tic = time() no_optim = 0 lr = 2e-4 total_epoch=300 train_epoch_best_loss=10000 best_test_score = 0 decay_factor = 1.5 optimizer = torch.optim.Adam(params=model.parameters(), lr=lr) loss_com = dice_bce_loss() for epoch in range(1, total_epoch + 1): data_loader_iter = iter(data_loader) train_epoch_loss = 0 train_score = 0 for img, mask in data_loader_iter: img = Variable(img.cuda(), volatile=False) mask = Variable(mask.cuda(), volatile=False) optimizer.zero_grad() pre = model.forward(img) loss = loss_com(mask, pre) loss.backward() optimizer.step() train_epoch_loss += loss train_score_b = dice_coeff(mask, pre, False) train_score += train_score_b*batch_size train_score /= x_train.size(0) train_epoch_loss /= len(data_loader_iter) test_img = Variable(x_test.cuda(), volatile=False) test_mask = Variable(y_test.cuda(), volatile=False) pre_test = model.forward(test_img) loss_test = loss_com(test_mask, pre_test) test_score = dice_coeff(test_mask, pre_test, False) if test_score > best_test_score: print('1. the dice score up to ', test_score, 'from ', best_test_score, 'saving the model', file=mylog, flush=True) print('1. the dice score up to ', test_score, 'from ', best_test_score, 'saving the model') best_test_score = test_score solver.save('./weights/' + NAME + '.th') print('********', file=mylog, flush=True) print('epoch:', epoch, ' time:', int(time() - tic), 'train_loss:', train_epoch_loss, 'train_score:', train_score, file=mylog, flush=True) print('test_loss:', loss_test, 'test_dice_score: ', test_score, 'best_score is ', best_test_score, file=mylog, flush=True) print('********') print('epoch:', epoch, ' time:', int(time() - tic), 'train_loss:', train_epoch_loss, 'train_score:', train_score) print('test_loss:', loss_test, 'test_dice_score: ', test_score, 'best_score is ', best_test_score) if train_epoch_loss >= train_epoch_best_loss: no_optim += 1 else: no_optim = 0 train_epoch_best_loss = train_epoch_loss if no_optim > Constants.NUM_UPDATE_LR: if solver.old_lr < 5e-7: break if solver.old_lr > 5e-5: model.load_state_dict(torch.load('./weights/' + NAME + '.th')) lr /= decay_factor for param_group in optimizer.param_groups: param_group['lr'] = lr print('Finish!', file=mylog, flush=True) print('Finish!') mylog.close()
def train_net( net, epochs=5, batch_size=1, lr=0.001, save_cp=True, gpu=False, ): dir_img = 'img/' dir_label = 'label/' dir_checkpoint = 'checkpoints/' dataset = SSDataset(dir_img, dir_label)[0] train_dataset = [dataset[i] for i in range(int(len(dataset) * 0.8))] test_dataset = [dataset[i] for i in range(int(len(dataset) * 0.2))] print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, len(train_dataset), len(test_dataset), str(save_cp), str(gpu))) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005) criterion = dice_bce_loss() USE_CUDA = torch.cuda.is_available() device = torch.device("cuda:0" if USE_CUDA else "cpu") loss_graph = [] x = [] count = 0 for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) net.train() epoch_loss = 0 batch = utils.batch(train_dataset, batch_size) for o in range(len(batch)): img = np.array(batch[o][0]).astype(np.float32) label = np.array(batch[o][1]) img = torch.from_numpy(img) label = torch.from_numpy(label) img = img.permute(0, 3, 1, 2) #label = label.reshape(batch_size, 1, 512, 512) if gpu: img = img.cuda() label = label.cuda() #net = torch.nn.DataParallel(net).cuda() net.to(device) output = net(img) output_final = output.view(-1).float() label_final = label.view(-1).float() #print(label_final) loss = criterion(output_final, label_final) epoch_loss = epoch_loss + loss.item() optimizer.zero_grad() with torch.autograd.set_detect_anomaly(True): loss.backward() optimizer.step() print('Epoch finished ! Loss: {}'.format(epoch_loss / (epoch + 1))) loss_graph.append(epoch_loss / (epoch + 1)) count += 1 x.append(count) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'unet_CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1)) plt.figure() plt.plot(x, loss_graph) plt.savefig("unet_loss.jpg")
def train_model(train_i): batchsize = 4 i = train_i # NAME = 'F51_fold'+str(i+1)+'_UNet' # net = UNet(usegaussian=False).cuda() # NAME = 'GaPF5_fold'+str(i+1)+'_UNet' # net = UNet(usegaussian=True).cuda() # NAME = 'EMF5_NOpretrain_fold'+str(i+1)+'_FSPNet' # NAME = 'EMF5_fold'+str(i+1)+'_FSPNet' NAME = 'F5_fold' + str(i + 1) + '_FSPNet' net = FPN_Net(is_ema=False).cuda() # net.apply(weights_init) print(NAME) txt_train = 'D163N5fold' + str(train_i + 1) + '_train.csv' txt_test = 'D163N5fold' + str(train_i + 1) + '_test.csv' dataset_train = MyDataset(root='/home/wangke/ultrasound_data163/', txt_path=txt_train, transform=transforms.ToTensor(), target_transform=transforms.ToTensor()) dataset_test = MyDataset(root='/home/wangke/ultrasound_data163/', txt_path=txt_test, transform=transforms.ToTensor(), target_transform=transforms.ToTensor()) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=batchsize, shuffle=True, num_workers=2) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=batchsize, shuffle=False, num_workers=2) mylog = open('models/saved/' + NAME + '.log', 'w') total_epoch = 300 optimizer = torch.optim.Adam(params=net.parameters(), lr=1e-3) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95) best_test_score = 0 dice_loss = dice_bce_loss() for epoch in range(1, total_epoch): total_loss = 0 data_loader_iter = iter(train_loader) data_loader_test = iter(test_loader) tic = time() train_score = 0 net.train() for img, mask in data_loader_iter: img = V(img.cuda(), volatile=False) mask_v = V(mask.cuda(), volatile=False) optimizer.zero_grad() output = net(img) loss = dice_loss(mask_v, output) loss.backward() optimizer.step() total_loss += loss train_score += dice_coeff(mask, output.cpu().data, False) test_score = 0 test_loss = 0 net.eval() with torch.no_grad(): for img, mask in data_loader_test: # print(img.shape) img = V(img.cuda(), volatile=True) # mask_v = V(mask.cuda(), volatile=False) output = net(img) # test_loss += dice_loss(mask_v, output) # print(dice_coeff(mask, output.cpu().data, False)) test_score += dice_coeff(mask, output.cpu().data, False) total_loss = total_loss / len(data_loader_iter) train_score = train_score / len(data_loader_iter) test_score = test_score / len(data_loader_test) # test_loss = test_loss/len(data_loader_test) # scheduler.step() if test_score > best_test_score: best_test_score = test_score torch.save(net, 'models/saved/' + NAME + '.pkl') print('saved, ', best_test_score, file=mylog, flush=True) print('saved, ', best_test_score) print('********', file=mylog, flush=True) print('epoch:', epoch, ' time:', int(time() - tic), 'train_loss:', total_loss.cpu().data.numpy(), 'train_score:', train_score, 'test_score:', test_score, 'best_score is ', best_test_score, file=mylog, flush=True) print('********') print('epoch:', epoch, ' time:', int(time() - tic), 'train_loss:', total_loss.cpu().data.numpy(), 'train_score:', train_score, 'test_score:', test_score, 'best_score is ', best_test_score)
self.weight = weight if os.path.exists(weight): net.load_state_dict(torch.load(weight)) train_dataset = MyDataset(img_path, mask_path) val_dataset = MyDataset(val_img_path, val_mask_path) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = DataLoader(val_dataset, batch_size=batch_size) adam = torch.optim.Adam(net.parameters(), lr=2e-4) sgd = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9) loss_fun = dice_bce_loss() if __name__ == '__main__': epoch = 1 log = open(log, 'w', encoding='utf-8') log.write('epoch' + '\t' + 'loss' + '\t' + 'pa' + '\t' + 'iou' + '\t' + 'precision' + '\n') log.flush() while epoch < 300: s_time = time.time() print('epoch - {} - training'.format(epoch)) net.train() TP = FP = TN = FN = 0 pa = 0 iou = 0
def get_train(traindata_path, weights_path, NAME, lr=2e-4): NAME = NAME + '.pth' SHAPE = (768, 768) model = DinkNet34() model.cuda() optimizer = torch.optim.Adam(params=model.parameters(), lr=lr) loss = dice_bce_loss() batchsize = 2 img_root = os.path.join(traindata_path, 'imgs') mask_root = os.path.join(traindata_path, 'save_masks') dataset = ImageFolder(img_root, mask_root, SHAPE) data_loader = torch.utils.data.DataLoader(dataset, batch_size=batchsize, shuffle=True, num_workers=0) if not os.path.exists('logs'): os.makedirs('logs') if os.path.exists(os.path.join(weights_path, NAME)): model.load_state_dict(torch.load(os.path.join(weights_path, NAME))) print('....load weights successfully') mylog = open('logs/' + NAME + '.log', 'w') old_lr = lr no_optim = 0 total_epoch = 300 train_epoch_best_loss = 100 for epoch in range(1, total_epoch + 1): data_loader_iter = iter(data_loader) train_epoch_loss = 0 for img1, img2, mask1, mask2 in data_loader_iter: img1 = V(img1.cuda()) img2 = V(img2.cuda()) mask1 = V(mask1.cuda()) mask2 = V(mask2.cuda()) #print('.........',img1.shape,img2.shape,mask1.shape,mask2.shape) pred1, pred2 = model(img1, img2) loss_1 = loss(mask1, pred1) loss_2 = loss(mask2, pred2) loss_all = loss_1 + loss_2 optimizer.zero_grad() loss_all.backward() optimizer.step() train_epoch_loss += loss_1.item() train_epoch_loss += loss_2.item() train_epoch_loss /= len(data_loader_iter) print('epoch:', epoch) print('train_loss:', train_epoch_loss) if train_epoch_loss >= train_epoch_best_loss: no_optim += 1 else: no_optim = 0 train_epoch_best_loss = train_epoch_loss torch.save(model.state_dict(), os.path.join(weights_path, NAME)) if no_optim > 10: print('early stop at %d epoch' % epoch) break if no_optim > 6: if old_lr < 5e-7: break model.load_state_dict(torch.load(os.path.join(weights_path, NAME)), strict=False) old_lr = update_lr(optimizer, old_lr=old_lr, new_lr=5.0, factor=True, mylog=mylog) mylog.flush()
run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0 save_dir = os.path.join(save_dir_root, 'run/CE-Net-topcon-ce-loss') # Network definition # if backbone == 'xception': # net = deeplab_xception.DeepLabv3_plus(nInputChannels=3, n_classes=12, os=16, pretrained=False) # elif backbone == 'resnet': # net = deeplab_resnet.DeepLabv3_plus(nInputChannels=3, n_classes=12, os=16, pretrained=False) # else: # raise NotImplementedError net = cenet.CE_Net_OCT(num_classes=12, num_channels=3) modelName = 'CEnet-' + backbone + '-voc' criterion_mask = loss.dice_bce_loss() # criterion_edge = loss.weighted_cross_entropy(num_classes=12) # criterion = loss.FocalLoss(gamma=2) print("Backbone network is {}".format(backbone)) if resume_epoch == 0: print("Training deeplabv3+ from scratch...") else: print("Initializing weights from: {}...".format( os.path.join(save_dir, 'models', modelName + '_epoch-' + str(resume_epoch - 1) + '.pth'))) net.load_state_dict( torch.load(os.path.join(save_dir, 'models', modelName + '_epoch-' + str(resume_epoch - 1) + '.pth'), map_location=lambda storage, loc: storage)) # Load all tensors onto the CPU if gpu_id >= 0: