def val_step(images, labels, net, optimizer, loss): # 计算loss prediction = net(images, training=False) loss_value = loss(labels, prediction) _f_score = f_score()(labels, prediction) return loss_value, _f_score
def forward(self, y_pr, y_gt): return 1 - f_score(y_pr, y_gt, beta=1., eps=self.eps, threshold=None, activation=self.activation)
def fit_one_epoch(net, epoch, epoch_size, gen, Epoch, cuda): total_loss = 0 total_f_score = 0 net = net.train() with tqdm(total=epoch_size, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(gen): if iteration >= epoch_size: break imgs, pngs, labels = batch with torch.no_grad(): imgs = Variable(torch.from_numpy(imgs).type(torch.FloatTensor)) pngs = Variable( torch.from_numpy(pngs).type(torch.FloatTensor)).long() labels = Variable( torch.from_numpy(labels).type(torch.FloatTensor)) if cuda: imgs = imgs.cuda() pngs = pngs.cuda() labels = labels.cuda() optimizer.zero_grad() outputs = net(imgs) loss = CE_Loss(outputs, pngs, num_classes=NUM_CLASSES) if dice_loss: main_dice = Dice_loss(outputs, labels) loss = loss + main_dice with torch.no_grad(): #-------------------------------# # 计算f_score #-------------------------------# _f_score = f_score(outputs, labels) loss.backward() optimizer.step() total_loss += loss.item() total_f_score += _f_score.item() pbar.set_postfix( **{ 'total_loss': total_loss / (iteration + 1), 'f_score': total_f_score / (iteration + 1), 'lr': get_lr(optimizer) }) pbar.update(1) print('Finish Validation') print('Epoch:' + str(epoch + 1) + '/' + str(Epoch)) print('Total Loss: %.4f' % (total_loss / (epoch_size + 1))) print('Saving state, iter:', str(epoch + 1)) torch.save( model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f.pth' % ((epoch + 1), total_loss / (epoch_size + 1)))
def train_step(images, labels, net, optimizer, loss): with tf.GradientTape() as tape: # 计算loss prediction = net(images, training=True) loss_value = loss(labels, prediction) grads = tape.gradient(loss_value, net.trainable_variables) optimizer.apply_gradients(zip(grads, net.trainable_variables)) _f_score = f_score()(labels, prediction) return loss_value, _f_score
for epoch in range(Init_Epoch, Freeze_Epoch): fit_one_epoch(model, loss, optimizer, epoch, epoch_size, epoch_size_val, gen, gen_val, Freeze_Epoch, get_train_step_fn()) else: gen = Generator(Batch_size, train_lines, inputs_size, num_classes, aux_branch, dataset_path).generate() gen_val = Generator(Batch_size, val_lines, inputs_size, num_classes, aux_branch, dataset_path).generate(False) model.compile(loss=dice_loss_with_CE() if dice_loss else CE(), optimizer=Adam(lr=lr), metrics=[f_score()]) model.fit_generator(gen, steps_per_epoch=epoch_size, validation_data=gen_val, validation_steps=epoch_size_val, epochs=Freeze_Epoch, initial_epoch=Init_Epoch, callbacks=[ checkpoint_period, reduce_lr, early_stopping, tensorboard ]) for i in range(freeze_layers): model.layers[i].trainable = True
def fit_one_epoch(net, epoch, epoch_size, epoch_size_val, gen, genval, Epoch, cuda, aux_branch): net = net.train() total_loss = 0 total_f_score = 0 val_toal_loss = 0 val_total_f_score = 0 start_time = time.time() with tqdm(total=epoch_size, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(gen): if iteration >= epoch_size: break imgs, pngs, labels = batch with torch.no_grad(): imgs = Variable(torch.from_numpy(imgs).type(torch.FloatTensor)) pngs = Variable( torch.from_numpy(pngs).type(torch.FloatTensor)).long() labels = Variable( torch.from_numpy(labels).type(torch.FloatTensor)) if cuda: imgs = imgs.cuda() pngs = pngs.cuda() labels = labels.cuda() #-------------------------------# # 判断是否使用辅助分支并回传 #-------------------------------# optimizer.zero_grad() if aux_branch: aux_outputs, outputs = net(imgs) aux_loss = CE_Loss(aux_outputs, pngs, num_classes=NUM_CLASSES) main_loss = CE_Loss(outputs, pngs, num_classes=NUM_CLASSES) loss = aux_loss + main_loss if dice_loss: aux_dice = Dice_loss(aux_outputs, labels) main_dice = Dice_loss(outputs, labels) loss = loss + aux_dice + main_dice else: outputs = net(imgs) loss = CE_Loss(outputs, pngs, num_classes=NUM_CLASSES) if dice_loss: main_dice = Dice_loss(outputs, labels) loss = loss + main_dice with torch.no_grad(): #-------------------------------# # 计算f_score #-------------------------------# _f_score = f_score(outputs, labels) loss.backward() optimizer.step() total_loss += loss.item() total_f_score += _f_score.item() waste_time = time.time() - start_time pbar.set_postfix( **{ 'total_loss': total_loss / (iteration + 1), 'f_score': total_f_score / (iteration + 1), 's/step': waste_time, 'lr': get_lr(optimizer) }) pbar.update(1) start_time = time.time() print('Start Validation') with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(genval): if iteration >= epoch_size_val: break imgs, pngs, labels = batch with torch.no_grad(): imgs = Variable(torch.from_numpy(imgs).type(torch.FloatTensor)) pngs = Variable( torch.from_numpy(pngs).type(torch.FloatTensor)).long() labels = Variable( torch.from_numpy(labels).type(torch.FloatTensor)) if cuda: imgs = imgs.cuda() pngs = pngs.cuda() labels = labels.cuda() #-------------------------------# # 判断是否使用辅助分支 #-------------------------------# if aux_branch: aux_outputs, outputs = net(imgs) aux_loss = CE_Loss(aux_outputs, pngs, num_classes=NUM_CLASSES) main_loss = CE_Loss(outputs, pngs, num_classes=NUM_CLASSES) val_loss = aux_loss + main_loss if dice_loss: aux_dice = Dice_loss(aux_outputs, labels) main_dice = Dice_loss(outputs, labels) val_loss = val_loss + aux_dice + main_dice else: outputs = net(imgs) val_loss = CE_Loss(outputs, pngs, num_classes=NUM_CLASSES) if dice_loss: main_dice = Dice_loss(outputs, labels) val_loss = val_loss + main_dice #-------------------------------# # 计算f_score #-------------------------------# _f_score = f_score(outputs, labels) val_toal_loss += val_loss.item() val_total_f_score += _f_score.item() pbar.set_postfix( **{ 'total_loss': val_toal_loss / (iteration + 1), 'f_score': val_total_f_score / (iteration + 1), 'lr': get_lr(optimizer) }) pbar.update(1) print('Finish Validation') print('Epoch:' + str(epoch + 1) + '/' + str(Epoch)) print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1))) print('Saving state, iter:', str(epoch + 1)) torch.save( model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth' % ((epoch + 1), total_loss / (epoch_size + 1), val_toal_loss / (epoch_size_val + 1)))
def fit_one_epoch(net, epoch, epoch_size_train, epoch_size_val, gen_train, gen_val, Epoch, cuda): """ 训练一个世代epoch net: 网络模型 epoch: 一个世代epoch epoch_size_train: 训练迭代次数iters epoch_size_val: 验证迭代次数iters gen_train: 训练数据集 gen_val: 验证数据集 Epoch: 总的迭代次数Epoch cuda: 是否使用GPU """ train_total_loss = 0 train_total_f_score = 0 val_total_loss = 0 val_total_f_score = 0 # 开启训练模式 net.train() print('Start Training') start_time = time.time() with tqdm(total=epoch_size_train, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(gen_train): if iteration >= epoch_size_train: break imgs, pngs, labels = batch with torch.no_grad(): imgs = Variable(torch.from_numpy(imgs).type(torch.FloatTensor)) pngs = Variable( torch.from_numpy(pngs).type(torch.FloatTensor)).long() labels = Variable( torch.from_numpy(labels).type(torch.FloatTensor)) if cuda: imgs = imgs.cuda() pngs = pngs.cuda() labels = labels.cuda() # 梯度初始化置零 optimizer.zero_grad() # 前向传播,网络输出 outputs = net(imgs) # 计算损失 一次iter即一个batchsize的loss loss = CE_Loss(outputs, pngs, num_classes=NUM_CLASSES) if dice_loss: main_dice = Dice_loss(outputs, labels) loss = loss + main_dice # 计算f_score with torch.no_grad(): _f_score = f_score(outputs, labels) # loss反向传播求梯度 loss.backward() # 更新所有参数 optimizer.step() train_total_loss += loss.item() train_total_f_score += _f_score.item() waste_time = time.time() - start_time pbar.set_postfix( **{ 'train_loss': train_total_loss / (iteration + 1), 'f_score': train_total_f_score / (iteration + 1), 's/step': waste_time, 'lr': get_lr(optimizer) }) pbar.update(1) start_time = time.time() print('Finish Training') # 开启验证模式 net.eval() print('Start Validation') with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) as pbar: for iteration, batch in enumerate(gen_val): if iteration >= epoch_size_val: break imgs, pngs, labels = batch with torch.no_grad(): imgs = Variable(torch.from_numpy(imgs).type(torch.FloatTensor)) pngs = Variable( torch.from_numpy(pngs).type(torch.FloatTensor)).long() labels = Variable( torch.from_numpy(labels).type(torch.FloatTensor)) if cuda: imgs = imgs.cuda() pngs = pngs.cuda() labels = labels.cuda() outputs = net(imgs) val_loss = CE_Loss(outputs, pngs, num_classes=NUM_CLASSES) if dice_loss: main_dice = Dice_loss(outputs, labels) val_loss = val_loss + main_dice # 计算f_score _f_score = f_score(outputs, labels) val_total_loss += val_loss.item() val_total_f_score += _f_score.item() pbar.set_postfix( **{ 'val_loss': val_total_loss / (iteration + 1), 'f_score': val_total_f_score / (iteration + 1), 'lr': get_lr(optimizer) }) pbar.update(1) print('Finish Validation') print('Epoch:' + str(epoch + 1) + '/' + str(Epoch)) print('Train Loss: %.4f || Val Loss: %.4f ' % (train_total_loss / (epoch_size_train + 1), val_total_loss / (epoch_size_val + 1))) print('Saving state, epoch:', str(epoch + 1)) torch.save( model.state_dict(), 'logs/Epoch%d-Train_Loss%.4f-Val_Loss%.4f.pth' % ((epoch + 1), train_total_loss / (epoch_size_train + 1), val_total_loss / (epoch_size_val + 1)))
def main(): args = get_arguments() h, w, c = map(int, args.input_size.split(',')) input_size = (h, w, c) model = pspnet(args.num_classes, input_size, downsample_factor=downsample_factor,backbone=BACKBONE, aux_branch=aux_branch) model.summary() model.load_weights(args.model_path, by_name=True, skip_mismatch=True) tf.set_random_seed(args.random_seed) with open('list/cityscapes_train_list.txt', 'r') as f : train_lines = f.readlines() with open('list/cityscapes_val_list.txt', 'r') as f: val_lines = f.readlines() checkpoint_period = ModelCheckpoint(args.log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss',save_weights_only=True, save_best_only=False,period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1) tensorboard = TensorBoard(log_dir=args.log_dir) for i in range(RESNET_FREEZE): model.layers[i].trainable = False print('Freeze the first {} layers of total {} layers.'.format(RESNET_FREEZE, len(model.layers),args.batch_size)) if True: lr = 1e-4 Init_Epoch = 0 Freeze_Epoch = 50 BATCH_SIZE = args.batch_size # model.compile(loss=dice_loss_with_CE() if dic_loss else CE(),optimizer=Adam(lr=lr),metrics=[f_score()]) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=lr), metrics=['accuracy']) gen = Generator(args.data_dir,args.data_list,BATCH_SIZE,input_size[:2],args.ignore_label,IMG_MEAN,aux_branch,len(train_lines),args.num_classes).generate() gen_val = Generator(args.data_dir,args.data_val,BATCH_SIZE,input_size[:2],args.ignore_label,IMG_MEAN,aux_branch,len(val_lines),args.num_classes).generate(False) model.fit_generator(gen, steps_per_epoch=max(1, len(train_lines)//BATCH_SIZE)//4, validation_data=gen_val, validation_steps=max(1, len(val_lines)//BATCH_SIZE)//4, epochs=Freeze_Epoch, initial_epoch=Init_Epoch, callbacks=[checkpoint_period, reduce_lr, tensorboard]) for i in range(RESNET_FREEZE): model.layers[i].trainable = True if True: lr = 1e-5 Freeze_Epoch = 50 Unfreeze_Epoch = 100 BATCH_SIZE = args.batch_size / 2 model.compile(loss=dice_loss_with_CE() if dic_loss else CE(), optimizer=Adam(lr=lr), metrics=[f_score()]) print( 'Freeze the first {} layers of total {} layers.'.format(RESNET_FREEZE, len(model.layers), args.batch_size)) gen = Generator(args.data_dir,args.data_list,BATCH_SIZE,input_size[:2],args.ignore_label,IMG_MEAN,aux_branch,len(train_lines),args.num_classes).generate() gen_val = Generator(args.data_dir,args.data_val,BATCH_SIZE,input_size[:2],args.ignore_label,IMG_MEAN,aux_branch,len(val_lines),args.num_classes).generate(False) model.fit_generator(gen, steps_per_epoch=max(1, len(train_lines) // BATCH_SIZE), validation_data=gen_val, validation_steps=max(1, len(val_lines) // BATCH_SIZE), epochs=Unfreeze_Epoch, initial_epoch=Freeze_Epoch, callbacks=[checkpoint_period, reduce_lr, tensorboard])