def main(): """ :param n_model: number of models for the comittee :param n_train: number of training data to be used, this decides how long the training process will be :param batch_train_size: batch size for training process, keep it under 20 :param idx_ratio: ratio of high entropy:ratio of random :return: """ # paths save_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test') csv_name_train = 'train.csv' csv_name_test = 'test.csv' csv_name_index = 'index.csv' dir_name = 'vote_90_5_70_' save_weights_flag = True cityscape_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes') cityscape_loss_weight_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'class_weights.pkl') cityscape_pretrain_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscape_pretrain') inference_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'inference') color_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'color') print('cityscape_path: ' + cityscape_path) print(dir_name) # arguments n_train = 2880 # divisible by 8: batch size and 10: 10% increment of training data increase n_pretrain = 0 n_test = 500 n_epoch = 40 n_model = 10 test_factor = 3 # committee only tested every test_factor-th batch batch_train_size = 3 * max(torch.cuda.device_count(), 1) batch_test_size = 25 * max(torch.cuda.device_count(), 1) lr = 0.0001 loss_print = 2 continue_flag = False poly_exp = 1.0 feature_extract = True dropout_rate = 0.9 idx_ratio = [1.0, 0.0] data_limit = 0.7 manual_seed = 1 # report qbc semseg to user in terminal text = (('n_model(dropout): ' + str(n_model)) + (', n_train: ' + str(n_train)) + (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio)) + (', test_factor: ' + str(test_factor))) print(text) # CUDA cuda_flag = torch.cuda.is_available() device = torch.device("cuda" if cuda_flag else "cpu") device_cpu = torch.device("cpu") dataloader_kwargs = {'pin_memory': True} if cuda_flag else {} print(torch.cuda.device_count(), "GPUs detected") torch.manual_seed(manual_seed) # print("Max memory allocated:" + str(np.round(torch.cuda.max_memory_allocated(device) / 1e9, 3)) + ' Gb') # get data and index library mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform = T.Compose([ T.Resize((800, 800), Image.BICUBIC), T.ToTensor(), T.Normalize(*mean_std) ]) train_dataset = dataset_preset.Dataset_Cityscapes_n( root=cityscape_path, split='train', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_train) train_dataset_idx = dataset_preset.Dataset_Cityscapes_n_i( root=cityscape_path, split='train', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_train) # also get index of data test_dataset = dataset_preset.Dataset_Cityscapes_n_i( root=cityscape_path, split='val', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_test) test_dataloader = DataLoader(test_dataset, batch_size=batch_test_size, shuffle=True, num_workers=3 * max(torch.cuda.device_count(), 1), drop_last=False) print("Datasets loaded!") # create models, optimizers, scheduler, criterion, the model fcn_model = torchvision.models.segmentation.deeplabv3_resnet101( pretrained=False, progress=True, num_classes=segmen_preset.n_labels_valid, aux_loss=True) fcn_model = fcn_model.cuda() fcn_model = nn.DataParallel(fcn_model) # the optimizers optimizer = torch.optim.Adam( [{ 'params': fcn_model.module.classifier.parameters() }, { 'params': list(fcn_model.module.backbone.parameters()) + list(fcn_model.module.aux_classifier.parameters()) }], lr=lr) lambda1 = lambda epoch: math.pow(1 - (epoch / n_epoch), poly_exp) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) with open(cityscape_loss_weight_path, "rb") as file: # (needed for python3) class_weights = np.array(pickle.load(file)) class_weights = torch.from_numpy(class_weights) class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda() criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda() # report everything print(str(n_model) + " fcn models created") text = ('n_model: ' + str(n_model)) + (', n_train: ' + str(n_train)) + (', n_epoch: ' + str(n_epoch)) +\ (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio)) print(text) # to document training process, create directory, etc train_text = [str(x) for x in range(1, n_epoch + 1)] test_text = [str(x) for x in range(1, n_epoch + 1)] train_index_text = [str(x) for x in range(1, 8)] train_index_docu = 0 train_index = [] test_text_index = 0 # write text to csv dir_number = 1 while os.path.exists( os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number)))): dir_number += 1 run_path = os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number))) os.makedirs(run_path) # make run_* dir f = open(os.path.join(run_path, 'info.txt'), 'w+') # write .txt file f.write(text) f.close() copy(__file__, os.path.join(run_path, os.path.basename(__file__))) # write training progress csv_path_train = os.path.join(run_path, csv_name_train) title = [ "Training progress for n_model = " + str(n_model) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch' ] with open(csv_path_train, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # write test progress csv_path_test = os.path.join(run_path, csv_name_test) title = [ "Test progress for n_model = " + str(1) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch, torch seed: ' + str(manual_seed) + 'run_path: ' + run_path ] with open(csv_path_test, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # write index and train progress csv_path_index = os.path.join(run_path, csv_name_index) title = [ "Index progress for n_model = " + str(n_model) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch' ] with open(csv_path_train, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # training start for i_epoch in range(n_epoch): # initialize with random if len(train_index) == 0: train_index = np.array( random.sample(range(n_train), k=int(n_train / 10))) train_index_text[train_index_docu] = train_index_text[train_index_docu] + ': ' \ + str([x for x in train_index]).strip('[]') # update train and index documentation text = train_index_text[train_index_docu].split(";") with open(csv_path_index, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=';') test_writer.writerow(text) print(train_index_text) train_index_docu = train_index_docu + 1 # append with vote entropy elif (len(train_index) < int(0.7 * n_train)) and (i_epoch % 5 == 0): t = Timer() t.start() # perform vote entropy on entire dataset indices, fcn_model = vote_entropy_dropout(fcn_model, train_dataset_idx, train_index, idx_ratio, batch_test_size, device, n_model, dropout_rate, i_epoch, n_data=int(n_train / 10)) train_index = np.append(train_index, indices) train_index_text[train_index_docu] = train_index_text[train_index_docu] + ': ' + \ str([x for x in train_index]).strip('[]') +\ ";{:.4f}".format(np.array(t.stop()).mean()) # update train and index documentation text = train_index_text[train_index_docu].split(";") with open(csv_path_index, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=';') test_writer.writerow(text) print(train_index_text) train_index_docu = train_index_docu + 1 # retrain with selected data print(train_index) print('length: ' + str(len(train_index))) train_subset = Subset(train_dataset_idx, train_index) train_dataloader = DataLoader(train_subset, batch_size=batch_train_size, shuffle=True) loss_epoch = [] time_epoch = [] for i_batch, (data_train, target_train, index, _) in enumerate(train_dataloader): # train batch t = Timer() t.start() output, loss, iou, fcn_model, optimizer = train_batch( fcn_model, data_train, target_train, optimizer, device, criterion) print('Epoch: ' + str(i_epoch) + '\t Batch: ' + str(i_batch) + '/' + str(len(train_dataloader)) + '; model ' + str(0) + '; train loss avg: ' + "{:.3f}".format(loss) + '; train iou avg: ' + "{:.3f}".format(iou.mean())) for param_group in optimizer.param_groups: print(param_group['lr']) loss_epoch.append(loss) time_epoch.append(t.stop()) # document train result train_text[i_epoch] = train_text[i_epoch] + ";{:.4f}".format(np.array(loss_epoch).mean()) + \ ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr']))\ + ';' + str(len(train_index)) + ";{:.4f}".format(np.array(time_epoch).mean()) # update train documentation text = train_text[i_epoch].split(";") with open(csv_path_train, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=';') test_writer.writerow(text) # save temporary model and perform test if i_epoch % 10 == 0 or (i_epoch + 1) == n_epoch: print('Save and Test Model') fcn_model.train() torch.save( fcn_model.state_dict(), os.path.join(run_path, ('model_weight_epoch_train' + '{:03d}'.format(i_epoch) + '.pt'))) fcn_model.eval() torch.save( fcn_model.state_dict(), os.path.join(run_path, ('model_weight_epoch_' + '{:03d}'.format(i_epoch) + '.pt'))) # perform test test_idx = test_text_index create_pred_img(fcn_model, test_dataloader, inference_path, color_path) all_result_dict = cityscapes_eval() # document test result test_text[test_idx] = test_text[test_idx] + ";{:.4f}".format(all_result_dict['averageScoreClasses']) +\ ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr']))\ + ';' + str(len(train_index)) # update test documentation text = test_text[test_idx].split(";") with open(csv_path_test, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=';') test_writer.writerow(text) test_text_index = test_text_index + 1 # one epoch ends here scheduler.step() print(optimizer)
def main(dropout): """ :param n_model: number of models for the comittee :param n_train: number of training data to be used, this decides how long the training process will be :param batch_train_size: batch size for training process, keep it under 20 :param idx_ratio: ratio of high entropy:ratio of random :return: """ # paths save_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 'random') csv_name_train = 'train.csv' csv_name_test = 'test.csv' csv_name_index = 'index.csv' dir_name = 've_ranker_' index_path_name = 'random_bulk_40_003' save_weights_flag = True cityscape_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes') cityscape_loss_weight_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'class_weights.pkl') cityscape_pretrain_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscape_pretrain') inference_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'inference') color_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'color') print('cityscape_path: ' + cityscape_path) # arguments n_train = 2880 # divisible by 8: batch size and 10: 10% increment of training data increase n_pretrain = 0 n_test = 500 n_epoch = 40 n_model = 10 test_factor = 3 # committee only tested every test_factor-th batch batch_train_size = 3 * max(torch.cuda.device_count(), 1) batch_test_size = 25 * max(torch.cuda.device_count(), 1) lr = 0.0001 loss_print = 2 continue_flag = False poly_exp = 1.0 feature_extract = True dropout_rate = dropout idx_ratio = [1.0, 0.0] data_limit = 0.7 # report qbc semseg to user in terminal text = (('n_model(dropout): ' + str(n_model)) + (', n_train: ' + str(n_train)) + (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio)) + (', test_factor: ' + str(test_factor))) print(text) # CUDA cuda_flag = torch.cuda.is_available() device = torch.device("cuda" if cuda_flag else "cpu") device_cpu = torch.device("cpu") dataloader_kwargs = {'pin_memory': True} if cuda_flag else {} print(torch.cuda.device_count(), "GPUs detected") # print("Max memory allocated:" + str(np.round(torch.cuda.max_memory_allocated(device) / 1e9, 3)) + ' Gb') # get data and index library mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform = T.Compose([ T.Resize((800, 800), Image.BICUBIC), T.ToTensor(), T.Normalize(*mean_std) ]) train_dataset = dataset_preset.Dataset_Cityscapes_n( root=cityscape_path, split='train', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_train) train_dataset_idx = dataset_preset.Dataset_Cityscapes_n_i( root=cityscape_path, split='train', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_train) # also get index of data test_dataset = dataset_preset.Dataset_Cityscapes_n_i( root=cityscape_path, split='val', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_test) test_dataloader = DataLoader(test_dataset, batch_size=batch_test_size, shuffle=True, num_workers=3 * max(torch.cuda.device_count(), 1), drop_last=False) print("Datasets loaded!") # create models, optimizers, scheduler, criterion, the model fcn_model = torchvision.models.segmentation.deeplabv3_resnet101( pretrained=False, progress=True, num_classes=segmen_preset.n_labels_valid, aux_loss=True) fcn_model = fcn_model.cuda() fcn_model = nn.DataParallel(fcn_model) fcn_model.load_state_dict( torch.load( os.path.join(save_path, index_path_name, 'model_weight_epoch_train039.pt'))) # the optimizers optimizer = torch.optim.Adam( [{ 'params': fcn_model.module.classifier.parameters() }, { 'params': list(fcn_model.module.backbone.parameters()) + list(fcn_model.module.aux_classifier.parameters()) }], lr=lr) lambda1 = lambda epoch: math.pow(1 - (epoch / n_epoch), poly_exp) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) with open(cityscape_loss_weight_path, "rb") as file: # (needed for python3) class_weights = np.array(pickle.load(file)) class_weights = torch.from_numpy(class_weights) class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda() criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda() # report everything print(str(n_model) + " fcn models created") text = ('n_model: ' + str(n_model)) + (', n_train: ' + str(n_train)) + (', n_epoch: ' + str(n_epoch)) +\ (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio)) print(text) # to document training process, create directory, etc train_text = [str(x) for x in range(1, n_epoch + 1)] test_text = [str(x) for x in range(1, n_epoch + 1)] train_index_text = [str(x) for x in range(1, 8)] train_index_docu = 0 train_index = np.array([]) test_text_index = 0 # write text to csv dir_number = 1 while os.path.exists( os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number)))): dir_number += 1 run_path = os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number))) os.makedirs(run_path) # make run_* dir f = open(os.path.join(run_path, 'info.txt'), 'w+') # write .txt file f.write(text) f.close() copy(__file__, os.path.join(run_path, os.path.basename(__file__))) # write training progress csv_path_train = os.path.join(run_path, csv_name_train) title = [ "Training progress for n_model = " + str(n_model) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch' ] with open(csv_path_train, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # write test progress csv_path_test = os.path.join(run_path, csv_name_test) title = [ "Test progress for n_model = " + str(1) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch, torch seed: ' + str(None) + 'run_path: ' + run_path + 'index_from: ' + index_path_name ] with open(csv_path_test, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # write index and train progress csv_path_index = os.path.join(run_path, csv_name_index) title = [ "Index progress for n_model = " + str(n_model) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch' ] with open(csv_path_index, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # append with vote entropy t = Timer() t.start() # perform vote entropy on entire dataset indices, entropy, fcn_model = vote_entropy_dropout(fcn_model, train_dataset_idx, train_index, idx_ratio, batch_test_size, device, n_model, dropout_rate, 1, n_data=int(n_train)) # save ranked indices np.save(os.path.join(run_path, 'indices.npy'), indices) np.save(os.path.join(run_path, 'entropy.npy'), entropy) # write index and train progress csv_path_index = os.path.join(run_path, csv_name_index) string_txt = "1: " + " ".join([str(x) + ',' for x in indices]) with open(csv_path_index, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(string_txt)
def main(n_train, batch_train_size, n_test, batch_test_size): """ :param n_model: number of models for the comittee :param n_train: number of training data to be used, this decides how long the training process will be :param batch_train_size: batch size for training process, keep it under 20 :param idx_ratio: ratio of high entropy:ratio of random :return: """ # paths img_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 've_test', 'example.png') save_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'results', 've_test') csv_name_train = 'train.csv' csv_name_test = 'test.csv' csv_name_index = 'index.csv' dir_name = 'vote_bulk_40_from_90_005_' index_path_name = 'vote_90_5_005' save_weights_flag = True cityscape_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes') cityscape_loss_weight_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'class_weights.pkl') cityscape_pretrain_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscape_pretrain') inference_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'inference') color_path = os.path.join(dr(dr(dr(abspath(__file__)))), 'data', 'cityscapes', 'color') print('cityscape_path: ' + cityscape_path) print(dir_name) print(index_path_name) # arguments n_train = 2880 n_pretrain = 0 n_test = 500 n_epoch = 40 test_factor = 3 # committee only tested every test_factor-th batch batch_train_size = 3 * max(torch.cuda.device_count(), 1) batch_train_size_pretrain = 4 batch_test_size = 25 * max(torch.cuda.device_count(), 1) lr = 0.0001 loss_print = 2 idx_ratio = [0.0, 1.0] # proportion to qbc:random continue_flag = False poly_exp = 1.0 feature_extract = True manual_seed = 2 np.random.seed(manual_seed) # CUDA cuda_flag = torch.cuda.is_available() device = torch.device("cuda" if cuda_flag else "cpu") device_cpu = torch.device("cpu") dataloader_kwargs = {'pin_memory': True} if cuda_flag else {} print(torch.cuda.device_count(), "GPUs detected") torch.manual_seed(manual_seed) # print("Max memory allocated:" + str(np.round(torch.cuda.max_memory_allocated(device) / 1e9, 3)) + ' Gb') # get data and index library mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform = T.Compose([ T.Resize((800, 800), Image.BICUBIC), T.ToTensor(), T.Normalize(*mean_std) ]) train_dataset = dataset_preset.Dataset_Cityscapes_n( root=cityscape_path, split='train', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_train) # read used index csv_path_index_source = os.path.join(save_path, index_path_name, csv_name_index) with open(csv_path_index_source) as csv_file: data = csv_file.readlines() train_index = np.array( list( map( int, data[-1][3:data[-1].find(';', (len(data[-1]) - 20))].split(',')))) print(len(train_index)) # np.random.shuffle(train_index) train_index = train_index[int(n_train * 0.1):int(n_train * 0.5)] print(len(train_index)) train_dataset = Subset(train_dataset, indices=train_index) test_dataset = dataset_preset.Dataset_Cityscapes_n_i( root=cityscape_path, split='val', mode='fine', target_type='semantic', transform=transform, target_transform=segmen_preset.label_id2label, n=n_test) # only test on part of data train_dataloader = DataLoader(train_dataset, batch_size=batch_train_size, shuffle=True, num_workers=3 * max(torch.cuda.device_count(), 1), drop_last=True) test_dataloader = DataLoader(test_dataset, batch_size=batch_test_size, shuffle=True, num_workers=3 * max(torch.cuda.device_count(), 1), drop_last=True) print("Datasets loaded!") # create models, optimizers, scheduler, criterion # the models fcn_model = torchvision.models.segmentation.deeplabv3_resnet101( pretrained=False, progress=True, num_classes=segmen_preset.n_labels_valid, aux_loss=True) fcn_model = fcn_model.cuda() fcn_model = nn.DataParallel(fcn_model) # the optimizers params_to_update = fcn_model.parameters() print("Params to learn:") if feature_extract: params_to_update = [] for name, param in fcn_model.named_parameters(): if param.requires_grad == True: params_to_update.append(param) print("\t", name) else: for name, param in fcn_model.named_parameters(): if param.requires_grad == True: print("\t", name) params = add_weight_decay(fcn_model, l2_value=0.0001) '''optimizer = torch.optim.SGD([{'params': fcn_model.module.classifier.parameters()}, {'params': list(fcn_model.module.backbone.parameters()) + list(fcn_model.module.aux_classifier.parameters())} ], lr=lr, momentum=0.9)''' optimizer = torch.optim.Adam( [{ 'params': fcn_model.module.classifier.parameters() }, { 'params': list(fcn_model.module.backbone.parameters()) + list(fcn_model.module.aux_classifier.parameters()) }], lr=lr, weight_decay=0.0001) lambda1 = lambda epoch: math.pow(1 - (epoch / n_epoch), poly_exp) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) with open(cityscape_loss_weight_path, "rb") as file: # (needed for python3) class_weights = np.array(pickle.load(file)) class_weights = torch.from_numpy(class_weights) class_weights = Variable(class_weights.type(torch.FloatTensor)).cuda() criterion = torch.nn.CrossEntropyLoss(weight=class_weights).cuda() # report everything text = ('Model created' + (', n_train: ' + str(n_train)) + (', n_epoch: ' + str(n_epoch)) + (', batch_train_size: ' + str(batch_train_size)) + (', idx_ratio: ' + str(idx_ratio)) + (', n_test: ' + str(n_test)) + (', batch_test_size: ' + str(batch_test_size)) + (', test_factor: ' + str(test_factor)) + (', optimizer: ' + str(optimizer)) + (', model: ' + str(fcn_model))) print(text) # for documentation train_text = [str(x) for x in range(1, n_epoch + 1)] test_text = [str(x) for x in range(1, n_epoch + 1)] test_text_index = 0 # write text to csv dir_number = 1 while os.path.exists( os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number)))): dir_number += 1 run_path = os.path.join(save_path, (dir_name + '{:03d}'.format(dir_number))) os.makedirs(run_path) # make run_* dir f = open(os.path.join(run_path, 'info.txt'), 'w+') # write .txt file f.write(text) f.close() copy(__file__, os.path.join(run_path, os.path.basename(__file__))) # write training progress csv_path_train = os.path.join(run_path, csv_name_train) title = [ "Training progress for n_model = " + str(1) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch, torch seed: ' + str(manual_seed) ] with open(csv_path_train, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # write test progress csv_path_test = os.path.join(run_path, csv_name_test) title = [ "Test progress for n_model = " + str(1) + ", idx_ratio: " + str(idx_ratio) + ', for multiple epoch, torch seed: ' + str(manual_seed) + 'run_path: ' + run_path + 'index_from: ' + index_path_name ] with open(csv_path_test, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=',') test_writer.writerow(title) # load from previous run if requested if continue_flag: fcn_model.load_state_dict( torch.load( 'C:\\Users\\steve\\Desktop\\projects\\al_kitti\\results\\first_test\\adam_run_005\\model_weight_epoch_10.pt' )) print('weight loaded') # training process, n-th batch for i_epoch in range(n_epoch): loss_epoch = [] iou_epoch = [] time_epoch = [] for i_batch, (data_train, target_train) in enumerate(train_dataloader): t = Timer() t.start() # train batch output, loss, iou, fcn_model, optimizer = train_batch( fcn_model, data_train, target_train, optimizer, device, criterion) print('Epoch: ' + str(i_epoch) + '\t Batch: ' + str(i_batch) + '/' + str(len(train_dataloader)) + '; model ' + str(0) + '; train loss avg: ' + "{:.3f}".format(loss) + '; train iou avg: ' + "{:.3f}".format(iou.mean())) for param_group in optimizer.param_groups: print(param_group['lr']) loss_epoch.append(loss) iou_epoch.append(iou.mean()) time_epoch.append(t.stop()) # document train result train_text[i_epoch] = train_text[i_epoch] + ";{:.4f}".format(np.array(loss_epoch).mean()) + \ ";{:.4f}".format(np.array(iou_epoch).mean()) + \ ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) + ';' + str(len(train_index)) # update train documentation text = train_text[i_epoch].split(";") with open(csv_path_train, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=';') test_writer.writerow(text) # one epoch ends here scheduler.step() print(optimizer) # save temporary model if i_epoch % 10 == 0 or (i_epoch + 1) == n_epoch: fcn_model.train() torch.save( fcn_model.state_dict(), os.path.join(run_path, ('model_weight_epoch_train' + '{:03d}'.format(i_epoch) + '.pt'))) fcn_model.eval() torch.save( fcn_model.state_dict(), os.path.join(run_path, ('model_weight_epoch_' + '{:03d}'.format(i_epoch) + '.pt'))) # perform test create_pred_img(fcn_model, test_dataloader, inference_path, color_path) all_result_dict = cityscapes_eval() # average training time mean_time = np.array(time_epoch).mean() # document test result test_text[test_text_index] = test_text[test_text_index] + \ ";{:.4f}".format(all_result_dict['averageScoreClasses']) + \ ";{:.7f}".format(np.array(optimizer.param_groups[0]['lr'])) \ + ";{:.4f}".format(mean_time) + ';' + str(len(train_index)) # update test documentation text = test_text[test_text_index].split(";") with open(csv_path_test, mode='a+', newline='') as test_file: test_writer = csv.writer(test_file, delimiter=';') test_writer.writerow(text) test_text_index = test_text_index + 1