def main(begin_date, end_date): extract_dict = extract.get_extracts(begin_date, end_date) output_dict = transform.get_transforms(extract_dict) load.load_data(output_dict)
def test(use_cuda): # data transformations = get_transforms(input_size=args.image_size, test_size=args.image_size) test_set = data_gen.TestDataset(root=args.test_txt_path, transform=transformations['test']) test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False) # load model model = make_model(args) if args.model_path: # 加载模型 model.load_state_dict(torch.load(args.model_path)) if use_cuda: model.cuda() # evaluate y_pred = [] y_true = [] img_paths = [] with torch.no_grad(): model.eval() # 设置成eval模式 for (inputs, targets, paths) in tqdm(test_loader): y_true.extend(targets.detach().tolist()) img_paths.extend(list(paths)) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) # (16,2) # dim=1 表示按行计算 即对每一行进行softmax # probability = torch.nn.functional.softmax(outputs,dim=1)[:,1].tolist() # probability = [1 if prob >= 0.5 else 0 for prob in probability] # 返回最大值的索引 probability = torch.max(outputs, dim=1)[1].data.cpu().numpy().squeeze() y_pred.extend(probability) print("y_pred=", y_pred) accuracy = metrics.accuracy_score(y_true, y_pred) print("accuracy=", accuracy) confusion_matrix = metrics.confusion_matrix(y_true, y_pred) print("confusion_matrix=", confusion_matrix) print(metrics.classification_report(y_true, y_pred)) # fpr,tpr,thresholds = metrics.roc_curve(y_true,y_pred) print("roc-auc score=", metrics.roc_auc_score(y_true, y_pred)) res_dict = { 'img_path': img_paths, 'label': y_true, 'predict': y_pred, } df = pd.DataFrame(res_dict) df.to_csv(args.result_csv, index=False) print(f"write to {args.result_csv} succeed ")
def test(): # data transformations = get_transforms(input_size=args.image_size, test_size=args.image_size) test_set = CGPIM_Data(root=args.test_txt_path, transform=transformations['test'], isTrain=False) test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False) # load model model = ResNeXt(2, 3, [3, 4, 6, 3], 2) if args.model_path: model.load_state_dict(torch.load(args.model_path)) model.cuda() # evaluate y_pred = [] y_true = [] img_paths = [] with torch.no_grad(): model.eval() for (inputs, targets) in tqdm(test_loader): y_true.extend(targets.detach().tolist()) inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) outputs = model(inputs) probability = torch.max(outputs, dim=1)[1].data.cpu().numpy().squeeze() y_pred.extend(probability) print("y_pred=", y_pred) accuracy = metrics.accuracy_score(y_true, y_pred) print("accuracy=", accuracy) confusion_matrix = metrics.confusion_matrix(y_true, y_pred) print("confusion_matrix=", confusion_matrix) print(metrics.classification_report(y_true, y_pred)) # fpr,tpr,thresholds = metrics.roc_curve(y_true,y_pred) print("roc-auc score=", metrics.roc_auc_score(y_true, y_pred)) res_dict = { 'label': y_true, 'predict': y_pred, } df = pd.DataFrame(res_dict) df.to_csv(args.result_csv, index=False) print("write to {args.result_csv} succeed ")
def __getitem__(self, i): image_path = os.path.join(self.image_dir, self.split_filename, self.images[i]) image = Image.open(image_path).convert('RGB') image = np.asarray(image) full_labels = self.objects[i]['labels'] labels = [f['category'] for f in full_labels] boxes = [f['box2d'] for f in full_labels] # default is pascal_voc format if not boxes: print(i) # apply augmentation if in training stage if self.split == 'train': aug_transform = get_transforms(self.model) transformed = aug_transform(image=image, bboxes=boxes, class_labels=labels) image = transformed['image'] boxes = transformed['bboxes'] labels = transformed['class_labels'] if self.model == 'yolo': boxes = [pascalvoc_to_coco(box) for box in boxes] # convert to relative(normalized) coordinates boxes = [ absolute_to_relative(box, self.size, self.size) for box in boxes ] # convert to tensors image = torchvision.transforms.ToTensor()(image) boxes = torch.FloatTensor(boxes) labels = torch.LongTensor([self.class_index[l] for l in labels]) if self.model == 'yolo': # image = self.yolo_resize(image) targets = torch.cat( [torch.zeros(size=(len(boxes), 1)), labels.view(-1, 1), boxes], 1) return image_path, image, targets else: difficulties = torch.zeros(size=labels.size()).byte() return image, boxes, labels, difficulties
def get_data_loader(args): # Data loading code transformations = transform.get_transforms(input_size=args.image_size, test_size=args.image_size) traindir = os.path.join(args.data_local, 'train') valdir = os.path.join(args.data_local, 'val') train_dataset = datasets.ImageFolder( traindir, transformations['val_train'], ) val_dataset = datasets.ImageFolder( valdir, transformations['val_test'], ) # ImageFolder类会将traindir目录下的每个子目录名映射为一个label id,然后将该id作为模型训练时的标签 # 比如,traindir目录下的子目录名分别是0~53,ImageFolder类将这些目录名当做class_name,再做一次class_to_idx的映射 # 最终得到这样的class_to_idx:{"0": 0, "1":1, "10":2, "11":3, ..., "19": 11, "2": 12, ...} # 其中key是class_name,value是idx,idx就是模型训练时的标签 # 因此我们在保存训练模型时,需要保存这种idx与class_name的映射关系,以便在做模型推理时,能根据推理结果idx得到正确的class_name idx_to_class = OrderedDict() for key, value in train_dataset.class_to_idx.items(): idx_to_class[value] = key collater = CutMixCollator(1.0) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, # collate_fn=collater, collate_fn=my_collate_fn, num_workers=args.workers) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=my_collate_fn, num_workers=args.workers) return train_loader, val_loader, idx_to_class
def main(): batch_size = 16 train_dirs = config.train_dirs test_dirs = config.test_dirs train_transform, test_transform = transform.get_transforms() train_dataset = dataset.ChestXRayDataset(train_dirs, train_transform) test_dataset = dataset.ChestXRayDataset(test_dirs, test_transform) dl_train = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) dl_test = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True) logging.info(f"Num of training batches {len(dl_train)}") logging.info(f"Num of test batches {len(dl_test)}\n") train.main(dl_train, dl_test, test_dataset, epochs=10)
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): # 创建文件夹来保存训练后的模型 mkdir_p(args.checkpoint) # Data transform = get_transforms(input_size=args.image_size, test_size=args.image_size, backbone=None) print('==> Preparing dataset %s' % args.trainroot) trainset = datasets.ImageFolder(root=args.trainroot, transform=transform['val_train']) train_loader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) valset = datasets.ImageFolder(root=args.valroot, transform=transform['val_test']) val_loader = data.DataLoader(valset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=True) """ # 用来做可视化,检查数据的标签与实际是否对应 inputs, classes = next(iter(train_loader)) out = torchvision.utils.make_grid(inputs) class_names = ["cat", "dog"] visualize.imshow(out, title=[class_names[x] for x in classes]) """ # 构建模型 model = make_model(args) if use_cuda: model = model.to(device) # print(model, dir(model)) print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) # define loss function (criterion) and optimizer if use_cuda: criterion = nn.CrossEntropyLoss().cuda() # cuda version else: criterion = nn.CrossEntropyLoss().to(device) optimizer = get_optimizer(model, args) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=5, verbose=False) # Resume title = 'ImageNet-' + args.arch if args.resume: # 从中断的地方开始训练 # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) # best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.module.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(val_loader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, optimizer.param_groups[0]['lr'])) train_loss, train_acc, train_5 = train(train_loader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc, test_5 = test(val_loader, model, criterion, epoch, use_cuda) scheduler.step(test_loss) # append logger file logger.append( [state['lr'], train_loss, test_loss, train_acc, test_acc]) print( 'train_loss:%f, val_loss:%f, train_acc:%f, train_5:%f, val_acc:%f, val_5:%f' % (train_loss, test_loss, train_acc, train_5, test_acc, test_5)) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) if len(args.gpu_id) > 1: save_checkpoint( { 'fold': 0, 'epoch': epoch + 1, 'state_dict': model.module.state_dict(), 'train_acc': train_acc, 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, single=True, checkpoint=args.checkpoint) else: save_checkpoint( { 'fold': 0, 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'train_acc': train_acc, 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, single=True, checkpoint=args.checkpoint) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data transform = get_transforms(input_size=args.image_size, test_size=args.image_size, backbone=None) print('==> Preparing dataset %s' % args.trainroot) trainset = dataset.Dataset(root=args.trainroot, transform=transform['val_train']) train_loader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) valset = dataset.TestDataset(root=args.valroot, transform=transform['val_test']) val_loader = data.DataLoader(valset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=True) model = make_model(args) if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = get_optimizer(model, args) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=5, verbose=False) # Resume title = 'ImageNet-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.module.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(val_loader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, optimizer.param_groups[0]['lr'])) train_loss, train_acc, train_5 = train(train_loader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc, test_5 = test(val_loader, model, criterion, epoch, use_cuda) scheduler.step(test_loss) # append logger file logger.append( [state['lr'], train_loss, test_loss, train_acc, test_acc]) print( 'train_loss:%f, val_loss:%f, train_acc:%f, train_5:%f, val_acc:%f, val_5:%f' % (train_loss, test_loss, train_acc, train_5, test_acc, test_5)) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) if len(args.gpu_id) > 1: save_checkpoint( { 'fold': 0, 'epoch': epoch + 1, 'state_dict': model.module.state_dict(), 'train_acc': train_acc, 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, single=True, checkpoint=args.checkpoint) else: save_checkpoint( { 'fold': 0, 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'train_acc': train_acc, 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, single=True, checkpoint=args.checkpoint) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)
def main(): # Data TRAIN = args.trainroot VAL = args.valroot # TRAIN = '/content/train' # VAL = '/content/val' transform = get_transforms(input_size=args.image_size, test_size=args.image_size, backbone=None) print('==> Preparing dataset %s' % args.trainroot) # trainset = datasets.ImageFolder(root=TRAIN, transform=transform['train']) # valset = datasets.ImageFolder(root=VAL, transform=transform['val']) trainset = dataset.Dataset(root=args.trainroot, transform=transform['train']) valset = dataset.TestDataset(root=args.valroot, transform=transform['val']) train_loader = DataLoader( trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = DataLoader( valset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=True) # model initial device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = make_model(args) # TODO:merge in function for k,v in model.named_parameters(): # print("{}: {}".format(k,v.requires_grad)) if not k.startswith('layer4') and not k.startswith('fc'): # print(k) v.requires_grad = False # sys.exit(0) if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() elif args.ngpu: model = torch.nn.DataParallel(model).cuda() model.to(device) cudnn.benchmark = True print('Total params:%.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) # 打印模型参数数量 print('Trainable params:%.2fM' % (sum(p.numel() for p in model.parameters() if p.requires_grad)/ 1000000.0)) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = get_optimizer(model, args) # 基于标准的学习率更新 scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=5, verbose=False) # Resume epochs = args.epochs start_epoch = args.start_epoch title = 'log-' + args.arch if args.resume: # --resume checkpoint/checkpoint.pth.tar # load checkpoint print('Resuming from checkpoint...') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!!' checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] state_dict = checkpoint['state_dict'] optim = checkpoint['optimizer'] model.load_state_dict(state_dict, strict=False) optimizer.load_state_dict(optim) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) # logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) logger.set_names(['LR', 'epoch', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.',]) # Evaluation:Confusion Matrix:Precision Recall F1-score if args.evaluate and args.resume: print('\nEvaluate only') test_loss, test_acc, test_acc_5, predict_all, labels_all = test_model(val_loader, model, criterion, device, test=True) print('Test Loss:%.8f,Test top1:%.2f top5:%.2f' %(test_loss,test_acc,test_acc_5)) # 混淆矩阵 report = metrics.classification_report(labels_all,predict_all,target_names=class_list,digits=4) confusion = metrics.confusion_matrix(labels_all,predict_all) print('\n report ',report) print('\n confusion',confusion) with open(args.resume[:-3]+"txt", "w+") as f_obj: f_obj.write(report) # plot_Matrix(args.resume[:-3], confusion, class_list) return # model train and val best_acc = 0 for epoch in range(start_epoch, epochs + 1): print('[{}/{}] Training'.format(epoch, args.epochs)) # train train_loss, train_acc, train_acc_5 = train_model(train_loader, model, criterion, optimizer, device) # val test_loss, test_acc, test_acc_5 = test_model(val_loader, model, criterion, device, test=None) scheduler.step(test_loss) lr_ = optimizer.param_groups[0]['lr'] # 核心参数保存logger logger.append([lr_, int(epoch), train_loss, test_loss, train_acc, test_acc,]) print('train_loss:%f, val_loss:%f, train_acc:%f, val_acc:%f, train_acc_5:%f, val_acc_5:%f' % (train_loss, test_loss, train_acc, test_acc, train_acc_5, test_acc_5)) # 保存模型 保存最优 is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) if not args.ngpu: name = 'checkpoint_' + str(epoch) + '.pth.tar' else: name = 'ngpu_checkpoint_' + str(epoch) + '.pth.tar' save_checkpoint({ 'epoch': epoch, 'state_dict': model.state_dict(), 'train_acc': train_acc, 'test_acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict() }, is_best, checkpoint=args.checkpoint, filename=name) # logger.close() # logger.plot() # savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)
def test_COCODataset(self): from transform import get_transforms from dataset import COCODataset from utils import read_yaml # read config file for transforms config_path = "configs/default_config.yml" config = read_yaml(config_path) # form basic albumentation transform transforms = get_transforms(config=config, mode="val") # init COCODataset DATA_ROOT = "tests/data/" COCO_PATH = "tests/data/coco_true_1.json" dataset = COCODataset(DATA_ROOT, COCO_PATH, transforms) # iterate over the dataset # and get annotations (target dict) for the first image image_tensor = next(iter(dataset))[0] target_tensor = next(iter(dataset))[1] # apply checks for image tensor self.assertEqual(image_tensor.type(), "torch.FloatTensor") self.assertEqual(list(image_tensor.size()), [3, 1920, 1080]) self.assertAlmostEqual(float(image_tensor.max()), 1.0, places=2) self.assertAlmostEqual(float(image_tensor.mean()), 0.39, places=2) # apply checks for each field in the target tensor dict boxes_tensor_0 = target_tensor["boxes"][0] self.assertEqual(boxes_tensor_0.type(), "torch.FloatTensor") self.assertEqual(boxes_tensor_0.cpu().numpy().tolist(), [97.0, 643.0, 931.0, 1185.0]) labels_tensor_0 = target_tensor["labels"][0] self.assertEqual(labels_tensor_0.type(), "torch.LongTensor") self.assertEqual(labels_tensor_0.cpu().numpy().item(), 1) masks_tensor_0 = target_tensor["masks"][0] self.assertEqual(masks_tensor_0.type(), "torch.ByteTensor") self.assertEqual(list(masks_tensor_0.size()), [1920, 1080]) self.assertAlmostEqual(float(masks_tensor_0.max()), 1.0, places=1) image_id_tensor_0 = target_tensor["image_id"][0] self.assertEqual(image_id_tensor_0.type(), "torch.LongTensor") self.assertEqual(image_id_tensor_0.cpu().numpy().item(), 0) area_tensor_0 = target_tensor["area"][0] self.assertEqual(area_tensor_0.type(), "torch.FloatTensor") self.assertEqual(area_tensor_0.cpu().numpy().item(), 452028.0) iscrowd_tensor_0 = target_tensor["iscrowd"][0] self.assertEqual(iscrowd_tensor_0.type(), "torch.LongTensor") self.assertEqual(iscrowd_tensor_0.cpu().numpy().item(), 0) boxes_tensor_1 = target_tensor["boxes"][1] self.assertEqual(boxes_tensor_1.type(), "torch.FloatTensor") self.assertEqual(boxes_tensor_1.cpu().numpy().tolist(), [97.0, 500.0, 931.0, 1185.0]) labels_tensor_1 = target_tensor["labels"][1] self.assertEqual(labels_tensor_1.type(), "torch.LongTensor") self.assertEqual(labels_tensor_1.cpu().numpy().item(), 2) masks_tensor_1 = target_tensor["masks"][1] self.assertEqual(masks_tensor_1.type(), "torch.ByteTensor") self.assertEqual(list(masks_tensor_1.size()), [1920, 1080]) self.assertAlmostEqual(float(masks_tensor_1.max()), 1.0, places=1) area_tensor_1 = target_tensor["area"][1] self.assertEqual(area_tensor_1.type(), "torch.FloatTensor") self.assertEqual(area_tensor_1.cpu().numpy().item(), 571290.0)
def main(): global best_acc if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) # load data transformations = get_transforms(input_size=args.image_size, test_size=args.image_size) # train data train_set = CGPIM_Data(root=args.train_txt_path, transform=transformations['val_train'], isTrain=True) train_loader = data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True) # val data val_set = CGPIM_Data(root=args.val_txt_path, transform=transformations['val_test'], isTrain=False) val_loader = data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False) # define model model = ResNeXt(2, 3, [3, 4, 6, 3], 2) model.cuda() # define loss function and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = get_optimizer(model, args) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=5, verbose=False) # load checkpoint start_epoch = args.start_epoch for epoch in range(start_epoch, args.epochs): print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, optimizer.param_groups[0]['lr'])) train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch) test_loss, val_acc = val(val_loader, model, criterion, epoch) scheduler.step(test_loss) print('train_loss: %.3f, val_loss:%.3f, train_acc:%.3f, val_acc:%.3f' % (train_loss, test_loss, train_acc, val_acc)) # save_model is_best = val_acc >= best_acc best_acc = max(val_acc, best_acc) save_checkpoint( { 'fold': 0, 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'train_acc': train_acc, 'acc': val_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, single=True, checkpoint=args.checkpoint) print("best acc = ", best_acc)
def train(config: dict = None): # fix the seed for reproduce results SEED = config["SEED"] torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False random.seed(SEED) # parse config parameters DATA_ROOT = config["DATA_ROOT"] COCO_PATH = config["COCO_PATH"] DATA_ROOT_VAL = config["DATA_ROOT_VAL"] COCO_PATH_VAL = config["COCO_PATH_VAL"] EXPERIMENT_NAME = config["EXPERIMENT_NAME"] OPTIMIZER_NAME = config["OPTIMIZER_NAME"] OPTIMIZER_WEIGHT_DECAY = config["OPTIMIZER_WEIGHT_DECAY"] OPTIMIZER_MOMENTUM = config["OPTIMIZER_MOMENTUM"] OPTIMIZER_BETAS = config["OPTIMIZER_BETAS"] OPTIMIZER_EPS = config["OPTIMIZER_EPS"] OPTIMIZER_AMSGRAD = config["OPTIMIZER_AMSGRAD"] OPTIMIZER_ADABOUND_GAMMA = config["OPTIMIZER_ADABOUND_GAMMA"] OPTIMIZER_ADABOUND_FINAL_LR = config["OPTIMIZER_ADABOUND_FINAL_LR"] LEARNING_RATE = config["LEARNING_RATE"] LEARNING_RATE_STEP_SIZE = config["LEARNING_RATE_STEP_SIZE"] LEARNING_RATE_GAMMA = config["LEARNING_RATE_GAMMA"] TRAINABLE_BACKBONE_LAYERS = config["TRAINABLE_BACKBONE_LAYERS"] RPN_ANCHOR_SIZES = config["RPN_ANCHOR_SIZES"] RPN_ANCHOR_ASPECT_RATIOS = config["RPN_ANCHOR_ASPECT_RATIOS"] RPN_PRE_NMS_TOP_N_TRAIN = config["RPN_PRE_NMS_TOP_N_TRAIN"] RPN_PRE_NMS_TOP_N_TEST = config["RPN_PRE_NMS_TOP_N_TEST"] RPN_POST_NMS_TOP_N_TRAIN = config["RPN_POST_NMS_TOP_N_TRAIN"] RPN_POST_NMS_TOP_N_TEST = config["RPN_POST_NMS_TOP_N_TEST"] RPN_NMS_THRESH = config["RPN_NMS_THRESH"] RPN_FG_IOU_THRESH = config["RPN_FG_IOU_THRESH"] RPN_BG_IOU_THRESH = config["RPN_BG_IOU_THRESH"] BOX_DETECTIONS_PER_IMAGE = config["BOX_DETECTIONS_PER_IMAGE"] LOG_FREQ = config["LOG_FREQ"] COCO_AP_TYPE = config["COCO_AP_TYPE"] TRAIN_SPLIT_RATE = config["TRAIN_SPLIT_RATE"] BATCH_SIZE = config["BATCH_SIZE"] NUM_EPOCH = config["NUM_EPOCH"] DEVICE = config["DEVICE"] NUM_WORKERS = config["NUM_WORKERS"] # init directories directories = Directories(experiment_name=EXPERIMENT_NAME) # copy config file to experiment dir yaml_path = os.path.join(directories.experiment_dir, "config.yml") save_yaml(config, yaml_path) # init tensorboard summary writer writer = SummaryWriter(directories.tensorboard_dir) # set pytorch device device = torch.device(DEVICE) if "cuda" in DEVICE and not torch.cuda.is_available(): print("CUDA not available, switching to CPU") device = torch.device("cpu") # use our dataset and defined transformations dataset = COCODataset( DATA_ROOT, COCO_PATH, get_transforms(config=config, mode="train") ) if COCO_PATH_VAL: dataset_val = COCODataset( DATA_ROOT_VAL, COCO_PATH_VAL, get_transforms(config=config, mode="val") ) else: dataset_val = COCODataset( DATA_ROOT, COCO_PATH, get_transforms(config=config, mode="val") ) # +1 for background class num_classes = dataset.num_classes + 1 config["NUM_CLASSES"] = num_classes # add category mappings to config, will be used at prediction category_mapping = get_category_mapping_from_coco_file(COCO_PATH) config["CATEGORY_MAPPING"] = category_mapping # split the dataset in train and val set if val path is not defined if not COCO_PATH_VAL: indices = torch.randperm(len(dataset)).tolist() num_train = int(len(indices) * TRAIN_SPLIT_RATE) train_indices = indices[:num_train] val_indices = indices[num_train:] dataset = torch.utils.data.Subset(dataset, train_indices) dataset_val = torch.utils.data.Subset(dataset_val, val_indices) # define training and val data loaders data_loader_train = torch.utils.data.DataLoader( dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, collate_fn=core.utils.collate_fn, ) data_loader_val = torch.utils.data.DataLoader( dataset_val, batch_size=1, shuffle=False, num_workers=NUM_WORKERS, collate_fn=core.utils.collate_fn, ) # get the model using our helper function model = get_torchvision_maskrcnn( num_classes=num_classes, trainable_backbone_layers=TRAINABLE_BACKBONE_LAYERS, anchor_sizes=RPN_ANCHOR_SIZES, anchor_aspect_ratios=RPN_ANCHOR_ASPECT_RATIOS, rpn_pre_nms_top_n_train=RPN_PRE_NMS_TOP_N_TRAIN, rpn_pre_nms_top_n_test=RPN_PRE_NMS_TOP_N_TEST, rpn_post_nms_top_n_train=RPN_POST_NMS_TOP_N_TRAIN, rpn_post_nms_top_n_test=RPN_POST_NMS_TOP_N_TEST, rpn_nms_thresh=RPN_NMS_THRESH, rpn_fg_iou_thresh=RPN_FG_IOU_THRESH, rpn_bg_iou_thresh=RPN_BG_IOU_THRESH, box_detections_per_img=BOX_DETECTIONS_PER_IMAGE, pretrained=True, ) # move model to the right device model.to(device) # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer_factory = OptimizerFactory( learning_rate=LEARNING_RATE, momentum=OPTIMIZER_MOMENTUM, weight_decay=OPTIMIZER_WEIGHT_DECAY, betas=OPTIMIZER_BETAS, eps=OPTIMIZER_EPS, amsgrad=OPTIMIZER_AMSGRAD, adabound_gamma=OPTIMIZER_ADABOUND_GAMMA, adabound_final_lr=OPTIMIZER_ADABOUND_FINAL_LR, ) optimizer = optimizer_factory.get(params, OPTIMIZER_NAME) # and a learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=LEARNING_RATE_STEP_SIZE, gamma=LEARNING_RATE_GAMMA ) # create coco index print("Creating COCO index...") coco_api_train = get_coco_api_from_dataset(data_loader_train.dataset) coco_api_val = get_coco_api_from_dataset(data_loader_val.dataset) # train it for NUM_EPOCH epochs for epoch in range(NUM_EPOCH): best_bbox_05095_ap = -1 # train for one epoch, printing every PRINT_FREQ iterations train_one_epoch( model=model, optimizer=optimizer, data_loader=data_loader_train, coco_api=coco_api_train, device=device, epoch=epoch, log_freq=LOG_FREQ, coco_ap_type=COCO_AP_TYPE, writer=writer, ) # update the learning rate lr_scheduler.step() # get iteration number num_images = len(data_loader_train.dataset) iter_num = epoch * num_images # evaluate on the val dataset loss_lists, coco_evaluator = evaluate( model=model, data_loader=data_loader_val, coco_api=coco_api_val, device=device, iter_num=iter_num, coco_ap_type=COCO_AP_TYPE, writer=writer, ) # update best model if it has the best bbox 0.50:0.95 AP bbox_05095_ap = coco_evaluator.coco_eval["bbox"].stats[0] if bbox_05095_ap > best_bbox_05095_ap: model_dict = {"state_dict": model.state_dict(), "config": config} torch.save(model_dict, directories.best_weight_path) best_bbox_05095_ap = bbox_05095_ap # save final model model_dict = {"state_dict": model.state_dict(), "config": config} torch.save(model_dict, directories.last_weight_path)
def main(): global best_acc if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) # data transformations = get_transforms(input_size=args.image_size, test_size=args.image_size) train_set = data_gen.Dataset(root=args.train_txt_path, transform=transformations['val_train']) train_loader = data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=4) val_set = data_gen.ValDataset(root=args.val_txt_path, transform=transformations['val_test']) val_loader = data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=4) # model model = make_model(args) if use_cuda: model.cuda() # define loss function and optimizer if use_cuda: criterion = nn.CrossEntropyLoss().cuda() else: criterion = nn.CrossEntropyLoss() optimizer = get_optimizer(model, args) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=5, verbose=False) # load checkpoint start_epoch = args.start_epoch # if args.resume: # print("===> Resuming from checkpoint") # assert os.path.isfile(args.resume),'Error: no checkpoint directory found' # args.checkpoint = os.path.dirname(args.resume) # 去掉文件名 返回目录 # checkpoint = torch.load(args.resume) # best_acc = checkpoint['best_acc'] # start_epoch = checkpoint['epoch'] # model.module.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) # train for epoch in range(start_epoch, args.epochs): print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, optimizer.param_groups[0]['lr'])) train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, use_cuda) test_loss, val_acc = val(val_loader, model, criterion, epoch, use_cuda) scheduler.step(test_loss) print( f'train_loss:{train_loss}\t val_loss:{test_loss}\t train_acc:{train_acc} \t val_acc:{val_acc}' ) # save_model is_best = val_acc >= best_acc best_acc = max(val_acc, best_acc) save_checkpoint( { 'fold': 0, 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'train_acc': train_acc, 'acc': val_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, single=True, checkpoint=args.checkpoint) print("best acc = ", best_acc)