def main(fold): mdl_path = "%s/%s_fold_%s_model_best_loss.pth.tar" % (config.best_models, config.model_name, str(fold)) args = get_args(mdl_path) model = MultiModalNet("se_resnext101_32x4d", "dpn26", 0.5) model_dict = torch.load(args.model_path) model.load_state_dict(model_dict['state_dict']) model.to(device) model.eval() test_files = pd.read_csv("./test.csv") test_gen = MultiModalDataset(test_files, config.test_data, config.test_vis, augument=False, mode="test") test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=1) test(test_loader, model, fold)
def main(): fold = 0 # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) if not os.path.exists(config.weights + config.model_name + os.sep + str(fold)): os.makedirs(config.weights + config.model_name + os.sep + str(fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists("./logs/"): os.mkdir("./logs/") #4.2 get model model = MultiModalNet("dpn26", 0.5) #4.3 optim & criterion optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=1e-4) # optimizer = torch.optim.Adam(model.parameters(), lr = config.lr) # 定义优化函数 criterion = nn.CrossEntropyLoss().to(device) start_epoch = 0 best_acc = 0 best_loss = np.inf best_f1 = 0 best_results = [0, np.inf, 0] val_metrics = [0, np.inf, 0] resume = False if resume: checkpoint = torch.load( r'./checkpoints/best_models/seresnext101_dpn92_defrog_multimodal_fold_0_model_best_loss.pth.tar' ) best_acc = checkpoint['best_acc'] best_loss = checkpoint['best_loss'] best_f1 = checkpoint['best_f1'] start_epoch = checkpoint['epoch'] if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) all_files = pd.read_csv("./train.csv") test_files = pd.read_csv("./test.csv") train_data_list, val_data_list = train_test_split( all_files, test_size=0.1, random_state=2050) #把训练集分为train和valid数据集 # load dataset train_gen = MultiModalDataset(train_data_list, config.train_data, config.train_vis, mode="train") train_loader = DataLoader( train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=1) #num_worker is limited by shared memory in Docker! val_gen = MultiModalDataset(val_data_list, config.train_data, config.train_vis, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) test_gen = MultiModalDataset(test_files, config.test_data, config.test_vis, augument=False, mode="test") test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=1) #scheduler = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.1) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer) #n_batches = int(len(train_loader.dataset) // train_loader.batch_size) #scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2) start = timer() arr_loss = np.zeros((30, len(train_loader))) val_accuracy = np.zeros((30, 1)) #train for epoch in range(0, config.epochs): scheduler.step(epoch) # train train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start, val_accuracy) # check results is_best_acc = val_metrics[0] > best_results[0] best_results[0] = max(val_metrics[0], best_results[0]) is_best_loss = val_metrics[1] < best_results[1] best_results[1] = min(val_metrics[1], best_results[1]) is_best_f1 = val_metrics[2] > best_results[2] best_results[2] = max(val_metrics[2], best_results[2]) # save model save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_acc": best_results[0], "best_loss": best_results[1], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[2], }, is_best_acc, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1],train_metrics[2], val_metrics[0],val_metrics[1],val_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01) best_model = torch.load("%s/%s_fold_%s_model_best_acc.pth.tar" % (config.best_models, config.model_name, str(fold))) model.load_state_dict(best_model["state_dict"]) test(test_loader, model, fold)
return preds + 1 def dotta(model, b_debug=False): model.to(device) model.eval() testImagePaths = list(paths.list_images(config.test_data)) testImagePaths.sort() testVisitPath = config.test_vis fo = open('submit/tta.txt', 'w') for iPath in testImagePaths: iName = iPath.split(os.path.sep)[-1] vName = iName.replace('jpg', 'npy') vPath = os.path.sep.join([testVisitPath, vName]) preds = ttacore(model, iPath, vPath, b_debug) print('[INFO] ID-->Preds: {0} --> {1}'.format(iName[:-4].zfill(6), preds)) line = iName[:-4].zfill(6) + '\t' + str(preds).zfill(3) + '\n' fo.write(line) if b_debug: break fo.close() # --------------------------------------------------------------------------------------------------- if __name__ == "__main__": #main() fold = 0 model=MultiModalNet("se_resnext50_32x4d","dpn26",0.5) best_model = torch.load("%s/%s_fold_%s_model_best_loss.pth.tar"%(config.best_models,config.model_name,str(fold))) model.load_state_dict(best_model["state_dict"]) dotta(model, False)
def main(): fold = 0 #meaningless # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) # if not os.path.exists(config.weights + config.model_name + os.sep +str(fold)):#checkpoints # os.makedirs(config.weights + config.model_name + os.sep +str(fold)) # if not os.path.exists(config.best_models):#best model # os.mkdir(config.best_models) if not os.path.exists("./logs/"): os.mkdir("./logs/") #4.2 get model basenet = "se_resnext50_32x4d" model = MultiModalNet(basenet, "dpn26", 0.5) #drop out = 0.5 #4.3 optim & criterion optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=1e-4) #weight_decay为L2 regularization criterion = nn.CrossEntropyLoss().to(device) start_epoch = 0 best_acc = 0 best_loss = np.inf best_f1 = 0 best_results = [0, np.inf, 0] val_metrics = [0, np.inf, 0] if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) all_files = pd.read_csv("./train.csv") test_files = pd.read_csv("./test.csv") train_data_list, val_data_list = train_test_split(all_files, test_size=0.1, random_state=1996) # load dataset train_gen = MultiModalDataset(train_data_list, config.train_data, config.train_vis, mode="train") train_loader = DataLoader( train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=1) #num_worker is limited by shared memory in Docker! val_gen = MultiModalDataset(val_data_list, config.train_data, config.train_vis, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) test_gen = MultiModalDataset(test_files, config.test_data, config.test_vis, augument=False, mode="test") test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=1) #scheduler = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.1) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer) #n_batches = int(len(train_loader.dataset) // train_loader.batch_size) #scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2) start = timer() #model.load_state_dict(torch.load('checkpoints/se_resnext50_32x4d_fold_0_checkpoint.pth')['state_dict']) for epoch in range(0, config.epochs): scheduler.step(epoch) #train train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_acc = val_metrics[0] > best_results[0] best_results[0] = max(val_metrics[0], best_results[0]) is_best_loss = val_metrics[1] < best_results[1] best_results[1] = min(val_metrics[1], best_results[1]) is_best_f1 = val_metrics[2] > best_results[2] best_results[2] = max(val_metrics[2], best_results[2]) # save model #只有当时best_acc,best_f1,best_loss时才分别存 save_checkpoint( { "basenet": basenet, "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_acc": best_results[0], "best_loss": best_results[1], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[2], }, is_best_acc, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1],train_metrics[2], val_metrics[0],val_metrics[1],val_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01)
import torch.nn.functional as F from torchvision import transforms as T from imgaug import augmenters as iaa import random import pathlib import cv2 from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from multimodal import MultiModalDataset, MultiModalDatasetTTA, MultiModalNet from collections import OrderedDict device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model1 = MultiModalNet('se_resnext50_32x4d', 'DPN26', 0.5) checkpoint1 = torch.load( 'checkpoints/se_resnext50_32x4d_fold_0_checkpoint.pth') new_state_dict = OrderedDict() for k, v in checkpoint1['state_dict'].items(): name = k[7:] # remove module. new_state_dict[name] = v model1.load_state_dict(new_state_dict) model2 = MultiModalNet('se_resnext101_32x4d', 'DPN26', 0.5) checkpoint2 = torch.load( 'checkpoints/se_resnext101_32x4d_fold_0_checkpoint.pth') new_state_dict = OrderedDict() for k, v in checkpoint2['state_dict'].items(): name = k[7:] # remove module. new_state_dict[name] = v
def main(): # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) for fold in range(config.FOLD): if not os.path.exists(config.weights + config.model_name + os.sep + str(fold)): os.makedirs(config.weights + config.model_name + os.sep + str(fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists("./logs/"): os.mkdir("./logs/") # with open('../data/train_lgb.pkl', 'rb') as f: # magic_trains = pickle.load(f) # with open('../data/test_lgb.pkl', 'rb') as f: # magic_tests = pickle.load(f) # resume = False # if resume: # checkpoint = torch.load(r'./checkpoints/best_models/seresnext101_dpn92_defrog_multimodal_fold_0_model_best_loss.pth.tar') # best_acc = checkpoint['best_acc'] # best_loss = checkpoint['best_loss'] # best_f1 = checkpoint['best_f1'] # start_epoch = checkpoint['epoch'] start = timer() # from torchsummary import summary # print(summary(model, [(3, 100, 100), (7*26, 24)])) all_files = pd.read_csv("../data/train.csv") all_files = all_files.sample(frac=1, random_state=666) test_files = pd.read_csv("../data/test.csv") max_epoch = config.epochs if config.debug: all_files = all_files.iloc[:1000] test_files = test_files.iloc[:100] config.batch_size = 2 max_epoch = 1 train_label = np.array(all_files['Target']) if config.OOF: result = np.zeros((len(all_files), 9)) # print(result.shape) skf = StratifiedKFold(n_splits=config.FOLD, random_state=2019, shuffle=False) for fold, (train_idx, val_idx) in enumerate(skf.split(all_files, train_label)): print('fold:', fold) val_data_list = all_files.iloc[val_idx] # load dataset val_gen = MultiModalDataset(val_data_list, config.train_data, config.train_vis, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) best_model = torch.load( "%s/%s_fold_%s_model_best_acc.pth.tar" % (config.best_models, config.model_name, str(fold))) model = MultiModalNet(drop=0.5) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) model.eval() model.load_state_dict(best_model["state_dict"]) result_oof = [] with torch.no_grad(): for i, (images, (visit, ), target) in tqdm(enumerate(val_loader)): image_var = images.to(device) # print(image_var.shape) # magic = magic.to(device) visit = visit.to(device) indx_target = target.clone() target = torch.from_numpy( np.array(target)).float().to(device) y_oof = np.array( F.softmax(model(image_var, visit)).cpu().data.numpy()) # print(y_oof.shape) result_oof.extend(y_oof) result_oof = np.array(result_oof) print(len(val_idx), result_oof.shape) result[val_idx] = result_oof print(result.shape) with open("../data/oof2.pkl", 'wb') as f: pickle.dump(result, f) if config.train and config.FOLD > 1: # train_data_list,val_data_list = train_test_split(all_files, test_size=0.1, random_state = 2050) skf = StratifiedKFold(n_splits=config.FOLD, random_state=2019, shuffle=False) for fold, (train_idx, val_idx) in enumerate(skf.split(all_files, train_label)): print('fold:', fold) train_data_list = all_files.iloc[train_idx] val_data_list = all_files.iloc[val_idx] # train_magic = magic_trains.iloc[train_idx] # val_magic = magic_trains.iloc[val_idx] # load dataset train_gen = MultiModalDataset(train_data_list, config.train_data, config.train_vis, mode="train") train_loader = DataLoader( train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=1 ) #num_worker is limited by shared memory in Docker! val_gen = MultiModalDataset(val_data_list, config.train_data, config.train_vis, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) start_epoch = 0 best_acc = 0 best_loss = np.inf best_f1 = 0 best_results = [0, np.inf, 0] val_metrics = [0, np.inf, 0] #model # 4.2 get model model = MultiModalNet(drop=0.5) if fold == 0: total_num = sum(p.numel() for p in model.parameters()) trainable_num = sum(p.numel() for p in model.parameters() if p.requires_grad) print('Total', total_num, 'Trainable', trainable_num) # 4.3 optim & criterion optimizer = Nadam(model.parameters(), lr=5e-4) #torch.optim.Adamax(model.parameters(), 0.001) # optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4) criterion = nn.CrossEntropyLoss().to(device) # scheduler = lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.25) scheduler = lr_scheduler.MultiStepLR(optimizer, [6, 12, 18], gamma=0.5) # lr_scheduler.ReduceLROnPlateau(optimizer) # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 8, 12], gamma=0.25) # n_batches = int(len(train_loader.dataset) // train_loader.batch_size) # scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) #train best_acc_epoch = 0 for epoch in range(0, max_epoch): if epoch - best_acc_epoch > 5: break scheduler.step(epoch) # train # train_metrics = None train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_acc = val_metrics[0] > best_results[0] if is_best_acc: best_acc_epoch = epoch best_results[0] = max(val_metrics[0], best_results[0]) is_best_loss = val_metrics[1] < best_results[1] best_results[1] = min(val_metrics[1], best_results[1]) is_best_f1 = val_metrics[2] > best_results[2] best_results[2] = max(val_metrics[2], best_results[2]) # save model save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_acc": best_results[0], "best_loss": best_results[1], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[2], }, is_best_acc, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1],train_metrics[2], val_metrics[0],val_metrics[1],val_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01) if config.train and config.FOLD == 1: train_data_list, val_data_list, train_magic, val_magic = train_test_split( all_files, magic_trains, test_size=0.1, random_state=2050) # skf = StratifiedKFold(n_splits=config.FOLD, random_state=2019, shuffle=False) # for fold, (train_idx, val_idx) in enumerate(skf.split(all_files, train_label)): # print('fold:', fold) # train_data_list = all_files.iloc[train_idx] # val_data_list = all_files.iloc[val_idx] # load dataset train_gen = MultiModalDataset(train_data_list, train_magic, config.train_data, config.train_vis, mode="train") train_loader = DataLoader( train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=1) #num_worker is limited by shared memory in Docker! val_gen = MultiModalDataset(val_data_list, val_magic, config.train_data, config.train_vis, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) start_epoch = 0 best_acc = 0 best_loss = np.inf best_f1 = 0 best_results = [0, np.inf, 0] val_metrics = [0, np.inf, 0] #model # 4.2 get model model = MultiModalNet(drop=0.5) # 4.3 optim & criterion optimizer = torch.optim.Adamax(model.parameters(), 0.001) # optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4) criterion = nn.CrossEntropyLoss().to(device) # scheduler = lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.25) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer) # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 8, 12], gamma=0.25) # n_batches = int(len(train_loader.dataset) // train_loader.batch_size) # scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) #train best_acc_epoch = 0 for epoch in range(0, max_epoch): if epoch - best_acc_epoch > 5: break scheduler.step(epoch) # train # train_metrics = None train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_acc = val_metrics[0] > best_results[0] if is_best_acc: best_acc_epoch = epoch best_results[0] = max(val_metrics[0], best_results[0]) is_best_loss = val_metrics[1] < best_results[1] best_results[1] = min(val_metrics[1], best_results[1]) is_best_f1 = val_metrics[2] > best_results[2] best_results[2] = max(val_metrics[2], best_results[2]) # save model save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_acc": best_results[0], "best_loss": best_results[1], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[2], }, is_best_acc, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1],train_metrics[2], val_metrics[0],val_metrics[1],val_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01) if config.predict: # test data models = [] for fold in range(5): best_model = torch.load( "%s/%s_fold_%s_model_best_acc.pth.tar" % (config.best_models, config.model_name, str(fold))) model = MultiModalNet(drop=0.5) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) model.eval() model.load_state_dict(best_model["state_dict"]) models.append(model) test_gen = MultiModalDataset(test_files, config.test_data, config.test_vis, augument=False, mode="test", TTA=True) test_loader = DataLoader(test_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) # predict test(test_loader, models)
def main(): fold = 0 # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) if not os.path.exists(config.weights + config.model_name + os.sep + str(fold)): os.makedirs(config.weights + config.model_name + os.sep + str(fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists("./logs/"): os.mkdir("./logs/") #4.2 get model # model=MultiModalNet("se_resnext101_32x4d","dpn107",0.5) model = MultiModalNet("se_resnext50_32x4d", "dpn26", 0.5) #4.3 optim & criterion #optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4) criterion = nn.CrossEntropyLoss().to(device) #多任务处理,所以选择交叉熵 # optimizer = optim.SGD([{'params': model.base.parameters()}, # {'params': model.classifier.parameters(), 'lr': config.lr*0.1}], lr=1e-5,momentum=0.9,weight_decay=1e-4) #betas = (0.9,0.999), eps = 1e-08, optimizer = optim.Adam( model.parameters(), lr=config.lr, betas=(0.9, 0.999), weight_decay=1e-4) #betas = (0.9,0.999), eps = 1e-08, # class SGD(Optimizer): # def __init__(self, params, lr=required, momentum=0, dampening=0, weight_decay1=0, weight_decay2=0, nesterov=False): # defaults = dict(lr=lr, momentum=momentum, dampening=dampening, # weight_decay1=weight_decay1, weight_decay2=weight_decay2, nesterov=nesterov) # if nesterov and (momentum <= 0 or dampening != 0): # raise ValueError("Nesterov momentum requires a momentum and zero dampening") # super(SGD, self).__init__(params, defaults) # def __setstate__(self, state): # super(SGD, self).__setstate__(state) # for group in self.param_groups: # group.setdefault('nesterov', False) # def step(self, closure=None): # """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ # loss = None # if closure is not None: # loss = closure() # for group in self.param_groups: # weight_decay1 = group['weight_decay1'] # weight_decay2 = group['weight_decay2'] # momentum = group['momentum'] # dampening = group['dampening'] # nesterov = group['nesterov'] # for p in group['params']: # if p.grad is None: # continue # d_p = p.grad.data # if weight_decay1 != 0: # d_p.add_(weight_decay1, torch.sign(p.data)) # if weight_decay2 != 0: # d_p.add_(weight_decay2, p.data) # if momentum != 0: # param_state = self.state[p] # if 'momentum_buffer' not in param_state: # buf = param_state['momentum_buffer'] = torch.zeros_like(p.data) # buf.mul_(momentum).add_(d_p) # else: # buf = param_state['momentum_buffer'] # buf.mul_(momentum).add_(1 - dampening, d_p) # if nesterov: # d_p = d_p.add(momentum, buf) # else: # d_p = buf # p.data.add_(-group['lr'], d_p) # return loss start_epoch = 0 best_acc = 0 best_loss = np.inf best_f1 = 0 best_results = [0, np.inf, 0] val_metrics = [0, np.inf, 0] resume = False if resume: checkpoint = torch.load( r'./checkpoints/best_models/seresnext101_dpn107_defrog_multimodal_fold_0_model_best_loss.pth.tar' ) best_acc = checkpoint['best_acc'] best_loss = checkpoint['best_loss'] best_f1 = checkpoint['best_f1'] start_epoch = checkpoint['epoch'] if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.to(device) all_files = pd.read_csv("./train.csv") test_files = pd.read_csv("./test.csv") train_data_list, val_data_list = train_test_split(all_files, test_size=0.1, random_state=2050) # load dataset train_gen = MultiModalDataset(train_data_list, config.train_data, config.train_vis, mode="train") train_loader = DataLoader( train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=1) #num_worker is limited by shared memory in Docker! val_gen = MultiModalDataset(val_data_list, config.train_data, config.train_vis, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=1) test_gen = MultiModalDataset(test_files, config.test_data, config.test_vis, augument=False, mode="test") test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=1) #scheduler = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.1,last_epoch = -1) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer) #n_batches = int(len(train_loader.dataset) // train_loader.batch_size) #scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2) start = timer() #train for epoch in range(0, config.epochs): scheduler.step(epoch) # train train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) # val val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_acc = val_metrics[0] > best_results[0] best_results[0] = max(val_metrics[0], best_results[0]) is_best_loss = val_metrics[1] < best_results[1] best_results[1] = min(val_metrics[1], best_results[1]) is_best_f1 = val_metrics[2] > best_results[2] best_results[2] = max(val_metrics[2], best_results[2]) # save model save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_acc": best_results[0], "best_loss": best_results[1], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[2], }, is_best_acc, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1],train_metrics[2], val_metrics[0],val_metrics[1],val_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01) best_model = torch.load("%s/%s_fold_%s_model_best_loss.pth.tar" % (config.best_models, config.model_name, str(fold))) model.load_state_dict(best_model["state_dict"]) test(test_loader, model, fold)
def main(): fold = 0 # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) if not os.path.exists(config.weights + config.model_name + os.sep +str(fold)): os.makedirs(config.weights + config.model_name + os.sep +str(fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists("./logs/"): os.mkdir("./logs/") #4.2 get model model = MultiModalNet("se_resnext50_32x4d","dpn26",0.5) #se_resnext101_32x4d #4.3 optim & criterion optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4) # criterion = FocalLoss(alpha=[1,1,1,1,1,1,1,1,1]).to(device) criterion = nn.CrossEntropyLoss().to(device) start_epoch = 0 best_acc=0 best_loss = np.inf best_f1 = 0 best_results = [0,np.inf,0] val_metrics = [0,np.inf,0] resume = False if resume: checkpoint_path = r'./checkpoints/best_models/multimodal_fold_0_model_best_loss.pth.tar' if not os.path.isfile(checkpoint_path): raise RuntimeError("=> no checkpoint found at '{}'".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path,map_location=device) best_acc = checkpoint['best_acc'] best_loss = checkpoint['best_loss'] best_f1 = checkpoint['best_f1'] start_epoch = checkpoint['epoch'] #args.cuda # if torch.cuda.is_available(): # model.module.load_state_dict(checkpoint['state_dict']) # else: # model.load_state_dict(checkpoint['state_dict']) model.load_state_dict(checkpoint['state_dict']) # ft = True # if ft: # optimizer.load_state_dict(checkpoint['optimizer']) # # Clear start epoch if fine-tuning # if args.ft: # args.start_epoch = 0 muti_gpu = False if torch.cuda.device_count() > 1 and muti_gpu == True: model = nn.DataParallel(model) model.to(device) all_files = pd.read_csv("/data/BaiDuBigData19-URFC/data/train_oversampling.csv") test_files = pd.read_csv("/data/BaiDuBigData19-URFC/data/test.csv") train_data_list,val_data_list = train_test_split(all_files, test_size=0.1, random_state = 2050) # load dataset train_gen = MultiModalDataset(train_data_list,config.train_data,config.train_vis,mode="train") train_loader = DataLoader(train_gen,batch_size=config.batch_size,shuffle=True,pin_memory=True,num_workers=16) #num_worker is limited by shared memory in Docker! val_gen = MultiModalDataset(val_data_list,config.train_data,config.train_vis,augument=False,mode="train") val_loader = DataLoader(val_gen,batch_size=config.batch_size,shuffle=False,pin_memory=True,num_workers=16) test_gen = MultiModalDataset(test_files,config.test_data,config.test_vis,augument=False,mode="test") test_loader = DataLoader(test_gen,1,shuffle=False,pin_memory=True,num_workers=16) #scheduler = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.1) #如果是best_acc 的话,mode = "max" ,如果是best_loss的话,mode = "min" scheduler = lr_scheduler.ReduceLROnPlateau(optimizer) #n_batches = int(len(train_loader.dataset) // train_loader.batch_size) #scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2) start = timer() #train for epoch in range(0,config.epochs):#config.epochs scheduler.step(epoch) # train train_metrics = train(train_loader,model,criterion,optimizer,epoch,val_metrics,best_results,start) # val val_metrics = evaluate(val_loader,model,criterion,epoch,train_metrics,best_results,start) # check results is_best_acc = val_metrics[0] > best_results[0] best_results[0] = max(val_metrics[0],best_results[0]) is_best_loss = val_metrics[1] < best_results[1] best_results[1] = min(val_metrics[1],best_results[1]) is_best_f1 = val_metrics[2] > best_results[2] best_results[2] = max(val_metrics[2],best_results[2]) # save model save_checkpoint({ "epoch":epoch + 1, "model_name":config.model_name, "state_dict":model.state_dict(), "best_acc":best_results[0], "best_loss":best_results[1], "optimizer":optimizer.state_dict(), "fold":fold, "best_f1":best_results[2], },is_best_acc,is_best_loss,is_best_f1,fold) # print logs print('\r',end='',flush=True) log.write('%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1],train_metrics[2], val_metrics[0],val_metrics[1],val_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], time_to_str((timer() - start),'min')) ) log.write("\n") time.sleep(0.01) best_model = torch.load("%s/%s_fold_%s_model_best_loss.pth.tar"%(config.best_models,config.model_name,str(fold))) model.load_state_dict(best_model["state_dict"]) evaluation(test_loader,model,fold)