def main(): print(EXPATH) writer = SummaryWriter(LOGDIR) pss = [] dl_list, dl_test, samples_per_cls = load_dataset() for fold, dl_train, dl_valid in dl_list: model = ww.ECATF().cuda() criterion = FocalLoss(gamma=2.4).cuda() optimizer = ww.SAM(model.parameters(), AdamW, lr=0.0001) trainer = Trainer(model, criterion, optimizer, writer, EXNAME, EXPATH, fold) trainer.fit(dl_train, dl_valid, EPOCHS) pss.append(trainer.submission(dl_test)) # submission 파일들 합치기 combine_submissions(pss, EXPATH)
def evaluate(config, model, data_iter, test_flag=False): model.eval() loss_total = 0 predict_all = np.array([], dtype=int) labels_all = np.array([], dtype=int) with torch.no_grad(): for batch_data in data_iter: batch_data = tuple(t.to(config.device) for t in batch_data) labels = batch_data[-1] # Forward Pass outputs = model(batch_data) # Compute Loss if config.use_FocalLoss: FL_loss = FocalLoss(config.num_classes) loss = FL_loss(outputs, labels) else: loss = F.cross_entropy(outputs, labels) loss_total += loss # Append into Final Predics&Labels labels = labels.data.cpu().numpy() predic = torch.max( outputs.data, dim=1)[1].cpu().numpy() #torch.max() [0]:返回值 [1]:返回索引 labels_all = np.append(labels_all, labels) predict_all = np.append(predict_all, predic) acc = metrics.accuracy_score(labels_all, predict_all) #print clf report with open('./num2label_dic.pkl', 'rb') as f: num2label_dic = pickle.load(f) num2label = [num2label_dic[i] for i in set(labels_all)] report = metrics.classification_report(labels_all, predict_all, target_names=num2label, digits=4) if test_flag: report = metrics.classification_report(labels_all, predict_all, target_names=num2label, digits=4) confusion = metrics.confusion_matrix(labels_all, predict_all) return acc, (loss_total / len(data_iter)).cpu().numpy(), report, confusion return acc, (loss_total / len(data_iter)).cpu().numpy(), report
def __init__(self, pretrained_vec, w=5, hidden_dim=300, drop=.5, attn_drop=.3, n_heads=4, n_epochs=1000, patience=25, train_batch_size=16, transform_batch_size=256, lr=1e-3, weight_decay=1e-3, optim_name='adam', loss_name='cross_entropy', device='cuda:0', verbose=False): self._pretrained_vec = pretrained_vec self.pretrained_vec = path.basename(self._pretrained_vec) self.w = w self.hidden_dim = hidden_dim self.drop = drop self.attn_drop = attn_drop self.n_heads = n_heads self.n_epochs = n_epochs self.patience = patience self.train_batch_size = train_batch_size self.transform_batch_size = transform_batch_size self.lr = lr self.weight_decay = weight_decay self.optim_name = optim_name self.loss_name = loss_name self.device = device self.verbose = verbose self.graph_builder = GraphsizePretrained( w=self.w, pretrained_vec=self._pretrained_vec, verbose=verbose) self.in_dim = self.graph_builder.ndim if self.loss_name.lower() == 'focal': self.loss_func = FocalLoss().to(torch.device(device)) elif self.loss_name.lower() == 'cross_entropy': self.loss_func = nn.CrossEntropyLoss().to(torch.device(device)) TGA.instances += 1 self.path_to_save = f'best_param_{TGA.instances}_{datetime.now().isoformat()}.pth'
def __init__(self, option, model, train_dataset, valid_dataset, test_dataset=None, weight=[[1.0, 1.0]], tasks_num=17): # Most important variable self.option = option self.device = torch.device("cuda:{}".format(option['gpu'][0]) if torch.cuda.is_available() else "cpu") self.model = DataParallel(model).to(self.device) if option['parallel'] else model.to(self.device) # Setting the train valid and test data loader if self.option['parallel']: self.train_dataloader = DataListLoader(train_dataset, batch_size=self.option['batch_size'], shuffle=True) self.valid_dataloader = DataListLoader(valid_dataset, batch_size=self.option['batch_size']) if test_dataset: self.test_dataloader = DataListLoader(test_dataset, batch_size=self.option['batch_size']) else: self.train_dataloader = DataLoader(train_dataset, batch_size=self.option['batch_size'], shuffle=True) self.valid_dataloader = DataLoader(valid_dataset, batch_size=self.option['batch_size']) if test_dataset: self.test_dataloader = DataLoader(test_dataset, batch_size=self.option['batch_size']) self.save_path = self.option['exp_path'] # Setting the Adam optimizer with hyper-param if option['focalloss']: self.log('Using FocalLoss') self.criterion = [FocalLoss(alpha=1 / w[0]) for w in weight] # alpha 0.965 else: self.criterion = [torch.nn.CrossEntropyLoss(torch.Tensor(w).to(self.device), reduction='mean') for w in weight] self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.option['lr'], weight_decay=option['weight_decay']) self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode='min', factor=0.7, patience=self.option['lr_scheduler_patience'], min_lr=1e-6 ) # other self.start = time.time() self.tasks_num = tasks_num self.records = {'trn_record': [], 'val_record': [], 'val_losses': [], 'best_ckpt': None, 'val_roc': [], 'val_prc': []} self.log(msgs=['\t{}:{}\n'.format(k, v) for k, v in self.option.items()], show=False) self.log('train set num:{} valid set num:{} test set num: {}'.format( len(train_dataset), len(valid_dataset), len(test_dataset))) self.log("total parameters:" + str(sum([p.nelement() for p in self.model.parameters()]))) self.log(msgs=str(model).split('\n'), show=False)
# load best model weights model.load_state_dict(best_model_wts) return model model_ft = torchvision.models.resnext101_32x8d(pretrained=True) num_ftrs = model_ft.fc.in_features # Here the size of each output sample is set to 2. # Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)). model_ft.fc = torch.nn.Linear(num_ftrs, 2) # model_ft = model_ft.to(device) # 开启多卡 model_ft = torch.nn.DataParallel(model_ft) model_ft.cuda() # criterion = torch.nn.CrossEntropyLoss() criterion = FocalLoss(class_num=2, alpha=torch.tensor([[alpha], [2 - alpha]]), gamma=gamma) # Observe that all parameters are being optimized optimizer_ft = torch.optim.SGD(model_ft.parameters(), lr=lr, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.3) model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=epochs) # torch.save(model_ft.state_dict(), 'side_model_use_restnet50_crop_and_crop.pth') torch.save(model_ft, 'side_model' + dir_name + '.pth')
# Load pretrained model model = models.resnext101_32x8d(pretrained=True) model.fc = nn.Linear(in_features=2048, out_features=7) model.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) class_dist = [282, 461, 967, 103, 5380, 123, 1044] # Class distribution of training set norm_weights = [1 - (x / sum(class_dist)) for x in class_dist] weights = torch.tensor(norm_weights).to(device) # weights for loss # Loss functions if args.loss == 'focal': criterion = FocalLoss(weight=weights, gamma=2).to(device) elif args.loss == 'weighted_ce': criterion = nn.CrossEntropyLoss(weight=weights).to(device) else: criterion = nn.CrossEntropyLoss().to(device) # Train, test or single image predictions if args.train: training(args, model, criterion, optimizer, device) if args.test: evaluate(args, device, model) if args.image_path is not None: predict(args, device=device, model=model)
if beta < 1: effective_num = 1.0 - np.power(beta, N_SAMPLES_PER_CLASS) per_cls_weights = (1.0 - beta) / np.array(effective_num) else: per_cls_weights = 1 / np.array(N_SAMPLES_PER_CLASS) per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len(N_SAMPLES_PER_CLASS) per_cls_weights = torch.FloatTensor(per_cls_weights).to(device) else: per_cls_weights = torch.ones(N_CLASSES).to(device) ## Choos a loss function ## if ARGS.loss_type == 'CE': criterion = nn.CrossEntropyLoss(weight=per_cls_weights, reduction='none').to(device) elif ARGS.loss_type == 'Focal': criterion = FocalLoss(weight=per_cls_weights, gamma=ARGS.focal_gamma, reduction='none').to(device) elif ARGS.loss_type == 'LDAM': criterion = LDAMLoss(cls_num_list=N_SAMPLES_PER_CLASS, max_m=0.5, s=30, weight=per_cls_weights, reduction='none').to(device) else: raise ValueError("Wrong Loss Type") ## Training ( ARGS.warm is used for deferred re-balancing ) ## if epoch >= ARGS.warm and ARGS.gen: train_stats = train_gen_epoch(net, net_seed, criterion, optimizer, train_loader) SUCCESS[epoch, :, :] = train_stats['t_success'].float() logger.log(SUCCESS[epoch, -10:, :]) np.save(LOGDIR + '/success.npy', SUCCESS.cpu().numpy()) else: train_loss, train_acc = train_epoch(net, criterion, optimizer, train_loader, logger)
def main(): parser = get_parser() args = parser.parse_args() # load data trainset, testset = get_subsets(size1=(224, 224), size2=(192, 192)) train_loader = torch.utils.data.DataLoader( trainset, batch_size=args.train_batch_size, shuffle=True, num_workers=args.train_workers) test_loader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch_size, shuffle=False, num_workers=args.test_workers) # path to save models if not os.path.isdir(args.model_dir): print("Make directory: " + args.model_dir) os.makedirs(args.model_dir) # prefix of saved checkpoint model_prefix = args.model_dir + '/' + args.model_prefix # define the model: use ResNet50 as an example if args.arch == "resnet50": from resnet import resnet50 model = resnet50(pretrained=True, num_labels=args.num_class) # 用来测试 训练时 model_prefix = model_prefix + "_resnet50" elif args.arch == "resnet101": from resnet import resnet101 model = resnet101(pretrained=True, num_labels=args.num_class) model_prefix = model_prefix + "_resnet101" else: raise NotImplementedError("To be implemented!") # 判断是否需要继续训练 if args.start_epoch != 0: resume_model = torch.load(args.resume) resume_dict = resume_model.state_dict() model_dict = model.state_dict() resume_dict = { k: v for k, v in resume_dict.items() if k in model_dict and k.size() == model_dict[k].size() } model_dict.update(resume_dict) model.load_state_dict(model_dict) # 重新导入 更新后的字典 print('继续训练') # 多GPU并行训练 cudnn.benchmark = True model.cuda() model = nn.DataParallel(model) # 选择优化器optimizer if args.optimizer == 'Adam': optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) elif args.optimizer == 'SGD': optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) else: raise NotImplementedError("Not supported yet!") # training the network model.train() # attention map size size1, size2 = 7, 6 w1 = size1 h1 = size1 grid_l = generate_flip_grid(w1, h1) w2 = size2 h2 = size2 grid_s = generate_flip_grid(w2, h2) # least common multiple lcm = w1 * w2 ################################## # 根据训练集中,每类数量,计算alpha ################################## per_class_num_dict = load_train_per_class_num_pickle( path= '/home/ailab/dataset/new_data/per_class_details/train_per_class_num.pickle' ) # 确保从0到num_classes alpha_list = [] for i in range(args.num_class): per_class_num = per_class_num_dict[i] if per_class_num == 1: per_class_num = 1.1 alpha_list.append(per_class_num) alpha_array = np.array(alpha_list) alpha_array = (1 / np.log(alpha_array)) # for i in range(args.num_class): # if alpha_array[i] > 0.5: # alpha_array[i] = alpha_array[i] / 2 alpha = alpha_array.tolist() alpha = [round(alpha_i, 4) for alpha_i in alpha] criterion = FocalLoss(2, alpha=alpha, size_average=True) criterion_mse = nn.MSELoss(size_average=True) for epoch in range(args.start_epoch, args.epoch_max): epoch_start = time.clock() if not args.stepsize == 0: adjust_learning_rate(optimizer, epoch, args) # num1 = 0 for step, batch_data in enumerate(train_loader): # if num1 >10: # print('############') # model.eval() # test(model, test_loader, epoch + 1) # model.train() # break # num1 += 1 batch_images_lo = batch_data[0] batch_images_lf = batch_data[1] batch_images_so = batch_data[2] batch_images_sf = batch_data[3] batch_images_lc = batch_data[4] # color 变化, 需要和images_lo 合并 batch_labels = batch_data[-1] batch_images_l = torch.cat((batch_images_lo, batch_images_lf)) batch_images_c = torch.cat( (batch_images_lo, batch_images_lc)) # color batch_images_s = torch.cat((batch_images_so, batch_images_sf)) batch_labels = torch.cat( (batch_labels, batch_labels, batch_labels, batch_labels, batch_labels, batch_labels)) # 6个 batch_images_l = batch_images_l.cuda() batch_images_c = batch_images_c.cuda() # color batch_images_s = batch_images_s.cuda() batch_labels = batch_labels.cuda() inputs_l = batch_images_l inputs_c = batch_images_c # color inputs_s = batch_images_s labels = batch_labels output_l, hm_l = model(inputs_l) output_c, hm_c = model(inputs_c) # color output_s, hm_s = model(inputs_s) output = torch.cat((output_l, output_s, output_c)) # output = torch.cat((output_l, output_s)) loss = criterion(output, labels) # flip num = hm_l.size(0) // 2 #单独split 按照batch维度,那么这个数需要大于等于一半 小于整体 hm1, hm2 = hm_l.split(num) flip_grid_large = grid_l.expand(num, -1, -1, -1) flip_grid_large = Variable(flip_grid_large, requires_grad=False) flip_grid_large = flip_grid_large.permute(0, 2, 3, 1) hm2_flip = F.grid_sample(hm2, flip_grid_large, mode='bilinear', padding_mode='border') flip_loss_l = F.mse_loss(hm1, hm2_flip) # no size_average hm1_small, hm2_small = hm_s.split(num) flip_grid_small = grid_s.expand(num, -1, -1, -1) flip_grid_small = Variable(flip_grid_small, requires_grad=False) flip_grid_small = flip_grid_small.permute(0, 2, 3, 1) hm2_small_flip = F.grid_sample(hm2_small, flip_grid_small, mode='bilinear', padding_mode='border') flip_loss_s = F.mse_loss(hm1_small, hm2_small_flip) # color 变化 对比 hm1, hm2 = hm_c.split(num) # color_loss = torch.FloatTensor([0]) color_loss = F.mse_loss(hm1, hm2) # no size_average # scale loss num = hm_l.size(0) hm_l = F.upsample(hm_l, lcm) hm_s = F.upsample(hm_s, lcm) scale_loss = F.mse_loss(hm_l, hm_s) losses = loss + flip_loss_l + flip_loss_s + color_loss + scale_loss # losses = loss + flip_loss_l + flip_loss_s + scale_loss optimizer.zero_grad() losses.backward() optimizer.step() if (step) % args.display == 0: print('epoch: {},\ttrain step: {}\tLoss: {:.6f}'.format( epoch + 1, step, losses.item())) print( '\tcls loss: {:.4f};\tflip_loss_l: {:.4f}' '\tflip_loss_s: {:.4f};\tcolor_loss: {:.4f};\tscale_loss: {:.4f}' .format(loss.item(), flip_loss_l.item(), flip_loss_s.item(), color_loss.item(), scale_loss.item())) epoch_end = time.clock() elapsed = epoch_end - epoch_start print("Epoch time: ", elapsed) # test if (epoch + 1) % args.snapshot == 0: model_file = model_prefix + '_epoch{}.pth' print("Saving model to " + model_file.format(epoch + 1)) torch.save(model, model_file.format(epoch + 1)) if args.test: model.eval() test(model, test_loader, epoch + 1) model.train() ###########测试完后,需要进入train模式,记住############### final_model = model_prefix + '_final.pth' print("Saving model to " + final_model) torch.save(model, final_model) model.eval() test(model, test_loader, epoch + 1)
def training(model, fold, args): # resore from last checkpoint # all model weights resored, but not learning rate. if os.path.exists(os.path.join(config.weights, config.model_name, str(fold), "checkpoint.pth.tar")): best_model = torch.load(os.path.join(config.weights, config.model_name, str(fold), "checkpoint.pth.tar")) model.load_state_dict(best_model["state_dict"]) # logging issues log = Logger() log.open(os.path.join(config.logs_dir, "%s_log_train.txt" % config.model_name), mode="a") log.write( "\n---------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 20)) log.write( '----------------------|--------- Train ---------|-------- Valid ---------|-------Best ' 'Results-------|----------|\n') log.write( 'mode iter epoch | loss f1_macro | loss f1_macro | loss f1_macro | time ' ' |\n') log.write( '----------------------------------------------------------------------------------------------------------' '----\n') # training params optimizer = optim.SGD(model.parameters(), lr=config.learning_rate_start, momentum=0.9, weight_decay=config.weight_decay) if config.loss_name == 'ce': criterion = nn.BCEWithLogitsLoss().cuda() elif config.loss_name == 'focal': criterion = FocalLoss().cuda() elif config.loss_name == 'f1': criterion = F1Loss().cuda() else: raise ValueError('unknown loss name {}'.format(config.loss_name)) best_results = [np.inf, 0] val_metrics = [np.inf, 0] scheduler = lr_scheduler.StepLR(optimizer, step_size=config.learning_rate_decay_epochs, gamma=config.learning_rate_decay_rate) start = timer() # load dataset all_files = pd.read_csv(config.train_csv) image_names = all_files['Id'] labels_strs = all_files['Target'] image_labels = [] for cur_label_str in labels_strs: cur_label = np.eye(config.num_classes, dtype=np.float)[np.array(list(map(int, cur_label_str.split(' '))))].sum(axis=0) image_labels.append(cur_label) image_labels = np.stack(image_labels, axis=0) msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=config.val_percent, random_state=0) for train_index, val_index in msss.split(image_names, image_labels): train_image_names = image_names[train_index] train_image_labels = image_labels[train_index] val_image_names = image_names[val_index] val_image_labels = image_labels[val_index] train_gen = HumanDataset(train_image_names, train_image_labels, config.train_dir, mode="train") sampler = WeightedRandomSampler(weights=get_sample_weights()[train_index], num_samples=int(len(all_files)*(1-config.val_percent))) train_loader = DataLoader(train_gen, batch_size=config.batch_size, pin_memory=True, num_workers=4, sampler=sampler) # train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4) val_gen = HumanDataset(val_image_names, val_image_labels, config.train_dir, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4) # train for epoch in range(0, config.epochs): # training & evaluating scheduler.step(epoch) get_learning_rate(optimizer) train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_loss = val_metrics[0] < best_results[0] best_results[0] = min(val_metrics[0], best_results[0]) is_best_f1 = val_metrics[1] > best_results[1] best_results[1] = max(val_metrics[1], best_results[1]) # save model save_checkpoint({ "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_loss": best_results[0], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[1], }, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write( logging_pattern % ( "best", epoch, epoch, train_metrics[0], train_metrics[1], val_metrics[0], val_metrics[1], str(best_results[0])[:8], str(best_results[1])[:8], time_to_str((timer() - start), 'min') ) ) log.write("\n") time.sleep(0.01)
collate_fn=collate_fn) model = Model(VOCAB_SIZE, EMBEDDING_SIZE, NUM_CODEBOOK, NUM_CODEWORD, HIDDEN_SIZE, IN_LENGTH, OUT_LENGTH, NUM_CLASS, ROUTING_TYPE, EMBEDDING_TYPE, CLASSIFIER_TYPE, NUM_ITERATIONS, NUM_REPEAT, DROP_OUT) if PRE_MODEL is not None: model_weight = torch.load('epochs/{}'.format(PRE_MODEL), map_location='cpu') model_weight.pop('classifier.weight') model.load_state_dict(model_weight, strict=False) if LOSS_TYPE == 'margin': loss_criterion = [MarginLoss(NUM_CLASS)] elif LOSS_TYPE == 'focal': loss_criterion = [FocalLoss()] elif LOSS_TYPE == 'cross': loss_criterion = [CrossEntropyLoss()] elif LOSS_TYPE == 'mf': loss_criterion = [MarginLoss(NUM_CLASS), FocalLoss()] elif LOSS_TYPE == 'mc': loss_criterion = [MarginLoss(NUM_CLASS), CrossEntropyLoss()] elif LOSS_TYPE == 'fc': loss_criterion = [FocalLoss(), CrossEntropyLoss()] else: loss_criterion = [ MarginLoss(NUM_CLASS), FocalLoss(), CrossEntropyLoss() ] if torch.cuda.is_available():
def main(opt): if opt['manual_seed'] is None: opt['manual_seed'] = random.randint(1, 10000) print('Random Seed: ', opt['manual_seed']) random.seed(opt['manual_seed']) torch.manual_seed(opt['manual_seed']) if torch.cuda.is_available(): torch.cuda.manual_seed_all(opt['manual_seed']) if opt['class_weight'] is not None: loss_weight = torch.FloatTensor(opt['class_weight']).to(device) else: loss_weight = None if opt['gamma'] is not None: criterion = FocalLoss(alpha=loss_weight, gamma=opt['gamma'], reduction=True) else: criterion = CrossEntropyLoss(weight=loss_weight) files = [] for file in os.listdir(opt['path']): files.append(file[:-3]) train_ids, val_ids = train_test_split(files, test_size=0.2) train_dataset = GRDataset(opt['path'], train_ids) val_dataset = GRDataset(opt['path'], val_ids) train_loader = DataLoader(train_dataset, batch_size=opt['batch_size'], shuffle=True, drop_last=True) val_loader = DataLoader(val_dataset, batch_size=opt['batch_size'], drop_last=True) tr_losses = np.zeros((opt['num_epochs'], )) tr_accs = np.zeros((opt['num_epochs'], )) val_losses = np.zeros((opt['num_epochs'], )) val_accs = np.zeros((opt['num_epochs'], )) model = Net(num_classes=opt['num_classes'], gnn_layers=opt['gnn_layers'], embed_dim=opt['embed_dim'], hidden_dim=opt['hidden_dim'], jk_layer=opt['jk_layer'], process_step=opt['process_step'], dropout=opt['dropout']) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=opt['lr'], weight_decay=opt['weight_decay']) best_val_loss = 1e6 for epoch in range(opt['num_epochs']): s = time.time() model.train() losses = 0 acc = 0 for i, data in enumerate(train_loader): data = data.to(device) optimizer.zero_grad() output = model(data) # print(data.y.squeeze()) loss = criterion(output, data.y.squeeze()) loss.backward() optimizer.step() y_true = data.y.squeeze().cpu().numpy() y_pred = output.data.cpu().numpy().argmax(axis=1) acc += accuracy_score(y_true, y_pred) * 100 losses += loss.data.cpu().numpy() tr_losses[epoch] = losses / (i + 1) tr_accs[epoch] = acc / (i + 1) model.eval() v_losses = 0 v_acc = 0 y_preds = [] y_trues = [] for j, data in enumerate(val_loader): data = data.to(device) with torch.no_grad(): output = model(data) loss = criterion(output, data.y.squeeze()) y_pred = output.data.cpu().numpy().argmax(axis=1) y_true = data.y.squeeze().cpu().numpy() y_trues += y_true.tolist() y_preds += y_pred.tolist() v_acc += accuracy_score(y_true, y_pred) * 100 v_losses += loss.data.cpu().numpy() cnf = confusion_matrix(y_trues, y_preds) val_losses[epoch] = v_losses / (j + 1) val_accs[epoch] = v_acc / (j + 1) current_val_loss = v_losses / (j + 1) if current_val_loss < best_val_loss: best_val_loss = current_val_loss best_cnf = cnf torch.save(model.state_dict(), os.path.join(output_path, 'best_model.ckpt')) print( 'Epoch: {:03d} | time: {:.4f} seconds\n' 'Train Loss: {:.4f} | Train accuracy {:.4f}\n' 'Validation Loss: {:.4f} | Validation accuracy {:.4f} | Best {:.4f}' .format(epoch + 1, time.time() - s, losses / (i + 1), acc / (i + 1), v_losses / (j + 1), v_acc / (j + 1), best_val_loss)) print('Validation confusion matrix:') print(cnf) np.save(os.path.join(log_path, 'train_loss.npy', tr_losses)) np.save(os.path.join(log_path, 'train_acc.npy', tr_accs)) np.save(os.path.join(log_path, 'val_loss.npy', val_losses)) np.save(os.path.join(log_path, 'val_acc.npy', val_accs)) np.save(os.path.join(log_path, 'confusion_matrix.npy', best_cnf))
def forward(self, p, img_size, targets=None, var=None, epoch=0): if ONNX_EXPORT: bs, nG = 1, self.nG # batch size, grid size else: bs, nG = p.shape[0], p.shape[-1] if self.img_size != img_size: create_grids(self, img_size, nG) if p.is_cuda: self.grid_xy = self.grid_xy.cuda() self.anchor_wh = self.anchor_wh.cuda() # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 80) # (bs, anchors, grid, grid, classes + xywh) p = p.view(bs, self.nA, self.nC + 5, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # prediction # xy, width and height xy = torch.sigmoid(p[..., 0:2]) wh = p[..., 2:4] # wh (yolo method) # wh = torch.sigmoid(p[..., 2:4]) # wh (power method) # Training if targets is not None: MSELoss = nn.MSELoss() #选择边框预测loss BCEWithLogitsLoss = nn.BCEWithLogitsLoss() CrossEntropyLoss = nn.CrossEntropyLoss() # Get outputs p_conf = p[..., 4] # Conf p_cls = p[..., 5:] # Class txy, twh, mask, tcls = build_targets(targets, self.anchor_vec, self.nA, self.nC, nG) tcls = tcls[mask] if xy.is_cuda: txy, twh, mask, tcls = txy.cuda(), twh.cuda(), mask.cuda( ), tcls.cuda() # Compute losses nT = sum([len(x) for x in targets]) # number of targets nM = mask.sum().float() # number of anchors (assigned to targets) k = 1 # nM / bs if nM > 0: lxy = k * MSELoss(xy[mask], txy[mask]) lwh = k * MSELoss(wh[mask], twh[mask]) #lcls = (k / 4) * CrossEntropyLoss(p_cls[mask], torch.argmax(tcls, 1))#此为原始的交叉熵损失函数,也就是softmax #lcls = (k * 10) * BCEWithLogitsLoss(p_cls[mask], tcls.float())#此为原始的logistics损失函数,一开始是k*10 if self.chose_cls_loss == 'logistic': lcls = BCEWithLogitsLoss(p_cls[mask], tcls.float()) #del (k / 4) elif self.chose_cls_loss == 'softmax': lcls = CrossEntropyLoss(p_cls[mask], torch.argmax(tcls, 1)) #del (k / 4) elif self.chose_cls_loss == 'focalloss': lcls = FocalLoss(class_num=self.nC, gamma=2)(p_cls[mask], torch.argmax(tcls, 1)) # del (k / 4) else: FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor lxy, lwh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]) if self.chose_cls_loss == 'focalloss': lconf = (k * 64) * FocalLoss_confidence( class_num=self.nC, gamma=2)(p_conf, mask.float()) else: lconf = (k * 64) * BCEWithLogitsLoss(p_conf, mask.float()) # Sum loss components loss = lxy + lwh + lconf + lcls return loss, loss.item(), lxy.item(), lwh.item(), lconf.item( ), lcls.item(), nT else: if ONNX_EXPORT: grid_xy = self.grid_xy.repeat((1, self.nA, 1, 1, 1)).view( (1, -1, 2)) anchor_wh = self.anchor_wh.repeat((1, 1, nG, nG, 1)).view( (1, -1, 2)) / nG # p = p.view(-1, 85) # xy = xy + self.grid_xy[0] # x, y # wh = torch.exp(wh) * self.anchor_wh[0] # width, height # p_conf = torch.sigmoid(p[:, 4:5]) # Conf # p_cls = F.softmax(p[:, 5:85], 1) * p_conf # SSD-like conf # return torch.cat((xy / nG, wh, p_conf, p_cls), 1).t() p = p.view(1, -1, 85) xy = xy + grid_xy # x, y wh = torch.exp(p[..., 2:4]) * anchor_wh # width, height p_conf = torch.sigmoid(p[..., 4:5]) # Conf p_cls = p[..., 5:85] # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py # p_cls = F.softmax(p_cls, 2) * p_conf # SSD-like conf p_cls = torch.exp(p_cls).permute((2, 1, 0)) p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute( (2, 1, 0)) # F.softmax() equivalent p_cls = p_cls.permute(2, 1, 0) return torch.cat((xy / nG, wh, p_conf, p_cls), 2).squeeze().t() p[..., 0:2] = xy + self.grid_xy # xy p[..., 2:4] = torch.exp(wh) * self.anchor_wh # wh yolo method # p[..., 2:4] = ((wh * 2) ** 2) * self.anchor_wh # wh power method p[..., 4] = torch.sigmoid(p[..., 4]) # p_conf p[..., :4] *= self.stride # reshape from [1, 3, 13, 13, 85] to [1, 507, 85] return p.view(bs, -1, 5 + self.nC)
np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) model = resnet.resnet18(sample_size=90, sample_duration=30, num_classes=2) model.to(device) #criterion = nn.CrossEntropyLoss() criterion = FocalLoss() optimizer = optim.Adam(model.parameters(), lr=3e-4) print("="*20) print('Training Started') print("="*20) epochs = 100 idx = 0 y_true = [] y_scores = [] for epoch in range(epochs): g_loss = 0 correct = 0
def train_model(train_data_words, test_data_words, model, epochs=30): log_file = os.path.join( LOGS_DIR, f'{model.__class__.__name__}.{str(train_data_words)}') checkpoint_file = f'{CHECKPOINT_PREFIX}.{model.__class__.__name__}.{str(train_data_words)}' model = model.cuda() optimizer = torch.optim.AdamW(model.parameters(), lr=LR) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) if os.path.exists(checkpoint_file): print('Loading checkpoint') epoch, best_score, vocabulary = load_train_state( checkpoint_file, model, optimizer, scheduler) else: epoch = 0 best_score = -1 vocabulary = create_vocabulary(train_data_words, vocabulary_size=VOCABULARY_SIZE) best_model = copy.deepcopy(model) train_data = WordIndexDataset(train_data_words, vocabulary, max_words=MAX_MESSAGE_LENGTH_WORDS) test_data = WordIndexDataset(test_data_words, vocabulary, max_words=MAX_MESSAGE_LENGTH_WORDS) train_loader = DataLoader(train_data, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=2, collate_fn=IndexVectorCollator()) test_loader = DataLoader(test_data, batch_size=TEST_BATCH_SIZE, shuffle=True, num_workers=2, collate_fn=IndexVectorCollator()) writer = SummaryWriter(log_file, purge_step=epoch, flush_secs=60) sample_input, sample_lens, _ = next(iter(train_loader)) summary(model=model, input_data=sample_input.cuda(), lens=sample_lens, device=torch.device('cuda')) print("Learning started") while epoch < epochs: epoch += 1 print(f"Epoch: {epoch}") epoch_losses = [] epoch_accuracy = [] model.train() loss_fn = FocalLoss(alpha=0.5, gamma=2) for step, (x, x_len, y) in enumerate(train_loader): x, y = x.cuda(), y.cuda() y_pred = model(x, x_len) loss_val = loss_fn(y_pred, y) accuracy = torch.argmax(y_pred, 1).eq(y).sum().item() / y.shape[0] optimizer.zero_grad() loss_val.backward() optimizer.step() epoch_losses.append(loss_val.item()) epoch_accuracy.append(accuracy) print(' Batch {} of {} loss: {}, accuracy: {}, lr: {}'.format( step + 1, len(train_loader), loss_val.item(), accuracy, optimizer.param_groups[0]["lr"]), file=sys.stderr) print( f'Train loss: {np.mean(epoch_losses):.4f}, accuracy: {np.mean(epoch_accuracy):.4f}' ) writer.add_scalar('Loss/train', np.mean(epoch_losses), global_step=epoch) writer.add_scalar('Accuracy/train', np.mean(epoch_accuracy), global_step=epoch) writer.add_scalar('LearningRate', optimizer.param_groups[0]["lr"], global_step=epoch) score = evaluate(model, test_loader, loss_fn, writer=writer, epoch=epoch) if score > best_score: best_model = copy.deepcopy(model) best_score = score print('New best score') save_train_state(epoch, model, optimizer, scheduler, best_score, vocabulary, checkpoint_file) scheduler.step() if best_score < 0: best_score = evaluate(model, test_loader, writer=writer) writer.close() save_file_path = os.path.join( SAVED_MODELS_PATH, '{}.{}.{}.{:.2f}.pck'.format(model.__class__.__name__, str(train_data_words), datetime.datetime.now().isoformat(), best_score)) log_file_path = os.path.join( LOGS_DIR, '{}.{}.{}.{:.2f}'.format(model.__class__.__name__, str(train_data_words), datetime.datetime.now().isoformat(), best_score)) os.makedirs(os.path.dirname(save_file_path), exist_ok=True) shutil.move(checkpoint_file, save_file_path) shutil.move(log_file, log_file_path) return best_model, best_score
print("[!] vocab_size: {}, num_class: {}".format(vocab_size, num_class)) test_sampler = BucketBatchSampler(test_dataset, BATCH_SIZE, False, sort_key=lambda row: len(row['text'])) test_iterator = DataLoader(test_dataset, batch_sampler=test_sampler, collate_fn=collate_fn) model = Model(vocab_size, num_class=num_class, routing_type=ROUTING_TYPE, num_iterations=NUM_ITERATIONS) if MODEL_WEIGHT is not None: model.load_state_dict(torch.load('epochs/' + MODEL_WEIGHT)) margin_loss, focal_loss = MarginLoss(), FocalLoss() if torch.cuda.is_available(): model, margin_loss, focal_loss = model.to( 'cuda:{}'.format(GPU)), margin_loss.to( 'cuda:{}'.format(GPU)), focal_loss.to('cuda:{}'.format(GPU)) optimizer = Adam(model.parameters()) results = { 'train_loss': [], 'train_accuracy': [], 'test_loss': [], 'test_accuracy': [] } meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True) meter_loss = tnt.meter.AverageValueMeter()
def training(model, fold, log, train_image_names, train_image_labels, val_image_names, val_image_labels): # logging issues log.write( "\n---------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 20)) log.write( '----------------------|--------- Train ---------|-------- Valid ---------|-------Best ' 'Results-------|----------|\n') log.write( 'mode iter epoch | loss f1_macro | loss f1_macro | loss f1_macro | time ' ' |\n') log.write( '----------------------------------------------------------------------------------------------------------' '----\n') # training params optimizer = optim.SGD(model.parameters(), lr=config.learning_rate_start, momentum=0.9, weight_decay=config.weight_decay) if config.loss_name == 'ce': criterion = nn.BCEWithLogitsLoss().cuda() elif config.loss_name == 'focal': criterion = FocalLoss().cuda() elif config.loss_name == 'f1': criterion = F1Loss().cuda() else: raise ValueError('unknown loss name {}'.format(config.loss_name)) best_results = [np.inf, 0] val_metrics = [np.inf, 0] scheduler = lr_scheduler.StepLR(optimizer, step_size=config.learning_rate_decay_epochs, gamma=config.learning_rate_decay_rate) start = timer() train_gen = HumanDataset(train_image_names, train_image_labels, config.train_dir, mode="train") train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4) val_gen = HumanDataset(val_image_names, val_image_labels, config.train_dir, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4) # train for epoch in range(0, config.epochs): # training & evaluating scheduler.step(epoch) get_learning_rate(optimizer) train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_loss = val_metrics[0] < best_results[0] best_results[0] = min(val_metrics[0], best_results[0]) is_best_f1 = val_metrics[1] > best_results[1] best_results[1] = max(val_metrics[1], best_results[1]) # save model save_checkpoint({ "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_loss": best_results[0], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[1], }, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write( logging_pattern % ( "best", epoch, epoch, train_metrics[0], train_metrics[1], val_metrics[0], val_metrics[1], str(best_results[0])[:8], str(best_results[1])[:8], time_to_str((timer() - start), 'min') ) ) log.write("\n") time.sleep(0.01)
def train(config, model, train_iter, dev_iter, test_iter, save_loss=False): model.train() init_network(model) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] lr = config.learning_rate max_grad_norm = 1.0 num_training_steps = 1000 num_warmup_steps = 100 warmup_proportion = float(num_warmup_steps) / float( num_training_steps) # 0.1 def differential_params( model, init_lr, beta_decay=0.9, ): try: num_layers = len(model.bert.encoder.layer) except AttributeError: return model.parameters() #filter out layer_params to get the other params layer_params = [] for layer_id in range(num_layers): layer_params += list( map(id, model.bert.encoder.layer[layer_id].parameters())) base_params = filter(lambda p: id(p) not in layer_params, model.parameters()) #differential bert layer's lr layer_params_lr = [] for layer_id in range(num_layers - 1, -1, -1): layer_params_lr_dict = {} layer_params_lr_dict['params'] = model.bert.encoder.layer[ layer_id].parameters() layer_params_lr_dict['lr'] = round( init_lr * (beta_decay)**layer_id, 9) layer_params_lr.append(layer_params_lr_dict) #return the new joint model parameters model_parameters = [{'params': base_params}] + layer_params_lr model.parameters() return model_parameters #set the torch.optimizer according whether using DISCR if config.DISCR: optimizer = AdamW(differential_params(model, init_lr=lr), lr=lr, correct_bias=False) else: optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False) #set the scheduler according whether using STLR, default just using warming_up if config.STLR: scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps) # PyTorch scheduler else: scheduler = get_constant_schedule_with_warmup( optimizer, num_warmup_steps=num_warmup_steps) loss_collect = [] total_batch = 0 #记录进行了多少轮batch for epoch in trange(config.num_epochs, desc='Epoch'): for step, batch_data in enumerate(tqdm(train_iter, desc='Iteration')): batch_data = tuple(t.to(config.device) for t in batch_data) labels = batch_data[-1] # Forward Pass outputs = model(batch_data) # Backward and optimizer optimizer.zero_grad() if config.use_FocalLoss: FL_loss = FocalLoss(config.num_classes) loss = FL_loss(outputs, labels) else: loss = F.cross_entropy(outputs, labels) loss.backward() loss_collect.append(loss.item()) print("\r%f" % loss, end='') torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) optimizer.step() scheduler.step() #保存loss图像: if save_loss: plot_train_loss(loss_collect, config) #在dev集上做验证 dev_acc, dev_loss, dev_report = evaluate(config, model, dev_iter) # print(dev_report) # print('dev_acc:', dev_acc, 'dev_loss:', dev_loss) logger = logging.getLogger(__name__) logger.setLevel(level=logging.INFO) handler = logging.FileHandler("log.txt", 'a') handler.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) console = logging.StreamHandler() console.setLevel(logging.INFO) logger.addHandler(handler) logger.addHandler(console) logger.info('USING MODEL: %s, Using PTM: %s' % (config.model_name, config.BERT_USING)) logger.info( 'Batch_Size: %d, Using FL: %s, Using DISCR: %s, Using STLR: %s' % (config.batch_size, config.use_FocalLoss, config.DISCR, config.STLR)) #print(dev_report) with open('log.txt', 'a+') as f: print(dev_report, file=f) logger.info('dev_acc: %s dev_loss: %s' % (dev_acc, dev_loss)) logger.info( '-----------------------------------------------------------\n')
def main(): args = parse_args() MAX_EPOCH = args.epoch NEW_LABEL_START = args.model PROPORTION = args.p # Data print('==> Preparing data..') original_trainset = torchvision.datasets.CIFAR100( root='./data', train=True, download=True, transform=transform_train) testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=10) # Model if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isdir( 'checkpoint'), 'Error: no checkpoint directory found!' checkpoint = torch.load('./checkpoint/' + '100_no' + str(args.model) + '.t7') net = checkpoint['net'] best_acc = 0 #checkpoint['acc'] start_epoch = 0 #checkpoint['epoch'] else: print('==> Building model..') print('error!!!!!!') # net = VGG('VGG19') #net = ResNet20() #net = ResNet110() if use_cuda: net.cuda() net = torch.nn.DataParallel(net, device_ids=range( torch.cuda.device_count())) cudnn.benchmark = True weights = [1 for i in range(100)] if args.weighted: for i in range(NEW_LABEL_START, 100): weights[i] = PROPORTION weights = torch.FloatTensor(weights) print(weights[0], weights[99]) if use_cuda: weights = weights.cuda() criterion = nn.CrossEntropyLoss(weights) if args.fl: criterion = FocalLoss(100) print("####") criterion.cuda() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4, nesterov=True) #MILESTONES = [int(MAX_EPOCH*0.5), int(MAX_EPOCH*0.75)] MILESTONES = [args.ms] scheduler = MultiStepLR(optimizer, milestones=MILESTONES, gamma=0.1) test_with_category(args, net, testloader) for epoch in range(start_epoch, MAX_EPOCH + 1): use_all_data = False first_epoch = False if epoch == MAX_EPOCH: use_all_data = True # else: scheduler.step() if epoch == 0: first_epoch = True train(args=args, net=net, epoch=epoch, optimizer=optimizer, criterion=criterion, first_epoch=first_epoch, use_all_data=use_all_data) test(args, net, epoch, testloader, criterion) print(final_train_acc, final_test_acc)
# prepare dataset vocab_size, num_class, train_dataset, test_dataset = load_data(DATA_TYPE, preprocessing=True, fine_grained=FINE_GRAINED, verbose=True, text_length=TEXT_LENGTH) print("[!] vocab_size: {}, num_class: {}".format(vocab_size, num_class)) train_sampler = BucketBatchSampler(train_dataset, BATCH_SIZE, False, sort_key=lambda row: len(row['text'])) train_iterator = DataLoader(train_dataset, batch_sampler=train_sampler, collate_fn=collate_fn) test_sampler = BucketBatchSampler(test_dataset, BATCH_SIZE, False, sort_key=lambda row: len(row['text'])) test_iterator = DataLoader(test_dataset, batch_sampler=test_sampler, collate_fn=collate_fn) model = Model(vocab_size, num_class=num_class, num_iterations=NUM_ITERATIONS) if MODEL_WEIGHT is not None: model.load_state_dict(torch.load('epochs/' + MODEL_WEIGHT)) margin_loss = MarginLoss() focal_loss = FocalLoss() if torch.cuda.is_available(): model.cuda() margin_loss.cuda() focal_loss.cuda() optimizer = Adam(model.parameters()) print("# trainable parameters:", sum(param.numel() for param in model.parameters())) # record statistics results = {'train_loss': [], 'train_accuracy': [], 'test_loss': [], 'test_accuracy': []} # record current best test accuracy best_acc = 0 meter_loss = tnt.meter.AverageValueMeter() meter_accuracy = tnt.meter.ClassErrorMeter(accuracy=True) meter_confusion = tnt.meter.ConfusionMeter(num_class, normalized=True)
def main(): args = parse_args() batch_size = args.batch_size use_cuda = torch.cuda.is_available() hyperparams = vars(args) pprint(hyperparams) active_set, test_set = get_datasets(hyperparams["initial_pool"], hyperparams["data_path"]) # We will use the FocalLoss criterion = FocalLoss(gamma=2, alpha=0.25) # Our model is a simple Unet model = smp.Unet( encoder_name="resnext50_32x4d", encoder_depth=5, encoder_weights="imagenet", decoder_use_batchnorm=False, classes=len(pascal_voc_ids), ) # Add a Dropout layerto use MC-Dropout add_dropout(model, classes=len(pascal_voc_ids), activation=None) # This will enable Dropout at test time. model = MCDropoutModule(model) # Put everything on GPU. if use_cuda: model.cuda() # Make an optimizer optimizer = optim.SGD(model.parameters(), lr=hyperparams["lr"], momentum=0.9, weight_decay=5e-4) # Keep a copy of the original weights initial_weights = deepcopy(model.state_dict()) # Add metrics model = ModelWrapper(model, criterion) model.add_metric("cls_report", lambda: ClassificationReport(len(pascal_voc_ids))) # Which heuristic you want to use? # We will use our custom reduction function. heuristic = get_heuristic(hyperparams["heuristic"], reduction=mean_regions) # The ALLoop is in charge of predicting the uncertainty and loop = ActiveLearningLoop( active_set, model.predict_on_dataset_generator, heuristic=heuristic, query_size=hyperparams["query_size"], # Instead of predicting on the entire pool, only a subset is used max_sample=1000, batch_size=batch_size, iterations=hyperparams["iterations"], use_cuda=use_cuda, ) acc = [] for epoch in tqdm(range(args.al_step)): # Following Gal et al. 2016, we reset the weights. model.load_state_dict(initial_weights) # Train 50 epochs before sampling. model.train_on_dataset(active_set, optimizer, batch_size, hyperparams["learning_epoch"], use_cuda) # Validation! model.test_on_dataset(test_set, batch_size, use_cuda) should_continue = loop.step() metrics = model.metrics val_loss = metrics["test_loss"].value logs = { "val": val_loss, "epoch": epoch, "train": metrics["train_loss"].value, "labeled_data": active_set.labelled, "Next Training set size": len(active_set), "cls_report": metrics["test_cls_report"].value, } pprint(logs) acc.append(logs) if not should_continue: break
def main(): fmoment = int(time.time()) args = parse_args() norm = args.norm backbone = args.backbone pretrained = args.pretrained lossfunc = args.loss size = args.size pk = args.pk nk = args.nk n_epoch = args.n_epoch gpu = args.gpu test_every = args.test_every ckpt = args.ckpt print( 'norm=%s backbone=%s pretrained=%s lossfunc=%s size=%s pk=%d nk=%d epoch=%d gpu=%d test_every=%d ckpt=%s' % (norm, backbone, pretrained, lossfunc, size, pk, nk, n_epoch, gpu, test_every, ckpt)) if backbone == 'resnet18': model = resnet18.resnet18(norm=norm).cuda(device=gpu) if pretrained == 'pretrained': ckpt_dict = torch.load('resnet18-pretrained.pth') model_dict = model.state_dict() ckpt_dict = {k: v for k, v in ckpt_dict.items() if k in model_dict} model_dict.update(ckpt_dict) model.load_state_dict(model_dict) if lossfunc == 'CE': criterion = nn.CrossEntropyLoss().cuda(device=gpu) elif lossfunc == 'Focal': criterion = FocalLoss(class_num=2, gpu=gpu).cuda(device=gpu) for m in model.modules(): if isinstance(m, nn.Linear): nn.init.constant_(m.bias, -math.log(99)) elif lossfunc == 'BCE': criterion = BCE(class_num=2, gpu=gpu).cuda(device=gpu) optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4) cudnn.benchmark = True train_trans = transforms.Compose([ transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.2005, 0.1490, 0.1486], std=[0.1445, 0.1511, 0.0967]) ]) infer_trans = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.2005, 0.1490, 0.1486], std=[0.1445, 0.1511, 0.0967]) ]) train_dset = XDataset('train-%s.lib' % size, train_trans=train_trans, infer_trans=infer_trans) train_loader = torch.utils.data.DataLoader(train_dset, batch_size=64, shuffle=False, pin_memory=True) test_dset = XDataset('test-%s.lib' % size, train_trans=train_trans, infer_trans=infer_trans) test_loader = torch.utils.data.DataLoader(test_dset, batch_size=128, shuffle=False, pin_memory=True) if ckpt != 'none': checkpoint = torch.load(ckpt) start = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) best_f1 = checkpoint['best_f1'] optimizer.load_state_dict(checkpoint['optimizer']) if not os.path.exists( 'logs/Training_%s_%s_%s_%s_%s_%d_%d_%d.csv' % (norm, backbone, pretrained, lossfunc, size, pk, nk, fmoment)): fconv = open( 'logs/Training_%s_%s_%s_%s_%s_%d_%d_%d.csv' % (norm, backbone, pretrained, lossfunc, size, pk, nk, fmoment), 'w') fconv.write('time,epoch,loss,error\n') fconv.write('%d,0,0,0\n' % fmoment) fconv.close() if not os.path.exists( 'logs/Testing_%s_%s_%s_%s_%s_%d_%d_%d.csv' % (norm, backbone, pretrained, lossfunc, size, pk, nk, fmoment)): fconv = open( 'logs/Testing_%s_%s_%s_%s_%s_%d_%d_%d.csv' % (norm, backbone, pretrained, lossfunc, size, pk, nk, fmoment), 'w') fconv.write('time,epoch,loss,error,tp,tn,fp,fn,f1,S\n') fconv.write('%d,0,0,0\n' % fmoment) fconv.close() else: start = 0 best_f1 = 0 fconv = open( 'logs/Training_%s_%s_%s_%s_%s_%d_%d_%d.csv' % (norm, backbone, pretrained, lossfunc, size, pk, nk, fmoment), 'w') fconv.write('time,epoch,loss,error\n') fconv.write('%d,0,0,0\n' % fmoment) fconv.close() fconv = open( 'logs/Testing_%s_%s_%s_%s_%s_%d_%d_%d.csv' % (norm, backbone, pretrained, lossfunc, size, pk, nk, fmoment), 'w') fconv.write('time,epoch,loss,error,tp,tn,fp,fn,f1,S\n') fconv.write('%d,0,0,0\n' % fmoment) fconv.close() for epoch in range(start, n_epoch): train_dset.setmode(1) _, probs = inference(epoch, train_loader, model, criterion, gpu) # torch.save(probs,'probs/train-%d.pth'%(epoch+1)) probs1 = probs[:train_dset.plen] probs0 = probs[train_dset.plen:] topk1 = np.array( group_argtopk(np.array(train_dset.slideIDX[:train_dset.plen]), probs1, pk)) topk0 = np.array( group_argtopk(np.array(train_dset.slideIDX[train_dset.plen:]), probs0, nk)) + train_dset.plen topk = np.append(topk1, topk0).tolist() # torch.save(topk,'topk/train-%d.pth'%(epoch+1)) # maxs = group_max(np.array(train_dset.slideIDX), probs, len(train_dset.targets)) # torch.save(maxs, 'maxs/%d.pth'%(epoch+1)) sf(topk) train_dset.maketraindata(topk) train_dset.setmode(2) loss, err = train(train_loader, model, criterion, optimizer, gpu) moment = time.time() writecsv([moment, epoch + 1, loss, err], 'logs/Training_%s_%s_%s_%s_%s_%d_%d_%d.csv' % (norm, backbone, pretrained, lossfunc, size, pk, nk, fmoment)) print('Training epoch=%d, loss=%.5f, error=%.5f' % (epoch + 1, loss, err)) if (epoch + 1) % test_every == 0: test_dset.setmode(1) loss, probs = inference(epoch, test_loader, model, criterion, gpu) # torch.save(probs,'probs/test-%d.pth'%(epoch+1)) # topk = group_argtopk(np.array(test_dset.slideIDX), probs, pk) # torch.save(topk, 'topk/test-%d.pth'%(epoch+1)) maxs = group_max( np.array(test_dset.slideIDX), probs, len(test_dset.targets)) #è¿åæ¯ä¸ªåççæ大æ?ç # torch.save(maxs, 'maxs/test-%d.pth'%(epoch+1)) pred = [1 if x >= 0.5 else 0 for x in maxs] tp, tn, fp, fn = tfpn(pred, test_dset.targets) err = calc_err(pred, test_dset.targets) S, f1 = score(tp, tn, fp, fn) moment = time.time() writecsv( [moment, epoch + 1, loss, err, tp, tn, fp, fn, f1, S], 'logs/Testing_%s_%s_%s_%s_%s_%d_%d_%d.csv' % (norm, backbone, pretrained, lossfunc, size, pk, nk, fmoment)) print('Testing epoch=%d, loss=%.5f, error=%.5f' % (epoch + 1, loss, err)) #Save best model if f1 >= best_f1: best_f1 = f1 obj = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_f1': best_f1, 'optimizer': optimizer.state_dict() } torch.save( obj, 'ckpt_%s_%s_%s_%s_%s_%d_%d_%d.pth' % (norm, backbone, pretrained, lossfunc, size, pk, nk, fmoment))