def valid(args): model.eval() valid_loader = create_loader(args, valid_ds) dev_loss = 0. y_true = [] y_pred = [] for i, (inputs, labels) in enumerate(valid_loader): if args.cuda: inputs = inputs.cuda() labels = labels.cuda() with torch.no_grad(): if args.model == "CNN": logits = model(True, inputs) else: logits = model(inputs) loss = criterion(logits, labels) dev_loss += loss.item() y_true.extend(labels.cpu().detach().numpy().tolist()) y_pred.extend(torch.sigmoid(logits).cpu().detach().numpy().tolist()) dev_loss /= len(valid_loader) # print( y_true, y_pred) targets, outputs = y_true, np.array(y_pred) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) f1_score_micro = metrics.f1_score(targets, outputs, average='micro', zero_division=1) f1_score_macro = metrics.f1_score(targets, outputs, average='macro', zero_division=1) print(f"Accuracy Score = {accuracy}") print(f"F1 Score (Micro) = {f1_score_micro}") print(f"F1 Score (Macro) = {f1_score_macro}") return dev_loss, accuracy, f1_score_micro, f1_score_macro
def test(args): # model.load_state_dict(torch.load(args.ckpt)) model.eval() test_loader = create_loader(args, test_ds, shuffle=False) y_true, y_pred = [], [] for i, (inputs, labels) in enumerate(test_loader): if args.cuda: inputs = inputs.cuda() labels = labels.cuda() with torch.no_grad(): if args.model == "CNN": pred = model(False, inputs) else: pred = model(inputs) y_true.extend(labels.cpu().detach().numpy().tolist()) y_pred.extend(torch.sigmoid(pred).cpu().detach().numpy().tolist()) targets, outputs = y_true, np.array(y_pred) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) f1_score_micro = metrics.f1_score(targets, outputs, average='micro',zero_division=1) f1_score_macro = metrics.f1_score(targets, outputs, average='macro', zero_division=1) prf1 = precision_recall_fscore_support(targets, outputs, beta=0.5, average=None) print(prf1) print(f"Accuracy Score = {accuracy}") print(f"F1 Score (Micro) = {f1_score_micro}") print(f"F1 Score (Macro) = {f1_score_macro}") #print(y_true_flatten, y_pred_flatten) return accuracy, f1_score_micro, f1_score_macro, classification_report(targets, outputs, target_names=list(test_ds.name2id)[:-1])
def train(epoch): device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") train_loader = create_loader(args, train_ds) train_loss = 0. global_steps = 0 optimizer.zero_grad() for i, a in enumerate(train_loader): model.train() inputs, labels = a optimizer.zero_grad() # CE loss. if args.model == "CNN": out = model(True, inputs) else: out = model(inputs) loss = criterion(out, torch.max(labels, 1)[1]) train_loss += (loss.item()) loss.backward() if _grad_step(args, i): optimizer.step() optimizer.zero_grad() global_steps += 1 logger.log("Loss: ", loss.item()) print("Training Loss: ", train_loss / global_steps) return train_loss / global_steps
def valid_(args): model.eval() valid_loader = create_loader(args, valid_ds) dev_loss = 0. for i, (inputs, labels) in enumerate(valid_loader): if args.cuda: inputs = inputs.cuda() labels = labels.cuda() with torch.no_grad(): if args.model == "CNN": logits = model(True, inputs) else: logits = model(inputs) loss = criterion(logits, labels) dev_loss += loss.item() dev_loss /= len(valid_loader) return dev_loss
def train(epoch, loss='ASYM'): device = torch.device("cuda:%s"%args.device if torch.cuda.is_available() else "cpu") train_loader = create_loader(args, train_ds, shuffle=True) print('start train') # if args.bias_type != 'frame': # weights = [1.0, 10.0] # weights = [1.0, 1.0] # class_weights = torch.FloatTensor(weights).cuda() # criterion = nn.CrossEntropyLoss(weight=class_weights) if loss == "BCE": criterion = torch.nn.BCEWithLogitsLoss() elif loss == "ASYM": criterion = AsymmetricLoss() train_loss = 0. global_steps = 0 optimizer.zero_grad() for i, a in enumerate(train_loader): model.train() inputs, labels = a optimizer.zero_grad() # CE loss. if args.model == "CNN": out = model(True, inputs) else: out = model(inputs) loss = criterion(out, labels) train_loss += (loss.item()) loss.backward() if _grad_step(args, i): optimizer.step() optimizer.zero_grad() global_steps += 1 logger.log("Loss: ", loss.item()) print("Training Loss: ", train_loss / global_steps) return train_loss / global_steps
def write_out(args, write_file): # model.load_state_dict(torch.load(args.ckpt)) model.eval() test_loader = create_loader(args, test_ds, shuffle=False) y_true, y_pred = [], [] for i, (inputs, labels) in enumerate(test_loader): if args.cuda: inputs = inputs.cuda() labels = labels.cuda() with torch.no_grad(): if args.model == "CNN": pred = model(False, inputs) else: pred = model(inputs) cur_label = labels.cpu().detach().numpy().tolist() y_true.extend(cur_label) prediction = torch.sigmoid(pred).cpu().detach().numpy().tolist() prediction_value = (np.array(prediction) >= 0.5).astype(int) y_pred.extend(prediction) for i in range(inputs.size()[0]): write_file.write( str(cur_label[i]) + " |||" + str(prediction_value[i]) + "\n") # print(, np.array(y_pred.shape)) # roc_auc = cal_roc_auc(np.array(y_true), np.array(y_pred)) # targets, outputs = y_true, np.array(y_pred) >= 0.5 # accuracy = metrics.accuracy_score(targets, outputs) # f1_score_micro = metrics.f1_score(targets, outputs, average='micro') # f1_score_macro = metrics.f1_score(targets, outputs, average='macro') # prf1 = precision_recall_fscore_support(targets, outputs, beta=0.5, average=None) # print(str(prf1)) # print(classification_report(targets, outputs)) # print(f"Accuracy Score = {accuracy}") # print(f"F1 Score (Micro) = {f1_score_micro}") # print(f"F1 Score (Macro) = {f1_score_macro}") #print(y_true_flatten, y_pred_flatten) return accuracy, f1_score_micro, f1_score_macro, prf1, classification_report, roc_auc
def main(): parser = argparse.ArgumentParser(description='Training') # Dataset / Model parameters parser.add_argument('--data', metavar='DIR', help='path to dataset') parser.add_argument('--model', default='hypernet', type=str, metavar='MODEL', help='Name of model to train (default: "countception"') parser.add_argument( '--pretrained', action='store_true', default=False, help='Start with pretrained version of specified network (if avail)') parser.add_argument( '--initial-checkpoint', default='', type=str, metavar='PATH', help='Initialize model from this checkpoint (default: none)') parser.add_argument( '--resume', default='', type=str, metavar='PATH', help= 'Resume full model and optimizer state from checkpoint (default: none)' ) parser.add_argument('--num-classes', type=int, default=1000, metavar='N', help='number of label classes (default: 1000)') parser.add_argument( '--gp', default='avg', type=str, metavar='POOL', help= 'Type of global pool, "avg", "max", "avgmax", "avgmaxc" (default: "avg")' ) parser.add_argument( '--img-size', type=int, default=None, metavar='N', help='Image patch size (default: None => model default)') parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', help='Override mean pixel value of dataset') parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', help='Override std deviation of of dataset') parser.add_argument( '--interpolation', default='', type=str, metavar='NAME', help='Image resize interpolation type (overrides model)') parser.add_argument('-b', '--batch-size', type=int, default=32, metavar='N', help='input batch size for training (default: 32)') parser.add_argument('--drop', type=float, default=0.0, metavar='DROP', help='Dropout rate (default: 0.)') # Optimizer parameters parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER', help='Optimizer (default: "sgd"') parser.add_argument('--opt-eps', default=1e-8, type=float, metavar='EPSILON', help='Optimizer Epsilon (default: 1e-8)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--weight-decay', type=float, default=0.0001, help='weight decay (default: 0.0001)') # Learning rate schedule parameters parser.add_argument('--sched', default='spos_linear', type=str, metavar='SCHEDULER', help='LR scheduler (default: "step"') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--warmup-lr', type=float, default=0.0001, metavar='LR', help='warmup learning rate (default: 0.0001)') parser.add_argument( '--min-lr', type=float, default=1e-5, metavar='LR', help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') parser.add_argument('--epochs', type=int, default=120, metavar='N', help='number of epochs to train (default: 2)') parser.add_argument('--start-epoch', default=None, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument('--decay-epochs', type=int, default=15, metavar='N', help='epoch interval to decay LR') parser.add_argument('--warmup-epochs', type=int, default=3, metavar='N', help='epochs to warmup LR, if scheduler supports') parser.add_argument( '--cooldown-epochs', type=int, default=10, metavar='N', help='epochs to cooldown LR at min_lr, after cyclic schedule ends') parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE', help='LR decay rate (default: 0.1)') parser.add_argument('--grad', type=int, default=1, metavar='RATE', help='LR decay rate (default: 0.1)') # Augmentation parameters parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT', help='Color jitter factor (default: 0.4)') parser.add_argument('--reprob', type=float, default=0., metavar='PCT', help='Random erase prob (default: 0.)') parser.add_argument('--remode', type=str, default='const', help='Random erase mode (default: "const")') parser.add_argument( '--mixup', type=float, default=0.0, help='mixup alpha, mixup enabled if > 0. (default: 0.)') parser.add_argument( '--mixup-off-epoch', default=0, type=int, metavar='N', help='turn off mixup after this epoch, disabled if 0 (default: 0)') parser.add_argument('--smoothing', type=float, default=0.1, help='label smoothing (default: 0.1)') # Batch norm parameters (only works with gen_efficientnet based models currently) parser.add_argument( '--bn-tf', action='store_true', default=False, help= 'Use Tensorflow BatchNorm defaults for models that support it (default: False)' ) parser.add_argument('--bn-momentum', type=float, default=None, help='BatchNorm momentum override (if not None)') parser.add_argument('--bn-eps', type=float, default=None, help='BatchNorm epsilon override (if not None)') # Model Exponential Moving Average parser.add_argument('--model-ema', action='store_true', default=False, help='Enable tracking moving average of model weights') parser.add_argument( '--model-ema-force-cpu', action='store_true', default=False, help= 'Force ema to be tracked on CPU, rank=0 node only. Disables EMA validation.' ) parser.add_argument( '--model-ema-decay', type=float, default=0.9998, help='decay factor for model weights moving average (default: 0.9998)') parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', help='learning rate noise on/off epoch percentages') parser.add_argument( '--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', help='learning rate noise limit percent (default: 0.67)') parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', help='learning rate noise std-dev (default: 1.0)') # Misc parser.add_argument('--seed', type=int, default=42, metavar='S', help='random seed (default: 42)') parser.add_argument( '--log-interval', type=int, default=50, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('-j', '--workers', type=int, default=4, metavar='N', help='how many training processes to use (default: 1)') parser.add_argument('--num-gpu', type=int, default=1, help='Number of GPUS to use') parser.add_argument("--local_rank", default=0, type=int) parser.add_argument("--update_iter", default=1, type=int) parser.add_argument("--slice", default=4, type=int) parser.add_argument("--pool_size", default=10, type=int) parser.add_argument( '--resunit', action='store_true', default=False, help='Start with pretrained version of specified network (if avail)') parser.add_argument( '--dil_conv', action='store_true', default=False, help='Start with pretrained version of specified network (if avail)') parser.add_argument('--tiny', action='store_true', default=False) parser.add_argument('--flops_maximum', default=600, type=int) parser.add_argument('--flops_minimum', default=0, type=int) parser.add_argument('--pick_method', default='meta', type=str) parser.add_argument('--meta_lr', default=1e-2, type=float) parser.add_argument('--meta_sta_epoch', default=-1, type=int) parser.add_argument('--model_selection', default=14, type=int) parser.add_argument('--how_to_prob', default='pre_prob', type=str) parser.add_argument('--pre_prob', default=(0.05, 0.2, 0.05, 0.5, 0.05, 0.15), type=tuple) args = parser.parse_args() seed = args.seed torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) torch.backends.cudnn.deterministic = True args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed and args.num_gpu > 1: logger.warning( 'Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.' ) args.num_gpu = 1 args.device = 'cuda:0' args.world_size = 1 args.rank = 0 # global rank if args.distributed: args.num_gpu = 1 args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) import random port = random.randint(0, 50000) torch.distributed.init_process_group( backend='nccl', init_method='env://' ) # tcp://127.0.0.1:{}'.format(port), rank=args.local_rank, world_size=8) args.world_size = torch.distributed.get_world_size() args.rank = torch.distributed.get_rank() assert args.rank >= 0 if args.distributed: logging.info( 'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' % (args.rank, args.world_size)) else: logging.info('Training with a single process on %d GPUs.' % args.num_gpu) if args.model_selection == 470: arch_list = [[0], [3, 4, 3, 1], [3, 2, 3, 0], [3, 3, 3, 1], [3, 3, 3, 3], [3, 3, 3, 3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in [ 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25', 'ir_r1_k3_s1_e4_c24_se0.25' ], # stage 2, 56x56 in [ 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s1_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25' ], # stage 3, 28x28 in [ 'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r2_k3_s1_e4_c80_se0.25' ], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in [ 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25' ], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 224 elif args.model_selection == 42: arch_list = [[0], [3], [3, 1], [3, 1], [3, 3, 3], [3, 3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in ['ir_r1_k3_s2_e4_c24_se0.25'], # stage 2, 56x56 in ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'], # stage 3, 28x28 in ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25'], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in ['ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 96 elif args.model_selection == 14: arch_list = [[0], [3], [3, 3], [3, 3], [3], [3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in ['ir_r1_k3_s2_e4_c24_se0.25'], # stage 2, 56x56 in ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k3_s2_e4_c40_se0.25'], # stage 3, 28x28 in ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e4_c80_se0.25'], # stage 4, 14x14in ['ir_r1_k3_s1_e6_c96_se0.25'], # stage 5, 14x14in ['ir_r1_k5_s2_e6_c192_se0.25'], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 64 elif args.model_selection == 112: arch_list = [[0], [3], [3, 3], [3, 3], [3, 3, 3], [3, 3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in ['ir_r1_k3_s2_e4_c24_se0.25'], # stage 2, 56x56 in ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k3_s2_e4_c40_se0.25'], # stage 3, 28x28 in ['ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25'], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in ['ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25'], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 160 elif args.model_selection == 285: arch_list = [[0], [3], [3, 3], [3, 1, 3], [3, 3, 3, 3], [3, 3, 3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in ['ir_r1_k3_s2_e4_c24_se0.25'], # stage 2, 56x56 in ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25'], # stage 3, 28x28 in [ 'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s2_e6_c80_se0.25' ], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in [ 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s2_e6_c192_se0.25' ], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 224 elif args.model_selection == 600: arch_list = [[0], [3, 3, 2, 3, 3], [3, 2, 3, 2, 3], [3, 2, 3, 2, 3], [3, 3, 2, 2, 3, 3], [3, 3, 2, 3, 3, 3], [0]] arch_def = [ # stage 0, 112x112 in ['ds_r1_k3_s1_e1_c16_se0.25'], # stage 1, 112x112 in [ 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25', 'ir_r1_k3_s2_e4_c24_se0.25' ], # stage 2, 56x56 in [ 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25', 'ir_r1_k5_s2_e4_c40_se0.25' ], # stage 3, 28x28 in [ 'ir_r1_k3_s2_e6_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25', 'ir_r1_k3_s1_e4_c80_se0.25' ], # stage 4, 14x14in [ 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25', 'ir_r1_k3_s1_e6_c96_se0.25' ], # stage 5, 14x14in [ 'ir_r1_k5_s2_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25', 'ir_r1_k5_s1_e6_c192_se0.25' ], # stage 6, 7x7 in ['cn_r1_k1_s1_c320_se0.25'], ] args.img_size = 224 model = _gen_childnet(arch_list, arch_def, num_classes=args.num_classes, drop_rate=args.drop, global_pool=args.gp) data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) if args.local_rank == 0: logger.info(args) if args.local_rank == 0: logger.info('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) # data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) if args.num_gpu > 1: if args.amp: logging.warning( 'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.' ) args.amp = False model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() else: model.cuda() if args.distributed: if has_apex: model = DDP(model, delay_allreduce=True) else: if args.local_rank == 0: logger.info( "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP." ) model = DDP(model, device_ids=[args.local_rank ]) # can use device str in Torch >= 1.1 # NOTE: EMA model does not need to be wrapped by DDP model_ema = ModelEma(model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else '', resume=args.resume) if args.tiny: from dataset.tiny_imagenet import get_newimagenet [loader_train, loader_eval], [train_sampler, test_sampler ] = get_newimagenet(args.data, args.batch_size) else: train_dir = os.path.join(args.data, 'train') if not os.path.exists(train_dir): logger.error( 'Training folder does not exist at: {}'.format(train_dir)) exit(1) eval_dir = os.path.join(args.data, 'val') if not os.path.isdir(eval_dir): logger.error( 'Validation folder does not exist at: {}'.format(eval_dir)) exit(1) dataset_eval = Dataset(eval_dir) loader_eval = create_loader( dataset_eval, input_size=data_config['input_size'], batch_size=4 * args.batch_size, is_training=False, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, distributed=args.distributed, ) def accuracy(output, target, topk=(1, )): """Computes the accuracy over the k top predictions for the specified values of k""" maxk = max(topk) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) return [ correct[:k].view(-1).float().sum(0) * 100. / batch_size for k in topk ] prec1_m = AverageMeter() prec5_m = AverageMeter() def reduce_tensor(tensor, n): rt = tensor.clone() dist.all_reduce(rt, op=dist.ReduceOp.SUM) rt /= n return rt model_ema.ema.eval() with torch.no_grad(): for step, (x, y) in enumerate(loader_eval): logits = model_ema.ema(x) prec1, prec5 = accuracy(logits, y, topk=(1, 5)) prec1 = reduce_tensor(prec1, args.world_size) prec5 = reduce_tensor(prec5, args.world_size) prec1_m.update(prec1.item(), logits.size(0)) prec5_m.update(prec5.item(), logits.size(0)) if args.local_rank == 0: logger.info("Prec1: %s Prec5: %s", prec1_m.avg, prec5_m.avg)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_dir', required=True, type=str) parser.add_argument('--data', default='CIFAR100', type=str) parser.add_argument('--random_seed', default=10, type=int) parser.add_argument('--epoch', default=200, type=int) parser.add_argument('--scheduler', default='step', type=str, help='step|cos') parser.add_argument('--schedule', default=[100, 150], type=int, nargs='+') parser.add_argument('--batch_size', default=128, type=int) parser.add_argument('--lr', default=0.1, type=float) parser.add_argument('--lr_decay', default=0.1, type=float) parser.add_argument('--momentum', default=0.9, type=float) parser.add_argument('--weight_decay', default=1e-4, type=float) parser.add_argument('--model', default='cifarresnet18', type=str) parser.add_argument('--num_channels', default=256, type=int) parser.add_argument('--num_features', default=-1, type=int) parser.add_argument('--repeat', default=1, type=int) parser.add_argument('--depth', default=2, type=int) parser.add_argument('--temperature', default=4, type=float) # distill parser.add_argument('--bifpn', default='BiFPNc', type=str, help='BiFPN|BiFPNc') parser.add_argument('--width', default=2, type=int) parser.add_argument('--distill', default='att', type=str) parser.add_argument('--alpha', default=1, type=float) parser.add_argument('--beta', default=0.0, type=float) parser.add_argument('--aux', default='none', type=str) parser.add_argument('--aux_lamb', default=0.0, type=float) # augmentation parser.add_argument('--aug', default='none', type=str) parser.add_argument('--aug_a', default=0.0, type=float) args = parser.parse_args() np.random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) random.seed(args.random_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False args_path = '{}_{}_{}_a{}_b{}_{}{}_{}{}'.format(args.data, args.model, args.distill, args.alpha, args.beta, args.aux, args.aux_lamb, args.aug, args.aug_a) path_log = os.path.join('logs', args_path) if not os.path.exists(path_log): os.makedirs(path_log) logger = create_logging(os.path.join(path_log, '%s.txt' % args.random_seed)) train_loader, test_loader, args.num_classes = create_loader( args.batch_size, args.data_dir, args.data) for param in sorted(vars(args).keys()): logger.info('--{0} {1}'.format(param, vars(args)[param])) args.depth = [args.depth] * 3 model = models.__dict__[args.model](num_classes=args.num_classes) if args.num_features == -1: args.num_features = len(model.network_channels) args.network_channels = model.network_channels[-args.num_features:] bifpn = models.__dict__[args.bifpn](args.network_channels, args.num_classes, args) if args.aux == 'sla': criterion_ce = distill_loss.__dict__[args.aux](args) criterion_ce.train() else: criterion_ce = nn.CrossEntropyLoss() criterion_kd = distill_loss.__dict__[args.distill](args, bifpn) criterion_kd.train() train_list = nn.ModuleList() train_list.append(model) train_list.append(criterion_ce) train_list.append(criterion_kd) train_list.append(bifpn) bifpn.cuda() train_list.cuda() criterion = [criterion_ce, criterion_kd] optimizer = optim.SGD(train_list.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = lr_scheduler(optimizer, args.scheduler, args.schedule, args.lr_decay, args.epoch) for epoch in range(1, args.epoch + 1): s = time() loss, train_acc1 = train(model, bifpn, optimizer, criterion, train_loader, args) scheduler.step() test_acc1 = test(model, test_loader) logger.info( 'Epoch: {0:>2d}|Train Loss: {1:2.4f}| Train Acc: {2:.4f}| Test Acc: {3:.4f}| Time: {4:4.2f}(s)' .format(epoch, loss, train_acc1, test_acc1, time() - s))
elif args.model == 'albert': model = AlbertForSequenceClassification.from_pretrained( "albert-base-v2").to(device) if args.load is not None: model.load_state_dict(torch.load(args.load)) if args.mode != 'test': if args.tensorboard is True: writer = SummaryWriter( f'runs/{args.model}_{args.mode}_{args.lr}_{args.batchsize}') else: writer = None train_loader, val_loader = create_loader(args.data_dir, args.model, args.mode, batch_size=args.batchsize, ratio=args.ratio) optimizer = AdamW(model.parameters(), lr=args.lr) for epoch in range(args.epoch): train(epoch, train_loader, val_loader, optimizer, model, device, args.save, writer) else: args.data_dir = './data/news_test.csv' test_loader, _ = create_loader(args.data_dir, args.model, args.mode, batch_size=args.batchsize) info_list = test(test_loader, model, device) submission = pd.read_csv('./data/sample_submission.csv')
def test(args, name): # model.load_state_dict(torch.load(args.ckpt)) model.eval() test_loader = create_loader(args, test_ds, shuffle=False) y_true, y_pred = [], [] for i, (inputs, labels) in enumerate(test_loader): if args.cuda: inputs = inputs.cuda() labels = labels.cuda() with torch.no_grad(): if args.model == "CNN": pred = model(False, inputs) else: pred = model(inputs) cur_label = labels.cpu().detach().numpy() y_true.extend(cur_label) prediction = torch.sigmoid(pred).cpu().detach().numpy().tolist() prediction_value = (np.array(prediction) >= 0.5).astype(int) y_pred.extend(prediction) for i in range(inputs.size()[0]): gold = np.where(cur_label[i] != 0.0) pred = np.where(prediction_value[i] != 0) import json test_file = pd.read_json("data/data_task6/test.json") ids = list(test_file.id) y_pred_value = np.array(y_pred) >= 0.5 store_file = [] def get_key(val, my_dict): for key, value in my_dict.items(): if val == value: if key.startswith('Thought-terminating'): return 'Thought-terminating' return key for i in range(len(ids)): labels_name_ = [ get_key(index, test_ds.name2id) for index in np.where(y_pred_value[i] != 0)[0].tolist() ] cur = {"id": ids[i], "labels": labels_name_} store_file.append(cur) out_file = open("output/%s.json" % name, "w") json.dump(store_file, out_file) # print(, np.array(y_pred.shape)) roc_auc = cal_roc_auc(np.array(y_true), np.array(y_pred)) targets, outputs = y_true, np.array(y_pred) >= 0.5 accuracy = metrics.accuracy_score(targets, outputs) f1_score_micro = metrics.f1_score(targets, outputs, average='micro') f1_score_macro = metrics.f1_score(targets, outputs, average='macro') prf1 = precision_recall_fscore_support(targets, outputs, beta=0.5, average=None) # print(str(prf1)) # print(classification_report(targets, outputs)) print(f"Accuracy Score = {accuracy}") print(f"F1 Score (Micro) = {f1_score_micro}") print(f"F1 Score (Macro) = {f1_score_macro}") #print(y_true_flatten, y_pred_flatten) return accuracy, f1_score_micro, f1_score_macro, prf1, classification_report, roc_auc
lr = config.lr optimizer = optim.Adam(model.parameters(), weight_decay=0.0, lr=lr) #optimizer_smoothing = optim.Adam(model2.parameters(), betas=[.9, .999], weight_decay=0.0, lr=lr) scheduler = optim.lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1) n_epochs = config.num_epochs log_interval = 100 # DataLoader # Train Dataset & Loader print("Data Loading ...") trainset = Dataset(config.traindata_dir) trainloader = create_loader(dataset=trainset, input_size=(3, 224, 224), batch_size=config.batch_size, interpolation="bicubic", mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), num_workers=2, crop_pct=1.0) # trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2, drop_last= True) # Test Dataset & Loader validset = Dataset(config.validdata_dir) validloader = create_loader(dataset=validset, input_size=(3, 224, 224), batch_size=config.batch_size, interpolation="bicubic", mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), num_workers=2, crop_pct=1.0)