def generate_submission(net, config, folds=1, SUBM_OUT=None, gen_csv=True, attn=False): print('Generating predictions...') net.eval() test_loader = get_test_loader(imsize=config.imsize, num_channels=config.num_channels, batch_size=config.batch_size) test_preds = torch.zeros(len(test_loader.dataset), 28) for _ in range(folds): test_preds += generate_preds(net, test_loader, test=True, attn=attn) test_preds = test_preds.numpy() / float(folds) if gen_csv: print('Generating submission with class wise thresholding...') best_th = find_threshold(net, config, class_wise=True, plot=True, attn=attn) preds_df = pd.DataFrame(data=test_preds) preds_df['th'] = pd.Series(best_th) preds_df.to_csv(SUBM_OUT.replace('subm', 'preds'), index=False) save_pred(test_preds, best_th, SUBM_OUT) return test_preds
def subm_ensemble(): if len(os.listdir('./subm/')) == 0: raise ValueError('Submission directory is empty') all_preds = np.zeros( (len(pd.read_csv('./data/sample_submission.csv')), 28)) for i, filepath in enumerate(glob.iglob('./subm/*.csv')): print('Processing file', filepath.split('/')[-1]) predi = pd.read_csv(filepath) all_preds += np.stack(predi['Predicted'].apply(label_gen_np)).astype( np.float) SUBM_OUT = './subm/subm_ensemble_{}.csv'.\ format(datetime.now().strftime('%Y-%m-%d_%H:%M:%S')) if args.outfile != '': SUBM_OUT = SUBM_OUT.replace('.csv', '_{}.csv'.format(args.outfile)) save_pred(all_preds, float(i + 1) / 2., SUBM_OUT)
def preds_ensemble(): if len(os.listdir('./preds/')) == 0: raise ValueError('Preds directory is empty') all_preds = np.zeros( (len(pd.read_csv('./data/sample_submission.csv')), 28)) th = 0 for i, filepath in enumerate(glob.iglob('./preds/*.csv')): print('Processing file', filepath.split('/')[-1]) predi = pd.read_csv(filepath) all_preds += predi[predi.columns[:-1]] th += predi['th'][0] all_preds /= float(i + 1) th /= float(i + 1) SUBM_OUT = './subm/pred_ensemble_{}.csv'.\ format(datetime.now().strftime('%Y-%m-%d_%H:%M:%S')) if args.outfile != '': SUBM_OUT = SUBM_OUT.replace('.csv', '_{}.csv'.format(args.outfile)) save_pred(all_preds, th, SUBM_OUT)
def main(args): global best_acc global best_auc if not os.path.exists(args.checkpoint): os.makedirs(args.checkpoint) print("==> Creating model '{}-{}', stacks={}, blocks={}, feats={}".format( args.netType, args.pointType, args.nStacks, args.nModules, args.nFeats)) print("=> Models will be saved at: {}".format(args.checkpoint)) model = models.__dict__[args.netType](num_stacks=args.nStacks, num_blocks=args.nModules, num_feats=args.nFeats, use_se=args.use_se, use_attention=args.use_attention, num_classes=68) model = torch.nn.DataParallel(model).cuda() criterion = torch.nn.MSELoss(size_average=True).cuda() optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) title = args.checkpoint.split('/')[-1] + ' on ' + args.data.split('/')[-1] Loader = get_loader(args.data) val_loader = torch.utils.data.DataLoader(Loader(args, 'A'), batch_size=args.val_batch, shuffle=False, num_workers=args.workers, pin_memory=True) if args.resume: if os.path.isfile(args.resume): print("=> Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> Loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Epoch', 'LR', 'Train Loss', 'Valid Loss', 'Train Acc', 'Val Acc', 'AUC' ]) cudnn.benchmark = True print('=> Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / (1024. * 1024))) if args.evaluation: print('=> Evaluation only') D = args.data.split('/')[-1] save_dir = os.path.join(args.checkpoint, D) if not os.path.exists(save_dir): os.makedirs(save_dir) loss, acc, predictions, auc = validate(val_loader, model, criterion, args.netType, args.debug, args.flip) save_pred(predictions, checkpoint=save_dir) return train_loader = torch.utils.data.DataLoader(Loader(args, 'train'), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) lr = args.lr for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule, args.gamma) print('=> Epoch: %d | LR %.8f' % (epoch + 1, lr)) train_loss, train_acc = train(train_loader, model, criterion, optimizer, args.netType, args.debug, args.flip) # do not save predictions in model file valid_loss, valid_acc, predictions, valid_auc = validate( val_loader, model, criterion, args.netType, args.debug, args.flip) logger.append([ int(epoch + 1), lr, train_loss, valid_loss, train_acc, valid_acc, valid_auc ]) is_best = valid_auc >= best_auc best_auc = max(valid_auc, best_auc) save_checkpoint( { 'epoch': epoch + 1, 'netType': args.netType, 'state_dict': model.state_dict(), 'best_acc': best_auc, 'optimizer': optimizer.state_dict(), }, is_best, predictions, checkpoint=args.checkpoint) logger.close() logger.plot(['AUC']) savefig(os.path.join(args.checkpoint, 'log.eps'))
else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Epoch', 'LR', 'Train Loss', 'Valid Loss', 'Train Acc', 'Val Acc', 'AUC']) cudnn.benchmark = True print('=> Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / (1024. * 1024))) if args.evaluation: print('=> Evaluation only') D = args.data.split('/')[-1] save_dir = os.path.join(args.checkpoint, D) if not os.path.exists(save_dir): os.makedirs(save_dir) loss, acc, predictions, auc = validate(val_loader, model, criterion, args.netType, args.debug, args.flip) save_pred(predictions, checkpoint=save_dir) return train_loader = torch.utils.data.DataLoader( Loader(args, 'train'), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) lr = args.lr for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule, args.gamma) print('=> Epoch: %d | LR %.8f' % (epoch + 1, lr)) sys.stdout.flush() train_loss, train_acc, model= train(train_loader, model, criterion, optimizer, args.netType,
def main_subm(net=None, opcon=None, attn=False): if opcon is not None: config = opcon model_params = [config.model_name, config.exp_name] MODEL_CKPT = './model_weights/best_{}_{}.pth'.format(*model_params) if net is None: Net = getattr(model_list, config.model_name) net = Net(num_channels=config.num_channels) net = nn.parallel.DataParallel(net) net.to(device) print('Loading model from ' + MODEL_CKPT) try: net.load_state_dict(torch.load(MODEL_CKPT)) except: net.load_state_dict(torch.load(MODEL_CKPT)['state_dict']) SUBM_OUT = './subm/best_{}_{}.csv'.format(*model_params) if args.outfile != '': SUBM_OUT = SUBM_OUT.replace('.csv', '_{}.csv'.format(args.outfile)) if not config.cosine_annealing: generate_submission(net, config, args.folds, SUBM_OUT, gen_csv=True, attn=attn) else: test_preds_avg = generate_submission(net, config, args.folds, SUBM_OUT, gen_csv=False, attn=attn) best_th = 2 * find_threshold(net, config, plot=False, attn=attn) num_models = 2 for MODEL_CKPT in glob.glob("./model_weights/cycle*{}.pth".format( config.exp_name)): print('Loading model from ' + MODEL_CKPT) net.load_state_dict(torch.load(MODEL_CKPT)) test_preds_avg += generate_submission(net, config, args.folds, SUBM_OUT, gen_csv=False, attn=attn) best_th += find_threshold(net, config, plot=False, attn=attn) num_models += 1 test_preds_avg /= num_models best_th /= num_models preds_df = pd.DataFrame(data=test_preds_avg) preds_df['th'] = best_th preds_df.to_csv(SUBM_OUT.replace('subm', 'preds'), index=False) print("Generating submission with threshold = ", best_th) save_pred(test_preds_avg, best_th, SUBM_OUT)
def main(args): global best_acc global best_auc if not os.path.exists(args.checkpoint): os.makedirs(args.checkpoint) print("==> Creating model '{}-{}', stacks={}, blocks={}, feats={}".format( args.netType, args.pointType, args.nStacks, args.nModules, args.nFeats)) print("=> Models will be saved at: {}".format(args.checkpoint)) model = models.__dict__[args.netType]( num_stacks=args.nStacks, num_blocks=args.nModules, num_feats=args.nFeats, use_se=args.use_se, use_attention=args.use_attention, num_classes=68) model = torch.nn.DataParallel(model).cuda() criterion = torch.nn.MSELoss(size_average=True).cuda() optimizer = torch.optim.RMSprop( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) title = args.checkpoint.split('/')[-1] + ' on ' + args.data.split('/')[-1] Loader = get_loader(args.data) val_loader = torch.utils.data.DataLoader( Loader(args, 'A'), batch_size=args.val_batch, shuffle=False, num_workers=args.workers, pin_memory=True) if args.resume: if os.path.isfile(args.resume): print("=> Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> Loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Epoch', 'LR', 'Train Loss', 'Valid Loss', 'Train Acc', 'Val Acc', 'AUC']) cudnn.benchmark = True print('=> Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / (1024. * 1024))) if args.evaluation: print('=> Evaluation only') D = args.data.split('/')[-1] save_dir = os.path.join(args.checkpoint, D) if not os.path.exists(save_dir): os.makedirs(save_dir) loss, acc, predictions, auc = validate(val_loader, model, criterion, args.netType, args.debug, args.flip) save_pred(predictions, checkpoint=save_dir) return train_loader = torch.utils.data.DataLoader( Loader(args, 'train'), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=True) lr = args.lr for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule, args.gamma) print('=> Epoch: %d | LR %.8f' % (epoch + 1, lr)) train_loss, train_acc = train(train_loader, model, criterion, optimizer, args.netType, args.debug, args.flip) # do not save predictions in model file valid_loss, valid_acc, predictions, valid_auc = validate(val_loader, model, criterion, args.netType, args.debug, args.flip) logger.append([int(epoch + 1), lr, train_loss, valid_loss, train_acc, valid_acc, valid_auc]) is_best = valid_auc >= best_auc best_auc = max(valid_auc, best_auc) save_checkpoint( { 'epoch': epoch + 1, 'netType': args.netType, 'state_dict': model.state_dict(), 'best_acc': best_auc, 'optimizer': optimizer.state_dict(), }, is_best, predictions, checkpoint=args.checkpoint) logger.close() logger.plot(['AUC']) savefig(os.path.join(args.checkpoint, 'log.eps'))
def do_epoch( setname, loader, model, criterion, epochno=-1, optimizer=None, num_classes=None, debug=False, checkpoint=None, mean=torch.Tensor([0.5, 0.5, 0.5]), std=torch.Tensor([1.0, 1.0, 1.0]), feature_dim=1024, save_logits=False, save_features=False, num_figs=100, topk=[1], save_feature_dir="", save_fig_dir="", ): assert setname == "train" or setname == "val" batch_time = AverageMeter() data_time = AverageMeter() losses = [AverageMeter()] perfs = [] for k in topk: perfs.append(AverageMeter()) if save_logits: all_logits = torch.Tensor(loader.dataset.__len__(), num_classes) if save_features: all_features = torch.Tensor(loader.dataset.__len__(), feature_dim) if setname == "train": model.train() elif setname == "val": model.eval() end = time.time() gt_win, pred_win, fig_gt_pred = None, None, None bar = Bar("E%d" % (epochno + 1), max=len(loader)) for i, data in enumerate(loader): if data.get("gpu_collater", False): # We handle collation on the GPU to enable faster data augmentation with torch.no_grad(): data["rgb"] = data["rgb"].cuda() collater_kwargs = {} if isinstance(loader.dataset, torch.utils.data.ConcatDataset): cat_datasets = loader.dataset.datasets collater = cat_datasets[0].gpu_collater cat_datasets = { type(x).__name__.lower(): x for x in cat_datasets } collater_kwargs["concat_datasets"] = cat_datasets else: collater = loader.dataset.gpu_collater data = collater(minibatch=data, **collater_kwargs) # measure data loading time data_time.update(time.time() - end) inputs = data["rgb"] targets = data["class"] inputs_cuda = inputs.cuda() targets_cuda = targets.cuda() # forward pass outputs_cuda = model(inputs_cuda) # compute the loss logits = outputs_cuda["logits"].data.cpu() loss = criterion(outputs_cuda["logits"], targets_cuda) topk_acc = performance(logits, targets, topk=topk) for ki, acc in enumerate(topk_acc): perfs[ki].update(acc, inputs.size(0)) losses[0].update(loss.item(), inputs.size(0)) # generate predictions if save_logits: all_logits[data["index"]] = logits if save_features: all_features[data["index"]] = outputs_cuda["embds"].squeeze( ).data.cpu() # TODO if (debug or is_show(num_figs, i, len(loader))): fname = "pred_%s_epoch%02d_iter%05d" % (setname, epochno, i) save_path = save_fig_dir / fname gt_win, pred_win, fig_gt_pred = viz_gt_pred( inputs, logits, targets, mean, std, data, gt_win, pred_win, fig_gt_pred, save_path=save_path, show=debug, ) # compute gradient and do optim step if setname == "train": optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = "({batch}/{size}) Data: {data:.1f}s | Batch: {bt:.1f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:} | Perf: {perf:}".format( batch=i + 1, size=len(loader), data=data_time.val, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=", ".join( [f"{losses[i].avg:.3f}" for i in range(len(losses))]), perf=", ".join([f"{perfs[i].avg:.3f}" for i in range(len(perfs))]), ) bar.next() bar.finish() # save outputs if save_logits or save_features: meta = { "clip_gt": np.asarray(loader.dataset.get_set_classes()), "clip_ix": loader.dataset.valid, "video_names": loader.dataset.get_all_videonames(), } if save_logits: save_pred( all_logits, checkpoint=save_feature_dir, filename="preds.mat", meta=meta, ) if save_features: save_pred( all_features, checkpoint=save_feature_dir, filename="features.mat", meta=meta, ) return losses, perfs
print("Doing all labels stacking") features = pd.concat((train_dfs[i] for i in range(len(train_dfs))), axis=1) test_features = pd.concat((test_dfs[i] for i in range(len(test_dfs))), axis=1) features = np.array(features) test_features = np.array(test_features) if args.classifier == "randomforest": rf_random = fit_features(features, labels, "f1_macro", n_iter=2, cv=3) pred = rf_random.predict(test_features) bs = rf_random.best_score_ if args.classifier in ["nn", "neuralnetwork"]: model = fit_neural_network(features, labels) pred = model.predict(test_features, batch_size=16) np.save('./stacks/{}.npy'.format(args.name), pred) t2 = time.time() print("Fitted. Best score: ", bs, ". Time taken = ", t2 - t1) save_pred(pred, th=0.5, SUBM_OUT='./subm/{}.csv'.format(args.name), fill_empty=False)