def get_responses(model: Model, device: int, instances: List[Instance], num_responses: int, temperature: float = 1e-5, flow: bool = True) -> List[List[str]]: iterator = BasicIterator(batch_size=128) iterator.index_with(model.vocab) predictions_dict = {f'response_{rno+1}': [] for rno in range(num_responses)} for batch in tqdm(iterator(instances, shuffle=False, num_epochs=1), desc='Predicting Responses'): for rno in range(num_responses): z = model.encode_query(nn_util.move_to_device(batch['source_tokens'], device), temperature=temperature) preds = model.decode_predictions(model._decoder(z)['predictions']) predictions_dict[f'response_{rno+1}'].extend(preds) return predictions_dict
def main(input_filepath, model_filepath, output_filepath, config_file): """Runs data loading and cleaning and pre-processing scripts and saves data in ../processed.""" logger = logging.getLogger(__name__) logger.info('Loading training set, test set and model and predicting.') # Parse config file config = parse_config(config_file) # Load data X_train = pd.read_csv(input_filepath + '/X_train.csv') y_train = pd.read_csv(input_filepath + '/y_train.csv').values.ravel() X_test = pd.read_csv(input_filepath + '/X_test.csv') y_test = pd.read_csv(input_filepath + '/y_test.csv').values.ravel() # Load model model = Model.load(model_filepath + config['predicting']['model_name']) # Make predictions train_pred = model.predict(X_train) test_pred = model.predict(X_test) # Evaluate model train_score = np.sqrt(mean_squared_error(y_train, train_pred)) test_score = np.sqrt(mean_squared_error(y_test, test_pred)) # Plot predictions scores = ( (r'$RMSE={:,.0f}$' + ' EUR').format(train_score), (r'$RMSE={:,.0f}$' + ' EUR').format(test_score), ) pred_plots = plot_predictions(scores, train_pred, test_pred, y_train, y_test) pred_plots.savefig(output_filepath + '/pred_plots.png')
def predict(model_filepath, config, input_data): """Return prediction from user input.""" # Load model model = Model.load(model_filepath + config['predicting']['model_name']) # Predict prediction = int(np.round(model.predict(input_data), -3)[0]) return prediction
def main(data_path, config_path): config = json.loads(evaluate_file(config_path)) os.environ["CUDA_VISIBLE_DEVICES"] = config['gpu'] data_loader = DatasetLoader(data_path, config['num_y'], config['num_z']) data = data_loader.load_data() model = Model.by_name(config['type'])(config) acc1, acc2 = model.evaluate(data) print(f'acc1 = {acc1}, acc2= {acc2}')
def main(input_data, output_model): """ Runs modeling scripts using processed data (../raw) to create model. Model is saved as pickle (saved in ../models). """ logger = logging.getLogger(__name__) logger.info('training model') data = DataSet(train_dir=input_data) train = data.get_train_set() X_train = data.get_features(train) y = data.get_label(train) clf = models[4] param_grid = params[4] model = Model.tune(clf, X_train, y, param_grid) model.save(output_model + model.name)
def main(input_train, input_test, input_model, output_prediction): """ Runs modeling scripts using model pickle (../models) to predict outcomes. Outcomes file is saved as .csv (saved in ../models). """ logger = logging.getLogger(__name__) logger.info('predicting outcomes') data = DataSet(train_dir=input_train, test_dir=input_test) test = data.get_test_set() X_test = data.get_features(test) model = Model.load(input_model + 'XGBClassifier') y_pred = model.predict(X_test) output = pd.DataFrame({ 'PassengerId': test['PassengerId'], 'Survived': y_pred }) output.to_csv(output_prediction + 'submission_{}.csv'.format(model.name), index=False)
def kenya_crop_type_mapper(): data_dir = "../data" test_folder = Path("PATH_TO_TIF_FILES") test_files = test_folder.glob("*.tif") print(test_files) model_path = "PATH_TO_MODEL_CKPT" print(f"Using model {model_path}") model = Model.load_from_checkpoint(model_path) for test_path in test_files: save_dir = Path(data_dir) / "Autoencoder" save_dir.mkdir(exist_ok=True) print(f"Running for {test_path}") savepath = save_dir / f"preds_{test_path.name}" if savepath.exists(): print("File already generated. Skipping") continue out_forecasted = model.predict(test_path, with_forecaster=True) plot_results(out_forecasted, test_path, savepath=save_dir, prefix="forecasted_") out_normal = model.predict(test_path, with_forecaster=False) plot_results(out_normal, test_path, savepath=save_dir, prefix="full_input_") out_forecasted.to_netcdf(save_dir / f"preds_forecasted_{test_path.name}.nc") out_normal.to_netcdf(save_dir / f"preds_normal_{test_path.name}.nc")
def main(input_filepath, output_filepath, config_file): """Runs data loading and cleaning and pre-processing scripts and saves data in ../processed.""" logger = logging.getLogger(__name__) logger.info('Loading training data set, setting up pipeline, tuning,' 'training and evaluating final model.') # Parse config file # config = parse_config(config_file) # Load training data X_train = pd.read_csv(input_filepath + '/X_train.csv') y_train = pd.read_csv(input_filepath + '/y_train.csv').values.ravel() # Pre-processing and modeling pipeline cat_features = X_train.select_dtypes(exclude='float64').columns num_features = X_train.select_dtypes(include='float64').columns pipe = Pipeline([('preprocessing', preprocessing_pipeline(cat_features, num_features)), ('model', TransformedTargetRegressor(regressor=SVR(), func=np.log1p, inverse_func=np.expm1))]) # Tune or select model # kf = KFold(config['modeling']['num_folds'], shuffle=True, # random_state=rng).get_n_splits(X_train.values) model = Model(model=pipe) # Train model model.train(X_train, y_train) # Save model model.save(output_filepath + model.name + '.pkl')
model = Model(period=[2018], entities=[ { 'slug': 'bitcoin', 'symbol': 'btc', 'algo': 'sha-256' }, { 'slug': 'bitcoin-cash', 'symbol': 'bch', 'algo': 'sha-256' }, { 'slug': 'bitcoin-diamond', 'symbol': 'bcd', 'algo': 'X13' }, { 'slug': 'bitcoin-gold', 'symbol': 'bcg', 'algo': 'equihash' }, { 'slug': 'bitcoin-private', 'symbol': 'btcp', 'algo': 'equihash' }, { 'slug': 'dash', 'symbol': 'dash', 'algo': 'X11' }, { 'slug': 'dogecoin', 'symbol': 'doge', 'algo': 'X11' }, { 'slug': 'electroneum', 'symbol': 'etn', 'algo': 'cryptonight' }, { 'slug': 'ethereum', 'symbol': 'eth', 'algo': 'ethash' }, { 'slug': 'ethereum-classic', 'symbol': 'etc', 'algo': 'ethash' }, { 'slug': 'litecoin', 'symbol': 'ltc', 'algo': 'scrypt' }, { 'slug': 'galactrum', 'symbol': 'ore', 'algo': 'lyra2rev2' }, { 'slug': 'monero', 'symbol': 'xmr', 'algo': 'cryptonight' }, { 'slug': 'ravencoin', 'symbol': 'rvn', 'algo': 'X16R' }, { 'slug': 'zcash', 'symbol': 'zec', 'algo': 'equihash' }, ])
def __init__(self, model_path, full_path=False): self.model = Model(model_path, full_path) self.encoder = self.model.model.layers[1]
import sys from argparse import ArgumentParser from pathlib import Path sys.path.append("..") from src.models import Model from src.models import train_model if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--max_epochs", type=int, default=1000) parser.add_argument("--patience", type=int, default=10) model_args = Model.add_model_specific_args(parser).parse_args() model = Model(model_args) train_model(model, model_args)
from src.models import Model import numpy as np import matplotlib.pyplot as plt m = Model( data_path= '/home/weiss/workspace/studies/Supervised Learning/project/projet_app_sup_20/data/Aggregation.txt', sep='\t') m = Model( data_path= '/home/weiss/workspace/studies/Supervised Learning/project/projet_app_sup_20/data/creditcard.csv', sep=',') m = Model( data_path= '/home/weiss/workspace/studies/Supervised Learning/project/projet_app_sup_20/data/flame.txt', sep='\t') m = Model( data_path= '/home/weiss/workspace/studies/Supervised Learning/project/projet_app_sup_20/data/spiral.txt', sep='\t') m = Model( data_path= '/home/weiss/workspace/studies/Supervised Learning/project/projet_app_sup_20/data/VisaPremier.txt', sep='\t', ys=-2) b = m._check_balance() plt.bar(range(len(b)), b.values(), align='center') plt.xticks(range(len(b)), list(b.keys())) plt.show()
def __init__(self): super(CrowdCount, self).__init__() self.features = Model() self.my_loss = None
def main(): # Params DEVICE = 'cuda' GROUP_SIZE = 6 EPOCHS = 800 TBOARD = False # If you have tensorboard running set it to true # Load data coseg = Coseg( img_set='images/', gt_set='ground_truth/', root_dir="data/042_reproducible/", ) trloader = DataLoader(coseg, batch_size=1, shuffle=False, num_workers=1) imgs = [] GTs = [] for i, (In, GTn) in enumerate(trloader): if i == GROUP_SIZE: break else: In = In.to(DEVICE) GTn = GTn.to(DEVICE) imgs.append(In) GTs.append(GTn) print("[ OK ] Data loaded") # Precompute features vgg19_original = models.vgg19() phi = nn.Sequential((*(list(vgg19_original.children())[:-2]))) for param in phi.parameters(): param.requires_grad = False phi = phi.to(DEVICE) features = precompute_features(imgs, GTs, phi) print("[ OK ] Feature precomputed") # Instantiate the model if DEVICE == 'cuda': groupnet = Model((1, 3, 224, 224)).cuda() else: groupnet = Model((1, 3, 224, 224)) print("[ OK ] Model instantiated") # Optimizer # [ PAPER ] suggests SGD with these parametes, but desn't work #optimizer = optim.SGD(groupnet.parameters(), momentum=0.99,lr=0.00005, weight_decay=0.0005) optimizer = optim.Adam(groupnet.parameters(), lr=0.00002) # Train Loop losses = [] if TBOARD: writer = SummaryWriter() for epoch in range(EPOCHS): optimizer.zero_grad() lss = 0 lcs = 0 loss = 0 masks = groupnet(imgs) for i in range(len(imgs)): lss += Ls(masks[i], GTs[i]) # [ PAPER ] suggests to activate group loss after 100 epochs if epoch >= 100: lcs += Lc(i, imgs, masks, features, phi) lss /= len(imgs) if epoch >= 100: lcs /= len(imgs) # [ PAPER ] suggests 0.1, but it does not work loss = lss + 1. * lcs loss.backward(retain_graph=True) optimizer.step() if TBOARD: writer.add_scalar("loss", loss.item(), epoch) utils.tboard_imlist(masks, "masks", epoch, writer) losses.append(loss.item()) print(f'[ ep {epoch} ] - Loss: {loss.item():.4f}') if TBOARD: writer.close() # Plot results in the same folder fig, axs = plt.subplots(nrows=3, ncols=GROUP_SIZE, figsize=(10, 5)) for i in range(len(imgs)): axs[0, i].imshow(imgs[i].detach().cpu().numpy().squeeze(0).transpose( 1, 2, 0)) axs[0, i].axis('off') axs[1, i].imshow(GTs[i].detach().cpu().numpy().squeeze(0).squeeze(0)) axs[1, i].axis('off') axs[2, i].imshow(masks[i].detach().cpu().numpy().squeeze(0).squeeze(0)) axs[2, i].axis('off') plt.savefig("predictions.png") plt.close() fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5)) ax.plot(losses) if epoch > 100: ax.axvline(100, c='r', ls='--', label="Activate Lc loss") ax.set_xlabel("Epoch") ax.set_ylabel("Loss") ax.legend() plt.savefig("loss.png") plt.close() print("[ OK ] Plot")
required=False) parser.add_argument('--save_period', type=int, default=1000, required=False) parser.add_argument('--max_to_keep', type=int, default=3, required=False) parser.add_argument('--restore', action='store_true', required=False) args = parser.parse_args() print(args) sess = tf.Session() if args.restore: print("restoring pretrained model...") if os.path.isdir(args.model_dir): model = Model(sess=sess, config=None) model.restore(model_dir=args.model_dir) else: raise Exception(f"invalid model dir: {args.model_dir}") else: print("building new model...") os.makedirs(args.model_dir, exist_ok=True) config = { "model": { "encoder": args.encoder, "params": json.loads(args.enc_params) }, "training": { "model_dir": args.model_dir, "num_games": args.num_games_training, "val_period": args.val_period,
def __init__(self): super(CrowdCount, self).__init__() self.features = Model() self.my_loss = None self.this_dataset_density_level = dataset_density_level[ 'shtA1_train_8_4']
def main(): # Get config for this run hparams = parse_args() # Setup logger config = { "handlers": [ { "sink": sys.stdout, "format": "{time:[MM-DD HH:mm]} - {message}" }, { "sink": f"{hparams.outdir}/logs.txt", "format": "{time:[MM-DD HH:mm]} - {message}" }, ], } logger.configure(**config) logger.info(f"Parameters used for training: {hparams}") # Fix seeds for reprodusability pt.utils.misc.set_random_seed(hparams.seed) # Save config os.makedirs(hparams.outdir, exist_ok=True) yaml.dump(vars(hparams), open(hparams.outdir + "/config.yaml", "w")) # Get model model = Model(arch=hparams.arch, model_params=hparams.model_params, embedding_size=hparams.embedding_size, pooling=hparams.pooling).cuda() # Get loss # loss = LOSS_FROM_NAME[hparams.criterion](in_features=hparams.embedding_size, **hparams.criterion_params).cuda() loss = LOSS_FROM_NAME["cross_entropy"].cuda() logger.info(f"Loss for this run is: {loss}") if hparams.resume: checkpoint = torch.load( hparams.resume, map_location=lambda storage, loc: storage.cuda()) model.load_state_dict(checkpoint["state_dict"], strict=True) loss.load_state_dict(checkpoint["loss"], strict=True) if hparams.freeze_bn: freeze_batch_norm(model) # Get optimizer # optim_params = pt.utils.misc.filter_bn_from_wd(model) optim_params = list(loss.parameters()) + list( model.parameters()) # add loss params optimizer = optimizer_from_name(hparams.optim)( optim_params, lr=0, weight_decay=hparams.weight_decay, amsgrad=True) num_params = pt.utils.misc.count_parameters(model)[0] logger.info(f"Model size: {num_params / 1e6:.02f}M") # logger.info(model) # Scheduler is an advanced way of planning experiment sheduler = pt.fit_wrapper.callbacks.PhasesScheduler(hparams.phases) # Save logs TB_callback = pt_clb.TensorBoard(hparams.outdir, log_every=20) # Get dataloaders train_loader, val_loader, val_indexes = get_dataloaders( root=hparams.root, augmentation=hparams.augmentation, size=hparams.size, val_size=hparams.val_size, batch_size=hparams.batch_size, workers=hparams.workers, ) # Load validation query / gallery split and resort it according to indexes from sampler df_val = pd.read_csv(os.path.join(hparams.root, "train_val.csv")) df_val = df_val[df_val["is_train"].astype(np.bool) == False] val_is_query = df_val.is_query.values[val_indexes].astype(np.bool) logger.info(f"Start training") # Init runner runner = pt.fit_wrapper.Runner( model, optimizer, criterion=loss, callbacks=[ # pt_clb.BatchMetrics([pt.metrics.Accuracy(topk=1)]), ContestMetricsCallback( is_query=val_is_query[:1280] if hparams.debug else val_is_query ), pt_clb.Timer(), pt_clb.ConsoleLogger(), pt_clb.FileLogger(), TB_callback, CheckpointSaver(hparams.outdir, save_name="model.chpn", monitor="target", mode="max"), CheckpointSaver(hparams.outdir, save_name="model_mapr.chpn", monitor="mAP@R", mode="max"), CheckpointSaver(hparams.outdir, save_name="model_loss.chpn"), sheduler, # EMA must go after other checkpoints pt_clb.ModelEma(model, hparams.ema_decay) if hparams.ema_decay else pt_clb.Callback(), ], use_fp16=hparams. use_fp16, # use mixed precision by default. # hparams.opt_level != "O0", ) if hparams.head_warmup_epochs > 0: #Freeze model for p in model.parameters(): p.requires_grad = False runner.fit( train_loader, # val_loader=val_loader, epochs=hparams.head_warmup_epochs, steps_per_epoch=20 if hparams.debug else None, # val_steps=20 if hparams.debug else None, ) # Unfreeze model for p in model.parameters(): p.requires_grad = True if hparams.freeze_bn: freeze_batch_norm(model) # Re-init to avoid nan's in loss optim_params = list(loss.parameters()) + list(model.parameters()) optimizer = optimizer_from_name(hparams.optim)( optim_params, lr=0, weight_decay=hparams.weight_decay, amsgrad=True) runner.state.model = model runner.state.optimizer = optimizer runner.state.criterion = loss # Train runner.fit( train_loader, # val_loader=val_loader, start_epoch=hparams.head_warmup_epochs, epochs=sheduler.tot_epochs, steps_per_epoch=20 if hparams.debug else None, # val_steps=20 if hparams.debug else None, ) logger.info(f"Loading best model") checkpoint = torch.load(os.path.join(hparams.outdir, f"model.chpn")) model.load_state_dict(checkpoint["state_dict"], strict=True) # runner.state.model = model # loss.load_state_dict(checkpoint["loss"], strict=True) # Evaluate _, [acc1, map10, target, mapR] = runner.evaluate( val_loader, steps=20 if hparams.debug else None, ) logger.info( f"Val: Acc@1 {acc1:0.5f}, mAP@10 {map10:0.5f}, Target {target:0.5f}, mAP@R {mapR:0.5f}" ) # Save params used for training and final metrics into separate TensorBoard file metric_dict = { "hparam/Acc@1": acc1, "hparam/mAP@10": map10, "hparam/mAP@R": target, "hparam/Target": mapR, } # Convert all lists / dicts to avoid TB error hparams.phases hparams.phases = str(hparams.phases) hparams.model_params = str(hparams.model_params) hparams.criterion_params = str(hparams.criterion_params) with pt.utils.tensorboard.CorrectedSummaryWriter(hparams.outdir) as writer: writer.add_hparams(hparam_dict=vars(hparams), metric_dict=metric_dict)
def run(X_seq_train, X_cont_train, y_train, X_seq_test, X_cont_test, timestamp, random_state): seed_everything(random_state) oof_preds = np.zeros(len(X_seq_train)) test_preds = np.zeros(len(X_seq_test)) cv_scores = [] for i, (trn_idx, val_idx) in enumerate( get_folds(5, "stratified", random_state).split(X_cont_train, y_train)): print(f"fold {i + 1}") train_dataset = TensorDataset( torch.from_numpy(X_seq_train[trn_idx]).float(), torch.from_numpy(X_cont_train[trn_idx]).float(), torch.from_numpy(y_train[trn_idx]).float(), ) valid_dataset = TensorDataset( torch.from_numpy(X_seq_train[val_idx]).float(), torch.from_numpy(X_cont_train[val_idx]).float(), torch.from_numpy(y_train[val_idx]).float(), ) test_dataset = TensorDataset( torch.from_numpy(X_seq_test).float(), torch.from_numpy(X_cont_test).float()) train_loader = DataLoader(train_dataset, shuffle=True, batch_size=32) valid_loader = DataLoader(valid_dataset, shuffle=False, batch_size=128) test_loader = DataLoader(test_dataset, shuffle=False, batch_size=128) loaders = {"train": train_loader, "valid": valid_loader} runner = CustomRunner(device="cuda") model = Model( in_channels=X_seq_train.shape[1], n_cont_features=X_cont_train.shape[1], hidden_channels=64, kernel_sizes=[3, 5, 7, 15, 21, 51, 101], out_dim=1, ) criterion = torch.nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-6) logdir = f"./logdir/{timestamp}_fold{i}" runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=logdir, num_epochs=30, verbose=True, ) pred = np.concatenate( list( map( lambda x: x.cpu().numpy(), runner.predict_loader( loader=valid_loader, resume=f"{logdir}/checkpoints/best.pth", model=model, ), ))) oof_preds[val_idx] = pred score = average_precision_score(y_train[val_idx], pred) cv_scores.append(score) print("score", score) pred = np.concatenate( list( map( lambda x: x.cpu().numpy(), runner.predict_loader( loader=test_loader, resume=f"{logdir}/checkpoints/best.pth", model=model, ), ))) test_preds += pred / 5 return oof_preds, test_preds, cv_scores
batch_size=config.batch_size, num_workers=8, drop_last=True, shuffle=True) val_dataset = InpaintingDataset(config, val_list, fix_mask_path=val_fix_mask, training=False) val_loader = DataLoader(dataset=val_dataset, batch_size=config.batch_size, num_workers=2, drop_last=False, shuffle=False) sample_iterator = val_dataset.create_iterator(config.sample_size) model = Model(config, logger=logger) model.load(is_test=False) steps_per_epoch = len(train_dataset) // config.batch_size iteration = model.iteration epoch = model.iteration // steps_per_epoch logger.info('Start from epoch:{}, iteration:{}'.format(epoch, iteration)) model.train() keep_training = True best_score = {} while (keep_training): epoch += 1 stateful_metrics = ['epoch', 'iter', 'g_lr'] progbar = Progbar(len(train_dataset), max_iters=steps_per_epoch,
def main(): args, device, checkpoint = init_pipeline() train_loader, _, _, _, init_params = load_train_data(args, device) model = Model(*init_params).to(device) util.load_state_dict(checkpoint, model) visualize(model, train_loader)
def test(hparams): # Check that folder exists assert hparams.config_path.exists() # Read config with open(hparams.config_path / "config.yaml", "r") as file: model_configs = yaml.load(file) model_configs.update(vars(hparams)) hparams = argparse.Namespace(**model_configs) # Get model model = Model( arch=hparams.arch, model_params=hparams.model_params, embedding_size=hparams.embedding_size, pooling=hparams.pooling).cuda() # logger.info(model) # Init checkpoint = torch.load(hparams.config_path / f"model.chpn") model.load_state_dict(checkpoint["state_dict"], strict=False) # -------------- Get embeddings for val and test data -------------- if hparams.extract_embeddings: if hparams.validation: print(f"Using size {hparams.val_size}") loader, indexes = get_val_dataloader( root=hparams.root, augmentation="val", batch_size=hparams.batch_size, size=hparams.val_size, workers=hparams.workers, ) # Load validation query / gallery split and sort it according to indexes from sampler df_val = pd.read_csv(os.path.join(hparams.root, "train_val.csv")) df_val = df_val[df_val["is_train"].astype(np.bool) == False].iloc[indexes] val_embeddings = predict_from_loader(model, loader) # Hack to save torch.Tensor into pd.DataFrame df_val["embeddings"] = list(map(lambda r: np.array(r).tolist(), val_embeddings)) # Save results into folder with logs df_val.to_csv(hparams.config_path / "train_val.csv", index=None) del val_embeddings logger.info("Finished extracting validation embeddings") if hparams.test: loader, indexes = get_test_dataloader( root=hparams.root, augmentation="test", batch_size=hparams.batch_size, size=hparams.val_size, workers=hparams.workers, ) # Load test DF and sort it according to indexes from sampler df_test = pd.read_csv(os.path.join(hparams.root, "test_A.csv")).iloc[indexes] test_embeddings = predict_from_loader(model, loader) # Hack to save torch.Tensor into pd.DataFrame df_test["embeddings"] = list(map(lambda r: np.array(r).tolist(), test_embeddings)) # Save results into folder with logs df_test.to_csv(hparams.config_path / "test_A.csv", index=None) del test_embeddings logger.info("Finished extracting test embeddings") # -------------- Test model on validation dataset -------------- if hparams.validation: # Read DF df_val = pd.read_csv(hparams.config_path / "train_val.csv") val_embeddings = torch.tensor(list(map(eval, df_val["embeddings"].values))) query_mask = df_val["is_query"].values.astype(np.bool) val_labels = df_val["label"].values # Shape (n_embeddings, embedding_dim) query_embeddings, gallery_embeddings = val_embeddings[query_mask], val_embeddings[~query_mask] query_labels, gallery_labels = val_labels[query_mask], val_labels[~query_mask] logger.info(f"Validation query size - {len(query_embeddings)}, gallery size - {len(gallery_embeddings)}") del val_embeddings if hparams.dba: gallery_embeddings = query_expansion(gallery_embeddings, gallery_embeddings, topk=10, alpha=None) if hparams.aqe: query_embeddings = query_expansion(query_embeddings, gallery_embeddings, topk=3, alpha=3) # Shape (query_size x gallery_size) conformity_matrix = torch.tensor(query_labels.reshape(-1, 1) == gallery_labels) # Matrix of pairwise cosin distances distances = torch.cdist(query_embeddings, gallery_embeddings) acc1 = cmc_score_count(distances, conformity_matrix, topk=1) map10 = map_at_k(distances, conformity_matrix, topk=10) mapR = map_at_k(distances, conformity_matrix, topk=None) logger.info( f"Val: Acc@1 {acc1:0.5f}, mAP@10 {map10:0.5f}, Target {0.5 * acc1 + 0.5 * map10:0.5f}, mAP@R {mapR:0.5f}") # -------------- Predict on test dataset -------------- if hparams.test: df_test = pd.read_csv(hparams.config_path / "test_A.csv") test_embeddings = torch.tensor(list(map(eval, df_test["embeddings"].values))) query_mask = df_test["is_query"].values.astype(np.bool) query_files, gallery_files = df_test["file_path"].values[query_mask], df_test["file_path"].values[~query_mask] # Shape (n_embeddings, embedding_dim) query_embeddings, gallery_embeddings = test_embeddings[query_mask], test_embeddings[~query_mask] query_files, gallery_files = df_test["file_path"].values[query_mask], df_test["file_path"].values[~query_mask] logger.info(f"Test query size - {len(query_embeddings)}, gallery size - {len(gallery_embeddings)}") del test_embeddings if hparams.dba: gallery_embeddings = query_expansion(gallery_embeddings, gallery_embeddings, topk=10, alpha=None) if hparams.aqe: query_embeddings = query_expansion(query_embeddings, gallery_embeddings, topk=3, alpha=3) # Matrix of pairwise cosin distances distances = torch.cdist(query_embeddings, gallery_embeddings) perm_matrix = torch.argsort(distances) logger.info(f"Creating submission{'_dba' if hparams.dba else ''}{'_aqe' if hparams.aqe else ''}_{hparams.val_size}.csv") data = { "image_id": [], "gallery_img_list": [] } for idx in tqdm(range(len(query_files))): query_file = query_files[idx].split("/")[1] predictions = gallery_files[perm_matrix[:, : 10][idx]] predictions = [p.split("/")[1] for p in predictions] data["image_id"].append(query_file) data["gallery_img_list"].append(predictions) df = pd.DataFrame(data=data) df["gallery_img_list"] = df["gallery_img_list"].apply(lambda x: '{{{}}}'.format(",".join(x))).astype(str) lines = [f"{x},{y}" for x, y in zip(data["image_id"], df["gallery_img_list"])] with open(hparams.config_path \ / f"submission{'_dba' if hparams.dba else ''}{'_aqe' if hparams.aqe else ''}_{hparams.val_size}.csv", "w") as f: for line in lines: f.write(line + '\n')