Пример #1
0
def get_responses(model: Model, device: int, instances: List[Instance],
                  num_responses: int, temperature: float = 1e-5,
                  flow: bool = True) -> List[List[str]]:
    iterator = BasicIterator(batch_size=128)
    iterator.index_with(model.vocab)
    predictions_dict = {f'response_{rno+1}': [] for rno in range(num_responses)}
    for batch in tqdm(iterator(instances, shuffle=False, num_epochs=1), desc='Predicting Responses'):
        for rno in range(num_responses):
            z = model.encode_query(nn_util.move_to_device(batch['source_tokens'], device),
                                   temperature=temperature)
            preds = model.decode_predictions(model._decoder(z)['predictions'])
            predictions_dict[f'response_{rno+1}'].extend(preds)
    return predictions_dict
def main(input_filepath, model_filepath, output_filepath, config_file):
    """Runs data loading and cleaning and pre-processing scripts and
    saves data in ../processed."""
    logger = logging.getLogger(__name__)
    logger.info('Loading training set, test set and model and predicting.')

    # Parse config file
    config = parse_config(config_file)

    # Load data
    X_train = pd.read_csv(input_filepath + '/X_train.csv')
    y_train = pd.read_csv(input_filepath + '/y_train.csv').values.ravel()

    X_test = pd.read_csv(input_filepath + '/X_test.csv')
    y_test = pd.read_csv(input_filepath + '/y_test.csv').values.ravel()

    # Load model
    model = Model.load(model_filepath + config['predicting']['model_name'])

    # Make predictions
    train_pred = model.predict(X_train)
    test_pred = model.predict(X_test)

    # Evaluate model
    train_score = np.sqrt(mean_squared_error(y_train, train_pred))
    test_score = np.sqrt(mean_squared_error(y_test, test_pred))

    # Plot predictions
    scores = (
        (r'$RMSE={:,.0f}$' + ' EUR').format(train_score),
        (r'$RMSE={:,.0f}$' + ' EUR').format(test_score),
    )
    pred_plots = plot_predictions(scores, train_pred, test_pred, y_train,
                                  y_test)
    pred_plots.savefig(output_filepath + '/pred_plots.png')
def predict(model_filepath, config, input_data):
    """Return prediction from user input."""
    # Load model
    model = Model.load(model_filepath + config['predicting']['model_name'])

    # Predict
    prediction = int(np.round(model.predict(input_data), -3)[0])
    return prediction
Пример #4
0
def main(data_path, config_path):
    config = json.loads(evaluate_file(config_path))
    os.environ["CUDA_VISIBLE_DEVICES"] = config['gpu']
    data_loader = DatasetLoader(data_path, config['num_y'], config['num_z'])
    data = data_loader.load_data()
    model = Model.by_name(config['type'])(config)
    acc1, acc2 = model.evaluate(data)
    print(f'acc1 = {acc1}, acc2= {acc2}')
Пример #5
0
def main(input_data, output_model):
    """ Runs modeling scripts using processed data (../raw) to
        create model. Model is saved as pickle (saved in ../models).
    """
    logger = logging.getLogger(__name__)
    logger.info('training model')

    data = DataSet(train_dir=input_data)
    train = data.get_train_set()
    X_train = data.get_features(train)
    y = data.get_label(train)

    clf = models[4]
    param_grid = params[4]

    model = Model.tune(clf, X_train, y, param_grid)
    model.save(output_model + model.name)
Пример #6
0
def main(input_train, input_test, input_model, output_prediction):
    """ Runs modeling scripts using model pickle (../models) to predict
        outcomes. Outcomes file is saved as .csv (saved in ../models).
    """
    logger = logging.getLogger(__name__)
    logger.info('predicting outcomes')

    data = DataSet(train_dir=input_train, test_dir=input_test)
    test = data.get_test_set()
    X_test = data.get_features(test)

    model = Model.load(input_model + 'XGBClassifier')
    y_pred = model.predict(X_test)

    output = pd.DataFrame({
        'PassengerId': test['PassengerId'],
        'Survived': y_pred
    })
    output.to_csv(output_prediction + 'submission_{}.csv'.format(model.name),
                  index=False)
Пример #7
0
def kenya_crop_type_mapper():
    data_dir = "../data"

    test_folder = Path("PATH_TO_TIF_FILES")
    test_files = test_folder.glob("*.tif")
    print(test_files)

    model_path = "PATH_TO_MODEL_CKPT"
    print(f"Using model {model_path}")

    model = Model.load_from_checkpoint(model_path)

    for test_path in test_files:

        save_dir = Path(data_dir) / "Autoencoder"
        save_dir.mkdir(exist_ok=True)

        print(f"Running for {test_path}")

        savepath = save_dir / f"preds_{test_path.name}"
        if savepath.exists():
            print("File already generated. Skipping")
            continue

        out_forecasted = model.predict(test_path, with_forecaster=True)
        plot_results(out_forecasted,
                     test_path,
                     savepath=save_dir,
                     prefix="forecasted_")

        out_normal = model.predict(test_path, with_forecaster=False)
        plot_results(out_normal,
                     test_path,
                     savepath=save_dir,
                     prefix="full_input_")

        out_forecasted.to_netcdf(save_dir /
                                 f"preds_forecasted_{test_path.name}.nc")
        out_normal.to_netcdf(save_dir / f"preds_normal_{test_path.name}.nc")
Пример #8
0
def main(input_filepath, output_filepath, config_file):
    """Runs data loading and cleaning and pre-processing scripts and
    saves data in ../processed."""
    logger = logging.getLogger(__name__)
    logger.info('Loading training data set, setting up pipeline, tuning,'
                'training and evaluating final model.')

    # Parse config file
    # config = parse_config(config_file)

    # Load training data
    X_train = pd.read_csv(input_filepath + '/X_train.csv')
    y_train = pd.read_csv(input_filepath + '/y_train.csv').values.ravel()

    # Pre-processing and modeling pipeline
    cat_features = X_train.select_dtypes(exclude='float64').columns
    num_features = X_train.select_dtypes(include='float64').columns

    pipe = Pipeline([('preprocessing',
                      preprocessing_pipeline(cat_features, num_features)),
                     ('model',
                      TransformedTargetRegressor(regressor=SVR(),
                                                 func=np.log1p,
                                                 inverse_func=np.expm1))])

    # Tune or select model
    #   kf = KFold(config['modeling']['num_folds'], shuffle=True,
    #   random_state=rng).get_n_splits(X_train.values)

    model = Model(model=pipe)

    # Train model
    model.train(X_train, y_train)

    # Save model
    model.save(output_filepath + model.name + '.pkl')
Пример #9
0
 model = Model(period=[2018],
               entities=[
                   {
                       'slug': 'bitcoin',
                       'symbol': 'btc',
                       'algo': 'sha-256'
                   },
                   {
                       'slug': 'bitcoin-cash',
                       'symbol': 'bch',
                       'algo': 'sha-256'
                   },
                   {
                       'slug': 'bitcoin-diamond',
                       'symbol': 'bcd',
                       'algo': 'X13'
                   },
                   {
                       'slug': 'bitcoin-gold',
                       'symbol': 'bcg',
                       'algo': 'equihash'
                   },
                   {
                       'slug': 'bitcoin-private',
                       'symbol': 'btcp',
                       'algo': 'equihash'
                   },
                   {
                       'slug': 'dash',
                       'symbol': 'dash',
                       'algo': 'X11'
                   },
                   {
                       'slug': 'dogecoin',
                       'symbol': 'doge',
                       'algo': 'X11'
                   },
                   {
                       'slug': 'electroneum',
                       'symbol': 'etn',
                       'algo': 'cryptonight'
                   },
                   {
                       'slug': 'ethereum',
                       'symbol': 'eth',
                       'algo': 'ethash'
                   },
                   {
                       'slug': 'ethereum-classic',
                       'symbol': 'etc',
                       'algo': 'ethash'
                   },
                   {
                       'slug': 'litecoin',
                       'symbol': 'ltc',
                       'algo': 'scrypt'
                   },
                   {
                       'slug': 'galactrum',
                       'symbol': 'ore',
                       'algo': 'lyra2rev2'
                   },
                   {
                       'slug': 'monero',
                       'symbol': 'xmr',
                       'algo': 'cryptonight'
                   },
                   {
                       'slug': 'ravencoin',
                       'symbol': 'rvn',
                       'algo': 'X16R'
                   },
                   {
                       'slug': 'zcash',
                       'symbol': 'zec',
                       'algo': 'equihash'
                   },
               ])
Пример #10
0
 def __init__(self, model_path, full_path=False):
     self.model = Model(model_path, full_path)
     self.encoder = self.model.model.layers[1]
Пример #11
0
import sys
from argparse import ArgumentParser
from pathlib import Path

sys.path.append("..")

from src.models import Model
from src.models import train_model

if __name__ == "__main__":
    parser = ArgumentParser()

    parser.add_argument("--max_epochs", type=int, default=1000)
    parser.add_argument("--patience", type=int, default=10)

    model_args = Model.add_model_specific_args(parser).parse_args()
    model = Model(model_args)

    train_model(model, model_args)
Пример #12
0
from src.models import Model
import numpy as np
import matplotlib.pyplot as plt

m = Model(
    data_path=
    '/home/weiss/workspace/studies/Supervised Learning/project/projet_app_sup_20/data/Aggregation.txt',
    sep='\t')
m = Model(
    data_path=
    '/home/weiss/workspace/studies/Supervised Learning/project/projet_app_sup_20/data/creditcard.csv',
    sep=',')
m = Model(
    data_path=
    '/home/weiss/workspace/studies/Supervised Learning/project/projet_app_sup_20/data/flame.txt',
    sep='\t')
m = Model(
    data_path=
    '/home/weiss/workspace/studies/Supervised Learning/project/projet_app_sup_20/data/spiral.txt',
    sep='\t')
m = Model(
    data_path=
    '/home/weiss/workspace/studies/Supervised Learning/project/projet_app_sup_20/data/VisaPremier.txt',
    sep='\t',
    ys=-2)

b = m._check_balance()
plt.bar(range(len(b)), b.values(), align='center')
plt.xticks(range(len(b)), list(b.keys()))
plt.show()
Пример #13
0
 def __init__(self):
     super(CrowdCount, self).__init__()
     self.features = Model()
     self.my_loss = None
Пример #14
0
def main():

    # Params
    DEVICE = 'cuda'
    GROUP_SIZE = 6
    EPOCHS = 800
    TBOARD = False  # If you have tensorboard running set it to true

    # Load data
    coseg = Coseg(
        img_set='images/',
        gt_set='ground_truth/',
        root_dir="data/042_reproducible/",
    )
    trloader = DataLoader(coseg, batch_size=1, shuffle=False, num_workers=1)
    imgs = []
    GTs = []
    for i, (In, GTn) in enumerate(trloader):
        if i == GROUP_SIZE:
            break
        else:
            In = In.to(DEVICE)
            GTn = GTn.to(DEVICE)
            imgs.append(In)
            GTs.append(GTn)
    print("[ OK ] Data loaded")

    # Precompute features
    vgg19_original = models.vgg19()
    phi = nn.Sequential((*(list(vgg19_original.children())[:-2])))
    for param in phi.parameters():
        param.requires_grad = False
    phi = phi.to(DEVICE)
    features = precompute_features(imgs, GTs, phi)
    print("[ OK ] Feature precomputed")

    # Instantiate the model
    if DEVICE == 'cuda':
        groupnet = Model((1, 3, 224, 224)).cuda()
    else:
        groupnet = Model((1, 3, 224, 224))
    print("[ OK ] Model instantiated")

    # Optimizer
    # [ PAPER ] suggests SGD with these parametes, but desn't work
    #optimizer = optim.SGD(groupnet.parameters(), momentum=0.99,lr=0.00005, weight_decay=0.0005)
    optimizer = optim.Adam(groupnet.parameters(), lr=0.00002)

    # Train Loop
    losses = []
    if TBOARD:
        writer = SummaryWriter()
    for epoch in range(EPOCHS):

        optimizer.zero_grad()
        lss = 0
        lcs = 0
        loss = 0

        masks = groupnet(imgs)
        for i in range(len(imgs)):
            lss += Ls(masks[i], GTs[i])

            # [ PAPER ] suggests to activate group loss after 100 epochs
            if epoch >= 100:
                lcs += Lc(i, imgs, masks, features, phi)

        lss /= len(imgs)

        if epoch >= 100:
            lcs /= len(imgs)

        # [ PAPER ] suggests 0.1, but it does not work
        loss = lss + 1. * lcs
        loss.backward(retain_graph=True)
        optimizer.step()

        if TBOARD:
            writer.add_scalar("loss", loss.item(), epoch)
            utils.tboard_imlist(masks, "masks", epoch, writer)
        losses.append(loss.item())
        print(f'[ ep {epoch} ] - Loss: {loss.item():.4f}')

    if TBOARD:
        writer.close()

    # Plot results in the same folder
    fig, axs = plt.subplots(nrows=3, ncols=GROUP_SIZE, figsize=(10, 5))
    for i in range(len(imgs)):
        axs[0, i].imshow(imgs[i].detach().cpu().numpy().squeeze(0).transpose(
            1, 2, 0))
        axs[0, i].axis('off')
        axs[1, i].imshow(GTs[i].detach().cpu().numpy().squeeze(0).squeeze(0))
        axs[1, i].axis('off')
        axs[2, i].imshow(masks[i].detach().cpu().numpy().squeeze(0).squeeze(0))
        axs[2, i].axis('off')
    plt.savefig("predictions.png")
    plt.close()

    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5))
    ax.plot(losses)
    if epoch > 100:
        ax.axvline(100, c='r', ls='--', label="Activate Lc loss")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.legend()
    plt.savefig("loss.png")
    plt.close()
    print("[ OK ] Plot")
Пример #15
0
                        required=False)
    parser.add_argument('--save_period',
                        type=int,
                        default=1000,
                        required=False)
    parser.add_argument('--max_to_keep', type=int, default=3, required=False)
    parser.add_argument('--restore', action='store_true', required=False)
    args = parser.parse_args()
    print(args)

    sess = tf.Session()

    if args.restore:
        print("restoring pretrained model...")
        if os.path.isdir(args.model_dir):
            model = Model(sess=sess, config=None)
            model.restore(model_dir=args.model_dir)
        else:
            raise Exception(f"invalid model dir: {args.model_dir}")
    else:
        print("building new model...")
        os.makedirs(args.model_dir, exist_ok=True)
        config = {
            "model": {
                "encoder": args.encoder,
                "params": json.loads(args.enc_params)
            },
            "training": {
                "model_dir": args.model_dir,
                "num_games": args.num_games_training,
                "val_period": args.val_period,
Пример #16
0
 def __init__(self):
     super(CrowdCount, self).__init__()
     self.features = Model()
     self.my_loss = None
     self.this_dataset_density_level = dataset_density_level[
         'shtA1_train_8_4']
Пример #17
0
def main():
    # Get config for this run
    hparams = parse_args()

    # Setup logger
    config = {
        "handlers": [
            {
                "sink": sys.stdout,
                "format": "{time:[MM-DD HH:mm]} - {message}"
            },
            {
                "sink": f"{hparams.outdir}/logs.txt",
                "format": "{time:[MM-DD HH:mm]} - {message}"
            },
        ],
    }
    logger.configure(**config)
    logger.info(f"Parameters used for training: {hparams}")

    # Fix seeds for reprodusability
    pt.utils.misc.set_random_seed(hparams.seed)

    # Save config
    os.makedirs(hparams.outdir, exist_ok=True)
    yaml.dump(vars(hparams), open(hparams.outdir + "/config.yaml", "w"))

    # Get model
    model = Model(arch=hparams.arch,
                  model_params=hparams.model_params,
                  embedding_size=hparams.embedding_size,
                  pooling=hparams.pooling).cuda()

    # Get loss
    # loss = LOSS_FROM_NAME[hparams.criterion](in_features=hparams.embedding_size, **hparams.criterion_params).cuda()
    loss = LOSS_FROM_NAME["cross_entropy"].cuda()
    logger.info(f"Loss for this run is: {loss}")

    if hparams.resume:
        checkpoint = torch.load(
            hparams.resume, map_location=lambda storage, loc: storage.cuda())
        model.load_state_dict(checkpoint["state_dict"], strict=True)
        loss.load_state_dict(checkpoint["loss"], strict=True)

    if hparams.freeze_bn:
        freeze_batch_norm(model)

    # Get optimizer
    # optim_params = pt.utils.misc.filter_bn_from_wd(model)
    optim_params = list(loss.parameters()) + list(
        model.parameters())  # add loss params
    optimizer = optimizer_from_name(hparams.optim)(
        optim_params, lr=0, weight_decay=hparams.weight_decay, amsgrad=True)

    num_params = pt.utils.misc.count_parameters(model)[0]
    logger.info(f"Model size: {num_params / 1e6:.02f}M")
    # logger.info(model)

    # Scheduler is an advanced way of planning experiment
    sheduler = pt.fit_wrapper.callbacks.PhasesScheduler(hparams.phases)

    # Save logs
    TB_callback = pt_clb.TensorBoard(hparams.outdir, log_every=20)

    # Get dataloaders
    train_loader, val_loader, val_indexes = get_dataloaders(
        root=hparams.root,
        augmentation=hparams.augmentation,
        size=hparams.size,
        val_size=hparams.val_size,
        batch_size=hparams.batch_size,
        workers=hparams.workers,
    )

    # Load validation query / gallery split and resort it according to indexes from sampler
    df_val = pd.read_csv(os.path.join(hparams.root, "train_val.csv"))
    df_val = df_val[df_val["is_train"].astype(np.bool) == False]
    val_is_query = df_val.is_query.values[val_indexes].astype(np.bool)

    logger.info(f"Start training")
    # Init runner
    runner = pt.fit_wrapper.Runner(
        model,
        optimizer,
        criterion=loss,
        callbacks=[
            # pt_clb.BatchMetrics([pt.metrics.Accuracy(topk=1)]),
            ContestMetricsCallback(
                is_query=val_is_query[:1280] if hparams.debug else val_is_query
            ),
            pt_clb.Timer(),
            pt_clb.ConsoleLogger(),
            pt_clb.FileLogger(),
            TB_callback,
            CheckpointSaver(hparams.outdir,
                            save_name="model.chpn",
                            monitor="target",
                            mode="max"),
            CheckpointSaver(hparams.outdir,
                            save_name="model_mapr.chpn",
                            monitor="mAP@R",
                            mode="max"),
            CheckpointSaver(hparams.outdir, save_name="model_loss.chpn"),
            sheduler,
            # EMA must go after other checkpoints
            pt_clb.ModelEma(model, hparams.ema_decay)
            if hparams.ema_decay else pt_clb.Callback(),
        ],
        use_fp16=hparams.
        use_fp16,  # use mixed precision by default.  # hparams.opt_level != "O0",
    )

    if hparams.head_warmup_epochs > 0:
        #Freeze model
        for p in model.parameters():
            p.requires_grad = False

        runner.fit(
            train_loader,
            # val_loader=val_loader,
            epochs=hparams.head_warmup_epochs,
            steps_per_epoch=20 if hparams.debug else None,
            # val_steps=20 if hparams.debug else None,
        )

        # Unfreeze model
        for p in model.parameters():
            p.requires_grad = True

        if hparams.freeze_bn:
            freeze_batch_norm(model)

        # Re-init to avoid nan's in loss
        optim_params = list(loss.parameters()) + list(model.parameters())

        optimizer = optimizer_from_name(hparams.optim)(
            optim_params,
            lr=0,
            weight_decay=hparams.weight_decay,
            amsgrad=True)

        runner.state.model = model
        runner.state.optimizer = optimizer
        runner.state.criterion = loss

    # Train
    runner.fit(
        train_loader,
        # val_loader=val_loader,
        start_epoch=hparams.head_warmup_epochs,
        epochs=sheduler.tot_epochs,
        steps_per_epoch=20 if hparams.debug else None,
        # val_steps=20 if hparams.debug else None,
    )

    logger.info(f"Loading best model")
    checkpoint = torch.load(os.path.join(hparams.outdir, f"model.chpn"))
    model.load_state_dict(checkpoint["state_dict"], strict=True)
    # runner.state.model = model
    # loss.load_state_dict(checkpoint["loss"], strict=True)

    # Evaluate
    _, [acc1, map10, target, mapR] = runner.evaluate(
        val_loader,
        steps=20 if hparams.debug else None,
    )

    logger.info(
        f"Val: Acc@1 {acc1:0.5f}, mAP@10 {map10:0.5f}, Target {target:0.5f}, mAP@R {mapR:0.5f}"
    )

    # Save params used for training and final metrics into separate TensorBoard file
    metric_dict = {
        "hparam/Acc@1": acc1,
        "hparam/mAP@10": map10,
        "hparam/mAP@R": target,
        "hparam/Target": mapR,
    }

    # Convert all lists / dicts to avoid TB error
    hparams.phases
    hparams.phases = str(hparams.phases)
    hparams.model_params = str(hparams.model_params)
    hparams.criterion_params = str(hparams.criterion_params)

    with pt.utils.tensorboard.CorrectedSummaryWriter(hparams.outdir) as writer:
        writer.add_hparams(hparam_dict=vars(hparams), metric_dict=metric_dict)
Пример #18
0
def run(X_seq_train, X_cont_train, y_train, X_seq_test, X_cont_test, timestamp,
        random_state):
    seed_everything(random_state)

    oof_preds = np.zeros(len(X_seq_train))
    test_preds = np.zeros(len(X_seq_test))
    cv_scores = []
    for i, (trn_idx, val_idx) in enumerate(
            get_folds(5, "stratified",
                      random_state).split(X_cont_train, y_train)):
        print(f"fold {i + 1}")
        train_dataset = TensorDataset(
            torch.from_numpy(X_seq_train[trn_idx]).float(),
            torch.from_numpy(X_cont_train[trn_idx]).float(),
            torch.from_numpy(y_train[trn_idx]).float(),
        )
        valid_dataset = TensorDataset(
            torch.from_numpy(X_seq_train[val_idx]).float(),
            torch.from_numpy(X_cont_train[val_idx]).float(),
            torch.from_numpy(y_train[val_idx]).float(),
        )
        test_dataset = TensorDataset(
            torch.from_numpy(X_seq_test).float(),
            torch.from_numpy(X_cont_test).float())

        train_loader = DataLoader(train_dataset, shuffle=True, batch_size=32)
        valid_loader = DataLoader(valid_dataset, shuffle=False, batch_size=128)
        test_loader = DataLoader(test_dataset, shuffle=False, batch_size=128)
        loaders = {"train": train_loader, "valid": valid_loader}

        runner = CustomRunner(device="cuda")

        model = Model(
            in_channels=X_seq_train.shape[1],
            n_cont_features=X_cont_train.shape[1],
            hidden_channels=64,
            kernel_sizes=[3, 5, 7, 15, 21, 51, 101],
            out_dim=1,
        )
        criterion = torch.nn.BCELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                               T_max=30,
                                                               eta_min=1e-6)

        logdir = f"./logdir/{timestamp}_fold{i}"
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            logdir=logdir,
            num_epochs=30,
            verbose=True,
        )

        pred = np.concatenate(
            list(
                map(
                    lambda x: x.cpu().numpy(),
                    runner.predict_loader(
                        loader=valid_loader,
                        resume=f"{logdir}/checkpoints/best.pth",
                        model=model,
                    ),
                )))
        oof_preds[val_idx] = pred
        score = average_precision_score(y_train[val_idx], pred)
        cv_scores.append(score)
        print("score", score)

        pred = np.concatenate(
            list(
                map(
                    lambda x: x.cpu().numpy(),
                    runner.predict_loader(
                        loader=test_loader,
                        resume=f"{logdir}/checkpoints/best.pth",
                        model=model,
                    ),
                )))
        test_preds += pred / 5
    return oof_preds, test_preds, cv_scores
                              batch_size=config.batch_size,
                              num_workers=8,
                              drop_last=True,
                              shuffle=True)
    val_dataset = InpaintingDataset(config,
                                    val_list,
                                    fix_mask_path=val_fix_mask,
                                    training=False)
    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=config.batch_size,
                            num_workers=2,
                            drop_last=False,
                            shuffle=False)
    sample_iterator = val_dataset.create_iterator(config.sample_size)

    model = Model(config, logger=logger)
    model.load(is_test=False)
    steps_per_epoch = len(train_dataset) // config.batch_size
    iteration = model.iteration
    epoch = model.iteration // steps_per_epoch
    logger.info('Start from epoch:{}, iteration:{}'.format(epoch, iteration))

    model.train()
    keep_training = True
    best_score = {}
    while (keep_training):
        epoch += 1

        stateful_metrics = ['epoch', 'iter', 'g_lr']
        progbar = Progbar(len(train_dataset),
                          max_iters=steps_per_epoch,
Пример #20
0
def main():
    args, device, checkpoint = init_pipeline()
    train_loader, _, _, _, init_params = load_train_data(args, device)
    model = Model(*init_params).to(device)
    util.load_state_dict(checkpoint, model)
    visualize(model, train_loader)
Пример #21
0
def test(hparams):
    # Check that folder exists
    assert hparams.config_path.exists()

    # Read config
    with open(hparams.config_path / "config.yaml", "r") as file:
        model_configs = yaml.load(file)
    model_configs.update(vars(hparams))
    hparams = argparse.Namespace(**model_configs)

    # Get model
    model = Model(
        arch=hparams.arch,
        model_params=hparams.model_params,
        embedding_size=hparams.embedding_size,
        pooling=hparams.pooling).cuda()
    # logger.info(model)

    # Init
    checkpoint = torch.load(hparams.config_path / f"model.chpn")
    model.load_state_dict(checkpoint["state_dict"], strict=False)

    # -------------- Get embeddings for val and test data --------------
    if hparams.extract_embeddings:
        if hparams.validation:
            print(f"Using size {hparams.val_size}")
            loader, indexes = get_val_dataloader(
                root=hparams.root,
                augmentation="val",
                batch_size=hparams.batch_size,
                size=hparams.val_size,
                workers=hparams.workers,
            )

            # Load validation query / gallery split and sort it according to indexes from sampler
            df_val = pd.read_csv(os.path.join(hparams.root, "train_val.csv"))
            df_val = df_val[df_val["is_train"].astype(np.bool) == False].iloc[indexes]

            val_embeddings = predict_from_loader(model, loader)

            # Hack to save torch.Tensor into pd.DataFrame
            df_val["embeddings"] = list(map(lambda r: np.array(r).tolist(), val_embeddings))
            # Save results into folder with logs
            df_val.to_csv(hparams.config_path / "train_val.csv", index=None)
            del val_embeddings
            logger.info("Finished extracting validation embeddings")

        if hparams.test:
            loader, indexes = get_test_dataloader(
                root=hparams.root,
                augmentation="test",
                batch_size=hparams.batch_size,
                size=hparams.val_size,
                workers=hparams.workers,
            )
            # Load test DF and sort it according to indexes from sampler
            df_test = pd.read_csv(os.path.join(hparams.root, "test_A.csv")).iloc[indexes]
            test_embeddings = predict_from_loader(model, loader)

            # Hack to save torch.Tensor into pd.DataFrame
            df_test["embeddings"] = list(map(lambda r: np.array(r).tolist(), test_embeddings))

            # Save results into folder with logs
            df_test.to_csv(hparams.config_path / "test_A.csv", index=None)
            del test_embeddings
            logger.info("Finished extracting test embeddings")

    # -------------- Test model on validation dataset --------------
    if hparams.validation:
        # Read DF
        df_val = pd.read_csv(hparams.config_path / "train_val.csv")
        val_embeddings = torch.tensor(list(map(eval, df_val["embeddings"].values)))
        query_mask = df_val["is_query"].values.astype(np.bool)
        val_labels = df_val["label"].values

        # Shape (n_embeddings, embedding_dim)
        query_embeddings, gallery_embeddings = val_embeddings[query_mask], val_embeddings[~query_mask]
        query_labels, gallery_labels = val_labels[query_mask], val_labels[~query_mask]
        logger.info(f"Validation query size - {len(query_embeddings)}, gallery size - {len(gallery_embeddings)}")
        del val_embeddings

        if hparams.dba:
            gallery_embeddings = query_expansion(gallery_embeddings, gallery_embeddings, topk=10, alpha=None)

        if hparams.aqe:
            query_embeddings = query_expansion(query_embeddings, gallery_embeddings, topk=3, alpha=3)

        # Shape (query_size x gallery_size)
        conformity_matrix = torch.tensor(query_labels.reshape(-1, 1) == gallery_labels)

        # Matrix of pairwise cosin distances
        distances = torch.cdist(query_embeddings, gallery_embeddings)

        acc1 = cmc_score_count(distances, conformity_matrix, topk=1)
        map10 = map_at_k(distances, conformity_matrix, topk=10)
        mapR = map_at_k(distances, conformity_matrix, topk=None)

        logger.info(
            f"Val: Acc@1 {acc1:0.5f}, mAP@10 {map10:0.5f}, Target {0.5 * acc1 + 0.5 * map10:0.5f}, mAP@R {mapR:0.5f}")

    # -------------- Predict on  test dataset  --------------
    if hparams.test:
        df_test = pd.read_csv(hparams.config_path / "test_A.csv")
        test_embeddings = torch.tensor(list(map(eval, df_test["embeddings"].values)))
        query_mask = df_test["is_query"].values.astype(np.bool)
        query_files, gallery_files = df_test["file_path"].values[query_mask], df_test["file_path"].values[~query_mask]

        # Shape (n_embeddings, embedding_dim)
        query_embeddings, gallery_embeddings = test_embeddings[query_mask], test_embeddings[~query_mask]
        query_files, gallery_files = df_test["file_path"].values[query_mask], df_test["file_path"].values[~query_mask]
        logger.info(f"Test query size - {len(query_embeddings)}, gallery size - {len(gallery_embeddings)}")
        del test_embeddings

        if hparams.dba:
            gallery_embeddings = query_expansion(gallery_embeddings, gallery_embeddings, topk=10, alpha=None)

        if hparams.aqe:
            query_embeddings = query_expansion(query_embeddings, gallery_embeddings, topk=3, alpha=3)

        # Matrix of pairwise cosin distances
        distances = torch.cdist(query_embeddings, gallery_embeddings)
        perm_matrix = torch.argsort(distances)

        logger.info(f"Creating submission{'_dba' if hparams.dba else ''}{'_aqe' if hparams.aqe else ''}_{hparams.val_size}.csv")
        data = {
            "image_id": [],
            "gallery_img_list": []
        }

        for idx in tqdm(range(len(query_files))):
            query_file = query_files[idx].split("/")[1]
            predictions = gallery_files[perm_matrix[:, : 10][idx]]
            predictions = [p.split("/")[1] for p in predictions]
            data["image_id"].append(query_file)
            data["gallery_img_list"].append(predictions)

        df = pd.DataFrame(data=data)
        df["gallery_img_list"] = df["gallery_img_list"].apply(lambda x: '{{{}}}'.format(",".join(x))).astype(str)
        lines = [f"{x},{y}" for x, y in zip(data["image_id"], df["gallery_img_list"])]
        with open(hparams.config_path \
            / f"submission{'_dba' if hparams.dba else ''}{'_aqe' if hparams.aqe else ''}_{hparams.val_size}.csv", "w") as f:
            for line in lines:
                f.write(line + '\n')