def get_df_and_dists(topk=100):
    test_dirs = [
        ROOT +
        'exp/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT +
        'exp/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'exp/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'exp/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT +
        'exp/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',
        ROOT +
        'exp/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]
    train_dirs = [
        ROOT +
        'exp/v19c/feats_train_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT +
        'exp/v20c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'exp/v21c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'exp/v22c/feats_train_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT +
        'exp/v23c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',
        ROOT +
        'exp/v24c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]
    weights = [
        0.5,
        1.0,
        1.0,
        0.5,
        1.0,
        1.0,
    ]

    logger.info('load ids and features.')
    ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs,
                                                       weights,
                                                       normalize=True)
    ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs,
                                                         weights,
                                                         normalize=True)
    logger.info('done.')

    logger.info('build index...')
    cpu_index = faiss.IndexFlatL2(feats_train.shape[1])
    cpu_index.add(feats_train)
    dists, topk_idx = cpu_index.search(x=feats_test, k=topk)
    logger.info('query search done.')

    df = pd.DataFrame(ids_test, columns=['id'])
    df['images'] = np.apply_along_axis(' '.join,
                                       axis=1,
                                       arr=ids_train[topk_idx])

    return df, dists
def load_train_ensemble_dataset():
    test_dirs = [
        ROOT +
        'experiments/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',  # noqa
        ROOT +
        'experiments/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',  # noqa
        ROOT +
        'experiments/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',  # noqa
        ROOT +
        'experiments/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',  # noqa
        ROOT +
        'experiments/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',  # noqa
        ROOT +
        'experiments/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',  # noqa
    ]
    train_dirs = [
        ROOT +
        'experiments/v19c/feats_train_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',  # noqa
        ROOT +
        'experiments/v20c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',  # noqa
        ROOT +
        'experiments/v21c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',  # noqa
        ROOT +
        'experiments/v22c/feats_train_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',  # noqa
        ROOT +
        'experiments/v23c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',  # noqa
        ROOT +
        'experiments/v24c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',  # noqa
    ]
    weights = [
        0.5,
        1.0,
        1.0,
        0.5,
        1.0,
        1.0,
    ]

    ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs,
                                                       weights,
                                                       normalize=True)
    ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs,
                                                         weights,
                                                         normalize=True)

    return edict(ids_test=ids_test,
                 ids_train=ids_train,
                 feats_test=feats_test,
                 feats_train=feats_train)
Exemplo n.º 3
0
def main():
    index_dirs = [
        '../exp/v2clean/feats_index19_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/'
    ]
    test_dirs = [
        '../exp/v2clean/feats_test19_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/'
    ]
    train_dirs = [
        '../exp/v2clean/feats_train_ms_L2_ep4_freqthresh-3_loss-arcface_verifythresh-30/'
    ]

    ids_index, feats_index = utils.prepare_ids_and_feats(index_dirs,
                                                         normalize=True)
    ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs,
                                                       normalize=True)
    ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs,
                                                         normalize=True)

    print('build index...')
    cpu_index = faiss.IndexFlatL2(feats_index.shape[1])
    gpu_index = faiss.index_cpu_to_all_gpus(cpu_index)
    gpu_index.add(feats_index)
    dists, topk_idx = gpu_index.search(x=feats_test, k=100)
    print('query search done.')

    subm = pd.DataFrame(ids_test, columns=['id'])
    subm['images'] = np.apply_along_axis(' '.join,
                                         axis=1,
                                         arr=ids_index[topk_idx])

    subm = reranking_submission(ids_index,
                                feats_index,
                                ids_test,
                                feats_test,
                                ids_train,
                                feats_train,
                                subm,
                                topk=100)

    output_name = ROOT + f'output/submit_retrieval.csv.gz'
    subm[['id', 'images']].to_csv(output_name, compression='gzip', index=False)
    print('saved to ' + output_name)

    cmd = f'kaggle c submit -c landmark-retrieval-2019 -f {output_name} -m "" '
    print(cmd)
    subprocess.run(cmd, shell=True)
    parser.add_argument('-w', '--weights', default='1')
    parser.add_argument('-d',
                        '--devices',
                        default='0',
                        help='gpu device indexes')
    args = parser.parse_args()

    index_dirs = args.index_dirs.split(',')
    test_dirs = args.test_dirs.split(',')
    setting = args.setting
    weights = list(map(int, args.weights.split(',')))
    os.environ['CUDA_VISIBLE_DEVICES'] = args.devices
    n_gpus = len(args.devices.split(','))

    ids_index, feats_index = utils.prepare_ids_and_feats(index_dirs,
                                                         weights,
                                                         normalize=True)
    ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs,
                                                       weights,
                                                       normalize=True)
    # ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True)

    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    # co.float16 = False

    vres = []
    for _ in range(n_gpus):
        res = faiss.StandardGpuResources()
        vres.append(res)
def get_df_and_dists(train19_csv,
                     topk=100,
                     n_iter=1,
                     qe_topk=3,
                     weighting_scheme='alpha',
                     alpha=3,
                     t=0.8,
                     verifythresh=40,
                     freqthresh=5,
                     ):
    ROOT = '/fs2/groups2/gca50080/yokoo/landmark/'
    test_dirs = [
        ROOT + 'experiments/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT + 'experiments/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT + 'experiments/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT + 'experiments/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT + 'experiments/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',
        ROOT + 'experiments/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]
    train_dirs = [
        ROOT + 'experiments/v19c/feats_train_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT + 'experiments/v20c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT + 'experiments/v21c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT + 'experiments/v22c/feats_train_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT + 'experiments/v23c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',
        ROOT + 'experiments/v24c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]
    weights = [
        0.5,
        1.0,
        1.0,
        0.5,
        1.0,
        1.0,
    ]

    logger.info('load ids and features.')
    ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, weights, normalize=True)
    ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True)
    logger.info('done.')

    if n_iter > 0:
        feats_train = supervised_dba(ids_train=ids_train,
                                     feats_train=feats_train,
                                     train19_csv=train19_csv,
                                     n_iter=n_iter,
                                     qe_topk=qe_topk,
                                     weighting_scheme=weighting_scheme,
                                     alpha=alpha,
                                     t=t,
                                     verifythresh=verifythresh,
                                     freqthresh=freqthresh
                                     )

    logger.info('build index...')
    cpu_index = faiss.IndexFlatL2(feats_train.shape[1])
    cpu_index.add(feats_train)
    dists, topk_idx = cpu_index.search(x=feats_test, k=topk)
    logger.info('query search done.')

    df = pd.DataFrame(ids_test, columns=['id'])
    df['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_train[topk_idx])

    np.save('data/working/exp44/dists_top5__ens3_DBAx1.npy', dists)
    np.save('data/working/exp44/topk_idx_top5__ens3_DBAx1.npy', topk_idx)

    return df, dists
def main():
    index_dirs = [
        ROOT +
        'experiments/v19c/feats_index19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT +
        'experiments/v20c/feats_index19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'experiments/v21c/feats_index19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'experiments/v22c/feats_index19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT +
        'experiments/v23c/feats_index19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',
        ROOT +
        'experiments/v24c/feats_index19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]
    test_dirs = [
        ROOT +
        'experiments/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT +
        'experiments/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'experiments/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'experiments/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT +
        'experiments/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',
        ROOT +
        'experiments/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]
    train_dirs = [
        ROOT +
        'experiments/v19c/feats_train_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT +
        'experiments/v20c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'experiments/v21c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'experiments/v22c/feats_train_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT +
        'experiments/v23c/feats_train_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresih-3_verifythresh-30/',
        ROOT +
        'experiments/v24c/feats_train_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]

    weights = [
        0.5,
        1.0,
        1.0,
        0.5,
        1.0,
        1.0,
    ]  # intuition

    ids_index, feats_index = utils.prepare_ids_and_feats(index_dirs,
                                                         weights,
                                                         normalize=True)
    ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs,
                                                       weights,
                                                       normalize=True)
    ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs,
                                                         weights,
                                                         normalize=True)

    print('build index...')
    cpu_index = faiss.IndexFlatL2(feats_index.shape[1])
    gpu_index = faiss.index_cpu_to_all_gpus(cpu_index)
    gpu_index.add(feats_index)
    dists, topk_idx = gpu_index.search(x=feats_test, k=100)
    print('query search done.')

    subm = pd.DataFrame(ids_test, columns=['id'])
    subm['images'] = np.apply_along_axis(' '.join,
                                         axis=1,
                                         arr=ids_index[topk_idx])

    subm = reranking_submission(ids_index,
                                feats_index,
                                ids_test,
                                feats_test,
                                ids_train,
                                feats_train,
                                subm,
                                topk=100)

    output_name = ROOT + f'output/submit_retrieval.csv.gz'
    subm[['id', 'images']].to_csv(output_name, compression='gzip', index=False)
    print('saved to ' + output_name)

    cmd = f'kaggle c submit -c landmark-retrieval-2019 -f {output_name} -m "" '
    print(cmd)
    subprocess.run(cmd, shell=True)
def ban_final():
    import argparse
    import faiss
    import numpy as np
    import pandas as pd
    import os
    import subprocess
    import tqdm
    from collections import Counter
    from src import utils

    topk = 100
    ROOT = '/opt/landmark/'

    test_dirs = [
        ROOT +
        'experiments/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT +
        'experiments/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'experiments/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'experiments/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT +
        'experiments/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',
        ROOT +
        'experiments/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]

    weights = [
        0.5,
        1.0,
        1.0,
        0.5,
        1.0,
        1.0,
    ]  # intuition

    ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs,
                                                       weights,
                                                       normalize=True)

    # train19_csv = pd.read_pickle('../input/train.pkl')[['id', 'landmark_id']].set_index('id').sort_index()
    # landmark_dict = train19_csv.to_dict()['landmark_id']

    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    # co.float16 = False

    vres = []
    for _ in range(4):
        res = faiss.StandardGpuResources()
        vres.append(res)

    subm = pd.read_csv(
        '../output/stage2_submit_banthresh30_ens3_top3_DBAx1_v44r7.csv.gz')
    subm['landmark_id'], subm['score'] = list(
        zip(*subm['landmarks'].apply(lambda x: str(x).split(' '))))
    subm['score'] = subm['score'].astype(np.float32)
    subm = subm.sort_values('score', ascending=False).set_index('id')

    ban_thresh = 30
    freq = subm['landmark_id'].value_counts()
    ban_lids = freq[freq > ban_thresh].index

    is_ban = np.isin(ids_test, subm[subm['landmark_id'].isin(ban_lids)].index)
    ban_ids_test = ids_test[is_ban]
    not_ban_ids_test = ids_test[~is_ban]
    ban_feats_test = feats_test[is_ban]
    not_ban_feats_test = feats_test[~is_ban]

    print('build index...')
    cpu_index = faiss.IndexFlatL2(not_ban_feats_test.shape[1])
    gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co)
    gpu_index.add(not_ban_feats_test)
    dists, topk_idx = gpu_index.search(x=ban_feats_test, k=100)
    print('query search done.')

    subm = pd.read_csv(
        '../output/stage2_submit_banthresh30_ens3_top3_DBAx1_v44r7.csv.gz')
    subm['landmark_id'], subm['score'] = list(
        zip(*subm['landmarks'].apply(lambda x: str(x).split(' '))))
    subm['score'] = subm['score'].astype(np.float32)
    subm = subm.sort_values('score', ascending=False).set_index('id')

    new_ban_ids = np.unique(not_ban_ids_test[topk_idx[dists < 0.5]])
    subm.loc[new_ban_ids,
             'landmarks'] = subm.loc[new_ban_ids, 'landmark_id'] + ' 0'
    # subm.loc[new_ban_ids, 'landmarks'] = subm.loc[new_ban_ids, 'landmark_id'] + ' ' + (subm.loc[new_ban_ids, 'score'] * 0.001).map(str)

    output_filename = '../output/l2dist_0.5.csv.gz'
    subm.reset_index()[['id', 'landmarks']].to_csv(output_filename,
                                                   index=False,
                                                   compression='gzip')