Exemplo n.º 1
0
def load_dataset(params,
                 eval_split,
                 train_remove_invalid,
                 eval_remove_invalid,
                 load_cnn_predictions=False,
                 load_cnn_features=False,
                 load_cnn_features_train=False):

    if params['dataset'] == 'inat_2017':

        data_dir = get_paths('inat_2017_data_dir')
        num_classes = 5089
        class_of_interest = 3731

        # load observations
        train_locs, train_classes, train_users, train_dates, train_inds = \
            load_inat_data(data_dir, 'train2017_locations.json',
            'train2017.json', train_remove_invalid)
        if eval_split == 'val':
            val_locs, val_classes, val_users, val_dates, val_inds = \
                load_inat_data(data_dir, eval_split+'2017_locations.json',
                eval_split+'2017.json', eval_remove_invalid)
        elif eval_split == 'test':
            val_locs, val_classes, val_users, val_dates, val_inds = \
                load_inat_data(data_dir, eval_split+'2017_locations.json',
                eval_split+'2017_DO_NOT_SHARE.json', eval_remove_invalid)
            val_split = pd.read_csv(data_dir +
                                    'kaggle_solution_2017_DO_NOT_SHARE.csv'
                                    )['usage'].values == 'Private'

        # load class names
        with open(data_dir + 'categories2017.json') as da:
            cls_data = json.load(da)
        class_names = [cc['name'] for cc in cls_data]
        class_ids = [cc['id'] for cc in cls_data]
        classes = dict(zip(class_ids, class_names))

        if load_cnn_predictions:
            val_preds = load_sparse_feats(data_dir +
                                          'features_inception/inat2017_' +
                                          eval_split + '_preds_sparse.npz')

        if load_cnn_features:
            val_feats = np.load(data_dir + 'features_inception/inat2017_' +
                                eval_split + '_net_feats.npy')

        if load_cnn_features_train:
            train_feats = np.load(
                data_dir + 'features_inception/inat2017_train_net_feats.npy')

    elif params['dataset'] == 'inat_2018':

        data_dir = get_paths('inat_2018_data_dir')
        num_classes = 8142
        class_of_interest = 3731  # wood thrush

        # load observations
        train_locs, train_classes, train_users, train_dates, train_inds = \
            load_inat_data(data_dir, 'train2018_locations.json',
            'train2018.json', train_remove_invalid)
        if eval_split == 'val':
            val_locs, val_classes, val_users, val_dates, val_inds = \
                load_inat_data(data_dir, eval_split+'2018_locations.json',
                eval_split+'2018.json', eval_remove_invalid)
        elif eval_split == 'test':
            val_locs, val_classes, val_users, val_dates, val_inds = \
                load_inat_data(data_dir, eval_split+'2018_locations.json',
                eval_split+'2018_DO_NOT_SHARE.json', eval_remove_invalid)
            val_split = pd.read_csv(data_dir +
                                    'kaggle_solution_2018_DO_NOT_SHARE.csv'
                                    )['usage'].values == 'Private'

        # load class names
        with open(data_dir + 'categories2018.json') as da:
            cls_data = json.load(da)
        class_names = [cc['name'] for cc in cls_data]
        class_ids = [cc['id'] for cc in cls_data]
        classes = dict(zip(class_ids, class_names))

        if load_cnn_predictions:
            if params['inat2018_resolution'] == 'high_res':
                val_preds = load_sparse_feats(
                    data_dir + 'features_inception_hr/inat2018_' + eval_split +
                    '_preds_sparse.npz')
            else:
                val_preds = load_sparse_feats(data_dir +
                                              'features_inception/inat2018_' +
                                              eval_split + '_preds_sparse.npz')

        if load_cnn_features:
            if params['inat2018_resolution'] == 'high_res':
                val_feats = np.load(data_dir +
                                    'features_inception_hr/inat2018_' +
                                    eval_split + '_net_feats.npy')
            else:
                val_feats = np.load(data_dir + 'features_inception/inat2018_' +
                                    eval_split + '_net_feats.npy')

        if load_cnn_features_train:
            if params['inat2018_resolution'] == 'high_res':
                train_feats = np.load(
                    data_dir +
                    'features_inception_hr/inat2018_train_net_feats.npy')
            else:
                train_feats = np.load(
                    data_dir +
                    'features_inception/inat2018_train_net_feats.npy')

    elif params['dataset'] == 'birdsnap':

        data_dir = get_paths('birdsnap_data_dir')
        ann_file_name = 'birdsnap_with_loc_2019.json'
        num_classes = 500
        class_of_interest = 0

        # load observations
        train_locs, train_classes, train_users, train_dates, train_inds = \
            load_bird_data(data_dir, ann_file_name, 'train', train_remove_invalid, params['meta_type'])
        val_locs, val_classes, val_users, val_dates, val_inds = \
            load_bird_data(data_dir, ann_file_name, eval_split, eval_remove_invalid, params['meta_type'])

        # load class names
        with open(data_dir + ann_file_name) as da:
            class_names = json.load(da)['classes']
        classes = dict(zip(range(len(class_names)), class_names))

        if load_cnn_predictions:
            val_preds = load_sparse_feats(data_dir +
                                          'features_inception/birdsnap_' +
                                          eval_split + '_preds_sparse.npz')

        if load_cnn_features:
            val_feats = np.load(data_dir + 'features_inception/birdsnap_' +
                                eval_split + '_net_feats.npy')

        if load_cnn_features_train:
            train_feats = np.load(
                data_dir + 'features_inception/birdsnap_train_net_feats.npy')

    elif params['dataset'] == 'nabirds':

        data_dir = get_paths('nabirds_data_dir')
        ann_file_name = 'nabirds_with_loc_2019.json'
        num_classes = 555
        class_of_interest = 0

        # load observations
        train_locs, train_classes, train_users, train_dates, train_inds = \
            load_bird_data(data_dir, ann_file_name, 'train', train_remove_invalid, params['meta_type'])
        val_locs, val_classes, val_users, val_dates, val_inds = \
            load_bird_data(data_dir, ann_file_name, eval_split, eval_remove_invalid, params['meta_type'])

        # load class names
        with open(data_dir + ann_file_name) as da:
            class_names = json.load(da)['classes']
        classes = dict(zip(range(len(class_names)), class_names))

        if load_cnn_predictions:
            val_preds = load_sparse_feats(data_dir +
                                          'features_inception/nabirds_' +
                                          eval_split + '_preds_sparse.npz')

        if load_cnn_features:
            val_feats = np.load(data_dir + 'features_inception/nabirds_' +
                                eval_split + '_net_feats.npy')

        if load_cnn_features_train:
            train_feats = np.load(
                data_dir + 'features_inception/nabirds_train_net_feats.npy')

    elif params['dataset'] == 'yfcc':

        data_dir = get_paths('yfcc_data_dir')
        print('  No user or date features for yfcc.')
        params['use_date_feats'] = False
        params['balanced_train_loader'] = False
        num_classes = 100
        class_of_interest = 9  # beach

        # load observations
        train_locs, train_classes, train_users, train_dates = load_yfcc_data(
            data_dir, 'train_test_split.csv', 'train')
        val_locs, val_classes, val_users, val_dates = load_yfcc_data(
            data_dir, 'train_test_split.csv', eval_split)
        train_inds = np.arange(train_locs.shape[0])
        val_inds = np.arange(val_locs.shape[0])

        # load class names
        da = pd.read_csv(data_dir + 'class_names.csv')
        classes = dict(zip(da['id'].values, da['name'].values))

        if load_cnn_predictions:
            val_preds = np.load(data_dir + 'features_inception/YFCC_' +
                                eval_split + '_preds.npy')

        if load_cnn_features:
            val_feats = np.load(data_dir + 'features_inception/YFCC_' +
                                eval_split + '_net_feats.npy')

        if load_cnn_features_train:
            train_feats = np.load(
                data_dir + 'features_inception/YFCC_train_net_feats.npy')

    if load_cnn_features_train and train_remove_invalid:
        train_feats = train_feats[train_inds, :]

    if load_cnn_features and eval_remove_invalid:
        val_feats = val_feats[val_inds, :]
        val_preds = val_preds[val_inds, :]

    # return data in dictionary
    op = {}
    op['train_locs'] = train_locs
    op['train_classes'] = train_classes
    op['train_users'] = train_users
    op['train_dates'] = train_dates

    op['val_locs'] = val_locs
    op['val_classes'] = val_classes
    op['val_users'] = val_users
    op['val_dates'] = val_dates

    op['class_of_interest'] = class_of_interest
    op['classes'] = classes
    op['num_classes'] = num_classes

    if load_cnn_predictions:
        op['val_preds'] = val_preds  # class predictions from trained image classifier
    if load_cnn_features:
        op['val_feats'] = val_feats  # features from trained image classifier
        assert val_feats.shape[0] == val_locs.shape[0]
    if load_cnn_features_train:
        op['train_feats'] = train_feats  # features from trained image classifier
        assert train_feats.shape[0] == train_locs.shape[0]

    # if it exists add the data split
    try:
        op['val_split'] = val_split
    except:
        op['val_split'] = np.ones(val_locs.shape[0], dtype=np.int)

    return op
import json
from sklearn.manifold import TSNE
import os

import sys
sys.path.append('../')
from geo_prior import models
from geo_prior.paths import get_paths
import geo_prior.datasets as dt
import geo_prior.grid_predictor as grid

users_of_interest = [344, 345, 42]
users_of_interest_cols = ['r', 'y', 'k']

model_path = '../models/model_inat_2018_full_final.pth.tar'
data_dir = get_paths('inat_2018_data_dir')
min_num_exs = 100
seed = 2001
dpi = 150.0
num_time_steps = 12
op_dir = 'images/user_ims/'
if not os.path.isdir(op_dir):
    os.makedirs(op_dir)

# this has been precomputed by plot_class_embedding.py
if os.path.isfile('images/class_ims/all_classes.npz'):
    class_embedding = np.load('images/class_ims/all_classes.npz')
else:
    print('Error: Need to run plot_class_embedding.py first.')
    sys.exit()
Exemplo n.º 3
0
if not os.path.isdir(op_dir):
    os.makedirs(op_dir)

# load model
net_params = torch.load(model_path, map_location='cpu')
params = net_params['params']
model = models.FCNet(num_inputs=params['num_feats'],
                     num_classes=params['num_classes'],
                     num_filts=params['num_filts'],
                     num_users=params['num_users']).to(params['device'])

model.load_state_dict(net_params['state_dict'])
model.eval()

# load ocean mask
mask = np.load(get_paths('mask_dir') + 'ocean_mask.npy')

# grid predictor - for making dense predictions for each lon/lat location
gp = grid.GridPredictor(mask, params, mask_only_pred=True)

# compute intermediate network features
print('Computing features.')
feats = []
for time_step in np.linspace(0, 1, num_time_steps + 1)[:-1]:
    net_feats = gp.dense_prediction_masked_feats(model,
                                                 time_step).data.cpu().numpy()
    feats.append(net_feats)

# downsample features - choose middle time step
dsf = decomposition.FastICA(n_components=num_ds_dims, random_state=seed)
dsf.fit(feats[len(feats) // 2])
Exemplo n.º 4
0
import torch
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from sklearn.manifold import TSNE

import sys
sys.path.append('../')
from geo_prior import models
from geo_prior.paths import get_paths

seed = 2001
model_path = '../models/model_inat_2018_full_final.pth.tar'
data_dir = get_paths('inat_2018_data_dir')
op_dir = 'images/class_ims/'
if not os.path.isdir(op_dir):
    os.makedirs(op_dir)

with open(data_dir + 'categories2018.json') as da:
    cls_data = json.load(da)
class_names = [cc['name'] for cc in cls_data]
class_ids = [cc['id'] for cc in cls_data]
supercat_names = [cc['supercategory'] for cc in cls_data]
supercat_un, supercat_ids = np.unique(supercat_names, return_inverse=True)

# load model
net_params = torch.load(model_path, map_location='cpu')
params = net_params['params']
model = models.FCNet(num_inputs=params['num_feats'],
Exemplo n.º 5
0
import torch
import numpy as np
import matplotlib.pyplot as plt
import json
import os

import sys
sys.path.append('../')
from geo_prior import models
from geo_prior.paths import get_paths
import geo_prior.datasets as dt

inat_year = '2018'  # '2017' or '2018'
model_path = '../models/model_inat_' + inat_year + '_full_final.pth.tar'
data_dir = get_paths('inat_' + inat_year + '_data_dir')
min_num_exs = 100
seed = 2001
dpi = 150.0
op_dir = 'images/metadata_stats/'
if not os.path.isdir(op_dir):
    os.makedirs(op_dir)

# load class info
with open(data_dir + 'categories' + inat_year + '.json') as da:
    cls_data = json.load(da)
class_names = [cc['name'] for cc in cls_data]
class_ids = [cc['id'] for cc in cls_data]
supercat_names = [cc['supercategory'] for cc in cls_data]
supercat_un, supercat_ids = np.unique(supercat_names, return_inverse=True)