def predict(optimizer, **kwargs): # load data X_test = np.load(os.path.join(kwargs['data_path'], 'X_test.npy')) y_test = np.zeros((len(X_test), ), dtype=np.int64) test_transform = transforms.Compose([ transforms.Lambda(lambda x: Image.fromarray(x)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) # TTA rng = RNG(seed=1337) base_transform = transforms.Compose([ transforms.Lambda(lambda x: Image.fromarray(x)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.Lambda( lambda img: [img, img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)]), transforms.Lambda( lambda img: adjust_gamma(img, gamma=rng.uniform(0.8, 1.25))), transforms.Lambda( lambda img: jpg_compress(img, quality=rng.randint(70, 100 + 1))), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) tta_n = 10 def tta_f(img, n=tta_n - 1): out = [test_transform(img)] for _ in xrange(n): out.append(base_transform(img)) return torch.stack(out, 0) tta_transform = transforms.Compose([ transforms.Lambda(lambda img: tta_f(img)), ]) test_loader = DataLoader(dataset=make_numpy_dataset( X_test, y_test, tta_transform), batch_size=kwargs['batch_size'], shuffle=False, num_workers=4) test_dataset = KaggleCameraDataset(kwargs['data_path'], train=False, lazy=not kwargs['not_lazy']) # compute predictions logits, _ = optimizer.test(test_loader) # compute and save raw probs logits = np.vstack(logits) proba = softmax(logits) # group and average predictions K = 16 * tta_n proba = proba.reshape(len(proba) / K, K, -1).mean(axis=1) fnames = [os.path.split(fname)[-1] for fname in test_dataset.X] df = pd.DataFrame(proba) df['fname'] = fnames df = df[['fname'] + range(10)] dirpath = os.path.split(kwargs['predict_from'])[0] df.to_csv(os.path.join(dirpath, 'proba.csv'), index=False) # compute predictions and save in submission format index_pred = unhot(one_hot_decision_function(proba)) data = { 'fname': fnames, 'camera': [KaggleCameraDataset.target_labels()[int(c)] for c in index_pred] } df2 = pd.DataFrame(data, columns=['fname', 'camera']) df2.to_csv(os.path.join(dirpath, 'submission.csv'), index=False)
def train(optimizer, **kwargs): # load training data print 'Loading and splitting data ...' if os.path.isfile(os.path.join(kwargs['data_path'], 'X_train.npy')): X_train = np.load(os.path.join(kwargs['data_path'], 'X_train.npy')) y_train = np.load(os.path.join(kwargs['data_path'], 'y_train.npy')) X_val = np.load(os.path.join(kwargs['data_path'], 'X_val.npy')) y_val = np.load(os.path.join(kwargs['data_path'], 'y_val.npy')) else: X = np.load(os.path.join(kwargs['data_path'], 'X_patches.npy')) y = np.load(os.path.join(kwargs['data_path'], 'y_patches.npy')) # split into train, val in stratified fashion sss = StratifiedShuffleSplit(n_splits=1, test_size=kwargs['n_val'], random_state=kwargs['random_seed']) train_ind, val_ind = list(sss.split(np.zeros_like(y), y))[0] X_train = X[train_ind] y_train = y[train_ind] X_val = X[val_ind] y_val = y[val_ind] np.save(os.path.join(kwargs['data_path'], 'X_train.npy'), X_train) np.save(os.path.join(kwargs['data_path'], 'y_train.npy'), y_train) np.save(os.path.join(kwargs['data_path'], 'X_val.npy'), X_val) np.save(os.path.join(kwargs['data_path'], 'y_val.npy'), y_val) rng = RNG() # noinspection PyTypeChecker train_transform = transforms.Compose([ transforms.Lambda(lambda x: Image.fromarray(x)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.Lambda( lambda img: [img, img.transpose(Image.ROTATE_90)][int(rng.rand() < 0.5)]), transforms.Lambda( lambda img: adjust_gamma(img, gamma=rng.uniform(0.8, 1.25))), transforms.Lambda( lambda img: jpg_compress(img, quality=rng.randint(70, 100 + 1))), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) val_transform = transforms.Compose([ transforms.Lambda(lambda x: Image.fromarray(x)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) train_dataset = make_numpy_dataset(X_train, y_train, train_transform) val_dataset = make_numpy_dataset(X_val, y_val, val_transform) # define loaders train_loader = DataLoader(dataset=train_dataset, batch_size=kwargs['batch_size'], shuffle=False, num_workers=4, sampler=StratifiedSampler( class_vector=y_train, batch_size=kwargs['batch_size'])) val_loader = DataLoader(dataset=val_dataset, batch_size=kwargs['batch_size'], shuffle=False, num_workers=4) print 'Starting training ...' optimizer.train(train_loader, val_loader)