def submit_cv_ensemble(ensemble, output_file): thresholds = [] tests = [] for net, val, train in ensemble: y_val = mlb.transform(train['tags'].str.split()).astype(np.float32) threshold = find_best_threshold.optimise_f2_thresholds_fast( y_val, net['train']) thresholds.append(threshold) test = net['test'] tests.append(test) threshold_avg = np.average(np.stack(thresholds, axis=0), axis=0) test_avg = np.average(np.stack(tests, axis=0), axis=0) test_images = list( map(lambda path: path[:-len('.jpg')], os.listdir(paths.test_jpg))) test_avg[test_avg > threshold_avg] = 1 test_avg[test_avg <= threshold_avg] = 0 predictions = mlb.inverse_transform(test_avg) test_results = zip(predictions, test_images) with open(paths.submissions + output_file, 'w') as submission: submission.write('image_name,tags\n') for tags, target in test_results: output = target + ',' + ' '.join(tags) submission.write("%s\n" % output) print('Submission ready!')
def do(model_fold): model, i = model_fold net = np.load(paths.predictions + '{}-split_{}.npz'.format(model, i)) train_idx, val_idx = split[i] train_predictions = net['train'] train_true = mlb.transform(labels_df.ix[train_idx]['tags'].str.split()).astype(np.float32) thresholds = [] for train in train_predictions: threshold = optimise_f2_thresholds_fast(train_true, train, verbose=True) thresholds.append(threshold) thresholds = np.stack(thresholds, axis=1) np.save(paths.thresholds + '{}-split_{}'.format(model, i), thresholds) print('Saved {}-split_{}'.format(model, i))
def train_net(train, val, unsupervised, model, name): unsupervised_initialization = mlb.transform(unsupervised['tags'].str.split()).astype(np.float32) unsupervised_samples = unsupervised['image_name'].as_matrix() unsupervised_initialization = unsupervised_initialization[:len(unsupervised_initialization)//2*3] unsupervised_samples = unsupervised_samples[:len(unsupervised_samples)//2*3] transformations_train = transforms.apply_chain([ transforms.random_fliplr(), transforms.random_flipud(), transforms.augment(), torchvision.transforms.ToTensor() ]) transformations_val = transforms.apply_chain([ torchvision.transforms.ToTensor() ]) dset_train_unsupervised = KaggleAmazonUnsupervisedDataset( unsupervised_samples, paths.test_jpg, '.jpg', transformations_train, transformations_val, unsupervised_initialization ) dset_train_supervised = KaggleAmazonJPGDataset(train, paths.train_jpg, transformations_train, divide=False) dset_train = KaggleAmazonSemiSupervisedDataset(dset_train_supervised, dset_train_unsupervised, None, indices=False) train_loader = DataLoader(dset_train, batch_size=64, shuffle=True, num_workers=10, pin_memory=True) dset_val = KaggleAmazonJPGDataset(val, paths.train_jpg, transformations_val, divide=False) val_loader = DataLoader(dset_val, batch_size=64, num_workers=10, pin_memory=True) ignored_params = list(map(id, chain( model.classifier.parameters(), model.layer1.parameters(), model.layer2.parameters(), model.layer3.parameters(), model.layer4.parameters() ))) base_params = filter(lambda p: id(p) not in ignored_params, model.parameters()) optimizer = optim.Adam([ {'params': base_params}, {'params': model.layer1.parameters()}, {'params': model.layer2.parameters()}, {'params': model.layer3.parameters()}, {'params': model.layer4.parameters()}, {'params': model.classifier.parameters()} ], lr=0, weight_decay=0.0001) trainer = ModuleTrainer(model) def schedule(current_epoch, current_lrs, **logs): lrs = [1e-3, 1e-4, 0.5e-4, 1e-5, 0.5e-5] epochs = [0, 1, 6, 8, 12] for lr, epoch in zip(lrs, epochs): if current_epoch >= epoch: current_lrs[5] = lr if current_epoch >= 2: current_lrs[4] = lr * 1 current_lrs[3] = lr * 1 current_lrs[2] = lr * 1 current_lrs[1] = lr * 1 current_lrs[0] = lr * 0.1 return current_lrs trainer.set_callbacks([ ModelCheckpoint( paths.models, name, save_best_only=False, saving_strategy=lambda epoch: True ), CSVLogger(paths.logs + name), LearningRateScheduler(schedule), SemiSupervisedUpdater(trainer, dset_train_unsupervised, start_epoch=6, momentum=0.25) ]) trainer.compile(loss=nn.BCELoss(), optimizer=optimizer) trainer.fit_loader(train_loader, val_loader, nb_epoch=16, verbose=1, cuda_device=0)
net = np.load(paths.predictions + '{}-split_{}.npz'.format(model_name, i)) thresholds = np.load(paths.thresholds + '{}-split_{}.npy'.format(model_name, i)) thresholds_all.append(np.average(thresholds, axis=1)) train = net['train'] val = net['val'] test = net['test'] x_train_all.append(train) x_val_all.append(val) x_test_all.append(test) labels_train = mlb.transform( labels_df.ix[labels_train]['tags'].str.split()).astype( np.float32) labels_val = mlb.transform( labels_df.ix[labels_val]['tags'].str.split()).astype( np.float32) labels_train_all.append(labels_train) labels_val_all.append(labels_val) train = x_train_all = np.concatenate(x_train_all, axis=1) val = x_val_all = np.concatenate(x_val_all, axis=1) # Test gets stacked over folds instead of concat test = x_test_all = np.stack(x_test_all, axis=0) # Thresholds get averaged as comparison thresholds_average = np.average(np.stack(thresholds_all, axis=0),