def out_of_fold_predict(model_name: str) -> NpArray: """ Predicts result on train in OOF way. """ folds = list(glob.glob(f"../models/{model_name}__fold_*_best.hdf5")) k_folds = len(folds) print(f"predicting model={model_name}, k_folds={k_folds}") assert (k_folds == 10 or k_folds == 20 or k_folds == 7) cache_path = f"../output/predict_{model_name}.pkl" if os.path.exists(cache_path): return pickle.load(open(cache_path, "rb")) else: kf = StratifiedKFold(n_splits=k_folds, shuffle=False) pred = np.zeros((len(train_indices), NUM_CLASSES)) for k, (train_idx, val_idx) in enumerate(kf.split(train_indices, train_labels)): print(f"predicting fold {k}") if model_name.startswith("71"): x_train, y_train, x_val, y_val, x_test, label_binarizer, \ clips_per_sample = data_v4.load_data(train_idx, val_idx) else: x_train, y_train, x_val, y_val, x_test, label_binarizer, \ clips_per_sample = load_data(train_idx, val_idx) p = predict(x_val, folds[k]) for i, idx in enumerate(val_idx): pred[idx, :] = p[i] pickle.dump(pred, open(cache_path, "wb")) return pred
y_test = np.array(y_merged) print("y_test.shape after merge", y_test.shape) return y_test if __name__ == "__main__": train_indices, test_idx, train_labels, class_weights = load_everything() if not ENABLE_KFOLD: train_idx, val_idx = train_test_split(train_indices, stratify=train_labels, random_state=0, test_size=TEST_SIZE) x_train, y_train, x_val, y_val, x_test, label_binarizer, \ clips_per_sample = load_data(train_idx, val_idx) if not PREDICT_ONLY: train_model() pred = predict(x_test, label_binarizer, clips_per_sample, "nofolds") else: kf = StratifiedKFold(n_splits=KFOLDS, shuffle=False) pred = np.zeros((len(test_idx), KFOLDS, NUM_CLASSES)) for k, (train_idx, val_idx) in enumerate(kf.split(train_indices, train_labels)): print("fold %d ==============================================" % k) x_train, y_train, x_val, y_val, x_test, label_binarizer, \ clips_per_sample = load_data(train_idx, val_idx)
args.bidirectional = not args.unidirectional print('the model name: ', model_name) args.data_root += '' args.save_root += '' args.dataset = args.cell_1 + ('_') + args.cell_2 args.data_root = os.path.join(args.data_root) print('loading data from: ', args.data_root) args.save_root = os.path.join(args.save_root, args.dataset) print('saving results in from: ', args.save_root) model_dir = os.path.join(args.save_root, model_name) if not os.path.exists(model_dir): os.makedirs(model_dir) attentionmapfile = model_dir + '/' + args.attentionfilename print('==>processing data') Train, Valid, Test = data_v1.load_data(args) CON = False AUX = False print('==>building model') if (args.model_name == 'raw_d'): model = Model.raw_d(args) elif (args.model_name == 'raw_c'): model = Model.raw_c(args) elif (args.model_name == 'raw'): model = Model.raw(args) elif (args.model_name == 'aux'): args.shared = False model = Model.aux(args) AUX = True args.gamma = 0.0