] Xt, X_test, yt, y_test = train_test_split(train_ffm.values, y.values, test_size = 0.2) ll = [] for i, (train_index, valid_index) in enumerate(StratifiedKFold(yt, n_folds = 10, random_state=0)): print('Fold {}'.format(i)) X_train, X_valid = Xt[train_index], Xt[valid_index] y_train, y_valid = yt[train_index], yt[valid_index] valid_set = [] lb = LabelBinarizer() ybin = lb.fit_transform(yt) for ylabel in lb.classes_: print(ylabel) tdf = pd.DataFrame(np.vstack([(y_train == ylabel).T,X_train.T]).T) vdf = pd.DataFrame(np.vstack([(y_valid == ylabel).T,X_valid.T]).T) train_file = './ffm/ffm_train_fold_{}_{}.csv'.format(i, ylabel) valid_file = './ffm/ffm_valid_fold_{}_{}.csv'.format(i, ylabel) model_file = './ffm/ffm_model_fold_{}_{}.csv'.format(i, ylabel) predt_file = './ffm/ffm_predt_fold_{}_{}.csv'.format(i, ylabel) tdf.to_csv(train_file, sep=" ", header=False, index=False, quote=csv.QUOTE_NONE, quotechar=" ") vdf.to_csv(valid_file, sep=" ", header=False, index=False, quote=csv.QUOTE_NONE, quotechar=" ") check_call(['ffm-train'] + ffm_params + ['-p', valid_file, train_file, model_file]) check_call(['ffm-predict', valid_file, model_file, predt_file]) valid_set.append(np.loadtxt(predt_file)) yp = np.array(valid_set).T yp = (yp / yp.sum(axis=1)[:, np.newaxis]) ll.append(OttoCompetition.score(y_valid, yp, lb.classes_.tolist()))