Пример #1
0
 ]
 
 Xt, X_test, yt, y_test = train_test_split(train_ffm.values, y.values, test_size = 0.2)
 
 ll = []
 for i, (train_index, valid_index) in enumerate(StratifiedKFold(yt, n_folds = 10, random_state=0)):
     print('Fold {}'.format(i))
     X_train, X_valid = Xt[train_index], Xt[valid_index]
     y_train, y_valid = yt[train_index], yt[valid_index]
     valid_set = []
     lb = LabelBinarizer()
     ybin = lb.fit_transform(yt)
     for ylabel in lb.classes_:
         print(ylabel)
         tdf = pd.DataFrame(np.vstack([(y_train == ylabel).T,X_train.T]).T)
         vdf = pd.DataFrame(np.vstack([(y_valid == ylabel).T,X_valid.T]).T)
         train_file = './ffm/ffm_train_fold_{}_{}.csv'.format(i, ylabel)
         valid_file = './ffm/ffm_valid_fold_{}_{}.csv'.format(i, ylabel)
         model_file = './ffm/ffm_model_fold_{}_{}.csv'.format(i, ylabel)
         predt_file = './ffm/ffm_predt_fold_{}_{}.csv'.format(i, ylabel)
         tdf.to_csv(train_file, sep=" ", header=False, index=False,
                    quote=csv.QUOTE_NONE, quotechar=" ")
         vdf.to_csv(valid_file, sep=" ", header=False, index=False,
                    quote=csv.QUOTE_NONE, quotechar=" ")
         check_call(['ffm-train'] + ffm_params + ['-p', valid_file, train_file, model_file])
         check_call(['ffm-predict', valid_file, model_file, predt_file])
         valid_set.append(np.loadtxt(predt_file))
         yp = np.array(valid_set).T
         yp = (yp / yp.sum(axis=1)[:, np.newaxis])
     ll.append(OttoCompetition.score(y_valid, yp, lb.classes_.tolist()))