Пример #1
0
            'colsample_bytree': 0.5,
            'target': 'target',
            'validation_set': xg_valid,
            'num_class' : 9,
            'objective': 'multi:softprob',
            'eval:metric': 'mlogloss',
            'silent': 1,
            }
    
    watchlist = [ (xg_train, 'train'), (xg_valid, 'valid') ]
    bst = xgb.train(params, xg_train, rounds, watchlist,
                    early_stopping_rounds=100, evals_result=evals)
    return bst, evals

if __name__ == '__main__':
    X, y = OttoCompetition.load_data(train=True)
    X_test, _ = OttoCompetition.load_data(train=False)
    le = LabelEncoder().fit(y)

    all_hold = []
    all_hold_predict = []
    all_test_predict = []
    all_weights = []
    # 20% holdout
    for i, (data_index, hold_index) in enumerate(StratifiedKFold(y, n_folds = 5, random_state=0)):
        X_data, X_hold = X[data_index], X[hold_index]
        y_data, y_hold = y[data_index], y[hold_index]
        y_hold_predict = []
        y_test_predict = []
        # train with 50%, validation with 5%
        for j, (train_index, valid_index) in enumerate(StratifiedShuffleSplit(y_data, 20, test_size = 0.05, train_size = 0.5, random_state=0)):
Пример #2
0
from lasagne.layers import DenseLayer
from lasagne.layers import InputLayer
from lasagne.layers import DropoutLayer
from lasagne.nonlinearities import softmax
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

if __name__ == '__main__':
    encoder = LabelEncoder()
    
    # Identical to StandardScaler using all train and test data.
    scaler = OttoScaler()

    # Training data
    X, y = OttoCompetition.load_data(train=True)
    y = encoder.fit_transform(y).astype('int32')
    X = scaler.transform(X).astype('float32')
    n_classes = np.unique(y).shape[0]
    n_features = X.shape[1]

    # Split a holdout set
    data_idx, hold_idx = next(iter(StratifiedShuffleSplit(y, 1, test_size = 0.2, random_state=0)))
    X_data, X_hold = X[data_idx], X[hold_idx]
    y_data, y_hold = y[data_idx], y[hold_idx]

    # Test data
    X_test, _ = OttoCompetition.load_data(train=False)
    X_test = scaler.transform(X_test).astype('float32')

Пример #3
0
 ]
 
 Xt, X_test, yt, y_test = train_test_split(train_ffm.values, y.values, test_size = 0.2)
 
 ll = []
 for i, (train_index, valid_index) in enumerate(StratifiedKFold(yt, n_folds = 10, random_state=0)):
     print('Fold {}'.format(i))
     X_train, X_valid = Xt[train_index], Xt[valid_index]
     y_train, y_valid = yt[train_index], yt[valid_index]
     valid_set = []
     lb = LabelBinarizer()
     ybin = lb.fit_transform(yt)
     for ylabel in lb.classes_:
         print(ylabel)
         tdf = pd.DataFrame(np.vstack([(y_train == ylabel).T,X_train.T]).T)
         vdf = pd.DataFrame(np.vstack([(y_valid == ylabel).T,X_valid.T]).T)
         train_file = './ffm/ffm_train_fold_{}_{}.csv'.format(i, ylabel)
         valid_file = './ffm/ffm_valid_fold_{}_{}.csv'.format(i, ylabel)
         model_file = './ffm/ffm_model_fold_{}_{}.csv'.format(i, ylabel)
         predt_file = './ffm/ffm_predt_fold_{}_{}.csv'.format(i, ylabel)
         tdf.to_csv(train_file, sep=" ", header=False, index=False,
                    quote=csv.QUOTE_NONE, quotechar=" ")
         vdf.to_csv(valid_file, sep=" ", header=False, index=False,
                    quote=csv.QUOTE_NONE, quotechar=" ")
         check_call(['ffm-train'] + ffm_params + ['-p', valid_file, train_file, model_file])
         check_call(['ffm-predict', valid_file, model_file, predt_file])
         valid_set.append(np.loadtxt(predt_file))
         yp = np.array(valid_set).T
         yp = (yp / yp.sum(axis=1)[:, np.newaxis])
     ll.append(OttoCompetition.score(y_valid, yp, lb.classes_.tolist()))