def load_xgb_train_data(train_csv, train_buf): X, y, encoder, scaler = load_train_data(train_csv) # save data for xgboost dtrain = xgb.DMatrix(X, label=y) dtrain.save_binary(train_buf) return X, y, encoder, scaler, dtrain
from lasagne.layers import InputLayer from lasagne.layers import DropoutLayer from lasagne.nonlinearities import softmax from lasagne.updates import nesterov_momentum from nolearn.lasagne import NeuralNet pardir = os.path.realpath('..') if pardir not in sys.path: sys.path.append(pardir) from otto_utils import (load_train_data, load_test_data, mkdir_p, calc_ll_from_proba) X, y, encoder, scaler = load_train_data('../data/train.csv') X_test, ids = load_test_data('../data/test.csv', scaler) num_classes = len(encoder.classes_) num_features = X.shape[1] n = X.shape[0] n_folds = 5 # create cv number of files for cross validation kf = cross_validation.KFold(n, n_folds=n_folds, shuffle=True, random_state=1234) # Train Neural Net
ou.mkdir_p(simdir) num_rounds = 2000 params = """{"eval_metric": "mlogloss", "early_stopping_rounds": 10, "colsample_bytree": "0.5", "num_class": 9, "silent": 1, "nthread": 16, "min_child_weight": "4", "subsample": "0.8", "eta": "0.0125","objective": "multi:softprob", "max_depth": "14", "gamma": "0.025"}""" params = json.loads(params) # files train_csv = '../data/train.csv' train_buf = 'data/train.buffer' # first clean the train data and save print 'loading data...' X, y, encoder, scaler = ou.load_train_data(train_csv) n_folds = 5 n, p = X.shape # create cv number of files for cross validation kf = cross_validation.KFold(n, n_folds=n_folds, shuffle=True, random_state=1234) ll = [] i = 0 for train_index, test_index in kf: if i != ncv: i += 1 continue