# set booster parameters simdir = sys.argv[1] simnum = int(sys.argv[2]) num_rounds = int(sys.argv[3]) cv_params = { "eval_metric": "mlogloss", "objective": "multi:softprob", "silent": 1, "num_class": 9, "nthread": 16, "eta": sys.argv[4], "gamma": sys.argv[5], "max_depth": sys.argv[6], "min_child_weight": sys.argv[7], "colsample_bytree": sys.argv[8], "subsample": sys.argv[9], "early_stopping_rounds": 10, } print cv_params mkdir_p(simdir) paramfile = simdir + "/param_%d.txt" % simnum with open(paramfile, "w") as fp: json.dump(cv_params, fp) logfile = simdir + "/log_%d.txt" % simnum lls = xu.xgb_cv(cv_params, dtrain, num_rounds, nfold=3) np.savetxt(logfile, lls)
cv_params = {} cv_params['simdir'] = sys.argv[1] cv_params['simnum'] = int(sys.argv[2]) cv_params['kernel'] = sys.argv[3] cv_params['C'] = float(sys.argv[4]) cv_params['gamma'] = float(sys.argv[5]) clf_svc = svm.SVC(probability=True, verbose=False, C=cv_params['C'], kernel=cv_params['kernel'], gamma=cv_params['gamma'], cache_size=1000) clf = make_pipeline(preprocessing.StandardScaler(), clf_svc) mkdir_p(cv_params['simdir']) scores = cross_validation.cross_val_score(clf, train, labels, cv=n_cvs, n_jobs=-1, scoring=logloss_mc, verbose=1) print "scores: ", scores logfile = cv_params['simdir'] + '/%d.txt' % cv_params['simnum'] with open(logfile, 'w') as fscores: fscores.write('%f %f\n' % (scores.mean(), scores.std())) pfile = logfile.replace('.txt', 'p.txt') with open(pfile, 'w') as f: json.dump(cv_params, f)
sys.path.append(pardir) import xgb_utils as xu import otto_utils as ou if len(sys.argv)<2: sys.exit(1) ncv = int(sys.argv[1]) if ncv not in [0, 1, 2, 3, 4]: sys.exit(1) simdir = 'kfcv' ou.mkdir_p(simdir) simdir = simdir + '/' + str(ncv) ou.mkdir_p(simdir) num_rounds = 2000 params = """{"eval_metric": "mlogloss", "early_stopping_rounds": 10, "colsample_bytree": "0.5", "num_class": 9, "silent": 1, "nthread": 16, "min_child_weight": "4", "subsample": "0.8", "eta": "0.0125","objective": "multi:softprob", "max_depth": "14", "gamma": "0.025"}""" params = json.loads(params) # files train_csv = '../data/train.csv' train_buf = 'data/train.buffer'