r = clone(reg)
    r.min_sample_split = i    
    regs.append(r)
    
clf = RandomForestClassifier(n_estimators = n, 
                            min_samples_split = 5, 
                            random_state = seed)

stacked = StackedClassiferRegressor(clf, regs = regs)

# get bins and labels
bins, labels = HazardBins(2)

# trained the stacked classifier & regressor
start_time = time.time()
stacked.fit(train[columns], train['Hazard'], bins, labels)
stop_time = time.time()
print "training time: %.2fs" % (stop_time - start_time)

haz_pred = stacked.predict(train[columns])
train_gini = Gini(haz_pred, train.Hazard)
print 'Gini (training):', train_gini
#print 'Importances: ', clf.feature_importances_

#train['predicted'] = haz_pred
#train['haz_class'] = stacked._classes
#g = sns.factorplot(x="haz_class", y="predicted", data=train)
#g = sns.FacetGrid(train, col="haz_class")
#g = g.map(plt.hist, "predicted")

plt.scatter(train.Hazard, haz_pred)
regs = []
for i in (10, 10, 10, 5):
    r = clone(reg)
    r.min_sample_split = i
    regs.append(r)

clf = GradientBoostingClassifier(n_estimators=500, min_samples_split=5, learning_rate=0.5, random_state=seed)

stacked = StackedClassiferRegressor(clf, regs=regs)

# get bins and labels
bins, labels = HazardBins(3)

# trained the stacked classifier & regressor
start_time = time.time()
stacked.fit(train[columns], train["Hazard"], bins, labels)
stop_time = time.time()
print "training time: %.2fs" % (stop_time - start_time)

haz_pred = stacked.predict(train[columns])
train_gini = Gini(haz_pred, train.Hazard)
print "Gini (training):", train_gini
# print 'Importances: ', clf.feature_importances_

# train['predicted'] = haz_pred
# train['haz_class'] = stacked._classes
# g = sns.factorplot(x="haz_class", y="predicted", data=train)
# g = sns.FacetGrid(train, col="haz_class")
# g = g.map(plt.hist, "predicted")

plt.scatter(train.Hazard, haz_pred)
#    r = clone(reg)
#    r.min_sample_split = i    
#    regs.append(r)
    
clf = xgb.XGBClassifier(max_depth=5, n_estimators=n, silent=False,
                       learning_rate=0.3, gamma = 0, seed=seed)

stacked = StackedClassiferRegressor(clf, gbm)

# get bins and labels
bins, labels = HazardBins(3)
#labels = range(len(bins)-1)

# trained the stacked classifier & regressor
start_time = time.time()
stacked.fit(train[columns].as_matrix(), train['Hazard'].as_matrix(), bins, labels)
stop_time = time.time()
print "training time: %.2fs" % (stop_time - start_time)

haz_pred = stacked.predict(train[columns].as_matrix())
train_gini = Gini(haz_pred, train.Hazard)
print 'Gini (training):', train_gini
#print 'Importances: ', clf.feature_importances_

#train['predicted'] = haz_pred
#train['haz_class'] = stacked._classes
#g = sns.factorplot(x="haz_class", y="predicted", data=train)
#g = sns.FacetGrid(train, col="haz_class")
#g = g.map(plt.hist, "predicted")

plt.scatter(train.Hazard, haz_pred)