from util import get_split_training_dataset
from metrics import suite

from sklearn.naive_bayes import GaussianNB

def train(Xtrain, Ytrain):
    """ Use entirety of provided X, Y to predict

    Default Arguments
    Xtrain -- Training data
    Ytrain -- Training prediction

    Named Arguments
    C -- regularization parameter

    Returns
    classifier -- a tree fitted to Xtrain and Ytrain
    """
    classifier = GaussianNB()
    classifier.fit(Xtrain, Ytrain)
    return classifier

if __name__ == "__main__":
    # Let's take our training data and train a decision tree
    # on a subset. Scikit-learn provides a good module for cross-
    # validation.
    Xt, Xv, Yt, Yv = get_split_training_dataset()
    Classifier = train(Xt, Yt)
    print "Naive Bayes Classifier"
    suite(Yv, Classifier.predict(Xv))
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.grid_search import GridSearchCV


def train(Xtrain, Ytrain, n=250, d=None):
    """ Use entirety of provided X, Y to train random forest

    Arguments
    Xtrain -- Training data
    Ytrain -- Training prediction

    Returns
    classifier
    """
    classifier = ExtraTreesClassifier(n_estimators=n,
                                      max_depth=d,
                                      min_samples_split=1,
                                      random_state=0,
                                      max_features=36)
    classifier.fit(Xtrain, Ytrain)
    return classifier


if __name__ == "__main__":
    # Let's take our training data and train a random forest
    # on a subset.
    Xt, Xv, Yt, Yv = get_split_training_dataset()
    Classifier = train(Xt, Yt)
    print "Extra Random Trees Ensemble Classifier"
    suite(Yv, Classifier.predict(Xv))