Exemplo n.º 1
0
def make_model(size_ts = 10000, size_ls = 250, data_set = 1, graph = False, depth = None, random_state = 0):
    
    if data_set == 1 :
        [X_train, y_train, X_test, y_test] = make_data1(size_ts,size_ls,0,False,random_state=random_state)
    else:
        [X_train, y_train, X_test, y_test] = make_data2(size_ts,size_ls,0,False,random_state=random_state)

    clf = DecisionTreeClassifier(max_depth=depth)
    clf = clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    if graph:
        if depth == None :
            plot_boundary(fname="figures/data"+str(data_set)+"_depthNone",fitted_estimator=clf,
                          X=X_test,y=y_pred,title="data set "+str(data_set)+" with depth = None")
            
            tree.plot_tree(clf)
            dot_data = tree.export_graphviz(clf, out_file=None)
            graph = graphviz.Source(dot_data)
            graph.render("figures/tree_data"+str(data_set)+"_depthNone")
        
        else:
            plot_boundary(fname="figures/data"+str(data_set)+"_depth"+str(depth),fitted_estimator=clf,
                          X=X_test,y=y_pred,title="data set "+str(data_set)+" with depth = "+str(depth))
                   
    return accuracy_score(y_test, y_pred)
def test_and_plot(title, X, y, n_neighbors):
    """Generate an image of our data and the predictions of the decision tree
    
    Parameters
    ----------
    title : str
        The title we give to our image
        
    X : array of shape [n_samples, 2]
        The input samples.

    y : array of shape [n_samples]
        The output values.
        
    n_neighbors : int > 0, optional (default = None)
        The number of neighbors of our model.
    
    """
    X_train = X[:150]
    y_train = y[:150]
    X_test = X[-1850:]
    y_test = y[-1850:]
    clf = KNeighborsClassifier(n_neighbors)
    clf.fit(X_train, y_train)
    plot_boundary(title, clf, X_test, y_test)
Exemplo n.º 3
0
def compute_accuracy(nbPoints, nbGen, dataset="dataset1"):
    """Computes the test set accuracies over nbGen generations of the dataset
        using a LinearDiscriminantAnalysis() as a classifier

        Parameters
        ----------
        -   nbPoints : number of samples.
        -   nbGen : number of generations of the dataset.

        Returns
        -------
        accuracy : accuracies mean over ngGen generations
    """
    accuracy = []

    for gen in range(nbGen):

        if dataset == "dataset2":
            X, y = make_dataset2(nbPoints, gen)
        else:
            X, y = make_dataset1(nbPoints, gen)
        X_ls, X_ts, y_ls, y_ts = train_test_split(X,
                                                  y,
                                                  train_size=0.8,
                                                  test_size=0.2)

        estimator = LinearDiscriminantAnalysis().fit(X_ls, y_ls)
        accuracy.append(estimator.score(X_ts, y_ts))
        if gen == 1:
            plot_boundary("LDA {}".format(dataset), estimator, X_ts, y_ts, 0.1)
    return np.array(accuracy)
def test_and_plot(title, X, y, m_depth=None):
    """Generate an image of our data and the predictions of the decision tree
    
    Parameters
    ----------
    title : str
        The title we give to our image
        
    X : array of shape [n_samples, 2]
        The input samples.

    y : array of shape [n_samples]
        The output values.
        
    m_depth : int > 0, optional (default = None)
        The maximum depth allowed of our decision tree
    
    """
    X_train = X[:150]
    y_train = y[:150]
    X_test = X[-1850:]
    y_test = y[-1850:]
    clf = DecisionTreeClassifier(max_depth=m_depth)
    clf.fit(X_train, y_train)
    plot_boundary(title, clf, X_test, y_test)
Exemplo n.º 5
0
def q21(x,y):
    trainSample = (x[:1000,:], y[:1000])
    testSample = (x[1000:,:], y[1000:])
    for  i in (1,5,50,100,500):
        knn =  KNeighborsClassifier(n_neighbors=i)
        estimator = knn.fit(trainSample[0], trainSample[1])
        yPredicted = estimator.predict(testSample[0])
        print("Accuracy with {} neighbors is : {}. ".format(i, accuracy_score(testSample[1], yPredicted)))
        name = "boundaryKNN"+ str(i)
        title = "Distibution for n_neighbors = " + str(i)
        plot_boundary(name, estimator, testSample[0], testSample[1],title=title)
    return
Exemplo n.º 6
0
def predLR(n_iter, learning_rate, trainSample, testSample, plot=False):
    lr = LogisticRegressionClassifier(n_iter=n_iter,
                                      learning_rate=learning_rate)
    lr.fit(trainSample[0], trainSample[1])
    yPredicted = lr.predict(testSample[0])
    acc = accuracy_score(testSample[1], yPredicted)
    if (plot):
        name = "boundaryLR"
        title = "Distibution for " + str(
            n_iter) + "iterations and a learning_rate of" + str(
                learning_rate) + "."
        plot_boundary(name, lr, testSample[0], testSample[1], title=title)
    return acc
Exemplo n.º 7
0
def make_model(size_ts = 10000, size_ls = 250, data_set = 1, graph = False, n_neigh = 1, cv = False):
    
    if data_set == 1 :
        [X_train, y_train, X_test, y_test] = make_data1(size_ts,size_ls,0,None)
    else:
        [X_train, y_train, X_test, y_test] = make_data2(size_ts,size_ls,0,None)

    clf = KNeighborsClassifier(n_neighbors=n_neigh)
    clf = clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
                            
    
    if graph :
        plot_boundary(fname="figures/data_" + str(data_set) + "_neighbors"+str(n), fitted_estimator=clf, 
                      X=X_test, y=y_pred, title="data set " + str(data_set) + " with neighbors = "+str(n))
        
    return accuracy_score(y_test, y_pred)
Exemplo n.º 8
0
Arquivo: dt.py Projeto: nDerroitte/ML1
def predPlotDT(trainSample, testSample, max_depth=None, plot=False):
    #Generation of the training and test datasets
    global DScount

    #Computing the acc. Not using predDT function because we need the dt in order to plot
    dt = DecisionTreeClassifier(max_depth=max_depth, random_state=42)
    estimator = dt.fit(trainSample[0], trainSample[1])
    yPredicted = estimator.predict(testSample[0])
    acc = accuracy_score(testSample[1], yPredicted)

    if max_depth != None:
        print("The accuracy of dataset {} with max depth of {} is {}. ".format(
            DScount, max_depth, acc))
    else:
        print("The accuracy of dataset {} without max depth is {}. ".format(
            DScount, acc))

    if (plot):
        print("Saving the file.")
        name = "boundaryDT" + str(max_depth)
        title = "Distibution for max depth tree of " + str(
            max_depth
        ) if max_depth != None else "Distibution for tree without max depth"
        plot_boundary(name,
                      estimator,
                      testSample[0],
                      testSample[1],
                      title=title)
        name = "boundaryDTLS" + str(max_depth)
        title = "Distibution for max depth tree of " + str(
            max_depth
        ) + " for the learning sample" if max_depth != None else "Distibution for tree without max depth for the learning sample"
        plot_boundary(name,
                      estimator,
                      trainSample[0],
                      trainSample[1],
                      title=title)
        nameTree = "Tree" + str(max_depth) + ".dot"
        tree.export_graphviz(dt, out_file=nameTree)

    return acc
Exemplo n.º 9
0
def tree(max_depth_input=None, fname=""):
    model = DecisionTreeClassifier(criterion='gini',
                                   splitter='best',
                                   max_depth=max_depth_input,
                                   min_samples_split=2,
                                   min_samples_leaf=1,
                                   min_weight_fraction_leaf=0.0,
                                   max_features=None,
                                   random_state=None,
                                   max_leaf_nodes=None,
                                   min_impurity_decrease=0.0,
                                   min_impurity_split=None,
                                   class_weight=None,
                                   presort=False)

    model.fit(x_data_train, y_data_train)

    y_pred = model.predict(x_data_test)

    plot_boundary(fname, model, X, y)

    return accuracy_score(y_data_test, y_pred)
Exemplo n.º 10
0
def get_accuracy(n_neighbors, seed, which, dataset_size, trainingSet_size):
    """
    This function will predict with the KNN class and build a graph based on the prediction,
    it will also print the accuracy corresponding to the graph
    
    Arguments:
        n_neighbors: an array containing all the number of neighbors of which we should apply KNN
        seed: this is used to make random operation
        which: which dataset should be used
        dataset_size: the number of samples in the dataset
        trainingSet_size: the number of samples in the training set
        
    Return:
        /
    """
    # Get the sets
    x_train_sample, x_test_sample, y_train_sample, y_test_sample = get_sets(
        dataset_size, trainingSet_size, seed, which)

    for i in range(len(n_neighbors)):
        # Get the KN neighbours for each n_neighbors
        knn = KNeighborsClassifier(n_neighbors=n_neighbors[i]).fit(
            x_train_sample, y_train_sample)

        # Predictions done from the training samples
        prediction = knn.predict(x_test_sample)

        # Compute the accuracy
        accuracy = accuracy_score(y_test_sample, prediction)

        # Plot
        fname = "KNN=" + str(n_neighbors[i]) + "_ds=" + str(which)
        title = "KNN of " + str(n_neighbors[i]) \
                 + " neighbours and with an accuracy of %0.4f" %accuracy

        plot_boundary(fname, knn, x_test_sample, y_test_sample, 0.1, title)

        print("The accuracy for the dataset " + str(which) +
              " is: %0.4f" % accuracy)
Exemplo n.º 11
0
def compute_accuracy(nb_gen, max_depth, nb_points):
    """Computes the test set accurencies over n generations of the dataset
    using the DecisionTreeClassifier class from sklearn.tree with a
    particular max depth.

    Parameters
    ----------
    -   nb_gen : number of generations of the dataset.
    -   max_depth : maximum depth of the decision tree for the DT model.
    -   nb_points : number of samples.

    Returns
    -------
    accuracy : a list of the test set accuracies of the different
    generations.
    """
    accuracy = []

    for generation in range(nb_gen):
        X, y = make_dataset2(nb_points, generation)
        X_ls, X_ts, y_ls, y_ts = train_test_split(X,
                                                  y,
                                                  train_size=.8,
                                                  test_size=.2)

        if max_depth == "None":
            estimator = DecisionTreeClassifier().fit(X_ls, y_ls)
        else:
            estimator = DecisionTreeClassifier(max_depth=max_depth).fit(
                X_ls, y_ls)

        y_pred = estimator.predict(X_ts)
        accuracy.append(accuracy_score(y_ts, y_pred))

        if generation == 1:
            plot_boundary("DT maxdepth {}".format(max_depth), estimator, X_ts,
                          y_ts, 0.1)

    return np.array(accuracy)
Exemplo n.º 12
0
def compute_accuracy(nb_gen, nb_neighbors, nb_points):
    """Computes the test set accurencies over n generations of the dataset
    for the KNeighborsClassifier class from sklearn.neighbors with a
    particular number of nearest neighbors.

    Parameters
    ----------
    -   nb_gen : number of generations of the dataset.
    -   nb_neighbors : number of nearest neighbors for the KNN model.
    -   nb_points : number of samples.

    Returns
    -------
    accuracy : a list of the test set accuracies of the different
    generations.
    """
    accuracy = []

    for generation in range(nb_gen):

        X, y = make_dataset2(nb_points, generation)
        X_ls, X_ts, y_ls, y_ts = train_test_split(X,
                                                  y,
                                                  train_size=.8,
                                                  test_size=.2)

        estimator = KNeighborsClassifier(n_neighbors=nb_neighbors).fit(
            X_ls, y_ls)
        y_pred = estimator.predict(X_ts)
        accuracy.append(accuracy_score(y_ts, y_pred))

        if generation == 1:
            plot_boundary("KNN neighbors {}".format(nb_neighbors), estimator,
                          X_ts, y_ts, 0.1)

    return np.array(accuracy)
Exemplo n.º 13
0
def findNbIter(trainSample, testSample, plot=False):
    nbIter = [1, 10, 20, 50, 100, 200, 500, 1000]
    bestAcc = 0
    bestIter = 0
    for i in nbIter:
        start_time = time.time()
        lr = LogisticRegressionClassifier(n_iter=i)
        lr.fit(trainSample[0], trainSample[1])
        yPredicted = lr.predict(testSample[0])
        currentAcc = accuracy_score(testSample[1], yPredicted)
        if bestAcc < currentAcc:
            bestAcc = currentAcc
            bestIter = i
        print("Accuracy for {} iterations is {}. It took {} sec.".format(
            i, currentAcc,
            time.time() - start_time))
        if (plot):
            name = "boundaryLR" + str(i)
            title = "Distibution for " + str(i) + "iterations."
            plot_boundary(name, lr, testSample[0], testSample[1], title=title)

    print("The Optimal number of iterations is {}".format(bestIter))

    return bestIter
Exemplo n.º 14
0
                    Py *= (1 / factor_den) * exp

                p[h].append(Py)
                Z += Py

            for i in range(len(p[h])):
                p[h][i] /= Z

        p = np.matrix(p)

        return p


if __name__ == "__main__":

    dataset_size = 2000
    trainingSet_size = 150

    for i in range(2):
        x_train_sample, x_test_sample, y_train_sample, y_test_sample = get_sets(
            dataset_size, trainingSet_size, 1, i + 1)

        nb = GaussianNaiveBayes().fit(x_train_sample, y_train_sample)
        prediction = nb.predict(x_test_sample)
        accuracy = accuracy_score(y_test_sample, prediction)

        fname = "NB_ds=" + str(i + 1)
        title = "Naive Bayes classification with an accuracy of %0.4f" % accuracy

        plot_boundary(fname, nb, x_test_sample, y_test_sample, 0.1, title)
		ypr = Sigmoid(theta,X[i])
		error = (ypr - Y[i]) * xij
		Err += error
	J = Err  / len(Y)
	return J

niter=[50] #[10,200,1000]
learningrate=[0.01,0.1,1,10]
# Main body
if __name__ == "__main__":
	for i in range(len(niter)):
		for j in range(len(learningrate)):
			cnf=np.zeros((2,2)); a=0; st=0
			ni=niter[i]
			lr=learningrate[j]
			for k in range(5): # five generations of the dataset
				b=make_unbalanced_dataset(3000)
				Xtr=np.array(b[0][0:1000,:])
				ytr=b[1][0:1000]
				Xte=np.array(b[0][1000:,:])
				yte=b[1][1000:]
				c=LogisticRegressionClassifier(n_iter=ni,learning_rate=lr)
				t=LogisticRegressionClassifier.fit(c,Xtr,ytr)
				plot_boundary(fname="Logistic_regression_learn_rate_%s_n_iter_%s.png" %(lr,ni),fitted_estimator=t,X=Xte,y=yte)
				pr=t.predict(Xte)
				cnf += confusion_matrix(yte,pr)
				a += round(accuracy_score(yte,pr),3)
				st += round(np.std(pr-yte),2)
			print("Average accuracy if Learn Rate = %s & Iteration N = %s:  True negative %s  False negative %s  True positive %s  False positive %s  Accuracy score %s  St dev %s" %(lr,ni,cnf[0,0]/5.,cnf[1,0]/5.,cnf[1,1]/5.,cnf[0,1]/5.,a/5.,st/5.)); c=0
	pass
Exemplo n.º 16
0
Arquivo: knn.py Projeto: ced211/ml1
# (Question 2)


if __name__ == "__main__":
    n_table = [1, 5, 25, 125, 300, 625,1200]
    data = make_dataset2(1500, 1997)
    scores = {}
    mean = {}
    var = {}
    for n in n_table:

        # part 1
        estimator = KNeighborsClassifier(n_neighbors=n).fit(data[0], data[1])
        print("computing" + str(n))
        plot_boundary("knn_" + str(n), estimator, data[0], data[1])

        scores[n] = cross_val_score(
            estimator, data[0], data[1], cv=10).tolist()
        for i in range(9):
            cv = StratifiedKFold(n_splits=10, random_state=i, shuffle=True)
            scores[n].extend(cross_val_score(
                estimator, data[0], data[1], cv=cv).tolist())
        print(len(scores[n]))
        mean[n] = np.mean(scores[n])
        var[n] = np.var(scores[n])
    print("mean" + str(mean))
    print("var" + str(var))

    # part2
    # desired accuracy
Exemplo n.º 17
0
                numerator.append(num[k])

            numerator = np.asarray(numerator)
            p[i] = numerator / den
            i += 1

        return p


if __name__ == "__main__":

    # 1st dataset
    train_set = make_dataset1(1200, 565354)
    lda = LinearDiscriminantAnalysis()
    lda.fit(train_set[0], train_set[1])
    plot_boundary('lda_trainDataset1', lda, train_set[0], train_set[1])

    # 2nd dataset
    train_set = make_dataset2(1200, 565354)
    lda = LinearDiscriminantAnalysis()
    lda.fit(train_set[0], train_set[1])
    plot_boundary('lda_trainDataset2', lda, train_set[0], train_set[1])

    # Accuracy and std for five generations of different seeds

    accuracy1 = np.zeros(5)
    accuracy2 = np.zeros(5)

    seed = 10000  # Will change for each generation
    for i in range(5):
        (train_set1, test_set1) = (make_dataset1(1200, seed),
Exemplo n.º 18
0
    # (Question 1) dt.py: Decision tree
    SAMPLE_NUMBER = 2000
    TRAIN_SET_SAMPLE_NUM = 150

    X, y = get_dataset(SAMPLE_NUMBER)

    X_train, y_train = X[:TRAIN_SET_SAMPLE_NUM], y[:TRAIN_SET_SAMPLE_NUM]
    X_test, y_test = X[TRAIN_SET_SAMPLE_NUM:], y[TRAIN_SET_SAMPLE_NUM:]

    # 1.
    decisionTreeClassifier = DecisionTreeClassifier(random_state=get_random_state())
    decisionTreeClassifier.fit(X_train, y_train)
    y_dtc = decisionTreeClassifier.predict(X_test)

    # Plot
    plot_boundary("1-1-Ground-Truth", decisionTreeClassifier, X_test, y_test, title="Ground Truth data")
    plot_boundary("1-1-Prediction", decisionTreeClassifier, X_test, y_dtc, title="Prediction data")

    # 2.
    max_depths = [i for i in range(1, 20)]
    training_scores = []
    for max_depth in max_depths:
        decisionTreeClassifier = DecisionTreeClassifier(random_state=get_random_state(), max_depth=max_depth)
        decisionTreeClassifier.fit(X_train, y_train)
        y_dtc = decisionTreeClassifier.predict(X_test)

        # Plot
        plot_boundary("1-2-Max-Depth_%s" % str(max_depth), decisionTreeClassifier, X_test, y_test, title="Real data with max_depth = %s" % str(max_depth))

        training_scores.append(decisionTreeClassifier.score(X_train,y_train))
Exemplo n.º 19
0
Arquivo: dt.py Projeto: ced211/ml1
from plot import plot_boundary
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

if __name__ == "__main__":

    train_set = make_dataset2(1200, 565354)
    test_set = make_dataset2(300, 156)
    seeds = [5, 36, 47, 9898]
    depth_test = [1, 2, 4, 8]
    scores = {}

    # create tree and figure of unconstrained depth

    estimator = DecisionTreeClassifier().fit(train_set[0], train_set[1])
    plot_boundary("inf_train_tree", estimator, train_set[0], train_set[1])
    plot_boundary("inf_test_tree", estimator, test_set[0], test_set[1])
    prediction = estimator.predict(test_set[0])
    scores[0] = []
    scores[0].append(accuracy_score(test_set[1], prediction))

    # part 2, test model against 5 test set.
    for seed in seeds:
        test_set = make_dataset2(300, seed)
        prediction = estimator.predict(test_set[0])
        scores[0].append(accuracy_score(test_set[1], prediction))

    # create tree and figure for each depth
    for depth in depth_test:
        print("create tree" + str(depth))
        estimator = DecisionTreeClassifier(max_depth=depth).fit(
Exemplo n.º 20
0
if __name__ == "__main__":
    print("Nearest Neighbour: Standard calculation")
    for i in range(len(d)):
        cnf = np.zeros((2, 2))
        a = 0
        st = 0
        for k in range(5):
            b = make_unbalanced_dataset(3000)
            Xtr = np.array(b[0][0:1000, :])
            ytr = b[1][0:1000]
            Xte = np.array(b[0][1000:, :])
            yte = b[1][1000:]
            c = KNeighborsClassifier(n_neighbors=d[i])
            t = KNeighborsClassifier.fit(c, Xtr, ytr)
            plot_boundary(fname="K_nearest_neighbors_%s.png" % (str(d[i])),
                          fitted_estimator=t,
                          X=Xte,
                          y=yte)
            pr = t.predict(Xte)
            cnf += confusion_matrix(yte, pr)
            a += round(accuracy_score(yte, pr), 3)
            st += round(np.std(pr - yte), 2)
        print(
            "Average accuracy if  N-value %s :  True negative %s  False negative %s  True positive %s  False positive %s  Accuracy score %s St dev %s"
            % (d[i], cnf[0, 0] / 5., cnf[1, 0] / 5., cnf[1, 1] / 5.,
               cnf[0, 1] / 5., a / 5., st / 5.))
    print("Ten-fold cross validation")
    for i in range(len(d)):
        cnf = np.zeros((2, 2))
        a = 0
        st = 0
        for k in range(5):
Exemplo n.º 21
0
def knn_plot(n_neighbors_input=1, fname=""):
    model = KNeighborsClassifier(n_neighbors=n_neighbors_input)
    model.fit(x_data_train, y_data_train)
    y_pred = model.predict(x_data_test)
    plot_boundary(fname, model, x, y)
    return accuracy_score(y_data_test, y_pred)
Exemplo n.º 22
0
        # Get training and testing sets
        X, y = f(n_samples, random_state=0)  # seed fixed to 0
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=p_test,
                                                            shuffle=False)

        # K Neighbors algorithm
        for n in n_neighbors:
            estimator = KNeighborsClassifier(n_neighbors=n).fit(
                X_train, y_train)

            # Save results
            fig_name = f.__name__ + '_knn_' + str(n)
            plot_boundary(fig_name, estimator, X_test[:n_show],
                          y_test[:n_show])

    ##############
    # Question 2 #
    ##############

    # Variables
    k, k_neighbors = 10, range(5, 150, 1)  # ten-fold cross validation
    accuracies_mean = np.zeros((len(k_neighbors)))

    # Get the second dataset
    X, y = datasets[1](n_samples, random_state=0)  # seed fixed to 0

    # Apply the algorithm with k-fold cross validation on second dataset
    for i, n in enumerate(k_neighbors):
        neigh = KNeighborsClassifier(n_neighbors=n)
Exemplo n.º 23
0
    X_test, y_test = X[TRAIN_SET_SAMPLE_NUM:], y[TRAIN_SET_SAMPLE_NUM:]

    # 1.
    knc = KNeighborsClassifier(n_neighbors=1)
    knc.fit(X_train, y_train)
    y_predict = knc.predict(X_test)

    n_errors = compare(y_test, y_predict)
    print("[Q2-1] 1-NN - Error percentage : {}%".format(n_errors*100/len(X_test)))

    # 2.
    oneNN = sklearn.neighbors.KNeighborsClassifier(n_neighbors=1)
    oneNN.fit(X_train, y_train)
    y_predict = oneNN.predict(X_test)

    plot_boundary("2-2-Ground-Truth", oneNN, X_test, y_test, title="Ground Truth data")
    plot_boundary("2-2-Prediction", oneNN, X_test, y_predict, title="Prediction data")

    n_errors = compare(y_test, y_predict)
    print("[Q2-2] 1-NN - Error percentage : {}%".format(n_errors*100/len(X_test)))

    plot_boundary("2-2-Training-set", oneNN, X_train, y_train, title="Training set boundaries")

    # 3.
    n_neighbors = [1, 2, 4, 7, 10, 30, 90, 150]
    for n in n_neighbors:
        nearest_neighb_class = sklearn.neighbors.KNeighborsClassifier(n_neighbors=n)
        nearest_neighb_class.fit(X_train, y_train)
        y_predict = nearest_neighb_class.predict(X_test)

        plot_boundary("2-3-Prediction-%s" % str(n), nearest_neighb_class, X_test, y_predict, title="Prediction data")
Exemplo n.º 24
0
np.random.seed(0)
if __name__ == "__main__":
    cnf = np.zeros((2, 2))
    a = 0
    st = 0
    for k in range(5):
        ''' choose either unbalanced either balanced and uncomment another'''
        b = make_unbalanced_dataset(3000)
        #b=make_balanced_dataset(3000)

        Xtr = np.array(b[0][0:1000, :])
        ytr = b[1][0:1000]
        Xte = np.array(b[0][1000:, :])
        yte = b[1][1000:]
        c = GaussianNB()
        t = GaussianNB.fit(c, Xtr, ytr)
        if k == 0:
            plot_boundary(fname="Naive_Bias_Depth_%s.png" % (k),
                          fitted_estimator=t,
                          X=Xte,
                          y=yte)
        pr = t.predict(Xte)
        cnf += confusion_matrix(yte, pr)
        a += round(accuracy_score(yte, pr), 2)
        st += round(np.std(pr - yte), 2)
    print(
        "Accuracy:  True negative %s  False negative %s  True positive %s  False positive %s  Accuracy score %s St dev %s"
        % (cnf[0, 0] / 5, cnf[1, 0] / 5, cnf[1, 1] / 5, cnf[0, 1] / 5, a / 5,
           st / 5))
    pass
Exemplo n.º 25
0
                # Compute the accuracy
                accuracy = accuracy_score(y_test_sample, prediction)
                accuracies[k].append(accuracy)

                # Plot the best accuracy
                if accuracy > best_accuracy:
                    to_plot = [
                        decisionTree, x_test_sample, y_test_sample, accuracy
                    ]
                    best_accuracy = accuracy

                if j == 4:
                    fname = "DTC_depth=" + str(depth[k]) + "_ds=" + str(i + 1)
                    title = "Decision Tree Classifier with a depth of " + str(depth[k]) \
                            + " with an accuracy of %0.4f" %to_plot[3]

                    plot_boundary(fname, to_plot[0], to_plot[1], to_plot[2],
                                  0.1, title)

        # Compute the average accuracies over 5 generations of the dataset
        for j in range(5):
            avg_accuracy = sum(accuracies[j]) / 5
            deviation = np.std(accuracies[j])

            print("From dataset %d:" % (i + 1))
            print("Depth = " + str(depth[j]))
            print("Average accuracy = %0.4f" % avg_accuracy)
            print("Deviation = %0.4f" % deviation)
            print()
Exemplo n.º 26
0
# ...
d = [1, 2, 4, 6, 8, None]

if __name__ == "__main__":
    for i in range(len(d)):
        cnf = np.zeros((2, 2))
        a = 0
        st = 0
        for k in range(5):
            b = make_unbalanced_dataset(3000)
            Xtr = np.array(b[0][0:1000, :])
            ytr = b[1][0:1000]
            Xte = np.array(b[0][1000:, :])
            yte = b[1][1000:]
            c = DecisionTreeClassifier(max_depth=d[i])
            t = DecisionTreeClassifier.fit(c, Xtr, ytr)
            plot_boundary(fname="Decision_Tree_Depth_%s.png" % (str(d[i])),
                          fitted_estimator=t,
                          X=Xte,
                          y=yte)
            pr = t.predict(Xte)
            cnf += confusion_matrix(yte, pr)
            a += round(accuracy_score(yte, pr), 2)
            st += round(np.std(pr - yte), 2)
        print(
            "Average accuracy if  Depth %s :  True negative %s  False negative %s  True positive %s  False positive %s  Accuracy score %s St dev %s"
            % (d[i], cnf[0, 0] / 5., cnf[1, 0] / 5., cnf[1, 1] / 5.,
               cnf[0, 1] / 5., a / 5., st / 5.))
        c = 0
    pass