Exemplo n.º 1
0
class PerceptronImpl():

    def __init__(self, penalty=None, alpha=0.0001, fit_intercept=True, max_iter=None, tol=None, shuffle=True, verbose=0, eta0=1.0, n_jobs=None, random_state=None, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, class_weight='balanced', warm_start=False, n_iter=None):
        self._hyperparams = {
            'penalty': penalty,
            'alpha': alpha,
            'fit_intercept': fit_intercept,
            'max_iter': max_iter,
            'tol': tol,
            'shuffle': shuffle,
            'verbose': verbose,
            'eta0': eta0,
            'n_jobs': n_jobs,
            'random_state': random_state,
            'early_stopping': early_stopping,
            'validation_fraction': validation_fraction,
            'n_iter_no_change': n_iter_no_change,
            'class_weight': class_weight,
            'warm_start': warm_start,
            'n_iter': n_iter}

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)
Exemplo n.º 2
0
    def preceptron(self):
        # Perceptron
        perceptron = Perceptron(penalty='l2', max_iter=1000, shuffle=True)
        perceptron.fit(self.X_train, self.y_train)

        acc = round(perceptron.score(self.X_train, self.y_train) * 100, 2)
        print("acc with Perceptron:", acc)

        self.y_pred = perceptron.predict(self.X_test)
def simulation(n, runs, margin=0, p_runs=100, d=2):
    '''
    Run a a given number of simulations to compare svm and perceptron error rates. Generates a set of training and testing points, runs a
    single svm and a given number of perceptrons (avg error is taken)
    :param n: number of points
    :param p_runs: number of perceptrons to average
    :param runs: number of times to sun simulation
    :param d: dimensionality of points
    :return: pandas dataframe, each row
    '''

    all_data = []
    for i in range(runs):
        # Get test data and its gamma, split 80-20 test train
        train_dat, test_dat, margin = generate_labeled_points(n_train=n,
                                                              n_test=ceil(n *
                                                                          25),
                                                              gamma=margin,
                                                              dim=d)

        # Separate train points from labels
        train_points = [x[0] for x in train_dat]
        train_labels = [x[1] for x in train_dat]

        # Separate test points from their labels
        test_points = [x[0] for x in test_dat]
        test_labels = [x[1] for x in test_dat]

        # Run k = p_runs number of perceptrons on this same training data, take their mean error
        p_errors = []
        seed = np.random.RandomState()
        for k in range(p_runs):
            perceptron = Perceptron(random_state=seed)
            perceptron.fit(train_points, train_labels)
            p_errors.append(perceptron.score(test_points, test_labels))
        p_error = np.mean(p_errors)

        # Train and test with single SVM
        svm = SVC(kernel="linear")
        svm.fit(train_points, train_labels)
        svm_error = svm.score(test_points, test_labels)

        all_data.append([n, margin, p_error, svm_error])

    df = pd.DataFrame(
        all_data, columns=['n', 'margin', 'avg perceptron_error', 'svm_error'])
    return df
Exemplo n.º 4
0
def main():
    #create the training & test sets, skipping the header row with [1:]

    dataset_T = genfromtxt(open('Data/demoTrain.csv', 'r'),
                           delimiter=',',
                           dtype='f8')[:]

    dataset_R = genfromtxt(open('Data/demoTarget.csv', 'r'),
                           delimiter=',',
                           dtype='f8')[:]

    dataset_v = genfromtxt(open('Data/demoTest.csv', 'r'),
                           delimiter=',',
                           dtype='f8')[:]

    trueData = genfromtxt(open('Data/validate.csv', 'r'),
                          delimiter=',',
                          dtype='f8')[:]

    target = [x for x in dataset_R]
    train = [x[:] for x in dataset_T]
    validate = [x[:] for x in dataset_v]
    y = [x for x in trueData]

    test = genfromtxt(open('Data/demoTest.csv', 'r'),
                      delimiter=',',
                      dtype='f8')[:]

    per = Perceptron(n_iter=2, shuffle=True)
    per.fit(train, target)

    #val = per.decision_function(validate)

    val = per.predict(validate)
    score = per.score(validate, y)

    print str(score) + "\n"

    for v in val:
        print v

    a = per.fit_transform(train, target)
    print a
Exemplo n.º 5
0
def main():
    #create the training & test sets, skipping the header row with [1:]



    dataset_T = genfromtxt(open('Data/demoTrain.csv','r'), delimiter=',', dtype='f8')[:]    
    
    dataset_R = genfromtxt(open('Data/demoTarget.csv','r'), delimiter=',', dtype='f8')[:]
    
    dataset_v = genfromtxt(open('Data/demoTest.csv','r'), delimiter=',', dtype='f8')[:]
    
    trueData = genfromtxt(open('Data/validate.csv','r'), delimiter=',', dtype='f8')[:]
    
    target = [x for x in dataset_R]
    train = [x[:] for x in dataset_T]
    validate = [x[:] for x in dataset_v]
    y = [x for x in trueData]
    
    
    test = genfromtxt(open('Data/demoTest.csv','r'), delimiter=',', dtype='f8')[:]
     
    
     
    per = Perceptron(n_iter=2, shuffle=True)
    per.fit(train, target)

   
    
    #val = per.decision_function(validate)
    
    val = per.predict(validate)
    score = per.score(validate, y)
    
    print str(score) +"\n"
    
    for v in val: 
        print v
    
    a= per.fit_transform(train,target)
    print a
Exemplo n.º 6
0
def task2(sneakers_data: pd.DataFrame, boots_data: pd.DataFrame,
          sneakers_labels: pd.DataFrame, boots_labels: pd.DataFrame):
    full_data = sneakers_data.append(boots_data)
    full_labels = sneakers_labels.append(boots_labels)

    train_times = []
    predict_times = []
    accuracies = []

    print("\tTask 2 output")
    num_splits = 4
    for train_index, test_index in KFold(n_splits=num_splits,
                                         shuffle=True).split(full_data):
        train_data = full_data.iloc[train_index]
        test_data = full_data.iloc[test_index]

        train_labels = full_labels.iloc[train_index]
        test_labels = full_labels.iloc[test_index]

        pctrn = Perceptron()

        train_start_time = timeit.default_timer()
        pctrn.fit(X=train_data, y=train_labels)
        train_end_time = timeit.default_timer()

        train_time = train_end_time - train_start_time
        train_times.append(train_time)

        print("\tPerceptron took", train_time, "seconds to train on data")

        predict_start_time = timeit.default_timer()
        prediction = pctrn.predict(test_data)
        predict_end_time = timeit.default_timer()

        predict_time = predict_end_time - predict_start_time
        predict_times.append(predict_time)

        print("\tPerceptron took", predict_time,
              "seconds to make a prediction")

        accuracy = accuracy_score(test_labels, prediction) * 100
        accuracies.append(accuracy)

        print("\tAccuracy for perceptron", accuracy, "%")

        confusion = confusion_matrix(test_labels, prediction)

        percent_true_pos = (confusion[0, 0] / len(test_labels)) * 100
        percent_false_pos = (confusion[0, 1] / len(test_labels)) * 100
        percent_true_neg = (confusion[1, 1] / len(test_labels)) * 100
        percent_false_neg = (confusion[1, 0] / len(test_labels)) * 100

        print("\tPerceptron confusion matrix true positive:", percent_true_pos,
              "%")
        print("\tPerceptron confusion matrix false positive:",
              percent_false_pos, "%")
        print("\tPerceptron confusion matrix true negative:", percent_true_neg,
              "%")
        print("\tPerceptron confusion matrix false negative:",
              percent_false_neg, "%\n")

    print("\tThe minimum train time was", np.min(train_times), "seconds")
    print("\tThe maximum train time was", np.max(train_times), "seconds")
    print("\tThe average train time was", np.mean(train_times), "seconds\n")

    print("\tThe minimum prediction time was", np.min(predict_times),
          "seconds")
    print("\tThe maximum prediction time was", np.max(predict_times),
          "seconds")
    print("\tThe average prediction time was", np.mean(predict_times),
          "seconds\n")

    print("\tMinimum Accuracy was", np.min(accuracies), "%")
    print("\tMaximum accuracy was", np.max(accuracies), "%")
    print("\tAverage accuracy was", np.mean(accuracies), "%\n")

    print(
        "\tTotal train time for all k-folds in perceptron with gamma value of =",
        np.sum(train_times), "seconds")
    print(
        "\tTotal prediction time for all k-folds in perceptron with gamma of =",
        np.sum(predict_times), "seconds\n")
def test_perceptron(x: list, y: list, learning_rate: float, max_iter: int) -> None:
    perceptron = Perceptron(max_iter=max_iter, alpha=learning_rate)
    perceptron.fit(x, y)
    plot_model(np.array(x), perceptron)
Exemplo n.º 8
0
def main():
    while True:
        intro = Text(
            Point(250, 300),
            "2048 TRAINER\n\nTrain model...R\nFull game train...G\n\n>>>Delays between moves<<<\nTest KNN model...E\nTest Perceptron model...P\nRandom model...N\n\n>>>No delays<<<\nTest KNN model...F\nTest Perceptron model...S\nRandom model...M\n\nSimple learning model...L\n\nPRESS Q TO QUIT"
        )
        intro.setSize(20)
        intro.setTextColor(color_rgb(255, 255, 255))

        if os.path.isfile("2048_train.csv"):
            data = pd.read_csv("2048_train.csv",
                               header=None,
                               usecols=[
                                   1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                                   14, 15, 16
                               ])
            direction = pd.read_csv("2048_train.csv", header=None, usecols=[0])
            splitRatio = 0.7
            datatrainingSet, datatestSet = splitData(data, splitRatio)
            directiontrainingSet, directiontestSet = splitData(
                direction, splitRatio)
            splitRatio = 0.5
            datatestSet, datadevelopementSet = splitData(
                datatestSet, splitRatio)
            directiontestSet, directiondevelopementSet = splitData(
                directiontestSet, splitRatio)
            direction = np.transpose(direction)
            isPrevData = True
        else:
            isPrevData = False

        if os.path.isfile("best_weights.txt"):
            fileContent = open("best_weights.txt", 'r')
            weightString = fileContent.readlines()
            bestWeightsX = np.array(weightString[0:17])
            bestWeightsX = bestWeightsX.astype(np.float)
            bestWeightsY = np.array(weightString[17:34])
            bestWeightsY = bestWeightsY.astype(np.float)
            randomRadius = float(weightString[34])
            bestLearnScore = float(weightString[35])
            fileContent.close()
        else:
            bestWeightsX = np.empty(0)
            bestWeightsY = np.empty(0)
            for i in range(0, 17):
                bestWeightsX = np.append(bestWeightsX, 0)
                bestWeightsY = np.append(bestWeightsY, 0)
                randomRadius = 100
                bestLearnScore = 0

        win = GraphWin("2048", 500, 600)
        win.setBackground(color_rgb(0, 103, 105))
        intro.draw(win)

        if isPrevData:
            options = ['r', 'g', 'e', 'n', 'f', 'm', 'l', 'q', 's', 'p']
        else:
            options = ['r', 'g', 'n', 'm', 'l', 'q']
        mode = '-'
        while mode not in options:
            mode = win.getKey()

        if mode == 'q':
            win.close()
            return 0

        if mode == 'e' or mode == 'f':
            knn = KNeighborsClassifier(n_neighbors=10)
            knn.fit(datatrainingSet,
                    np.ravel(np.transpose(directiontrainingSet)))

        if mode == 'p' or mode == 's':
            ppn = Perceptron(eta0=0.01, n_iter=10000)
            ppn.fit(datatrainingSet,
                    np.ravel(np.transpose(directiontrainingSet)))

        intro.undraw()
        win.setBackground(color_rgb(100, 100, 100))

        score = 0
        scoreText = Text(Point(250, 550), str(score))
        scoreText.setTextColor(color_rgb(255, 255, 255))
        scoreText.setSize(30)

        board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                          [0, 0, 0, 0]])

        isChangeBoard = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                                  [0, 0, 0, 0]])

        #DO THE SAME THING FOR THE TILE NUMBERS
        tileList = []
        numberList = []

        board = spawn(board)
        board = spawn(board)

        for i in range(0, 4):
            for j in range(0, 4):
                tileList.append(
                    Rectangle(Point(i * 125 + 5, j * 125 + 5),
                              Point(i * 125 + 120, j * 125 + 120)))
                numberList.append(
                    Text(Point(i * 125 + 60, j * 125 + 60), str(board[j][i])))
                numberList[i * 4 + j].setSize(20)
                tileList[i * 4 + j].setWidth(4)
                tileList[i * 4 + j].setOutline(color_rgb(255, 255, 255))
                numberList[i * 4 + j].setTextColor(color_rgb(0, 0, 0))
                tileList[i * 4 + j].draw(win)
                numberList[i * 4 + j].draw(win)

        #THE TRAINING DATA
        if not isPrevData:
            data = np.empty((0, 16), float)
            direction = np.empty(0)

        drawBoard(board, win, tileList, numberList)

        move = '-'
        classes = ['w', 's', 'd', 'a']
        moveIter = 0
        nMoves = 0

        totalIterations = 1
        iteration = 1

        currentWeightsX = generateRandomWeights(bestWeightsX,
                                                math.floor(randomRadius))
        currentWeightsY = generateRandomWeights(bestWeightsY,
                                                math.floor(randomRadius))
        currentBestWeightsX = currentWeightsX
        currentBestWeightsY = currentWeightsY

        if mode == 'l':
            maxIterations = int(
                input("Enter number of generations to simulate:"))

        while True:

            score = calcScore(board)

            if mode != 'l':
                drawBoard(board, win, tileList, numberList)

            scoreText.undraw()
            if mode != 'l':
                scoreText.setSize(30)
                scoreText.setText(str(score))
                scoreText.draw(win)

            for i in range(0, 4):
                for j in range(0, 4):
                    isChangeBoard[i][j] = board[i][j]

            if mode == 'r' or mode == 'g':
                move = win.getKey()
                if nMoves % 30 == 0 and mode == 'r':
                    board = generateRandomGrid(board)
                nMoves += 1
            else:
                if mode == 'e' or mode == 'n' or mode == 'p':
                    time.sleep(0.5)
                if mode == 'e' or mode == 'f':
                    probs = np.ravel(
                        knn.predict_proba(gridToData(board).reshape(1, -1)))
                    ranks = [0] * len(probs)
                    for i, x in enumerate(
                            sorted(range(len(probs)), key=lambda y: probs[y])):
                        ranks[x] = i
                    move = classes[ranks[moveIter]]
                    moveIter += 1
                elif mode == 'm':
                    move = classes[random.randint(0, 3)]
                elif mode == 'l':
                    move = classes[calculateDirection(currentWeightsX, board) *
                                   2 +
                                   calculateDirection(currentWeightsY, board)]
                    if moveIter > 0:
                        move = classes[random.randint(0, 3)]
                    moveIter += 1
                elif mode == 'p' or mode == 's':
                    move = ppn.predict(gridToData(board).reshape(1, -1))
                    if moveIter > 0:
                        move = classes[random.randint(0, 3)]
                    moveIter += 1

            if move == 'w':
                board = shift(board, 0)
            elif move == 'd':
                board = shift(board, 1)
            elif move == 's':
                board = shift(board, 2)
            elif move == 'a':
                board = shift(board, 3)
            elif move == 'q':
                break

            if lose(board):
                print(score)
                if mode == 'l':
                    board = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                                      [0, 0, 0, 0]])

                    isChangeBoard = np.array([[0, 0, 0, 0], [0, 0, 0, 0],
                                              [0, 0, 0, 0], [0, 0, 0, 0]])

                    board = spawn(board)
                    board = spawn(board)

                    scoreText.setSize(16)
                    scoreText.setText("Score: " + str(score) +
                                      " -- iteration: " + str(iteration) +
                                      " -- r: " + str(randomRadius))

                    if score > bestLearnScore:
                        bestLearnScore = score
                        currentBestWeightsX = currentWeightsX
                        currentBestWeightsY = currentWeightsY
                        print("current best" + str(currentBestWeightsX) +
                              str(currentBestWeightsY))

                    if iteration == 1000:
                        drawBoard(board, win, tileList, numberList)
                        print("best score: " + str(bestLearnScore))
                        iteration = 1
                        randomRadius *= 0.9
                        bestWeightsX = currentBestWeightsX
                        bestWeightsY = currentBestWeightsY
                        print("best weights:" + str(bestWeightsX) +
                              str(bestWeightsY))
                        if totalIterations / 1000 >= maxIterations:
                            toWrite = np.empty(0)
                            toWrite = np.append(toWrite, bestWeightsX)
                            toWrite = np.append(toWrite, bestWeightsY)
                            toWrite = np.append(toWrite, randomRadius)
                            toWrite = np.append(toWrite, bestLearnScore)
                            np.savetxt("best_weights.txt", toWrite)
                            #WRITE WEIGHTS N STUFF
                            break

                    totalIterations += 1
                    iteration += 1

                    currentWeightsX = generateRandomWeights(
                        bestWeightsX, math.floor(randomRadius))
                    currentWeightsY = generateRandomWeights(
                        bestWeightsY, math.floor(randomRadius))
                else:
                    break

            isChanged = False
            for i in range(0, 4):
                for j in range(0, 4):
                    if isChangeBoard[i][j] != board[i][j]:
                        isChanged = True

            if isChanged:
                moveIter = 0
                if mode == 'r' or mode == 'g':
                    direction = np.append(direction, move)
                    data = np.vstack([data, gridToData2(board)])
                board = spawn(board)

        score = calcScore(board)

        scoreText.setSize(16)
        scoreText.setText(str(score) + " -- YOU LOSE (Press Q)")
        while move != 'q':
            move = win.getKey()
        win.close()

        if mode == 'r' or mode == 'g':
            pd.DataFrame.to_csv(pd.DataFrame(
                np.hstack([
                    np.reshape(directiontrainingSet,
                               [np.shape(directiontrainingSet)[0], 1]),
                    datatrainingSet
                ])),
                                "2048_train.csv",
                                index=False,
                                header=False)
Exemplo n.º 9
0
from sklearn.linear_model.perceptron import Perceptron
from numbers_mass import one, two
import itertools
from generate_picture import read_image

x = [
    list(itertools.chain.from_iterable(one)),
    list(itertools.chain.from_iterable(two))
]
print(x)
y = [1, 2]

clf = Perceptron(random_state=241)
clf.fit(x, y)

if __name__ == "__main__":
    print(clf.predict([
        list(itertools.chain.from_iterable(one)),
    ]))
    print(
        clf.predict([
            list(itertools.chain.from_iterable(read_image('1.png'))),
        ]))
Exemplo n.º 10
0
import numpy as np
from matplotlib import pyplot

from sklearn.linear_model.perceptron import Perceptron
from sklearn.metrics import accuracy_score

import usps
import perceptron # Pour la fonction two_classes

data, labels           = usps.load_train()
data_test, labels_test = usps.load_test()

for k in range(10):
    labels_k = perceptron.two_classes(labels, k)

    net = Perceptron()
    net.fit(data, labels_k)
    output_train = net.predict(data)
    output_test  = net.predict(data_test)
    print(k)
    print("  Score (train)", accuracy_score(labels_k, output_train))
    labels_k_test = perceptron.two_classes(labels_test, k)
    print("  Score (test)", accuracy_score(labels_k_test, output_test))
Exemplo n.º 11
0
class PerceptronMask(StreamModel):
    """ PerceptronMask

    A mask for scikit-learn's Perceptron classifier.

    Because scikit-multiflow's framework require a few interfaces, not present 
    int scikit-learn, this mask allows the first to use classifiers native to 
    the latter.

    """
    def __init__(self,
                 penalty=None,
                 alpha=0.0001,
                 fit_intercept=True,
                 max_iter=1000,
                 tol=1e-3,
                 shuffle=True,
                 verbose=0,
                 eta0=1.0,
                 n_jobs=1,
                 random_state=0,
                 class_weight=None,
                 warm_start=False):
        self.penalty = penalty
        self.alpha = alpha
        self.fit_intercept = fit_intercept
        self.max_iter = max_iter
        self.tol = tol
        self.shuffle = shuffle
        self.verbose = verbose
        self.eta0 = eta0
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.class_weight = class_weight
        self.warm_start = warm_start
        super().__init__()
        self.classifier = Perceptron(penalty=self.penalty,
                                     alpha=self.alpha,
                                     fit_intercept=self.fit_intercept,
                                     max_iter=self.max_iter,
                                     tol=self.tol,
                                     shuffle=self.shuffle,
                                     verbose=self.verbose,
                                     random_state=self.random_state,
                                     eta0=self.eta0,
                                     warm_start=self.warm_start,
                                     class_weight=self.class_weight,
                                     n_jobs=self.n_jobs)

    def fit(self, X, y, classes=None, weight=None):
        """ fit

        Calls the Perceptron fit function from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: Not used.

        weight: Instance weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.fit(X, y, sample_weight=weight)
        return self

    def partial_fit(self, X, y, classes=None, weight=None):
        """ partial_fit

        Calls the Perceptron partial_fit from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: list, optional
            A list with all the possible labels of the classification problem.

        weight: Instance weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.partial_fit(X, y, classes, weight)
        return self

    def predict(self, X):
        """ predict

        Uses the current model to predict samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        Returns
        -------
        numpy.ndarray
            A numpy.ndarray containing the predicted labels for all instances in X.

        """
        return np.asarray(self.classifier.predict(X))

    def predict_proba(self, X):
        """ predict_proba

        Predicts the probability of each sample belonging to each one of the 
        known classes.
    
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            A matrix of the samples we want to predict.
    
        Returns
        -------
        numpy.ndarray
            An array of shape (n_samples, n_features), in which each outer entry is 
            associated with the X entry of the same index. And where the list in 
            index [i] contains len(self.target_values) elements, each of which represents
            the probability that the i-th sample of X belongs to a certain label.
    
        """
        return self.classifier._predict_proba_lr(X)

    def score(self, X, y):
        """ score

        Returns the predict performance for the samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_sample, n_features)
            The features matrix.

        y: Array-like
            An array-like containing the class labels for all samples in X.

        Returns
        -------
        float
            The classifier's score.

        """
        return self.classifier.score(X, y)

    def get_info(self):
        params = self.classifier.get_params()
        info = type(self).__name__ + ':'
        info += ' - penalty: {}'.format(params['penalty'])
        info += ' - alpha: {}'.format(params['alpha'])
        info += ' - fit_intercept: {}'.format(params['fit_intercept'])
        info += ' - max_iter: {}'.format(params['max_iter'])
        info += ' - tol: {}'.format(params['tol'])
        info += ' - shuffle: {}'.format(params['shuffle'])
        info += ' - eta0: {}'.format(params['eta0'])
        info += ' - warm_start: {}'.format(params['warm_start'])
        info += ' - class_weight: {}'.format(params['class_weight'])
        info += ' - n_jobs: {}'.format(params['n_jobs'])
        return info

    def reset(self):
        self.__init__(penalty=self.penalty,
                      alpha=self.alpha,
                      fit_intercept=self.fit_intercept,
                      max_iter=self.max_iter,
                      tol=self.tol,
                      shuffle=self.shuffle,
                      verbose=self.verbose,
                      random_state=self.random_state,
                      eta0=self.eta0,
                      warm_start=self.warm_start,
                      class_weight=self.class_weight,
                      n_jobs=self.n_jobs)
Exemplo n.º 12
0
print(acc_svc)  # 0.81218

# 线性支持向量机SVC
from sklearn.svm import LinearSVC

linear_svc = LinearSVC()
linear_svc.fit(x_train, y_train)
y_pred = linear_svc.predict(x_test)
acc_linear_svc = accuracy_score(y_pred, y_test)
print(acc_linear_svc)  # 0.77157

# 感知机
from sklearn.linear_model.perceptron import Perceptron

perceptron = Perceptron()
perceptron.fit(x_train, y_train)
y_pred = perceptron.predict(x_test)
acc_perceptron = accuracy_score(y_pred, y_test)
print(acc_perceptron)  # 0.77665

# 决策树
from sklearn.tree import DecisionTreeClassifier

decisiontree = DecisionTreeClassifier()
decisiontree.fit(x_train, y_train)
y_pred = decisiontree.predict(x_test)
acc_decisiontree = accuracy_score(y_pred, y_test)
print(acc_decisiontree)  # 0.82233

# 随机森林
from sklearn.ensemble import RandomForestClassifier
Exemplo n.º 13
0
        [1, -1, -1],  # Naranja
        [1, 1, -1],  # Manzana
        [1, -1, -1],  # Naranja
    ]), array([
        1,
        -1,
        1,
        1
    ])
]

# Crear una instancia de Perceptron con un máximo de 100 épocas
perceptron = Perceptron(max_iter=1000)

# Entrenar la red neuronal
perceptron.fit(data[0], data[1])


def test_net_accuracy():
    # Probar la certeza de la red neuronal con la data previamente alimentada
    # para filtrar una manzana y una naranja.
    data = [
        array([
            [1, 1, -1],   # Manzana
            [1, -1, -1],  # Naranja
            [-1, -1, -1]  # Una naranja que es elíptica
        ]), array([
            1,
            1,
            1
        ])
Exemplo n.º 14
0
class PerceptronMask(BaseClassifier):
    """ PerceptronMask

    A mask for scikit-learn's Perceptron classifier.

    Because scikit-multiflow's framework require a few interfaces, not present 
    int scikit-learn, this mask allows the first to use classifiers native to 
    the latter.

    """
    def __init__(self):
        super().__init__()
        self.classifier = Perceptron(n_iter=50)

    def fit(self, X, y, classes = None, weight=None):
        """ fit

        Calls the Perceptron fit function from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: Not used.

        weight: Instance weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.fit(X, y, sample_weight=weight)
        return self

    def partial_fit(self, X, y, classes=None, weight=None):
        """ partial_fit

        Calls the Perceptron partial_fit from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: list, optional
            A list with all the possible labels of the classification problem.

        weight: Instance weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.partial_fit(X, y, classes, weight)
        return self

    def predict(self, X):
        """ predict

        Uses the current model to predict samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        Returns
        -------
        list
            A list containing the predicted labels for all instances in X.

        """
        return self.classifier.predict(X)


    def predict_proba(self, X):
        """ predict_proba

        Predicts the probability of each sample belonging to each one of the 
        known classes.
    
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            A matrix of the samples we want to predict.
    
        Returns
        -------
        numpy.ndarray
            An array of shape (n_samples, n_features), in which each outer entry is 
            associated with the X entry of the same index. And where the list in 
            index [i] contains len(self.classes) elements, each of which represents 
            the probability that the i-th sample of X belongs to a certain label.
    
        """
        return self.classifier.predict_proba(X)

    def score(self, X, y):
        """ score

        Returns the predict performance for the samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_sample, n_features)
            The features matrix.

        y: Array-like
            An array-like containing the class labels for all samples in X.

        Returns
        -------
        float
            The classifier's score.

        """
        return self.classifier.score(X, y)

    def get_info(self):
        params = self.classifier.get_params()
        penalty = params['penalty']
        penalty = 'None' if penalty is None else penalty
        fit_int = params['fit_intercept']
        fit_int = 'True' if fit_int else 'False'
        shuffle = params['shuffle']
        shuffle = 'True' if shuffle else 'False'
        return 'Perceptron: penalty: ' + penalty + \
               '  -  alpha: ' + str(round(params['alpha'], 3)) + \
               '  -  fit_intercept: ' + fit_int + \
               '  -  n_iter: ' + str(params['n_iter']) + \
               '  -  shuffle: ' + shuffle
Exemplo n.º 15
0
#encoding=utf8
import os
from sklearn.linear_model.perceptron import Perceptron
import pandas as pd

if os.path.exists('./step2/result.csv'):
    os.remove('./step2/result.csv')

# 获取训练数据
train_data = pd.read_csv('./step2/train_data.csv')
# 获取训练标签
train_label = pd.read_csv('./step2/train_label.csv')
train_label = train_label['target']
# 获取测试数据
test_data = pd.read_csv('./step2/test_data.csv')

# 训练数据
clf = Perceptron(eta0=0.1, max_iter=500)
clf.fit(train_data, train_label)
res = clf.predict(test_data)

# 保存
res = {"result": res}
res = pd.DataFrame(res)
res.to_csv('./step2/result.csv', index=0)
def result():
    if request.method == 'POST':
        path = request.files.get('myFile')

        df = pd.read_csv(path, encoding="ISO-8859-1")

        filename = request.form['filename']

        str1 = request.form['feature']
        str2 = request.form['label']

        if str1 in list(df) and str2 in list(df):
            y = df[str2]
            X = df[str1]
        else:
            return render_template('nameError.html')

        x = []
        for subject in X:
            result = re.sub(r"http\S+", "", subject)
            replaced = re.sub(r'[^a-zA-Z0-9 ]+', '', result)
            x.append(replaced)
        X = pd.Series(x)

        X = X.str.lower()
        """
        texts = []
        for doc in X:
            doc = nlp(doc, disable=['parser', 'ner'])
            tokens = [tok.lemma_.lower().strip() for tok in doc if tok.lemma_ != '-PRON-']
            tokens = [tok for tok in tokens if tok not in stopwords]
            tokens = ' '.join(tokens)
            texts.append(tokens)

        X = pd.Series(texts)
        """
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.33)

        tfidfvect = TfidfVectorizer(ngram_range=(1, 1))
        X_train_tfidf = tfidfvect.fit_transform(X_train)

        start = time()
        clf1 = LinearSVC()
        clf1.fit(X_train_tfidf, y_train)
        pred_SVC = clf1.predict(tfidfvect.transform(X_test))

        a1 = accuracy_score(y_test, pred_SVC)
        end = time()
        print("accuracy SVC: {} and time: {} s".format(a1, (end - start)))

        start = time()
        clf2 = LogisticRegression(n_jobs=-1,
                                  multi_class='multinomial',
                                  solver='newton-cg')
        clf2.fit(X_train_tfidf, y_train)
        pred_LR = clf2.predict(tfidfvect.transform(X_test))
        a2 = accuracy_score(y_test, pred_LR)
        end = time()
        print("accuracy LR: {} and time: {}".format(a2, (end - start)))

        start = time()
        clf3 = RandomForestClassifier(n_jobs=-1)

        clf3.fit(X_train_tfidf, y_train)
        pred = clf3.predict(tfidfvect.transform(X_test))
        a3 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy RFC: {} and time: {}".format(a3, (end - start)))

        start = time()
        clf4 = MultinomialNB()

        clf4.fit(X_train_tfidf, y_train)
        pred = clf4.predict(tfidfvect.transform(X_test))
        a4 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy MNB: {} and time: {}".format(a4, (end - start)))

        start = time()
        clf5 = GaussianNB()

        clf5.fit(X_train_tfidf.toarray(), y_train)
        pred = clf5.predict(tfidfvect.transform(X_test).toarray())
        a5 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy GNB: {} and time: {}".format(a5, (end - start)))

        start = time()
        clf6 = LogisticRegressionCV(n_jobs=-1)
        clf6.fit(X_train_tfidf, y_train)
        pred_LR = clf6.predict(tfidfvect.transform(X_test))
        a6 = accuracy_score(y_test, pred_LR)
        end = time()
        print("accuracy LRCV: {} and time: {}".format(a6, (end - start)))

        start = time()
        clf7 = AdaBoostClassifier()
        clf7.fit(X_train_tfidf, y_train)
        pred_LR = clf7.predict(tfidfvect.transform(X_test))
        a7 = accuracy_score(y_test, pred_LR)
        end = time()
        print("accuracy ABC: {} and time: {}".format(a7, (end - start)))

        start = time()
        clf8 = BernoulliNB()

        clf8.fit(X_train_tfidf.toarray(), y_train)
        pred = clf8.predict(tfidfvect.transform(X_test).toarray())
        a8 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy BNB: {} and time: {}".format(a8, (end - start)))

        start = time()
        clf9 = Perceptron(n_jobs=-1)

        clf9.fit(X_train_tfidf.toarray(), y_train)
        pred = clf9.predict(tfidfvect.transform(X_test).toarray())
        a9 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy Per: {} and time: {}".format(a9, (end - start)))
        start = time()
        clf10 = RidgeClassifierCV()

        clf10.fit(X_train_tfidf.toarray(), y_train)
        pred = clf10.predict(tfidfvect.transform(X_test).toarray())
        a10 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy RidCV: {} and time: {}".format(a10, (end - start)))

        start = time()
        clf11 = SGDClassifier(n_jobs=-1)

        clf11.fit(X_train_tfidf.toarray(), y_train)
        pred = clf11.predict(tfidfvect.transform(X_test).toarray())
        a11 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy SGDC: {} and time: {}".format(a11, (end - start)))
        start = time()
        clf12 = SGDClassifier(n_jobs=-1)

        clf12.fit(X_train_tfidf.toarray(), y_train)
        pred = clf12.predict(tfidfvect.transform(X_test).toarray())
        a12 = accuracy_score(y_test, pred)
        end = time()
        print("accuracy XGBC: {} and time: {}".format(a12, (end - start)))

        acu_list = [a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12]
        max_list = max(acu_list)

        if max_list == a1:
            pickle.dump(clf1, open(filename + '_model', 'wb'))
        elif max_list == a2:
            pickle.dump(clf2, open(filename + '_model', 'wb'))
        elif max_list == a3:
            pickle.dump(clf3, open(filename + '_model', 'wb'))
        elif max_list == a4:
            pickle.dump(clf4, open(filename + '_model', 'wb'))
        elif max_list == a5:
            pickle.dump(clf5, open(filename + '_model', 'wb'))
        elif max_list == a6:
            pickle.dump(clf6, open(filename + '_model', 'wb'))
        elif max_list == a7:
            pickle.dump(clf7, open(filename + '_model', 'wb'))
        elif max_list == a8:
            pickle.dump(clf8, open(filename + '_model', 'wb'))
        elif max_list == a9:
            pickle.dump(clf9, open(filename + '_model', 'wb'))
        elif max_list == a10:
            pickle.dump(clf10, open(filename + '_model', 'wb'))
        elif max_list == a11:
            pickle.dump(clf11, open(filename + '_model', 'wb'))
        elif max_list == a12:
            pickle.dump(clf12, open(filename + '_model', 'wb'))

        pickle.dump(tfidfvect, open(filename + '_tfidfVect', 'wb'))

        return render_template("result.html",
                               ac1=a1,
                               ac2=a2,
                               ac3=a3,
                               ac4=a4,
                               ac5=a5,
                               ac6=a6,
                               ac7=a7,
                               ac8=a8,
                               ac9=a9,
                               ac10=a10,
                               ac11=a11,
                               ac12=a12)
Exemplo n.º 17
0
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=i)

    # Normalizando as variaveis de treino e teste
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    # Treinando o Perceptron com o conjunto de treino previamente separado
    #Passa-se 100  vezes pelo conjunto de dados e o learning rate = 0.001
    from sklearn.linear_model.perceptron import Perceptron
    classifier = Perceptron(max_iter=100, eta0=0.1)
    classifier.fit(X_train, y_train)

    # Apresentando os dados de teste a rede Perceptron e obtendo as predicoes
    y_pred = classifier.predict(X_test)

    # Fazendo a matriz de confusao
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test, y_pred)
    '''TP = # True Positives, TN = # True Negatives,
     FP = # False Positives, FN = # False Negatives
    
    Acerto = TP / (TP + FP)
    '''
    #Aplicando o metodo de avaliacao

    TP = cm[0][0]