예제 #1
0
    def solve(self):
        x_train, x_test, y_train, y_test = train_test_split(self.inputs, self.outputs, test_size=0.3, random_state=1)

        x_train_numeric = self.getFeatures(x_train, self.repository.numericCols)
        x_train_string = self.getFeatures(x_train, self.repository.stringCols)
        x_test_numeric = self.getFeatures(x_test, self.repository.numericCols)
        x_test_string = self.getFeatures(x_test, self.repository.stringCols)

        x_train_numeric, x_test_numeric = normalisation(x_train_numeric, x_test_numeric)  # normalise the numeric features

        x_train_string = self.vectorizeStringFeatures(x_train_string)  # convert string features to float
        x_test_string = self.vectorizeStringFeatures(x_test_string)

        x_train = self.unionFeatures(x_train_numeric, x_train_string)
        x_test = self.unionFeatures(x_test_numeric, x_test_string)

        clf_tree = tree.DecisionTreeClassifier(criterion='gini', random_state=1)
        clf_tree.fit(x_train, y_train)

        y_computed = clf_tree.predict(x_test)

        acc = accuracy(y_computed, y_test)
        print("accuracy:", acc)
예제 #2
0
from utils import loadData, splitData,flatten,normalisation
import matplotlib.image as mpimg
from sklearn import neural_network
from sklearn.metrics import accuracy_score

if __name__=="__main__":
    inputs, outputs = loadData("data.csv")
    trainInputs, trainOutputs, validationInputs, validationOutputs = splitData(inputs,outputs)
    trainInputsFlatten = [flatten(el) for el in trainInputs]
    validationInputsFlatten = [flatten(el) for el in validationInputs]


    trainInputsNormalised, validationInputsNormalised = normalisation(trainInputsFlatten, validationInputsFlatten)

    classifier = neural_network.MLPClassifier(hidden_layer_sizes=(5,), activation='relu', max_iter=1000, solver='sgd', verbose=10, random_state=1, learning_rate_init=.1)
    classifier.fit(trainInputsNormalised,trainOutputs)
    predictedLabels = classifier.predict(validationInputsNormalised)

    print("Real labels: ", validationOutputs)
    print("Computed labels: ", predictedLabels)
    acc = accuracy_score(validationOutputs, predictedLabels)
    print("Accuracy: ",acc)





import variables
import variables_image_recognition
import lime
import lime.lime_tabular
import sklearn
import sklearn.metrics
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import fetch_20newsgroups
from lime.lime_text import LimeTextExplainer
from utils_lime import Draw_Lime_On_Logistic_Regression

exit()

if variables.choice_of_work == 1:
    xtrain, ytrain, xtest, ytest = load_data(variables.file_name)
    xtrain, xtest = normalisation(xtrain, xtest)
    n = NN(variables.choice, variables.w)
    n.fit(xtrain, ytrain)
    ypred = n.pred(xtest)
    print(1 - accuracy_score(ytest, ypred))
    explainer = lime.lime_tabular.LimeTabularExplainer(xtrain,
                                                       mode='classification')
    exp = explainer.explain_instance(xtest[0],
                                     n.pred_proba,
                                     labels=1,
                                     top_labels=1)
    exp.as_pyplot_figure()
    plt.show()
elif variables.choice_of_work == 2:
    xtrain, ytrain, xtest, ytest = load_data(variables.file_name)
    xtrain, xtest = normalisation(xtrain, xtest)
예제 #4
0
from utils import loadData, splitData, normalisation, transform, myNormalisation
from sklearn import linear_model
from sklearn.metrics import accuracy_score

if __name__ == "__main__":
    #step 1. load data
    inputs, outputs = loadData("iris.data")
    #step 2. split data
    trainInputs, trainOutputs, validationInputs, validationOutputs = splitData(
        inputs, outputs)
    #step 3. normalisation
    normalisedTrainData, normalisedTestData = normalisation(
        trainInputs, validationInputs)
    #step 4. training
    classifier = linear_model.LogisticRegression()
    classifier.fit(normalisedTrainData, transform(trainOutputs))

    #step 5. make predictions
    computedTestOutputs = classifier.predict(normalisedTestData)

    #step 6. evalaute the classifier performance
    print("Computed outputs", computedTestOutputs)
    print("Real outputs", transform(validationOutputs))
    error = 1 - accuracy_score(transform(validationOutputs),
                               computedTestOutputs)
    print("classification error (tool): ", error)
feature1 = [ex[0] for ex in inputs]  #Economy..GDP.per.Capita.
feature2 = [ex[1] for ex in inputs]  #Freedom

#train and test samples normalised
import numpy as np
np.random.seed()
indexes = [i for i in range(len(inputs))]
trainSample = np.random.choice(indexes, int(0.8 * len(inputs)), replace=False)
testSample = [i for i in indexes if not i in trainSample]

trainInputs = [inputs[i] for i in trainSample]
trainOutputs = [outputs[i] for i in trainSample]
testInputs = [inputs[i] for i in testSample]
testOutputs = [outputs[i] for i in testSample]

trainInputs, testInputs = normalisation(trainInputs, testInputs)
trainOutputs, testOutputs = normalisation(trainOutputs, testOutputs)

feature1train = [ex[0] for ex in trainInputs]
feature2train = [ex[1] for ex in trainInputs]

feature1test = [ex[0] for ex in testInputs]
feature2test = [ex[1] for ex in testInputs]

#using sklearn tool:
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
regressor = linear_model.SGDRegressor(alpha=0.01, max_iter=1000, average=True)
regressor.fit(trainInputs, trainOutputs)
w0, w1, w2 = regressor.intercept_[0], regressor.coef_[0], regressor.coef_[1]
print('the learnt model: f(x1,x2) = ', w0, ' + ', w1, ' * x1', '+', w2, '*x2')