Exemplo n.º 1
0
Arquivo: Main.py Projeto: oldteb/AI
def main():
	# ann = ANN("hw5data.txt",10)
	# ann.run()

	# maze = AStarMaze("Maze.txt",10)
	# maze.ASSolver()

	knn = KNN("hw5data.txt",15)
	knn.run()
Exemplo n.º 2
0
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from KNN import KNN

path = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
dataset = pd.read_csv(path, names=names)

encoder = LabelEncoder()
dataset["class"] = encoder.fit_transform(dataset["class"])

train_set, test_set = train_test_split(dataset, test_size=0.25, random_state=0)

print("Train set size : ", len(train_set))
print("Test set size : ", len(test_set))

knn = KNN(7)
predictions = []
for index, t in test_set.iterrows():
    predictors_only = t[:-1]
    prediction = knn.predict(train_set, predictors_only)
    predictions.append(prediction)

score = knn.evaluate(np.array(test_set.iloc[:, -1]), predictions)
print("KNN Score = ", score)
import RiskParser as rp
from ANN import ANN
from Dtree import Dtree
from KNN import KNN
from LogR import LogR
from NaiveEnsemble import NaiveEnsemble
import numpy as np

inputs, outputs = rp.parse_data("RiskAssessData.csv")
print(inputs)
print(outputs)
print("### ALL TESTS ###")

test_ANN = ANN()
test_DTree = Dtree()
test_KNN = KNN(10)
test_LogR = LogR()
ensemble_results = []
individual_results = []
for i in range(0, 3):
    x_train, x_test, y_train, y_test = test_DTree.split_data(
        inputs, outputs, .25)
    test_NaiveEnsemble = NaiveEnsemble(
        [test_ANN, test_KNN, test_LogR, test_DTree])
    test_NaiveEnsemble.train(x_train, y_train)
    ensemble = test_NaiveEnsemble.report_accuracy(x_test, y_test)
    print("Naive Ensemble Test accuracy: test data set", ensemble)
    #print("Naive Ensemble Test accuracy: train data set",test_NaiveEnsemble.report_accuracy(x_train,y_train))

    #test_KNN.train(x_train,y_train)
    #test_NaiveEnsemble.model_list.append(test_KNN)
Exemplo n.º 4
0
#***Train SVM***#
classifier = svm.SVC(C=1, decision_function_shape="ovr")
classifier.fit(X_train, y_train)

#Test Model#
prediction = classifier.predict(X_test)

#Evaluate Errors#
correct = 0
for i, j in zip(np.nditer(prediction), np.nditer(y_test)):
    if (i == j):
        #print("{} : {}".format(i, j))
        correct = correct + 1

acc = 1.0 * correct / (y_test.size)
print("SVM accuracy: {}".format(acc))

#***KNN Model***#
knn = KNN(k=10)
predictions = knn.predict(X_train, X_test, y_train)

#Evaluate Errors#
correct = 0
for i, j in zip(np.nditer(predictions), np.nditer(y_test)):
    if (i == j):
        #print("{} : {}".format(i, j))
        correct = correct + 1

acc = 1.0 * correct / (y_test.size)
print("KNN accuracy: {}".format(acc))
    index_col='tripId')

testSet = pd.read_csv(
    'test_dataset/test_set_a2.csv',  # replace with the correct path
    sep="\t",
    converters={"Trajectory": literal_eval})

path = 'Test_Subways/'
if not os.path.exists(path):
    os.makedirs(path)
#trainSet = trainSet[:400]

for i in range(len(testSet)):
    start_time = time.time()
    origin = testSet['Trajectory'].iloc[i]
    knn = KNN(5, LCSS(Compare_Harvesine), origin, True)
    for j in range(len(trainSet)):
        knn.calculate_neighbor(trainSet['Trajectory'].iloc[j], j)
    results = knn.results()
    elapsed_time = time.time() - start_time
    path = 'Test_Subways/Test_Subways_' + str(i + 1)
    if not os.path.exists(path):
        os.makedirs(path)
    file = open(path + '/results', 'w')
    file.write("Test_Subways_" + str(i + 1) + "\n")
    print_map(origin, 'Test_Subways_' + str(i + 1), path, None)
    count = 1
    for k in results:
        print_map(trainSet['Trajectory'].iloc[k[0]], 'Neighbor_' + str(count),
                  path, Find_Subsequence(k[2]))
        file.write('Neighbor_' + str(count) + '\nJP_ID: ' +
from FeatureScaling import FeatureScaling
fs = FeatureScaling(X, y)
X = fs.fit_transform_X()

#training set split
X_train = X[0:train_size, :]
Y_train = y[0:train_size]

#testing set split
X_test = X[train_size:, :]
Y_test = y[train_size:]

#importing KNN class
from KNN import KNN
l = time.time()
knn = KNN(X_train, Y_train, 5)
y_pred = knn.predict(X_test)
r = time.time()
KNN_learn_time = (r - l)
print(r - l)

#getting the confusion matrix
tp = len([
    i for i in range(0, Y_test.shape[0]) if Y_test[i] == 0 and y_pred[i] == 0
])
tn = len([
    i for i in range(0, Y_test.shape[0]) if Y_test[i] == 0 and y_pred[i] == 1
])
fp = len([
    i for i in range(0, Y_test.shape[0]) if Y_test[i] == 1 and y_pred[i] == 0
])
Exemplo n.º 7
0
    y_test = test_np[:, -1]
    property_list = [x for x in range(len(test_np[0, :-1]))]
    max_acc = 0
    best_sub = ()
    tmp_sub = 0
    flag = 0
    while True:
        for i in range(8):
            if i not in best_sub:
                subset = best_sub + (i,)
            else:
                continue
            # subset = (1,7,6,5,2)
            train_tmp = norm_train[:, (subset + (8,))]
            test_tmp = norm_test[:, subset]
            y_pred = KNN(train_tmp, test_tmp, 9)
            acc = accuracy_score(y_test, y_pred)
            # acc = float(np.sum(y_test == y_pred) / len(y_test))
            # print("for element " + str(subset) +" recieved acc of " + str(acc) + "\n")
            if acc >= max_acc:
                flag = 1
                max_acc = acc
                tmp_sub = subset
        best_sub = tmp_sub
        if flag == 0:
            break
        flag = 0
    print(list(best_sub))
    # print(max_acc)

Exemplo n.º 8
0
        elif "t_" in item:
            topic_list.append(item[2:])

    word_list = word_list[1:] # Remove 'Article #'
    words_topics_size = len(topic_list) + len(word_list)

    for row in dataMatrix[2:]:
        matrix.append( [row[0]] + map(int, row[1:1 + words_topics_size]) )
    return {"topic_list":topic_list, "word_list": word_list, "matrix": matrix}


##### MAIN #####
dataMatrix = parseDM()
arg_list = sys.argv
if len(arg_list) != 5:
	print "Usage: ./DM2_KNN.py -k <neighborcount> -t <testpercentage>"
	sys.exit(1)

if arg_list[1] == '-k':
	k = int(arg_list[2])
elif arg_list[1] == '-t':
	t = int(arg_list[2])

if arg_list[3] == '-k':
	k = int(arg_list[4])
elif arg_list[3] == '-t':
	t = int(arg_list[4])

knn = KNN(dataMatrix, k)
knn.test_split(t)
Exemplo n.º 9
0
from KNN import KNN
from numberClassification import NumClassification
import numpy as np


def acc(pred, label):
    t = np.equal(pred, label)
    return np.sum(t) / len(pred)


nc = NumClassification(trainingPath="digits/trainingDigits",
                       testPath="digits/testDigits")

trainingDataset = nc.buildTrainingDataset()
testDataset, labels = nc.buildTestDataset()

knn = KNN(trainingDataset, 3, isnorm=False)
pred = knn.infer(testDataset)

print(acc(pred, labels))
Exemplo n.º 10
0
img_right = cv2.cvtColor(img_right, cv2.COLOR_BGR2RGB)
img_r = cv2.cvtColor(img_right, cv2.COLOR_BGR2GRAY)

img_left = cv2.imread('Resources/left.jpg')
img_left = cv2.cvtColor(img_left, cv2.COLOR_BGR2RGB)
img_l = cv2.cvtColor(img_left, cv2.COLOR_BGR2GRAY)

# Create SIFT and extract features
sift = cv2.xfeatures2d.SIFT_create(nfeatures=10000)

# Find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(img_r, None)
kp2, des2 = sift.detectAndCompute(img_l, None)

# Find KNN matches and validate with ratio test
knn_solver = KNN(des1, des2, 2)
matches = knn_solver.solve()

# Extract keypoints' coordinates
valid_kp1 = []
valid_kp2 = []

for match in matches:
    valid_kp1.append(np.array(kp1[match.index_1].pt))
    valid_kp2.append(np.array(kp2[match.indices_2[0]].pt))

valid_kp1 = np.array(valid_kp1).T
valid_kp2 = np.array(valid_kp2).T

H = find_homography(valid_kp1, valid_kp2, 5000)
print(H)
    'test_dataset/test_set_a2.csv',  # replace with the correct path
    sep="\t",
    converters={"Trajectory": literal_eval})

#train,test = train_test_split(trainSet,test_size=0.01)

#rint len(train)
#print len(test)
#Initialize Encoder
le = preprocessing.LabelEncoder()
le.fit(trainSet["journeyPatternId"])
y = le.transform(trainSet["journeyPatternId"])

X = trainSet['Trajectory']
Y = testSet['Trajectory']
knn = KNN(5, DTW(Harvesine))

knn.fit(X, y)

knn_pred = knn.predict(Y)
predicted_categories = le.inverse_transform(knn_pred)
print(predicted_categories)

with open('testSet_JourneyPatternIDs.csv', 'wb') as csvfile:
    csvwriter = csv.writer(csvfile,
                           delimiter=',',
                           quotechar='|',
                           quoting=csv.QUOTE_MINIMAL)
    csvwriter.writerow(['Test_Trip_ID', 'Predicted_JourneyPatternID]'])
    for i in range(len(testSet)):
        csvwriter.writerow([str(i + 1), predicted_categories[i]])
Exemplo n.º 12
0
def main():
    db = Database()
    knn = KNN(db)
    fr = FaceRecog(db, knn)
    tg = Telegram(fr, db)
    tg.start()
Exemplo n.º 13
0
from KNN import KNN
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits

digits = load_digits()
X = digits.data
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

kNN = KNN(1)
kNN.train(X_train, y_train)
res = kNN.predict(X_test)

print('Real--->Predicted')

for i, val in enumerate(y_test):
    print('  %d ---> %d' % (val, res[i]))

print('预测准确率:')
print(kNN.score(X_test, y_test))
Exemplo n.º 14
0
    # TODO: think about extend the code to other classification methods, features extractors ... maybe a switch?

    if args.descriptor == 'sift':
        features_descriptor = SIFT(nfeatures=100)

    elif args.descriptor == 'surf':
        features_descriptor = SURF(nOctaves=4, nOctaveLayers=2)

    elif args.descriptor == 'hog':
        features_descriptor = HOG()

    else:
        features_descriptor = None
        print('Invalid descriptor')

    myKNN = KNN(nneighbors=100, features_descriptor=features_descriptor)
    myEvaluation = Evaluation(evaluation_path=args.evaluation_path,
                              save_plots=True)

    if args.do_train:

        start_time = time.time()

        if args.train_method == 'kfold':

            # make K trainings, save the evaluation metrics and models, then decide the best model
            evaluation_metrics = np.array([], dtype=float)
            model = []

            for k in range(args.kfold_k):
Exemplo n.º 15
0
#     data = Data()  # loads the data and checks if complete
#
#     while True:
#         data.load_data()
#         data.split_data()  # split into both test and train
#         predicted_class = {}  # holds data_set_name and a list of predicted classes
#
#         for name, train_data_set in data.train_dict.items():  # iterate through data and get key(Data name) and data_set
#             print("Current Data Set: ", name)
#             predicted_class[name] = []  # create a list of for a data set of predicted values
#             test_data_set = data.test_dict[name]  # TODO: Use same keys for all dictionaries; Access testing data by key.
#             for _, query_point in train_data_set.iterrows():
#                 # give query example and its corresponding train_data_set, along with # of desired neighbors to consider
#                 predicted_class[name].append(knn.perform_knn(query_point, train_data_set, 5, name, data))

knn = KNN()
data = Data()  # loads the data and checks if complete
lf = LF()
data.load_data()


def run_zero_loss():
    """
    Calls function in other files until program is finished.
    :return: None
    """
    data.split_data()  # split into both test and train
    lf.zero_one_loss(data.test_dict['abalone'].sample(n=400), 5, 'abalone', data)


def run_k_means(indata):  # Run k-means on wine data set'knn = KNN()
Exemplo n.º 16
0
from KNN import KNN
from utils import get_xor
import matplotlib.pyplot as plt

if __name__ == '__main__':
    X, Y = get_xor()

    plt.scatter(X[:, 0], X[:, 1], s=100, c=Y, alpha=0.5)
    plt.show()

    for i in range(20):
        model = KNN(i + 1)
        model.fit(X, Y)
        print('Neighbours:', i + 1, 'Train accuracy:', model.score(X, Y))
Exemplo n.º 17
0
     input_instances.calcNumClases()
     #2. PREPROCESO
     preproceso.randomizarInstancias(input_instances)
     
     #3. kNN
     #70% instancias con clase, 30% instancias a clasificar
     k = int(sys.argv[2]) #num de vecinos a explorar
     m = float(sys.argv[3]) #m de la dist de Minkowski
     porcentaje = sys.argv[4]
     porcentaje = float(porcentaje)
     if(porcentaje>100.0 or porcentaje<0.0):
         raise Exception()
     train = input_instances.getPorcentaje(porcentaje) #obtenemos el % de las instancias (para train) 
     test = input_instances #lo restante (test) (el getPorcentaje actualiza la lista haciendo pop())
     print("Usando k="+str(k)+", m="+str(int(m))+" y usando el " + str(porcentaje) +"% de las instancias para train\n")
     print("Clasificando...")
     
     clasificador = KNN(train, test, k, m) #creamos el clasificador con las instancias de las q sabemos la clase, las q queremos predecir la clase, k, m
     clasificador.execute() #calculamos los vecinos proximos, y predecimos la clase
     prediccion = clasificador.getPrediccion()
     
     #4. EVALUACION    
     evaluacion.mostrarFigurasMerito(prediccion, input_instances) #Specificity y Recall
     #evaluacion.espacioROC() #representar graficamente los modelos en el espacio ROC.
     time = str(datetime.now()-start_time)
     time = time[5:]
     print("Duración de la ejecución del programa: " + time + " segundos")
 except (IOError):
     print("Error de lectura de fichero")
 #except(Exception):
     #print("Error desconocido. Seguramente hayas introducido incorrectamente algún argumento")
from KNN import KNN
import os
# Change the model type variable value to "CNN" to use the Convolutional Neural Network
# Change the model type variable value to "KNN" to use the K Nearest Neighbours Classifier
modeltype = "KNN"

if modeltype == "KNN":
    if os.path.exists("knn.sav"):
        pass
    else:
        print("Saved KNN Classifier not found....")
        print(
            "Downloading MNIST Data, training KNN classifier and saving as knn.sav......"
        )
        print("Kindly wait for a few minutes............")
        knnobj = KNN(3)
        knnobj.skl_knn()
else:
    if os.path.exists("cnn.hdf5"):
        pass
    else:
        print("cnn.hdf5 not found...")
        print("Loading MNIST Data, training CNN and saving as cnn.hdf5.....")
        print("Kindly wait a few minutes.........")
        cnnobj = CNN()
        cnnobj.build_and_compile_model()
        cnnobj.train_and_evaluate_model()
        cnnobj.save_model()
MainUIobj = MainUI(modeltype)
MainUIobj.mainloop()
MainUIobj.cleanup()
Exemplo n.º 19
0
 print("2. Density Based Clustering")
 print("3. Hierarchical Clustering")
 print("4. K-means Clustering")
 print("5. Learning Vector Quantization Clustering")
 print("6. Mixture of Gaussian Clustering")
 print("-------Decision Tree-------")
 print("7. ID3")
 print("8. C4.5")
 print("9. CART")
 print("---Recommendation Algorithm---")
 print("10. MF")
 print("11. PMF")
 print("-------------------------------------------------------")
 num = input("Enter the number of the algorithm you want to execute:")
 if num == str(1):
     algorithm = KNN.KNN()
     algorithm.execute()
 elif num == str(2):
     algorithm = DensityBasedClustering.DensityBasedClustering()
     algorithm.execute()
 elif num == str(3):
     algorithm = HierarchicalClustering.HierarchicalClustering()
     algorithm.execute()
 elif num == str(4):
     algorithm = KMeansAlgorithm.KMeansAlgorithm()
     algorithm.execute()
 elif num == str(5):
     algorithm = LearningVectorQuantization.LearningVectorQuantization()
     algorithm.execute()
 elif num == str(6):
     algorithm = MixtureOfGaussianAlgorithm.MixtureOfGaussianAlgorithm()
Exemplo n.º 20
0
        binarizer = LabelBinarizer()
        Y = binarizer.fit_transform(y)
        # By inspection I know that the number of examples will be 50,000
        # This allows me to set an upper limit on the number of neighbours I want to
        # Test For. I inspected the 'X' matrix using the debugger tool in PYCHARM

        k_values = np.array(
            range(1, 20)
        )  #np.array(range(1,10)) # range of K values I want to test. This is okay for my computers memory capabilities

        y_pred = np.zeros(k_values.size)
        best_k = 0  #This is a dummy variable I will use to keep track of the k value which returns the best test error
        best_error = 1000000000000000000  #Initalizing a random value to store the best test error and track the value of it

        for k in k_values:
            model = KNN(k)
            model.fit(X, y)

            #Computing the validation Error with Xtest and yest
            y_pred = model.predict(Xtest)
            test_error = np.mean(y_pred != ytest)
            if test_error < best_error:
                best_k = k
                best_error = test_error

        print(best_k)
        print(best_error)

    elif question == "1.2":

        with gzip.open('mnist.pkl.gz', 'rb') as f: