def Classification(data, centroids): for row in range(len(data)): # for each row distance = 999999999999.0 for i in range(len(centroids)): # for each centroid distAux = DistancePoints( np.asarray(_matrix.RemoveColumn(_matrix.Copy(data), -1))[row], np.asarray( _matrix.RemoveColumn(_matrix.Copy(centroids), -1)[i])) if (distAux < distance): distance = distAux data[row][-1] = centroids[i][-1]
def dataset_test(self, path, prediction_array = None, datatype="float"): prediction_test = [] if(prediction_array != None): prediction_test = _matrix.ConvertArrayToMatrix(prediction_array) print(">>>>>> " + path + " <<<<<<") readmatrix = _matrix.ReadCsv(path, ";", datatype) matrix = numpy.delete(readmatrix, len(readmatrix[0]) - 1, 1) #extract features # extract y y = _matrix.Copy(readmatrix) for col in range(len(readmatrix[0]) - 1): y = numpy.delete(y, 0, 1) result_lin = leastSquares.LinearLeastSquares(matrix, y) _matrix.matrix_print("\n Beta for LinearLeastSquares", result_lin) if(prediction_array != None): _matrix.matrix_print(">> Prediction for: ", prediction_test) print("Is: " + str(leastSquares.LinearPredict(prediction_test, result_lin))) result_quad = leastSquares.QuadraticLeastSquares(matrix, y) _matrix.matrix_print("\n Beta for QuadraticLeastSquares", result_quad) if(prediction_array != None): _matrix.matrix_print(">> Prediction for: ", prediction_test) print("Is: " + str(leastSquares.QuadraticPredict(prediction_test, result_quad))) result_robust = leastSquares.RobustLeastSquares(matrix, y) _matrix.matrix_print("\n Beta for RobustLeastSquares", result_robust) if(prediction_array != None): _matrix.matrix_print(">> Prediction for: ", prediction_test) print("Is: " + str(leastSquares.LinearPredict(prediction_test, result_robust))) print("\n--------------------------------------------------")
def Kmeans(data, k, centroidMethod='random', stopCriteria='default', distanceMethod='euclidean'): _data = np.asarray(_matrix.AddColumn( _matrix.Copy(data), -1)) # create new column for classification _newData = np.asarray(_matrix.Copy(_data)) centroids = GenerateCentroid(k, data, centroidMethod) # create matrix of centroids Classification(_newData, centroids) # classificating while (not StopCriteria(_data, _newData)): _data = np.asarray(_matrix.Copy(_newData)) centroids = UpdateCentroid(_newData, centroids) # grouping by class Classification(_newData, centroids) # classificating return _newData, centroids
def Exercise1(self): data = [[1.9, 7.3], [3.4, 7.5], [2.5, 6.8], [1.5, 6.5], [3.5, 6.4], [2.2, 5.8], [3.4, 5.2], [3.6, 4], [5, 3.2], [4.5, 2.4], [6, 2.6], [1.9, 3], [1, 2.7], [1.9, 2.4], [0.8, 2], [1.6, 1.8], [1, 1]] _data = np.asarray(_matrix.AddColumn( _matrix.Copy(data), -1)) # create new column for classification x1, y1 = _matrix.DivideXY(_data) plot.Kmeans(x1, np.int_(y1), [[]], 'Plot pure data') newData, centroids = kmeans.Kmeans(data, 3) x, y = _matrix.DivideXY(newData) plot.Kmeans(x, np.int_(y), centroids, 'Plot Kmeans')
def dataset_test(self, path, datatype="float"): print(">>>>>> " + path + " <<<<<<") readmatrix = _matrix.ReadCsv(path, ";", datatype) _data = np.asarray( _matrix.AddColumn(_matrix.Copy(readmatrix), -1)) # create new column for classification x1, y1 = _matrix.DivideXY(_data) plot.Kmeans(x1, np.int_(y1), [[]], path + ' - Plot pure data') newData, centroids = kmeans.Kmeans(readmatrix, 3) x, y = _matrix.DivideXY(newData) plot.Kmeans(x, np.int_(y), centroids, path + ' - Plot Kmeans') print("\n--------------------------------------------------")
def Exercise3(self): iris = datasets.load_iris() x = iris.data # reduce dimensions with Pca resultPca = PCA(n_components=2) resultPca.fit(x) PcaData = resultPca.transform(x) _data = np.asarray(_matrix.AddColumn( _matrix.Copy(PcaData), -1)) # create new column for classification x1, y1 = _matrix.DivideXY(_data) plot.Kmeans(x1, np.int_(y1), [[]], 'Iris - Plot pure data') newData, centroids = kmeans.Kmeans(PcaData, 3) x, y = _matrix.DivideXY(newData) plot.Kmeans(x, np.int_(y), centroids, 'Iris - Plot Kmeans')
def RobustLeastSquares(matrix, y): # B = (X^T * W.X)^-1 * X^t * W.y matrixlinear = LinearLeastSquares(matrix, y) newy = LinearPredictMatrix(matrix, matrixlinear) w = _matrix.Copy(newy) # calculating w for i in range(len(w)): w[i][0] = 1 / (abs(y[i][0] - newy[i][0])) y = _matrix.MultiplicationEscalarMatrix(y, w) matrix = _matrix.AddBeginColumn(matrix, 1) matrix = _matrix.MultiplicationEscalarMatrix(matrix, w) matrixtranspose = _matrix.Transpose(matrix) section_1 = _matrix.Inverse(_matrix.Multiplication(matrixtranspose, matrix), 1) # (X^T * X)^-1 section_2 = _matrix.Multiplication(section_1, matrixtranspose) # (X^T * X)^-1 * X^t section_3 = _matrix.Multiplication(section_2, y) # (X^T * X)^-1 * X^t * y return section_3
def dataset_test(self, path, datatype="float"): print(">>>>>> " + path + " <<<<<<") originalMatrix = _matrix.ReadCsv(path, ";", datatype) originalMatrix = _matrix.Transpose(originalMatrix) auxMatrix = _matrix.Copy(originalMatrix) eValues, eVectors = pca.Pca(auxMatrix) _matrix.matrix_print("EigenVector", eVectors) _matrix.matrix_print("EigenValue", [eValues]) # plot relevance components pca.PlotRelevance(eValues, "Relevance Components " + path) # plot data transformed in the new space plot.SimplePointData2D(originalMatrix, "Original " + path, "PC1", "PC2") # plot data transformed in the new space transformedData = pca.Transformation(originalMatrix, eVectors) plot.SimplePointData2D(transformedData, "Transformed " + path, "PC1", "PC2") print("\n--------------------------------------------------")