def pca(dataset): colors = ["r", "g", "b"] labels = dataset[1] label_names = dataset[2] # Obtenemos el promedio de cada columna (separated_data, tranposed, data) = common.separate_data(dataset) means = data.mean(0) std_data = data - means # Obtenemos la matriz de covarianza cov_mat = cov(std_data.T) [values, vectors] = linalg.eig(cov_mat) tuples = [] for i in xrange(len(values)): tuples.append((values[i], vectors[i])) sorted(tuples) first_pc = tuples[0][1] second_pc = tuples[1][1] print(first_pc) # Datos proyectados en distintas direcciones projected_data_1 = [[], [], []] projected_data_2 = [[], [], []] for idx, row in enumerate(data): projected_data_1[label_names.index(labels[idx])].append(dot(row, first_pc)) for idx, row in enumerate(data): projected_data_2[label_names.index(labels[idx])].append(dot(row, second_pc)) label_names = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"] # Ploteo con solo un PC for i in xrange(len(colors) - 1): for j in xrange(i + 1, len(colors)): pyplot.plot(projected_data_1[i], zeros(len(projected_data_1[i])), "o" + colors[i]) pyplot.plot(projected_data_1[j], zeros(len(projected_data_1[j])), "o" + colors[j]) print("Clase " + colors[i] + ": " + label_names[i]) print("Clase " + colors[j] + ": " + label_names[j]) pyplot.show() # Dos PC for i in xrange(len(colors) - 1): for j in xrange(i + 1, len(colors)): pyplot.plot(projected_data_1[i], projected_data_2[i], "o" + colors[i]) pyplot.plot(projected_data_1[j], projected_data_2[j], "o" + colors[j]) print("Clase " + colors[i] + ": " + label_names[i]) print("Clase " + colors[j] + ": " + label_names[j]) pyplot.show() # Igual que lo anterior, pero con 3 clases for i in xrange(len(colors)): pyplot.plot(projected_data_1[i], projected_data_2[i], "o" + colors[i]) print("Clase " + colors[i] + ": " + label_names[i]) pyplot.show()
def plot(dataset): (separated_data, plot_data, numpy_data) = common.separate_data(dataset) for i in xrange(0, 3): # Primer atributo = 1,2,3 for j in xrange(i + 1, 4): # Segundo atributo = 2,3,4 for k in xrange(0, len(plot_data)): # Para cada clase pyplot.plot(plot_data[k][i], plot_data[k][j], 'o' + colors[k]) pyplot.xlabel(str(i + 1)) pyplot.ylabel(str(j + 1)) pyplot.show()
def plot(dataset): (separated_data, plot_data, numpy_data) = common.separate_data(dataset) for i in xrange(0, 3): # Primer atributo = 1,2,3 for j in xrange(i+1, 4): # Segundo atributo = 2,3,4 for k in xrange(0, len(plot_data)): # Para cada clase pyplot.plot(plot_data[k][i], plot_data[k][j], 'o' + colors[k]) pyplot.xlabel(str(i+1)) pyplot.ylabel(str(j+1)) pyplot.show()
def fischer(dataset): colors = ['r', 'g', 'b'] labels = dataset[1] label_names = dataset[2] # Obtenemos el promedio de cada columna for i in xrange(len(label_names)): # Para cada clase de hace un one vs all c1 = label_names[i] new_labels = [] # Se crean nuevos labels para las clases: 0 y 1 for label in labels: if label == c1: new_labels.append(0) else: new_labels.append(1) # Separacion de datos, nuevamente (separated_data, tranposed, data) = common.separate_data((dataset[0], new_labels, [0, 1])) #Calculo de medias means = [ separated_data[0].mean(0), separated_data[1].mean(0) ] # inicializamos las matrices de scatter s1 = [[0,0,0,0], [0,0,0,0],[0,0,0,0],[0,0,0,0]] s2 = [[0,0,0,0], [0,0,0,0],[0,0,0,0],[0,0,0,0]] # Simplemente sacamos los scatters for row in separated_data[0]: m1 = array([row- means[0]]) m2 = array([row- means[0]]).transpose() s1 = s1 + dot(m2, m1) for row in separated_data[1]: m1 = array([row- means[1]]) m2 = array([row- means[1]]).transpose() s2 = s2 + dot(m2, m1) # Within class scatter sw = s1 + s2 inv_sw = inv(sw) mean_diff = array([means[0]-means[1]]) # mu1 - mu2, es necesario llevarlo a un "doble arreglo" para multiplicar matrices # Esta sera la direccion v optima direction = dot(inv_sw, mean_diff.T) p1 = [[],[]] for idx, row in enumerate(data): p1[[0, 1].index(new_labels[idx])].append(dot(row,direction)) # Ploteamos los datos proyectados print('Rojo: ' + c1) print('Azul: las otras') pyplot.plot(p1[0], zeros(len(p1[0])), 'or') pyplot.plot(p1[1], zeros(len(p1[1])), 'ob') pyplot.show()
def pca(dataset): colors = ['r', 'g', 'b'] labels = dataset[1] label_names = dataset[2] # Obtenemos el promedio de cada columna (separated_data, tranposed, data) = common.separate_data(dataset) means = data.mean(0) std_data = data - means # Obtenemos la matriz de covarianza cov_mat = cov(std_data.T) [values, vectors] = linalg.eig(cov_mat) tuples = [] for i in xrange(len(values)): tuples.append((values[i], vectors[i])) sorted(tuples) first_pc = tuples[0][1] second_pc = tuples[1][1] print(first_pc) # Datos proyectados en distintas direcciones projected_data_1 = [[], [], []] projected_data_2 = [[], [], []] for idx, row in enumerate(data): projected_data_1[label_names.index(labels[idx])].append( dot(row, first_pc)) for idx, row in enumerate(data): projected_data_2[label_names.index(labels[idx])].append( dot(row, second_pc)) label_names = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'] # Ploteo con solo un PC for i in xrange(len(colors) - 1): for j in xrange(i + 1, len(colors)): pyplot.plot(projected_data_1[i], zeros(len(projected_data_1[i])), 'o' + colors[i]) pyplot.plot(projected_data_1[j], zeros(len(projected_data_1[j])), 'o' + colors[j]) print('Clase ' + colors[i] + ": " + label_names[i]) print('Clase ' + colors[j] + ": " + label_names[j]) pyplot.show() # Dos PC for i in xrange(len(colors) - 1): for j in xrange(i + 1, len(colors)): pyplot.plot(projected_data_1[i], projected_data_2[i], 'o' + colors[i]) pyplot.plot(projected_data_1[j], projected_data_2[j], 'o' + colors[j]) print('Clase ' + colors[i] + ": " + label_names[i]) print('Clase ' + colors[j] + ": " + label_names[j]) pyplot.show() # Igual que lo anterior, pero con 3 clases for i in xrange(len(colors)): pyplot.plot(projected_data_1[i], projected_data_2[i], 'o' + colors[i]) print('Clase ' + colors[i] + ": " + label_names[i]) pyplot.show()