Пример #1
0
def pca(dataset):

    colors = ["r", "g", "b"]

    labels = dataset[1]
    label_names = dataset[2]
    # Obtenemos el promedio de cada columna

    (separated_data, tranposed, data) = common.separate_data(dataset)
    means = data.mean(0)
    std_data = data - means
    # Obtenemos la matriz de covarianza
    cov_mat = cov(std_data.T)
    [values, vectors] = linalg.eig(cov_mat)
    tuples = []

    for i in xrange(len(values)):
        tuples.append((values[i], vectors[i]))

    sorted(tuples)
    first_pc = tuples[0][1]
    second_pc = tuples[1][1]
    print(first_pc)

    # Datos proyectados en distintas direcciones
    projected_data_1 = [[], [], []]
    projected_data_2 = [[], [], []]

    for idx, row in enumerate(data):
        projected_data_1[label_names.index(labels[idx])].append(dot(row, first_pc))

    for idx, row in enumerate(data):
        projected_data_2[label_names.index(labels[idx])].append(dot(row, second_pc))

    label_names = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]

    # Ploteo con solo un PC
    for i in xrange(len(colors) - 1):
        for j in xrange(i + 1, len(colors)):
            pyplot.plot(projected_data_1[i], zeros(len(projected_data_1[i])), "o" + colors[i])
            pyplot.plot(projected_data_1[j], zeros(len(projected_data_1[j])), "o" + colors[j])
            print("Clase " + colors[i] + ": " + label_names[i])
            print("Clase " + colors[j] + ": " + label_names[j])
            pyplot.show()

    # Dos PC
    for i in xrange(len(colors) - 1):
        for j in xrange(i + 1, len(colors)):
            pyplot.plot(projected_data_1[i], projected_data_2[i], "o" + colors[i])
            pyplot.plot(projected_data_1[j], projected_data_2[j], "o" + colors[j])
            print("Clase " + colors[i] + ": " + label_names[i])
            print("Clase " + colors[j] + ": " + label_names[j])
            pyplot.show()

    # Igual que lo anterior, pero con 3 clases
    for i in xrange(len(colors)):
        pyplot.plot(projected_data_1[i], projected_data_2[i], "o" + colors[i])
        print("Clase " + colors[i] + ": " + label_names[i])
    pyplot.show()
Пример #2
0
def plot(dataset):

    (separated_data, plot_data, numpy_data) = common.separate_data(dataset)

    for i in xrange(0, 3):  # Primer atributo = 1,2,3
        for j in xrange(i + 1, 4):  # Segundo atributo = 2,3,4
            for k in xrange(0, len(plot_data)):  # Para cada clase
                pyplot.plot(plot_data[k][i], plot_data[k][j], 'o' + colors[k])
                pyplot.xlabel(str(i + 1))
                pyplot.ylabel(str(j + 1))
            pyplot.show()
Пример #3
0
def plot(dataset):
    
    (separated_data, plot_data, numpy_data) = common.separate_data(dataset)
    
    
    for i in xrange(0, 3): # Primer atributo = 1,2,3
        for j in xrange(i+1, 4): # Segundo atributo = 2,3,4
            for k in xrange(0, len(plot_data)): # Para cada clase
                pyplot.plot(plot_data[k][i], plot_data[k][j], 'o' + colors[k])
                pyplot.xlabel(str(i+1))
                pyplot.ylabel(str(j+1))
            pyplot.show()
Пример #4
0
def fischer(dataset):
    colors = ['r', 'g', 'b']
    
    labels = dataset[1]
    label_names = dataset[2]
    # Obtenemos el promedio de cada columna
    
    for i in xrange(len(label_names)): # Para cada clase de hace un one vs all
        c1 = label_names[i]
        new_labels = []
        
        # Se crean nuevos labels para las clases: 0 y 1
        for label in labels:
            if label == c1:
                new_labels.append(0)
            else:
                new_labels.append(1)
        
        # Separacion de datos, nuevamente
        (separated_data, tranposed, data) = common.separate_data((dataset[0], new_labels, [0, 1]))
        #Calculo de medias
        means = [ separated_data[0].mean(0), separated_data[1].mean(0) ]
        
        # inicializamos las matrices de scatter
        s1 = [[0,0,0,0], [0,0,0,0],[0,0,0,0],[0,0,0,0]]
        s2 = [[0,0,0,0], [0,0,0,0],[0,0,0,0],[0,0,0,0]]
        
        # Simplemente sacamos los scatters
        for row in separated_data[0]:
            m1 = array([row- means[0]])
            m2 = array([row- means[0]]).transpose()
            s1 = s1 + dot(m2, m1)
            
        for row in separated_data[1]:
            m1 = array([row- means[1]])
            m2 = array([row- means[1]]).transpose()
            s2 = s2 + dot(m2, m1)
        
        # Within class scatter
        sw = s1 + s2
        inv_sw = inv(sw)
        mean_diff = array([means[0]-means[1]]) # mu1 - mu2, es necesario llevarlo a un "doble arreglo" para multiplicar matrices
        
        # Esta sera la direccion v optima
        direction = dot(inv_sw, mean_diff.T)
        
        p1 = [[],[]]
        
        for idx, row in enumerate(data):        
            p1[[0, 1].index(new_labels[idx])].append(dot(row,direction))
        
        # Ploteamos los datos proyectados
        print('Rojo: ' + c1)
        print('Azul: las otras')
        pyplot.plot(p1[0], zeros(len(p1[0])), 'or')
        pyplot.plot(p1[1], zeros(len(p1[1])), 'ob')
        pyplot.show()
    
    
        
       
        
Пример #5
0
def pca(dataset):

    colors = ['r', 'g', 'b']

    labels = dataset[1]
    label_names = dataset[2]
    # Obtenemos el promedio de cada columna

    (separated_data, tranposed, data) = common.separate_data(dataset)
    means = data.mean(0)
    std_data = data - means
    # Obtenemos la matriz de covarianza
    cov_mat = cov(std_data.T)
    [values, vectors] = linalg.eig(cov_mat)
    tuples = []

    for i in xrange(len(values)):
        tuples.append((values[i], vectors[i]))

    sorted(tuples)
    first_pc = tuples[0][1]
    second_pc = tuples[1][1]
    print(first_pc)

    # Datos proyectados en distintas direcciones
    projected_data_1 = [[], [], []]
    projected_data_2 = [[], [], []]

    for idx, row in enumerate(data):
        projected_data_1[label_names.index(labels[idx])].append(
            dot(row, first_pc))

    for idx, row in enumerate(data):
        projected_data_2[label_names.index(labels[idx])].append(
            dot(row, second_pc))

    label_names = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

    # Ploteo con solo un PC
    for i in xrange(len(colors) - 1):
        for j in xrange(i + 1, len(colors)):
            pyplot.plot(projected_data_1[i], zeros(len(projected_data_1[i])),
                        'o' + colors[i])
            pyplot.plot(projected_data_1[j], zeros(len(projected_data_1[j])),
                        'o' + colors[j])
            print('Clase ' + colors[i] + ": " + label_names[i])
            print('Clase ' + colors[j] + ": " + label_names[j])
            pyplot.show()

    # Dos PC
    for i in xrange(len(colors) - 1):
        for j in xrange(i + 1, len(colors)):
            pyplot.plot(projected_data_1[i], projected_data_2[i],
                        'o' + colors[i])
            pyplot.plot(projected_data_1[j], projected_data_2[j],
                        'o' + colors[j])
            print('Clase ' + colors[i] + ": " + label_names[i])
            print('Clase ' + colors[j] + ": " + label_names[j])
            pyplot.show()

    # Igual que lo anterior, pero con 3 clases
    for i in xrange(len(colors)):
        pyplot.plot(projected_data_1[i], projected_data_2[i], 'o' + colors[i])
        print('Clase ' + colors[i] + ": " + label_names[i])
    pyplot.show()