def alg(groups): exit_count = 0 for group in groups: all_representants = get_all_representants(groups) instance_and_nearest = [] new_group = Group() for sample in group.get_instances(): [nearest_rep] = knn.get_knn(1, sample, all_representants) if nearest_rep != group.get_representant(): nearest_g = get_group_by_representant(nearest_rep, groups) instance_and_nearest.append((sample, nearest_g)) if len(instance_and_nearest) == 0: exit_count = exit_count + 1 elif len(instance_and_nearest) == group.count_instances(): elements = group.get_instances() P, med = hama.pca([e[:-1] for e in elements], 1) normal_vector = P prototype = group.get_representant()[:-1] [d] = hama.proj(P, prototype, med) for inst in group.get_instances(): [di] = hama.proj(P, inst[:-1], med) if d < di: group.remove_instance(inst) new_group.add_instance(inst) else: for tupla in instance_and_nearest: bad_instance = tupla[0] nearest_g = tupla[1] # [new_nearest] = knn.get_knn(1, bad_instance, [nearest_g.get_representant(), group.get_representant()]) # if new_nearest == nearest_g.get_representant(): if nearest_g.get_representant() != group.get_representant() and nearest_g.get_classe() != group.get_classe(): group.remove_instance(bad_instance) new_group.add_instance(bad_instance) elif nearest_g.get_representant() != group.get_representant(): # mas são da mesma classe nearest_g.add_instance(bad_instance) group.remove_instance(bad_instance) # nearest_g.update_all() # group.update_all() if not new_group.is_empty(): new_group.update_all() groups.append(new_group) update_all_groups(groups) if exit_count != len(groups): return alg(groups) return groups
import numpy as np if __name__ == '__main__': database = 'glass' # the name of the database you want to run components = 3 # the number of components you want to use in PCA training = load_database('databases/' + database + '.train') test = load_database('databases/' + database + '.test') training_feats = [t[:-1] for t in training] training_class = [t[-1] for t in training] test_feats = [t[:-1] for t in test] test_class = [t[-1] for t in test] P, mn = pca(training_feats, components) pca_training = proj(P, training_feats, mn).tolist() pca_training = [a + [b] for (a,b) in zip(pca_training, training_class)] pca_test = proj(P, test_feats, mn).tolist() pca_test = [a + [b] for (a,b) in zip(pca_test, test_class)] print '\tK\tKNN\tPCA' for k in [1,3,5]: print '\t%d %.2f %.2f' % (k, get_acertion_tax(k, test, training), get_acertion_tax(k, pca_test, pca_training))