import numpy as np from repository import Repository from configuration import config from sklearn.cross_validation import train_test_split from sklearn.cross_validation import ShuffleSplit from sklearn import tree from sklearn.decomposition import PCA from geopy.distance import vincenty import matplotlib.pyplot as plt # Import data repository = Repository(config) dataset, labels = repository.get_dataset_and_labels() dataset = dataset.fillna(-85) # Data: Accuracy && Error acc = [] err = [] size = [] #Files: AccuracyDTrea && ErrorDTrea AccuracyDTrea = open("AccuracyDTrea.txt", "w") ErrorDTrea = open("ErrorDTrea.txt", "w") # Iterate accross the PCA dimentionality for i in range(93, 2094, 100): print "########################" print "Iteration number: " + str(i / 100) size.append(i) # PCA number of components pca = PCA(n_components=i)