import numpy as np
from repository import Repository
from configuration import config
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import ShuffleSplit
from sklearn import tree
from sklearn.decomposition import PCA
from geopy.distance import vincenty
import matplotlib.pyplot as plt

# Import data
repository = Repository(config)
dataset, labels = repository.get_dataset_and_labels()
dataset = dataset.fillna(-85)

# Data: Accuracy && Error
acc = []
err = []
size = []

#Files: AccuracyDTrea && ErrorDTrea
AccuracyDTrea = open("AccuracyDTrea.txt", "w")
ErrorDTrea = open("ErrorDTrea.txt", "w")

# Iterate accross the PCA dimentionality
for i in range(93, 2094, 100):
    print "########################"
    print "Iteration number: " + str(i / 100)
    size.append(i)
    # PCA number of components
    pca = PCA(n_components=i)