def setup_dataset(self): """ Creates a corpus of primes. Returns the dataset, the attributes getter and the target getter. """ size = 105 # Magic number, chosen to avoid an "error" that cannot be # patched in Dtree Pseudo (with modifing the pseudocode). dataset = [] for i in range(size): dataset.append([i % 2 == 0, i % 3 == 0, i % 5 == 0, i % 7 == 0, self.isprime(i)]) problem = VectorDataClassificationProblem(dataset, target_index=-1) problem.distance = euclidean_vector_distance self.corpus = dataset self.problem = problem
def setup_dataset(self): """ Creates a corpus with the iris dataset. Returns the dataset, the attributes getter and the target getter. """ dataset = [] with open(self.IRIS_PATH) as filehandler: file_data = filehandler.read() for line in file_data.split("\n"): line_data = [round(float(x)) for x in line.split()] if line_data: dataset.append(line_data) problem = VectorDataClassificationProblem(dataset, target_index=4) problem.distance = euclidean_vector_distance self.corpus = dataset self.problem = problem
def setup_dataset(self): """ Creates a corpus of primes. Returns the dataset, the attributes getter and the target getter. """ size = 105 # Magic number, chosen to avoid an "error" that cannot be # patched in Dtree Pseudo (with modifing the pseudocode). dataset = [] for i in xrange(size): dataset.append([ i % 2 == 0, i % 3 == 0, i % 5 == 0, i % 7 == 0, self.isprime(i) ]) problem = VectorDataClassificationProblem(dataset, target_index=-1) problem.distance = euclidean_vector_distance self.corpus = dataset self.problem = problem