def setup_dataset(self): """ Creates a corpus of primes. Returns the dataset, the attributes getter and the target getter. """ size = 105 # Magic number, chosen to avoid an "error" that cannot be # patched in Dtree Pseudo (with modifing the pseudocode). dataset = [] for i in range(size): dataset.append([i % 2 == 0, i % 3 == 0, i % 5 == 0, i % 7 == 0, self.isprime(i)]) problem = VectorDataClassificationProblem(dataset, target_index=-1) problem.distance = euclidean_vector_distance self.corpus = dataset self.problem = problem
def setup_dataset(self): """ Creates a corpus with the iris dataset. Returns the dataset, the attributes getter and the target getter. """ dataset = [] with open(self.IRIS_PATH) as filehandler: file_data = filehandler.read() for line in file_data.split("\n"): line_data = [round(float(x)) for x in line.split()] if line_data: dataset.append(line_data) problem = VectorDataClassificationProblem(dataset, target_index=4) problem.distance = euclidean_vector_distance self.corpus = dataset self.problem = problem
def setup_dataset(self): """ Creates a corpus of primes. Returns the dataset, the attributes getter and the target getter. """ size = 105 # Magic number, chosen to avoid an "error" that cannot be # patched in Dtree Pseudo (with modifing the pseudocode). dataset = [] for i in xrange(size): dataset.append([ i % 2 == 0, i % 3 == 0, i % 5 == 0, i % 7 == 0, self.isprime(i) ]) problem = VectorDataClassificationProblem(dataset, target_index=-1) problem.distance = euclidean_vector_distance self.corpus = dataset self.problem = problem
def setup_dataset(self): """ Creates a corpus with n k-bit examples of the parity problem: k random bits followed by a 1 if an odd number of bits are 1, else 0 """ k = 2 n = 100 dataset = [] for i in xrange(n): # Pseudo random generation of bits bits = [(((i + j) * 1223) % (n + 1)) % 2 for j in xrange(k)] bits.append(sum(bits) % 2) dataset.append(bits) problem = VectorDataClassificationProblem(dataset, target_index=k) self.corpus = dataset self.problem = problem