kdtree_euclid.setDistanceFunction(EuclideanDistance()) # only Euclidean Distance function tree_algorithms.append(kdtree_euclid) ball = BallTree() ball.setDistanceFunction(EuclideanDistance()) # only Euclidean Distance function tree_algorithms.append(ball) cover = CoverTree() cover.setDistanceFunction(EuclideanDistance()) # only Euclidean Distance function tree_algorithms.append(cover) data.setClassIndex(data.numAttributes() - 1) for num in range(1,30,2): file.write(str(num)) for algoknn in tree_algorithms : log.write("---------------------------------\nK: " + str(num) + ", Search Algorithm: " + algoknn.__class__.__name__ + "\n") algo = IBk() algo.setNearestNeighbourSearchAlgorithm(algoknn) algo.setKNN(num) x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() #evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString())
ball.setDistanceFunction( EuclideanDistance()) # only Euclidean Distance function tree_algorithms.append(ball) cover = CoverTree() cover.setDistanceFunction( EuclideanDistance()) # only Euclidean Distance function tree_algorithms.append(cover) data.setClassIndex(data.numAttributes() - 1) for num in range(1, 30, 2): file.write(str(num)) for algoknn in tree_algorithms: log.write("---------------------------------\nK: " + str(num) + ", Search Algorithm: " + algoknn.__class__.__name__ + "\n") algo = IBk() algo.setNearestNeighbourSearchAlgorithm(algoknn) algo.setKNN(num) x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() #evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n")
tree_algorithms_instance.append(ball) cover = CoverTree() cover.setDistanceFunction(EuclideanDistance()) # only Euclidean Distance function tree_algorithms_instance.append(cover) for num in range(int(p['knn.initial']),fulltrainset.numInstances(),(fulltrainset.numInstances() / int(p['knn.numdatapoints']))): filelimit.write(str(num)) trainset = Instances(fulltrainset,0,num) # create training set trainset.setClassIndex(trainset.numAttributes() - 1) for algoknn in tree_algorithms_instance : log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full training set size: " + str(fulltrainset.numInstances()) + "\n") for dataset in [testset, fulltrainset]: algo = IBk() algo.setNearestNeighbourSearchAlgorithm(algoknn) algo.setKNN(int(p['knn.K'])) algo.buildClassifier(trainset) evaluation = Evaluation(trainset) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel(algo, dataset, 10, rand, [output, attRange, outputDistribution]) else: evaluation.evaluateModel(algo, dataset, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString())
for num in range(int(p['knn.initial']), fulltrainset.numInstances(), (fulltrainset.numInstances() / int(p['knn.numdatapoints']))): filelimit.write(str(num)) trainset = Instances(fulltrainset, 0, num) # create training set trainset.setClassIndex(trainset.numAttributes() - 1) for algoknn in tree_algorithms_instance: log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full training set size: " + str(fulltrainset.numInstances()) + "\n") for dataset in [testset, fulltrainset]: algo = IBk() algo.setNearestNeighbourSearchAlgorithm(algoknn) algo.setKNN(int(p['knn.K'])) algo.buildClassifier(trainset) evaluation = Evaluation(trainset) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel( algo, dataset, 10, rand, [output, attRange, outputDistribution]) else: evaluation.evaluateModel(