# loop for different values of x using full dataset data.setClassIndex(data.numAttributes() - 1) for num in [x * 0.05 for x in range(0, 10)]: log.write("---------------------------------\nCF: " + str(num) + "\n") algo = J48() x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") algo.setConfidenceFactor(num) evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) #evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString()) file.write(str(num) + "," + str(evaluation.rootMeanSquaredError()) + "\n") # create graph graphfilename = "image/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \ str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_" + str(num) + ".dot" graphfile = open(graphfilename, 'wb') graphfile.write(algo.graph()) graphfile.close() file.close() log.close()
tree_algorithms.append(cover) data.setClassIndex(data.numAttributes() - 1) for num in range(1, 30, 2): file.write(str(num)) for algoknn in tree_algorithms: log.write("---------------------------------\nK: " + str(num) + ", Search Algorithm: " + algoknn.__class__.__name__ + "\n") algo = IBk() algo.setNearestNeighbourSearchAlgorithm(algoknn) algo.setKNN(num) x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() #evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString()) file.write("," + str(evaluation.rootMeanSquaredError())) file.write("\n") file.close() log.close()
cover = CoverTree() cover.setDistanceFunction(EuclideanDistance()) # only Euclidean Distance function tree_algorithms.append(cover) data.setClassIndex(data.numAttributes() - 1) for num in range(1,30,2): file.write(str(num)) for algoknn in tree_algorithms : log.write("---------------------------------\nK: " + str(num) + ", Search Algorithm: " + algoknn.__class__.__name__ + "\n") algo = IBk() algo.setNearestNeighbourSearchAlgorithm(algoknn) algo.setKNN(num) x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() #evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString()) file.write("," + str(evaluation.rootMeanSquaredError())) file.write("\n") file.close() log.close()
data.setClassIndex(data.numAttributes() - 1) for num in [x * 0.05 for x in range(0, 10)]: log.write("---------------------------------\nCF: " + str(num) + "\n") algo = J48() x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") algo.setConfidenceFactor(num) evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) #evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString()) file.write(str(num) + "," + str(evaluation.rootMeanSquaredError()) + "\n") # create graph graphfilename = "image/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \ str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_" + str(num) + ".dot" graphfile = open(graphfilename, 'wb') graphfile.write(algo.graph()) graphfile.close() file.close() log.close()