# loop for different values of c data.setClassIndex(data.numAttributes() - 1) for num in range(-10, 10, 2): c = 2 ** (num) file.write(str(c)) for kerneltype in range(0, 4): algo = LibSVM() tag = SelectedTag( str(kerneltype), algo.TAGS_KERNELTYPE ) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) algo.setCost(c) log.write("---------------------------------\nC: " + str(c) + ", KernelType: " + str(kerneltype) + "\n") x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() # evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString()) file.write("," + str(evaluation.rootMeanSquaredError())) file.write("\n")
logfile = "logs/" + classifiername + "_" + dataname + crossvalidate + ".log" log=open(logfile, 'w', bufsize) # open general log file for num in range(int(p['svm.initial']),fulltrainset.numInstances(),(fulltrainset.numInstances() / int(p['svm.numdatapoints']))): trainset = Instances(fulltrainset,0,num) # create training set trainset.setClassIndex(trainset.numAttributes() - 1) filelimit.write(str(num)) for kerneltype in range(0,4): log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n") for dataset in [testset, fulltrainset]: algo = LibSVM() tag = SelectedTag(str(kerneltype),algo.TAGS_KERNELTYPE) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) algo.setCost(int(p['svm.C'])) algo.buildClassifier(trainset) evaluation = Evaluation(trainset) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel(algo, dataset, 10, rand, [output, attRange, outputDistribution]) else: evaluation.evaluateModel(algo, dataset, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString()) filelimit.write("," + str(evaluation.pctIncorrect()))
trainset.setClassIndex(trainset.numAttributes() - 1) filelimit.write(str(num)) for kerneltype in range(0, 4): log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n") for dataset in [testset, fulltrainset]: algo = LibSVM() tag = SelectedTag( str(kerneltype), algo.TAGS_KERNELTYPE ) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) algo.setCost(int(p['svm.C'])) algo.buildClassifier(trainset) evaluation = Evaluation(trainset) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel( algo, dataset, 10, rand, [output, attRange, outputDistribution]) else: evaluation.evaluateModel( algo, dataset, [output, attRange, outputDistribution])
str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_tunable.log" log=open(logfile, 'w', bufsize) # open general log file # loop for different values of c data.setClassIndex(data.numAttributes() - 1) for num in range(-10,10,2): c = 2 ** (num) file.write(str(c)) for kerneltype in range(0,4): algo = LibSVM() tag = SelectedTag(str(kerneltype),algo.TAGS_KERNELTYPE) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) algo.setCost(c) log.write("---------------------------------\nC: " + str(c) + ", KernelType: " + str(kerneltype) + "\n") x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() #evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString()) file.write("," + str(evaluation.rootMeanSquaredError())) file.write("\n")