logfile = ( "logs/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_tunable.log" ) log = open(logfile, "w", bufsize) # open general log file # loop for different values of c data.setClassIndex(data.numAttributes() - 1) for num in range(-10, 10, 2): c = 2 ** (num) file.write(str(c)) for kerneltype in range(0, 4): algo = LibSVM() tag = SelectedTag( str(kerneltype), algo.TAGS_KERNELTYPE ) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) algo.setCost(c) log.write("---------------------------------\nC: " + str(c) + ", KernelType: " + str(kerneltype) + "\n") x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output
sys.exit() # load data file print "Loading data..." file = FileReader(sys.argv[1]) data = Instances(file) # set the class Index - the index of the dependent variable data.setClassIndex(data.numAttributes() - 1) # define the algorithms to be used. algo_list = [(NaiveBayes(), 'NaiveBayes'), (BayesNet(), 'BayesNet'), (J48(), 'J48'), (JRip(), 'JRip'), (KStar(), 'KStar'), (RandomForest(), 'RandomForest'), (AdaBoostM1(), 'AdaBoostM1'), (MultilayerPerceptron(), 'MultilayerPerceptron'), (LibSVM(), 'LibSVM')] algo_dict = dict([(x[1], x[0]) for x in algo_list]) algo_keys = [ 'NaiveBayes', 'J48', 'BayesNet', 'JRip', 'RandomForest', 'KStar', 'AdaBoostM1', 'LibSVM', 'MultilayerPerceptron' ] # example to set kernal type on libsvm. Default is 2 #algo = algo_dict['LibSVM'] #tag = SelectedTag("1",algo.TAGS_KERNELTYPE) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid #algo.setKernelType(tag) # train classifiers but filter out the name column first print "Training classifiers..." for key in algo_keys: algo = algo_dict[key]
logfile = "logs/" + classifiername + "_" + dataname + crossvalidate + ".log" log = open(logfile, 'w', bufsize) # open general log file for num in range(int(p['svm.initial']), fulltrainset.numInstances(), (fulltrainset.numInstances() / int(p['svm.numdatapoints']))): trainset = Instances(fulltrainset, 0, num) # create training set trainset.setClassIndex(trainset.numAttributes() - 1) filelimit.write(str(num)) for kerneltype in range(0, 4): log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n") for dataset in [testset, fulltrainset]: algo = LibSVM() tag = SelectedTag( str(kerneltype), algo.TAGS_KERNELTYPE ) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) algo.setCost(int(p['svm.C'])) algo.buildClassifier(trainset) evaluation = Evaluation(trainset) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)):
dataname = str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) datafilelimit = "data/plot/" + classifiername + "_" + dataname + crossvalidate + "_instances.csv" filelimit=open(datafilelimit, 'w', bufsize) filelimit.write("instances,lineartest,lineartrain,polytest,polytrain,radialtest,radialtrain,sigmoidtest,sigmoidtrain\n") logfile = "logs/" + classifiername + "_" + dataname + crossvalidate + ".log" log=open(logfile, 'w', bufsize) # open general log file for num in range(int(p['svm.initial']),fulltrainset.numInstances(),(fulltrainset.numInstances() / int(p['svm.numdatapoints']))): trainset = Instances(fulltrainset,0,num) # create training set trainset.setClassIndex(trainset.numAttributes() - 1) filelimit.write(str(num)) for kerneltype in range(0,4): log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n") for dataset in [testset, fulltrainset]: algo = LibSVM() tag = SelectedTag(str(kerneltype),algo.TAGS_KERNELTYPE) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) algo.setCost(int(p['svm.C'])) algo.buildClassifier(trainset) evaluation = Evaluation(trainset) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel(algo, dataset, 10, rand, [output, attRange, outputDistribution]) else:
datafile = "data/plot/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \ str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_rmse.csv" file=open(datafile, 'w', bufsize) # open a file for rmse data file.write("c,linear,polynomial,radial,sigmoid\n") logfile = "logs/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \ str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_tunable.log" log=open(logfile, 'w', bufsize) # open general log file # loop for different values of c data.setClassIndex(data.numAttributes() - 1) for num in range(-10,10,2): c = 2 ** (num) file.write(str(c)) for kerneltype in range(0,4): algo = LibSVM() tag = SelectedTag(str(kerneltype),algo.TAGS_KERNELTYPE) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) algo.setCost(c) log.write("---------------------------------\nC: " + str(c) + ", KernelType: " + str(kerneltype) + "\n") x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time()
if (not (len(sys.argv) == 2)): print "Usage: supervised.py <ARFF-file>" sys.exit() # load data file print "Loading data..." file = FileReader(sys.argv[1]) data = Instances(file) # set the class Index - the index of the dependent variable data.setClassIndex(data.numAttributes() - 1) # define the algorithms to be used. algo_list = [(NaiveBayes(), 'NaiveBayes'), (BayesNet(),'BayesNet'), (J48(),'J48'), (JRip(), 'JRip'), (KStar(), 'KStar'), (RandomForest(), 'RandomForest'), (AdaBoostM1(),'AdaBoostM1'), (MultilayerPerceptron(),'MultilayerPerceptron'), (LibSVM(), 'LibSVM')] algo_dict = dict([(x[1], x[0]) for x in algo_list]) algo_keys = ['NaiveBayes', 'J48', 'BayesNet', 'JRip', 'RandomForest', 'KStar', 'AdaBoostM1', 'LibSVM', 'MultilayerPerceptron'] # example to set kernal type on libsvm. Default is 2 algo = algo_dict['LibSVM'] tag = SelectedTag("1",algo.TAGS_KERNELTYPE) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) # train classifiers print "Training classifiers..." for key in algo_keys : algo = algo_dict[key] algo.buildClassifier(data) # evaluate classifiers and print a result summary including confusion matrix