def createPerceptronClassifier(img, samples, class_names, n_samples, ops=None, filepath=None, params={}): mp = MultilayerPerceptron() if "learning_rate" in params: # In (0, 1] mp.setLearningRate(params.get("learning_rate", mp.getLearningRate())) # Number of nodes per layer: a set of comma-separated values (numbers), or: # 'a' = (number of attributes + number of classes) / 2 # 'i' = number of attributes, # 'o' = number of classes # 't' = number of attributes + number of classes. # See MultilayerPerceptron.setHiddenLayers # https://weka.sourceforge.io/doc.dev/weka/classifiers/functions/MultilayerPerceptron.html#setHiddenLayers-java.lang.String- mp.setHiddenLayers(params.get("hidden_layers", "10,5")) return trainClassifier(mp, img, samples, class_names, n_samples, ops=ops, filepath=filepath)
print "Usage: supervised.py <ARFF-file>" sys.exit() # load data file print "Loading data..." file = FileReader(sys.argv[1]) data = Instances(file) # set the class Index - the index of the dependent variable data.setClassIndex(data.numAttributes() - 1) # define the algorithms to be used. algo_list = [(NaiveBayes(), 'NaiveBayes'), (BayesNet(), 'BayesNet'), (J48(), 'J48'), (JRip(), 'JRip'), (KStar(), 'KStar'), (RandomForest(), 'RandomForest'), (AdaBoostM1(), 'AdaBoostM1'), (MultilayerPerceptron(), 'MultilayerPerceptron'), (LibSVM(), 'LibSVM')] algo_dict = dict([(x[1], x[0]) for x in algo_list]) algo_keys = [ 'NaiveBayes', 'J48', 'BayesNet', 'JRip', 'RandomForest', 'KStar', 'AdaBoostM1', 'LibSVM', 'MultilayerPerceptron' ] # example to set kernal type on libsvm. Default is 2 #algo = algo_dict['LibSVM'] #tag = SelectedTag("1",algo.TAGS_KERNELTYPE) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid #algo.setKernelType(tag) # train classifiers but filter out the name column first print "Training classifiers..." for key in algo_keys:
# check commandline parameters if (not (len(sys.argv) == 3)): print "Usage: weka.py <ARFF-file>" sys.exit() file = FileReader(sys.argv[1]) file2 = FileReader(sys.argv[2]) data = Instances(file) test = Instances(file2) data.setClassIndex(data.numAttributes() - 1) test.setClassIndex(test.numAttributes() - 1) evaluation = Evaluation(data) buffer = StringBuffer() attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution nn = MultilayerPerceptron() nn.buildClassifier(data) # only a trained classifier can be evaluated #print evaluation.evaluateModel(nn, ['-t', sys.argv[1], '-T', sys.argv[2]])#;, [buffer, attRange, outputDistribution]) res = evaluation.evaluateModel(nn, test, [buffer, attRange, outputDistribution]) f = open('predictions/' + data.relationName(), 'w') for d in res: f.write(str(d) + '\n') f.close() SerializationHelper.write("models/" + data.relationName() + ".model", nn) # print out the built model #print "--> Generated model:\n" #print nn
file.write("epochs,rmse\n") wallfile = "data/plot/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \ str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_wall.csv" filewall=open(wallfile, 'w', bufsize) # open a file for wall clock time filewall.write("epochs,seconds\n") logfile = "logs/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \ str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_tunable.log" log=open(logfile, 'w', bufsize) # open general log file # loop for different number of training epochs data.setClassIndex(data.numAttributes() - 1) for num in range(1,1000,50): log.write("---------------------------------\nEpoch: " + str(num) + "\n") algo = MultilayerPerceptron() algo.setTrainingTime(num) x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") filewall.write(str(num) + "," + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() #evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution])
if (not (len(sys.argv) == 2)): print "Usage: supervised.py <ARFF-file>" sys.exit() # load data file print "Loading data..." file = FileReader(sys.argv[1]) data = Instances(file) # set the class Index - the index of the dependent variable data.setClassIndex(data.numAttributes() - 1) # define the algorithms to be used. algo_list = [(NaiveBayes(), 'NaiveBayes'), (BayesNet(),'BayesNet'), (J48(),'J48'), (JRip(), 'JRip'), (KStar(), 'KStar'), (RandomForest(), 'RandomForest'), (AdaBoostM1(),'AdaBoostM1'), (MultilayerPerceptron(),'MultilayerPerceptron'), (LibSVM(), 'LibSVM')] algo_dict = dict([(x[1], x[0]) for x in algo_list]) algo_keys = ['NaiveBayes', 'J48', 'BayesNet', 'JRip', 'RandomForest', 'KStar', 'AdaBoostM1', 'LibSVM', 'MultilayerPerceptron'] # example to set kernal type on libsvm. Default is 2 algo = algo_dict['LibSVM'] tag = SelectedTag("1",algo.TAGS_KERNELTYPE) # 0 = linear, 1 = polynomial, 2 = radial basis function, 3 = sigmoid algo.setKernelType(tag) # train classifiers print "Training classifiers..." for key in algo_keys : algo = algo_dict[key] algo.buildClassifier(data) # evaluate classifiers and print a result summary including confusion matrix