timefilename = "data/plot/" + classifiername + "_" + dataname + crossvalidate + "_traintime.csv" timefile = open(timefilename, 'w', bufsize) timefile.write("instances,timetest,timetrain\n") for num in range(int(p['mlp.initial']),fulltrainset.numInstances(),(fulltrainset.numInstances() / int(p['mlp.numdatapoints']))): trainset = Instances(fulltrainset,0,num) # create training set trainset.setClassIndex(trainset.numAttributes() - 1) log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n") filelimit.write(str(trainset.numInstances())) timefile.write(str(num)) for dataset in [testset, fulltrainset]: algo = MultilayerPerceptron() algo.setTrainingTime(int(p['mlp.N'])) x = time.time() algo.buildClassifier(trainset) evaluation = Evaluation(trainset) timefile.write("," + str(time.time() - x)) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel(algo, dataset, 10, rand, [output, attRange, outputDistribution]) else: evaluation.evaluateModel(algo, dataset, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString())
str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_wall.csv" filewall=open(wallfile, 'w', bufsize) # open a file for wall clock time filewall.write("epochs,seconds\n") logfile = "logs/" + str(os.path.splitext(os.path.basename(__file__))[0]) + "_" + \ str(os.path.splitext(os.path.basename(sys.argv[1]))[0]) + "_tunable.log" log=open(logfile, 'w', bufsize) # open general log file # loop for different number of training epochs data.setClassIndex(data.numAttributes() - 1) for num in range(1,1000,50): log.write("---------------------------------\nEpoch: " + str(num) + "\n") algo = MultilayerPerceptron() algo.setTrainingTime(num) x = time.time() algo.buildClassifier(data) log.write("Time to build classifier: " + str(time.time() - x) + "\n") filewall.write(str(num) + "," + str(time.time() - x) + "\n") evaluation = Evaluation(data) output = PlainText() # plain text output for predictions output.setHeader(data) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() #evaluation.evaluateModel(algo, data, [output, attRange, outputDistribution]) evaluation.crossValidateModel(algo, data, 10, rand, [output, attRange, outputDistribution]) log.write("Time to evaluate model: " + str(time.time() - x) + "\n") log.write(evaluation.toSummaryString()) file.write(str(num) + "," + str(evaluation.rootMeanSquaredError()) + "\n")
if (not (len(sys.argv) == 3)): print "Usage: weka.py <ARFF-file>" sys.exit() file = FileReader(sys.argv[1]) file2 = FileReader(sys.argv[2]) data = Instances(file) test = Instances(file2) data.setClassIndex(data.numAttributes() - 1) test.setClassIndex(test.numAttributes() - 1) evaluation = Evaluation(data) buffer = StringBuffer() attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution nn = MultilayerPerceptron() nn.buildClassifier(data) # only a trained classifier can be evaluated #print evaluation.evaluateModel(nn, ['-t', sys.argv[1], '-T', sys.argv[2]])#;, [buffer, attRange, outputDistribution]) res = evaluation.evaluateModel(nn, test, [buffer, attRange, outputDistribution]) f = open('predictions/' + data.relationName(), 'w') for d in res: f.write(str(d) + '\n') f.close() SerializationHelper.write("models/" + data.relationName() + ".model", nn) # print out the built model #print "--> Generated model:\n" #print nn
for num in range(int(p['mlp.initial']), fulltrainset.numInstances(), (fulltrainset.numInstances() / int(p['mlp.numdatapoints']))): trainset = Instances(fulltrainset, 0, num) # create training set trainset.setClassIndex(trainset.numAttributes() - 1) log.write("---------------------------------\nTraining Set Size: " + str(trainset.numInstances()) + ", Test Set Size: " + str(testset.numInstances()) + ", Full data set size: " + str(fulltrainset.numInstances()) + "\n") filelimit.write(str(trainset.numInstances())) timefile.write(str(num)) for dataset in [testset, fulltrainset]: algo = MultilayerPerceptron() algo.setTrainingTime(int(p['mlp.N'])) x = time.time() algo.buildClassifier(trainset) evaluation = Evaluation(trainset) timefile.write("," + str(time.time() - x)) output = PlainText() # plain text output for predictions output.setHeader(trainset) buffer = StringBuffer() # buffer to use output.setBuffer(buffer) attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution x = time.time() if (int(crossvalidate)): evaluation.crossValidateModel( algo, dataset, 10, rand, [output, attRange, outputDistribution]) else: evaluation.evaluateModel(algo, dataset,
if (not (len(sys.argv) == 3)): print "Usage: weka.py <ARFF-file>" sys.exit() file = FileReader(sys.argv[1]) file2 = FileReader(sys.argv[2]) data = Instances(file) test = Instances(file2) data.setClassIndex(data.numAttributes() - 1) test.setClassIndex(test.numAttributes() - 1) evaluation = Evaluation(data) buffer = StringBuffer() attRange = Range() # no additional attributes output outputDistribution = Boolean(False) # we don't want distribution nn = MultilayerPerceptron() nn.buildClassifier(data) # only a trained classifier can be evaluated #print evaluation.evaluateModel(nn, ['-t', sys.argv[1], '-T', sys.argv[2]])#;, [buffer, attRange, outputDistribution]) res = evaluation.evaluateModel(nn, test, [buffer, attRange, outputDistribution]) f = open('predictions/' + data.relationName(), 'w') for d in res: f.write(str(d) + '\n'); f.close() SerializationHelper.write("models/" + data.relationName() + ".model", nn) # print out the built model #print "--> Generated model:\n" #print nn #print "--> Evaluation:\n"