def test_save_to_file(self): """ Tests if one can save a set of experiments to a file. """ times = [1, 2, 3, 4, 5] measure1 = 'ax + b' measure2 = 'e = mc ** 2' values1 = [123, 44, 3.2, 4, 30] values2 = [321, 44, 2.3, 4, 3] experiment1 = Experiment(times, values1, measure1) experiment2 = Experiment(times, values2, measure2) experiment_set = ExperimentSet() experiment_set.add(experiment1) experiment_set.add(experiment2) out_file = 'tmp_exp_set.xml' experiment_set.save_to_file(out_file) f = open(out_file, 'r') data = f.read() f.close() os.remove(out_file) measures = [measure1, measure2] values = values1 + values2 assert all([str(t) in data for t in times]) assert all([m in data for m in measures]) assert all([str(v) in data for v in values])
def read_txt_experiment_file(file_name, var): """ This function can read experiment files. The experiment data is organized in a list of Experiment objects. """ f = open(file_name) experiment_values = [] times = [] experiments = [] for line in f: l = line.split() # remove comments if '#' in l: l = l[0:l.index('#')] if len(l) == 0: # create experiment objects for exp_val in experiment_values: exp = Experiment(times, exp_val, var) experiments.append(exp) experiment_values = [] times = [] continue times.append(l[0]) if (experiment_values == []): experiment_values = [[] for i in range(len(l) - 1)] for i in range(1, len(l)): experiment_values[i - 1].append(float(l[i])) for exp_val in experiment_values: exp = Experiment(times, exp_val, var) experiments.append(exp) experiment_values = [] f.close() return experiments
def test_output_with_abcsysbio_syntax(self): """ We should be able to echo experiments with the ABC-SysBio syntax. """ exp1 = Experiment([1, 2, 3, 4], [.1, .2, .3, .4], '') exp2 = Experiment([1, 2, 3, 4], [.2, .4, .6, .8], '') exps = ExperimentSet() exps.add(exp1) exps.add(exp2) out = exps.get_as_abcsysbio_syntax() expected = '<var1> 0.1 0.2 0.3 0.4 </var1>' + \ '<var2> 0.2 0.4 0.6 0.8 </var2>' space_re = re.compile('\s|\n', re.MULTILINE) self.assertEqual(space_re.sub('', out), space_re.sub('', expected))
def initExperiment(_args): FileHandler().createFolder("results") resultFolder = "results/" + args.name + "/" FileHandler().createFolder(resultFolder) resultFile = resultFolder + "result.csv" if _args.classification: e = Experiment(args.classification, args.name) models = initModels(_args, Type.CLASSIFICATION) e.classification(models, 10) if _args.gen_code: exportCode(_args, resultFolder, _args.classification, models) if _args.visualize: files = [e.path("cv_" + str(i) + ".csv") for i in range(len(models))] xTicks = [model.modelName for model in models] ResultVisualizer().boxplots(files, _args.visualize, xTicks, ylabel=_args.visualize) elif _args.correlation: csv = CSV() csv.load(args.correlation) csv.computeCorrelationMatrix(resultFile) if _args.visualize: ResultVisualizer().colorMap(resultFile) elif _args.regression: e = Experiment(args.regression, args.name) models = initModels(_args, Type.REGRESSION) e.regression(models, 10) if _args.gen_code: exportCode(_args, resultFolder, _args.regression, models) if _args.visualize: files = [e.path("cv_" + str(i) + ".csv") for i in range(len(models))] xTicks = [model.modelName for model in models] ResultVisualizer().boxplots(files, _args.visualize, xTicks, ylabel=_args.visualize) print("[LIMITS]: results written to src/" + resultFolder)
def test_write_then_read(self): """ Tests if one can save experiments to a file and then read. """ exp1 = Experiment([1, 2, 3, 4], [.1, .2, .3, .4], 'x1') exp2 = Experiment([1, 3, 9, 27], [.1, .2, .3, .4], 'x2') exp_set = ExperimentSet() exp_set.add(exp1) exp_set.add(exp2) out_file = 'tmp_exp_set_file.xml' exp_set.save_to_file(out_file) read_exp_set = ExperimentSet(out_file) self.assertEqual(read_exp_set.get_size(), 2) exp0 = read_exp_set[0] exp1 = read_exp_set[1] self.assertListEqual(list(exp0.times), [1, 2, 3, 4]) self.assertListEqual(list(exp0.values), [.1, .2, .3, .4]) self.assertEqual(exp0.measure_expression, 'x1') self.assertListEqual(list(exp1.times), [1, 3, 9, 27]) self.assertListEqual(list(exp1.values), [.1, .2, .3, .4]) self.assertEqual(exp1.measure_expression, 'x2') os.remove(out_file)
def test_get_likelihood_over_time(self): """ Tests if the likelihood can return the correct value when the observation occurs in multiple time points. """ t = [0, .25, .5, .75, 1] D = [np.exp(x) for x in t] experiments = [Experiment(t, D, "x1")] f_D = 1 for y in D: f_D *= self.__gaussian(y, 1, y) analytic = np.log(f_D) likelihood_f = LikelihoodFunction(self.odes) l = likelihood_f.get_log_likelihood(experiments, self.theta) assert (abs(analytic - l) < 1e-2)
def regressionRF(_training, _trees, _depth, _file): e = Experiment(_training, verbose=False) R = ResultMatrix() for numTrees in range(1, _trees + 1): for depth in range(1, _depth + 1): rf = RandomForest() rf.config.trees = numTrees rf.config.depth = depth header, result = e.regression([rf], 10) mem = computeMemorySize(_training, rf, True) header += ["arduino", "msp", "esp"] result = np.hstack([result, mem]) print([ "#trees=" + str(numTrees) + "/" + str(_trees) + " depth=" + str(depth) + "/" + str(_depth) + ' mem=', mem ], flush=True) R.add(header, result) R.save(_file)
def regressionRF(_training, _trees, _depth, _file, _resultFolder, _discretization): csv = CSV(training) attributes = csv.findAttributes(0) R = ResultMatrix() for numTrees in range(1, _trees + 1): for depth in range(1, _depth + 1): rf = RandomForest() rf.config.trees = numTrees rf.config.depth = depth # perform a cross validation to generate the training/test files e = Experiment(_training, "example_rf_sweet_spot_disc", verbose=False) e.regression([rf], 10) # r, c = CodeEvaluator().crossValidation(rf, _training, attributes, e.tmp(), _discretization) result = np.hstack([r.data.mean(0), r.data.std(0)]) header = r.header + [x + "_std" for x in r.header] mem = computeMemorySize(_training, rf, _resultFolder, _discretization) header += ["arduino", "msp", "esp"] result = np.hstack([result, mem]) print([ "#trees=" + str(numTrees) + "/" + str(_trees) + " depth=" + str(depth) + "/" + str(_depth) + ' mem=', mem ], flush=True) R.add(header, result) R.save(_file)
def run(self, _training, _models, _platforms): R = ResultMatrix() M = []; for model in _models: # run the cross validation to compute the model performance M.append(model.toString()) e = Experiment(_training) header, result = e.regression([model], 10) R.add(header, result) # train with the global training data and export code training_arff = "tmp/recommend.arff" csv = CSV() csv.load(_training) csv.convertToARFF(training_arff, False) attributes = csv.findAttributes(0) lAtt = len(attributes)-1 WEKA().train(model, training_arff, "0") data = "\n".join(FileHandler().read("tmp/raw0.txt")) codeFile = "recommend.c" model.exportCode(data, csv, attributes, codeFile) # complile platform-specific code for platform in _platforms: "" #print(model.toString() + " : " + platform.toString()) print(R.header, R.data) print(M)
def classificationANN(_training, _layers, _nodes, _file): e = Experiment(_training, verbose=False) R = ResultMatrix() for numLayers in range(1, _layers + 1): for numNodes in range(1, _nodes + 1): ann = ANN() ann.config.hiddenLayers = [] for i in range(numLayers): ann.config.hiddenLayers.append(numNodes) header, result = e.classification([ann], 10) mem = computeMemorySize(_training, ann, False) header += ["arduino", "msp", "esp"] result = np.hstack([result, mem]) print([ "#layers=" + str(numLayers) + "/" + str(_layers) + " nodes=" + str(numNodes) + "/" + str(_nodes) + ' mem=', mem ], flush=True) R.add(header, result) R.save(_file)
def load_data_file (self, file_name): """ Reads and adds all experiments of an experiment set file. Parameters file_name: an xml file that contains a set of experiments. """ tree = etree.parse (file_name) root = tree.getroot () if clean_tag (root) != "ExperimentSet": print ("Wrong experiment data syntax. Root tag should be" \ + "<ExperimentSet>") return experiments_arr = [] for experiment_tag in root.getchildren (): rows = [] if clean_tag (experiment_tag) != "Experiment": print ("Wrong experiment data syntax. The children of" \ + " <ExperimentSet> can only be of tag" \ + " <Experiment>.") for children in experiment_tag.getchildren (): if clean_tag (children) == "row": row = self.__read_xml_row (children, file_name) rows.append (row) elif clean_tag (children) == "condition" : continue elif clean_tag (children) == "interpretation": interp = self.__read_interpretation (children) else: print ("Unexpected child of dataset in" + file_name) rows = np.array (rows) time_idx = interp.index ("time") times = rows[:, time_idx] for i in range (len (interp)): if i == time_idx: continue expression = interp[i] var_values = rows[:, i] experiment = Experiment (times, var_values, expression) experiments_arr.append (experiment) for e in experiments_arr: self.add (e)
def test_get_likelihood_point(self): """ Tests if the likelihood can return the correct value when there's only one observation. """ # x1(0) = 1.0 # D ~ Gaussian (x1(0), 1) ~ Gaussian (1, 1) # f_D (1) = e ^ -{[(0) ^ 2] / [2 * 1]} * {1 * sqrt (2pi)} ^ -1 f_D = self.__gaussian(1, 1, 1) analytic = np.log(f_D) t = [0] values = [1.0] var = "x1" experiments = [Experiment(t, values, var)] likelihood_f = LikelihoodFunction(self.odes) l = likelihood_f.get_log_likelihood(experiments, self.theta) assert (abs(analytic - l) < 1e-8)
def test_get_likelihood_experiment_set(self): """ Tests if can return the likelihood of data in respect to multiple experimnets. """ t = [0, .25, .5, .75, 1] D = [np.exp(x) for x in t] experiment = Experiment(t, D, "x1") experiments = [experiment, experiment] f_D = 1 for y in D: f_D *= self.__gaussian(y, 1, y) f_D **= 2 analytic = np.log(f_D) likelihood_f = LikelihoodFunction(self.odes) l = likelihood_f.get_log_likelihood(experiments, self.theta) assert (abs(analytic - l) < 1e-2)
def run(self, _training, _model, _batchSize, _resultFile): csv = CSV(_training) csv.randomize(1000) csv.removeIndices() R = ResultMatrix() for i in range(int(len(csv.data)/_batchSize)): c = CSV() c.header = csv.header c.data = csv.data[0:(i+1)*_batchSize] file = self.resultFolder + "subset_" + str(i) + ".csv" c.save(file) header, data = Experiment(file).regression([_model], 10) R.add(header, data) R.save(_resultFile)
yStd = S[:, i] x = range(len(y)) ax = axs[0, 0] ax.errorbar(x, y, yerr=yStd, capsize=7) ax.set_title('Model Performance') ax.set(xlabel='#Nodes on Hidden Layer', ylabel='Accuracy') plot(ResultVisualizer().readAsMatrix(_file, "msp", _layers, _nodes) / 1000, axs[0, 1], "MSP430", _nodes) plot( ResultVisualizer().readAsMatrix(_file, "arduino", _layers, _nodes) / 1000, axs[1, 0], "Atmega", _nodes) plot(ResultVisualizer().readAsMatrix(_file, "esp", _layers, _nodes) / 1000, axs[1, 1], "ESP32", _nodes) fig.tight_layout() fig.set_size_inches(8, 5) fig.savefig(_example.path("example_ann_sweet_spot.png"), format="png") plt.show() e = Experiment("", "example_ann_sweet_spot") layers = 3 nodes = 26 resultFile = e.path("ann_classification_mem.csv") classificationANN("../examples/vehicleClassification.csv", layers, nodes, resultFile) plotSweetSpot(resultFile, layers, nodes)
from models.randomforest.RandomForest import RandomForest from experiment.Experiment import Experiment from code.CodeGenerator import CodeGenerator from data.CSV import CSV from data.ResultMatrix import ResultMatrix import numpy as np import matplotlib.pyplot as plt from plot.PlotTool import PlotTool from plot.ResultVisualizer import ResultVisualizer # define the training data set and set up the model training = "../examples/mnoA.csv" model = RandomForest() model.config.trees = 10 model.config.depth = 5 # perform a 10-fold cross validation e = Experiment(training, "example_rf_mdi") e.regression([model], 10) # M = CSV(e.path("features_0.csv")).toMatrix() M.normalizeRows() M.sortByMean() M.save(e.path("rf_features.csv")) # ResultVisualizer().barchart(e.path("rf_features.csv"), xlabel="Feature", ylabel="Relative Feature Importance", savePNG=e.path(e.id+".png"))
"fig": fig, "ax": axs[1][0] }).colorMap( ResultVisualizer().readAsMatrix(_file, "arduino", _sX, _sY) / 1000, "Atmega Program Memory Occupation [kB]") PlotTool({ "fig": fig, "ax": axs[1][1] }).colorMap(ResultVisualizer().readAsMatrix(_file, "esp", _sX, _sY) / 1000, "ESP32 Program Memory Occupation [kB]") for ax in axs.flat: ax.set_xticks(range(4, _sX + 1, 5)) ax.set_xticklabels(range(5, _sX + 1, 5)) ax.set_yticks(range(0, _sY, 5)) ax.set_yticklabels(np.flipud(range(5, _sY + 1, 5))) ax.set(xlabel='Number of Trees', ylabel='Maximum Depth') fig.tight_layout() fig.set_size_inches(8, 5) fig.savefig('example_rf_sweet_spot.png', format='png') plt.show() # e = Experiment("", "example_rf_sweet_spot") trees = 30 depth = 15 resultFile = e.path("rf_regression_mem.csv") regressionRF("../examples/mnoA.csv", trees, depth, resultFile) plotSweetSpot(resultFile, trees, depth)
from models.m5.M5 import M5 from experiment.Experiment import Experiment from code.CodeGenerator import CodeGenerator from data.CSV import CSV from code.MSP430 import MSP430 import os # define the training data set and set up the model training = "../examples/mnoA.csv" model = M5() # perform a 10-fold cross validation e = Experiment(training, "example_msp") e.regression([model], 10) # export the raw C++ code codeFile = "example_msp.cpp" CodeGenerator().export(training, model, codeFile) # create a dummy MSP430 project which executes the model csv = CSV() csv.load(training) attributes = csv.findAttributes(0) mem = MSP430().run(codeFile, "float", len(attributes) - 1) print(mem) # all results are written to results/example_msp/
from models.ann.ANN import ANN from experiment.Experiment import Experiment from code.CodeGenerator import CodeGenerator from data.FileHandler import FileHandler from data.CSV import CSV # define the training data set and set up the model training = "../examples/mnoA.csv" model = ANN() model.hiddenLayers = [10, 10] # perform a 10-fold cross validation e = Experiment(training, "example_ann_visualization") e.regression([model], 10) # export the C++ code CodeGenerator().export(training, model, e.path("ann.cpp")) model.exportEps(e.path("ann_vis.eps"))
from marginal_likelihood.SBMLtoODES import sbml_to_odes from experiment.ExperimentSet import ExperimentSet from experiment.Experiment import Experiment import numpy as np def add_noise(values): for i in range(len(values)): eps = np.random.normal(0, .01) if values[i] + eps > 0: values[i] += eps sbml = SBML() sbml.load_file('model1.xml') odes = sbml_to_odes(sbml) time = [0, 2, 5, 10, 20, 40, 60, 100] values = odes.evaluate_on(time) experiment_set = ExperimentSet() for i in range(3): noised_values = {} for x in values: noised_values[x] = list(values[x]) add_noise(noised_values["Rpp"]) experiment = Experiment(time[1:], noised_values["Rpp"][1:], "Rpp") experiment_set.add(experiment) experiment_set.save_to_file('experiment.data')
from models.randomforest.RandomForest import RandomForest from experiment.Experiment import Experiment from data.CSV import CSV from plot.ResultVisualizer import ResultVisualizer # define the training data set and set up the model training = "../examples/mnoA.csv" csv = CSV(training) model = RandomForest() # perform a 10-fold cross validation e = Experiment(training, "example_feature_reduction") e.regression([model], 10) CSV(e.path("cv_0.csv")).save(e.path("subset_0.csv")) xTicks = ["None"] # obtain a feature ranking M = CSV(e.path("features_0.csv")).toMatrix() M.normalizeRows() M.sortByMean() # sequentially remove the least important feature from the training data and retrain the model subset = e.path("subset.csv") for i in range(len(M.header) - 1): key = M.header[-1] M.header = M.header[0:-1] csv.removeColumnWithKey(key) csv.save(subset) e = Experiment(subset, "example_feature_reduction") e.regression([model], 10)
def run_experiment(data_directory, num_browsers=20, num_blocks=10): demo = Experiment(data_directory=data_directory, num_browsers=num_browsers, num_blocks=num_blocks, feature_extract=extract_topics, save_path=TEST_NAME + "_data.txt") demo.add_stage("start", "all", "https://www.youtube.com/", [visit, scroll]) demo.add_stage("treatment", "experimental", "https://www.google.com/", [visit]) demo.add_stage("measurement", "all", "https://www.youtube.com/", [visit, scroll, save_page_source]) demo.run() demo.save_data() return demo.get_observations(), demo.get_assignments()
from models.randomforest.RandomForest import RandomForest from experiment.Experiment import Experiment from code.CodeGenerator import CodeGenerator # define the training data set and set up the model training = "../examples/mnoA.csv" model = RandomForest() model.config.trees = 10 model.config.depth = 5 # perform a 10-fold cross validation e = Experiment(training, "example_rf") e.regression([model], 10) # export the C++ code CodeGenerator().export(training, model, e.path("rf.cpp")) # all results are written to results/example_rf/
from models.ann.ANN import ANN from models.m5.M5 import M5 from models.randomforest.RandomForest import RandomForest from models.svm.SVM import SVM from experiment.Experiment import Experiment from plot.ResultVisualizer import ResultVisualizer import matplotlib.pyplot as plt # define the training data set and set up the model training = "../examples/mnoA.csv" models = [ANN(), M5(), RandomForest(), SVM()] # perform a 10-fold cross validation e = Experiment(training, "example_experiment") e.regression(models, 10) # visualize files = [e.path("cv_" + str(i) + ".csv") for i in range(len(models))] fig, axs = plt.subplots(2, 2) fig.set_size_inches(8, 5) xticks = [model.modelName for model in models] ResultVisualizer().boxplots(files, "r2", xticks, ylabel='R2', fig=fig, ax=axs[0][0], show=False) ResultVisualizer().boxplots(files, "mae",
def run_experiment(data_directory, num_browsers=20, num_blocks=10): persona = Experiment(data_directory=data_directory, num_browsers=num_browsers, num_blocks=num_blocks, feature_extract=extract_topics, save_path=TEST_NAME + "_data.txt") persona.add_stage("start", "all", "https://www.youtube.com", [visit, scroll]) persona.add_stage( "treatment", "experimental", "https://pnandak1.github.io/treatments/playback_video.html", [visit]) persona.add_stage("measurement", "all", "https://www.youtube.com", [visit, scroll, save_page_source]) persona.run() persona.save_data() return persona.get_observations(), persona.get_assignments()
from data.CSV import CSV # define the training data set and set up the model training = "../examples/mnoA.csv" training = "../examples/vehicleClassification.csv" csv = CSV(training) attributes = csv.findAttributes(0) d = csv.discretizeData() model = RandomForest() model.config.trees = 10 model.config.depth = 5 # perform a 10-fold cross validation e = Experiment(training, "example_rf_disc") e.classification([model], 10) # export the C++ code CodeGenerator().export(training, model, e.path("rf.cpp"), d) # ce = CodeEvaluator() R, C = ce.crossValidation(model, training, attributes, e.tmp(), d) R.printAggregated() # all results are written to results/example_rf_disc/
from models.m5.M5 import M5 from experiment.Experiment import Experiment from code.CodeGenerator import CodeGenerator from data.CSV import CSV from code.Arduino import Arduino # define the training data set and set up the model training = "../examples/mnoA.csv" model = M5() # perform a 10-fold cross validation e = Experiment(training, "example_arduino") e.regression([model], 10) # export the raw C++ code codeFile = e.path("arduino.cpp") CodeGenerator().export(training, model, codeFile) # create a dummy Arduino project which executes the model csv = CSV() csv.load(training) attributes = csv.findAttributes(0) mem = Arduino().run(codeFile, "float", len(attributes) - 1) print(mem) # all results are written to results/example_arduino/
from models.ann.ANN import ANN from weka.models.ANN import ANN as ANN_WEKA from experiment.Experiment import Experiment from code.CodeGenerator import CodeGenerator from data.CSV import CSV from data.FileHandler import FileHandler from data.ResultMatrix import ResultMatrix import numpy as np from plot.ResultVisualizer import ResultVisualizer # define the training data set and set up the model training = "../examples/mnoA.csv" model = ANN() # perform a 10-fold cross validation e = Experiment(training, "example_ann_feature_importance") e.regression([model], 10) # M = ResultMatrix() csv = CSV(training) attributes = csv.findAttributes(0) for i in range(10): training = e.tmp() + "training_mnoA_" + str(i) + ".csv" data = "\n".join(FileHandler().read(e.tmp() + "raw0_" + str(i) + ".txt")) ANN_WEKA(model).initModel(data, csv, attributes, training) M.add(csv.header[1:], model.computeInputLayerRanking()) M.normalizeRows() M.sortByMean()
if values[i] + eps > 0: values[i] += eps sbml = SBML () sbml.load_file ('final_model.sbml') odes = sbml_to_odes (sbml) time = [30, 60, 180, 300, 900, 1800] # Simple experiment: run final_model simulations adding a Gaussian noise values = odes.evaluate_exp_on ('MAPK_PP + MAPK_P', time) experiment_set = ExperimentSet () for i in range (3): noised_values = list (values) add_noise (noised_values) experiment = Experiment (time, noised_values, 'MAPK_PP + MAPK_P') experiment_set.add (experiment) experiment_set.save_to_file ('gauss_noise.data') with open ('gaus_noise_abcsysbio.data', 'w') as f: f.write (experiment_set.get_as_abcsysbio_syntax ()) f.close () # More complex experiment: run final_model with perturbations on # catalytic constants perturbation_exp = ExperimentSet () # First perturbation experiments theta = odes.get_all_parameters () changed_param = '' changed_param_value = 0
from code.CodeGenerator import CodeGenerator from code.CodeEvaluator import CodeEvaluator from data.FileHandler import FileHandler from data.ResultMatrix import ResultMatrix # define the training data set and set up the model training = "../examples/mnoA.csv" model = RandomForest() model.config.trees = 10 model.config.depth = 10 csv = CSV(training) attributes = csv.findAttributes(0) # perform a 10-fold cross validation e = Experiment(training, "example_model_reapplication") e.regression([model], 10) # ce = CodeEvaluator() R, C = ce.crossValidation(model, training, attributes, e.tmp()) R.printAggregated() # ResultVisualizer().scatter( [e.tmp() + "predictions_" + str(i) + ".csv" for i in range(10)], "prediction", "label", xlabel='Predicted Data Rate [MBit/s]', ylabel='Measured Data Rate [MBit/s', savePNG=e.path("example_model_reapplication.png"))