def main(input_files, output_file, levels): fig = plt.figure() ax = plt.subplot(111) x_fun_name = "included_contigs_ratio" y_fun_name = "precision" axis_funs = RocAxisFuns(x_fun_name,y_fun_name) for input_file in input_files: data = ExperimentData(axis_funs) data.load_data_frame(input_file) data.standardize() for level in levels: data.classify(level) data.calculate_roc() # Plot ROC curve for level in levels: x = data.roc_data[level].x y = data.roc_data[level].y file_name = input_file.split('/')[-1] plt.plot(x,y,label=file_name + ", " + level) box = ax.get_position() ax.set_position([box.x0,box.y0,box.width,box.height*0.8]) plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.35), ncol=2, fancybox=True, shadow=True) plt.xlabel(x_fun_name) plt.ylabel(y_fun_name) plt.savefig(output_file)
def main(input_files, level, fun_name): fun = RocAxisFun(fun_name).fun m_row = [] for input_file in input_files: data = ExperimentData(fun) data.load_data_frame(input_file) data.standardize() data.classify(level) df = data.classification[level] real_classif = [row[1] for row in df.values] m_row.append(fun(real_classif, [])) formated_matrix_row = " & ".join([str(n) for n in m_row]) sys.stdout.write(formated_matrix_row)
def test_experiment_data(self): axis_funs = RocAxisFuns("included_contigs_ratio", "precision") d = ExperimentData(axis_funs) cur_dir = os.path.dirname(__file__) input_file = os.path.join(cur_dir, "fixtures/score_mul_test.tsv") d.load_data_frame(input_file) d.standardize() # Random sample to test if the standardization works assert_almost_equal(d.df[0:1].p_value_standardized[0], 0.7767953) assert_almost_equal(d.df[5:6].p_value_standardized[5], -0.67591835) assert_almost_equal(d.df[10:11].p_value_standardized[10], -0.1388929) assert_almost_equal(d.df[12:13].p_value_standardized[12], -1.13672827) d.classify("family") d.classify("genus") d.classify("genome") df = d.classification["family"] assert_equal(df[df.index == 1010].real_classif.values[0], False) assert_equal(list(df.real_classif.values).count(True), 28) df = d.classification["genus"] assert_equal(df[df.index == 1010].real_classif.values[0], False) assert_equal(df[df.index == 1013].real_classif.values[0], False) assert_equal(df[df.index == 1027].real_classif.values[0], False) assert_equal(list(df.real_classif.values).count(True), 26) df = d.classification["genome"] assert_equal(df[df.index == 1000].real_classif.values[0], False) assert_equal(df[df.index == 1001].real_classif.values[0], False) assert_equal(df[df.index == 1003].real_classif.values[0], False) assert_equal(df[df.index == 1004].real_classif.values[0], False) assert_equal(df[df.index == 1007].real_classif.values[0], False) assert_equal(df[df.index == 1010].real_classif.values[0], False) assert_equal(df[df.index == 1013].real_classif.values[0], False) assert_equal(df[df.index == 1018].real_classif.values[0], False) assert_equal(df[df.index == 1027].real_classif.values[0], False) assert_equal(list(df.real_classif.values).count(True), 20)
#!/usr/bin/env python import os from corrbin.score import ExperimentData, RocAxisFun file_path = os.path.realpath(__file__) input_file = os.path.abspath(os.path.join(file_path,"..","..","fixtures/score_500000.tsv")) fun = RocAxisFun("precision").fun data = ExperimentData(fun) data.load_data_frame(input_file) data.standardize() @profile def my_fun(data): data.classify("genome") my_fun(data)
def my_fun(): fun = RocAxisFun("precision").fun data = ExperimentData(fun) data.load_data_frame(input_file) data.standardize()