Пример #1
0
def main(input_files, output_file, levels):
    fig = plt.figure()
    ax = plt.subplot(111)
    
    x_fun_name = "included_contigs_ratio"
    y_fun_name = "precision"
    axis_funs = RocAxisFuns(x_fun_name,y_fun_name)
    for input_file in input_files:
        data = ExperimentData(axis_funs)
        data.load_data_frame(input_file)
        data.standardize()
        for level in levels:
            data.classify(level)
        data.calculate_roc()
    
        # Plot ROC curve
        for level in levels:
            x = data.roc_data[level].x
            y = data.roc_data[level].y
        
            file_name = input_file.split('/')[-1]
            plt.plot(x,y,label=file_name + ", " + level)
    
    box = ax.get_position()
    ax.set_position([box.x0,box.y0,box.width,box.height*0.8])
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.35),
          ncol=2, fancybox=True, shadow=True)
    plt.xlabel(x_fun_name)
    plt.ylabel(y_fun_name)
    plt.savefig(output_file)
Пример #2
0
def main(input_files, level, fun_name):
    fun = RocAxisFun(fun_name).fun
    m_row = []

    for input_file in input_files:
        data = ExperimentData(fun)
        data.load_data_frame(input_file)
        data.standardize()
        data.classify(level)
        df = data.classification[level]
        real_classif = [row[1] for row in df.values]
        m_row.append(fun(real_classif, []))

    formated_matrix_row = " & ".join([str(n) for n in m_row])
    sys.stdout.write(formated_matrix_row)
Пример #3
0
    def test_experiment_data(self):
        axis_funs = RocAxisFuns("included_contigs_ratio", "precision")
        d = ExperimentData(axis_funs)
        cur_dir = os.path.dirname(__file__)
        input_file = os.path.join(cur_dir, "fixtures/score_mul_test.tsv")
        d.load_data_frame(input_file)
        d.standardize()
        # Random sample to test if the standardization works
        assert_almost_equal(d.df[0:1].p_value_standardized[0], 0.7767953)
        assert_almost_equal(d.df[5:6].p_value_standardized[5], -0.67591835)
        assert_almost_equal(d.df[10:11].p_value_standardized[10], -0.1388929)
        assert_almost_equal(d.df[12:13].p_value_standardized[12], -1.13672827)

        d.classify("family")
        d.classify("genus")
        d.classify("genome")

        df = d.classification["family"]
        assert_equal(df[df.index == 1010].real_classif.values[0], False)
        assert_equal(list(df.real_classif.values).count(True), 28)

        df = d.classification["genus"]
        assert_equal(df[df.index == 1010].real_classif.values[0], False)
        assert_equal(df[df.index == 1013].real_classif.values[0], False)
        assert_equal(df[df.index == 1027].real_classif.values[0], False)
        assert_equal(list(df.real_classif.values).count(True), 26)

        df = d.classification["genome"]
        assert_equal(df[df.index == 1000].real_classif.values[0], False)
        assert_equal(df[df.index == 1001].real_classif.values[0], False)
        assert_equal(df[df.index == 1003].real_classif.values[0], False)
        assert_equal(df[df.index == 1004].real_classif.values[0], False)
        assert_equal(df[df.index == 1007].real_classif.values[0], False)
        assert_equal(df[df.index == 1010].real_classif.values[0], False)
        assert_equal(df[df.index == 1013].real_classif.values[0], False)
        assert_equal(df[df.index == 1018].real_classif.values[0], False)
        assert_equal(df[df.index == 1027].real_classif.values[0], False)

        assert_equal(list(df.real_classif.values).count(True), 20)
Пример #4
0
#!/usr/bin/env python
import os
from corrbin.score import ExperimentData, RocAxisFun
file_path = os.path.realpath(__file__)
input_file = os.path.abspath(os.path.join(file_path,"..","..","fixtures/score_500000.tsv"))

fun = RocAxisFun("precision").fun
data = ExperimentData(fun)
data.load_data_frame(input_file)
data.standardize()
@profile
def my_fun(data):
    data.classify("genome")

my_fun(data)
Пример #5
0
def my_fun():
    fun = RocAxisFun("precision").fun
    data = ExperimentData(fun)
    data.load_data_frame(input_file)
    data.standardize()