示例#1
0
def test_fastq_unzipped():

    for thisdata in [data, datagz]:
        # isntanciation
        f = fastq.FastQ(thisdata)
        assert f.data_format == "Illumina_1.8+"
        # count lines
        # rune it twice because we want to make sure re-running count_lines
        # (decompression with zlib) works when run again.
        assert f.count_lines() == 1000
        assert f.count_lines() == 1000
        assert f.count_reads() == 250
        assert f.count_reads() == 250

        # extract head of the file into an unzipped file
        ft = TempFile()
        f.extract_head(100, ft.name)
        fcheck = fastq.FastQ(ft.name)
        assert fcheck.count_lines() == 100
        ft.delete()

        # extract head of the file and zip output
        ft = TempFile(suffix=".gz")
        f.extract_head(100, ft.name)
        fcheck = fastq.FastQ(ft.name)
        assert fcheck.count_lines() == 100
        ft.delete()

        with FastQ(thisdata) as ff:
            assert len(ff) == 250

        with TempFile() as fh:
            selection = f.select_random_reads(10, fh.name)
            f.select_random_reads(selection, fh.name)
示例#2
0
def test_fastq_unzipped():

    for thisdata in [data, datagz]:
        # isntanciation
        f = fastq.FastQ(thisdata)
        assert f.data_format == "Illumina_1.8+"
        # count lines
        # rune it twice because we want to make sure re-running count_lines
        # (decompression with zlib) works when run again.
        assert f.count_lines() == 1000
        assert f.count_lines() == 1000
        assert f.count_reads() == 250
        assert f.count_reads() == 250

        # extract head of the file into an unzipped file
        ft = TempFile()
        f.extract_head(100, ft.name)
        fcheck = fastq.FastQ(ft.name)
        assert fcheck.count_lines() == 100
        ft.delete()

        # extract head of the file and zip output
        ft = TempFile(suffix=".gz")
        f.extract_head(100, ft.name)
        fcheck = fastq.FastQ(ft.name)
        assert fcheck.count_lines() == 100
        ft.delete()

        with FastQ(thisdata) as ff:
            assert len(ff) == 250

        with TempFile() as fh:
            selection = f.select_random_reads(10, fh.name)
            f.select_random_reads(selection, fh.name)
示例#3
0
def test_snpeff():
    # a custom refrence
    fh_log = TempFile()

    mydata = snpeff.SnpEff(reference=sequana_data("JB409847.gbk"), log=fh_log.name)
    with TempFile() as fh:
        mydata.launch_snpeff(sequana_data("JB409847.vcf"), fh.name)
    fh_log.delete()

    # cleanup
    try:
        os.remove("snpEff.config")
    except:
        pass

    try:
        os.remove("snpEff_genes.txt")
    except:
        pass

    try:
        os.remove("snpEff_summary.html")
    except:
        pass

    try:
        snpeff.SnpEff(reference="dummy")
        assert False
    except SystemExit:
        assert True
    except:
        assert False
示例#4
0
def test_attrdict():

    a = tools.AttrDict(value=1)
    assert a.value == 1
    assert 'value' in list(a.keys())
    assert 1 in (a.values())

    a.description = 'test'
    assert a['description'] == 'test'

    a['output'] = 'txt'
    assert a.output == 'txt'


    d = {'a':{'b':1}, 'aa':2}
    ad = tools.AttrDict(**d)
    assert ad.a.b == 1
    ad.a.b = 2
    assert ad.a.b == 2

    ad['d'] = 4
    assert ad.d == 4

    try:
        ad.update(1)
        assert False
    except:
        assert True

    # check json capabilities
    fh = TempFile()
    js = ad.to_json()
    ad.to_json(filename=fh.name)
    ad.from_json(fh.name)
    fh.delete()
示例#5
0
def test_add_locus_with_modification():

    # Alter the original GBK to alter the locus name
    data = open(sequana_data("JB409847.gbk"), "r").read()
    newdata = data.replace("JB409847", "DUMMY_JB409847")

    fh = TempFile(suffix="gbk")
    with open(fh.name, 'w') as fout:
        fout.write(newdata)

    # Now we read this new GBK file that has a different locus name as
    # compared to the fasta
    mydata = snpeff.SnpEff(reference=fh.name)

    # Here is the corresponding FASTA
    fasta = sequana_data("JB409847.fasta")

    with TempFile(suffix="fasta") as fh2:
        mydata.add_locus_in_fasta(fasta, fh2.name)

        # In theory, in the newly created fasta file, we should find back the
        # DUMMY tag
        # cleanup
        try:
            os.remove("snpEff.config")
        except:
            pass

        data = open(fh2.name, "r").read()
        assert "DUMMY" in data
    fh.delete()
示例#6
0
def test_attrdict():

    a = tools.AttrDict(value=1)
    assert a.value == 1
    assert 'value' in list(a.keys())
    assert 1 in (a.values())

    a.description = 'test'
    assert a['description'] == 'test'

    a['output'] = 'txt'
    assert a.output == 'txt'

    d = {'a': {'b': 1}, 'aa': 2}
    ad = tools.AttrDict(**d)
    assert ad.a.b == 1
    ad.a.b = 2
    assert ad.a.b == 2

    ad['d'] = 4
    assert ad.d == 4

    try:
        ad.update(1)
        assert False
    except:
        assert True

    # check json capabilities
    fh = TempFile()
    js = ad.to_json()
    ad.to_json(filename=fh.name)
    ad.from_json(fh.name)
    fh.delete()
示例#7
0
def test_yeast_annotations():
    from easydev import gsf
    filename = gsf('msdas', "data", "YEAST_raw_sample.csv")
    r = MassSpecReader(filename, verbose=verbose)
    a = AnnotationsYeast(r, verbose=verbose)
    a.df = a.df.ix[0:200] # 200 is enough to get gene name cases and ambiguous gene names cases
    # e.g., ALD3_YEAST ['P54114', 'P40047']
    a.get_uniprot_entries()
    a.update_mapping()
    a.set_annotations()
    a.annotations.Sequence

    t = TempFile()
    a.to_csv(t.name)
    t.delete()

    t = TempFile()
    a.to_pickle("test", overwrite=True)
    try:
        a.to_pickle("test", overwrite=False)
        assert False
    except IOError:
        assert True
    a.read_pickle("YEAST_annotations_test.pkl")

    # create constructor given the annotations
    a = AnnotationsYeast(r, verbose=verbose, annotations="YEAST_annotations_test.pkl")
    a.get_uniprot_entries() # populate entry and entry_names in the df
    a.plot_goid_histogram()


    #cleanup
    os.remove("YEAST_annotations_test.pkl")
示例#8
0
def test_pacbio_input_bam(tmpdir):
    # we need a summary and a bunch of images
    filename = sequana_data("summary_pacbio_qc1.json")

    # mock the PNG files found in the summary
    import json
    summary = json.load(open(filename))
    pngname = sequana_data("no_data.jpg")
    summary["images"]["gc_vs_length"] = pngname
    summary["images"]["hist_gc_content"] = pngname
    summary["images"]["hist_read_length"] = pngname
    summary["images"]["hist_snr"] = pngname
    summary["images"]["hist_zmw"] = pngname

    summary_file = TempFile()
    with open(summary_file.name, "w") as ff:
        json.dump(summary, ff)

    # Now that we have this new summary file, let us use it
    # we also need an output handler
    ff = TempFile()

    from sequana.utils import config
    config.output_dir = "/tmp"
    #here, ff.name is of the form /tmp/djhfjh4dz so we need to remove the /tmp
    pacbio_input_bam.PacbioInputBAMModule(summary_file.name, ff.name.split("/")[1])

    # cleanup
    summary_file.delete()
    ff.delete()
示例#9
0
def test_pacbio_input_bam(tmpdir):
    # we need a summary and a bunch of images
    filename = sequana_data("summary_pacbio_qc1.json")

    # mock the PNG files found in the summary
    import json
    summary = json.load(open(filename))
    pngname = sequana_data("no_data.jpg")
    summary["images"]["gc_vs_length"] = pngname
    summary["images"]["hist_gc_content"] = pngname
    summary["images"]["hist_read_length"] = pngname
    summary["images"]["hist_snr"] = pngname
    summary["images"]["hist_zmw"] = pngname

    summary_file = TempFile()
    with open(summary_file.name, "w") as ff:
        json.dump(summary, ff)

    # Now that we have this new summary file, let us use it
    # we also need an output handler
    ff = TempFile()

    from sequana.utils import config
    config.output_dir = "/tmp"
    #here, ff.name is of the form /tmp/djhfjh4dz so we need to remove the /tmp
    pacbio_input_bam.PacbioInputBAMModule(summary_file.name,
                                          ff.name.split("/")[1])

    # cleanup
    summary_file.delete()
    ff.delete()
示例#10
0
def test_sequana_config():
    s = snaketools.Module("compressor")
    config = snaketools.SequanaConfig(s.config)

    assert config.config.get("compressor")["source"] == "fastq.gz"
    assert config.config.get("kraken:dummy") == None

    # --------------------------------- tests different constructors
    config = snaketools.SequanaConfig()
    config = snaketools.SequanaConfig({"test": 1})
    assert config.config.test == 1
    # with a dictionary
    config = snaketools.SequanaConfig(config.config)
    # with a sequanaConfig instance
    config = snaketools.SequanaConfig(config)
    # with a non-yaml file
    try:
        json = sequana_data('test_summary_fastq_stats.json')
        config = snaketools.SequanaConfig(json)
        assert False
    except:
        assert True
    try:
        config = snaketools.SequanaConfig("dummy_dummy")
        assert False
    except:
        assert True

    # Test an exception
    s = snaketools.Module("compressor")
    config = snaketools.SequanaConfig(s.config)
    config._recursive_update(config._yaml_code,
                             {"input_directory_dummy": "test"})

    #config.check_config_with_schema(s.schema_config)
    # loop over all pipelines, read the config, save it and check the content is
    # identical. This requires to remove the templates. We want to make sure the
    # empty strings are kept and that "no value" are kept as well
    #
    #    field1: ""
    #    field2:
    #
    # is unchanged
    from easydev import TempFile
    output = TempFile(suffix=".yaml")
    for pipeline in snaketools.pipeline_names:
        config_filename = Module(pipeline)._get_config()
        cfg1 = SequanaConfig(config_filename)
        cfg1.cleanup()  # remove templates and strip strings

        cfg1.save(output.name)
        cfg2 = SequanaConfig(output.name)
        assert cfg2._yaml_code == cfg1._yaml_code
        cfg2._update_config()
        assert cfg1.config == cfg2.config
    output.delete()
示例#11
0
def test_input():

    from easydev import TempFile
    fh = TempFile(suffix=".fastq.gz")
    filename = sequana_data('Hm2_GTGAAA_L005_R2_001.fastq.gz')
    df = fastq_head.main(
        [prog, '--input', filename, '--nlines', "100", "--output", fh.name])

    df = fastq_head.main([prog, filename, "100", fh.name])
    fh.delete()
示例#12
0
 def score_sc2(self, prediction_file):
     fh = TempFile()
     _, gs2 = self.download_gs()
     script = self.classpath + os.sep + "DREAM_Olfaction_scoring_Q2.pl"
     cmd = "perl %s %s %s %s"
     cmd = cmd % (script, prediction_file, fh.name, gs2)
     shellcmd(cmd)
     df = pd.read_csv(fh.name, sep='\t', index_col=None).ix[0]
     fh.delete()
     return df
示例#13
0
 def score_sc2(self, prediction_file):
     fh = TempFile()
     _, gs2 = self.download_gs()
     script = self.classpath + os.sep + "DREAM_Olfaction_scoring_Q2.pl"
     cmd = "perl %s %s %s %s"
     cmd = cmd % (script, prediction_file, fh.name, gs2)
     shellcmd(cmd)
     df = pd.read_csv(fh.name, sep='\t', index_col=None).ix[0]
     fh.delete()
     return df
示例#14
0
def test_sequana_config():
    s = snaketools.Module("quality_control")
    config = snaketools.SequanaConfig(s.config)

    assert config.config.get("kraken:dummy", "test") == "test"
    assert config.config.get("kraken:dummy") == None

    # --------------------------------- tests different constructors
    config = snaketools.SequanaConfig()
    config = snaketools.SequanaConfig({"test":1})
    assert config.config.test == 1
    # with a dictionary
    config = snaketools.SequanaConfig(config.config)
    # with a sequanaConfig instance
    config = snaketools.SequanaConfig(config)
    # with a non-yaml file
    try:
        json = sequana_data('test_summary_fastq_stats.json')
        config = snaketools.SequanaConfig(json)
        assert False
    except:
        assert True
    try:
        config = snaketools.SequanaConfig("dummy_dummy")
        assert False
    except:
        assert True

    # Test an exception
    s = snaketools.Module("quality_control")
    config = snaketools.SequanaConfig(s.config)
    config._recursive_update(config._yaml_code, {"input_directory_dummy": "test"})

    # loop over all pipelines, read the config, save it and check the content is
    # identical. This requires to remove the templates. We want to make sure the
    # empty strings are kept and that "no value" are kept as well
    #
    #    field1: ""
    #    field2:
    #
    # is unchanged
    from easydev import TempFile
    output = TempFile(suffix=".yaml")
    for pipeline in snaketools.pipeline_names:
        config_filename = Module(pipeline)._get_config()
        cfg1 = SequanaConfig(config_filename)
        cfg1.cleanup() # remove templates and strip strings

        cfg1.save(output.name)
        cfg2 = SequanaConfig(output.name)
        assert cfg2._yaml_code == cfg1._yaml_code
        cfg2._update_config()
        assert cfg1.config == cfg2.config
    output.delete()
示例#15
0
def sbmlqual_from_datasets(identifier):

    # a simple model
    s1 = SIF()
    s2 = SIF(cnodata("PKN-" + identifier + ".sif"))
    fh = TempFile()
    s2.to_sbmlqual(fh.name)
    s1.read_sbmlqual(fh.name)
    fh.delete()
    assert s1 == s2
    s3 = SIF(cnodata("PKN-" + identifier + ".xml"))
    assert s1 == s3 and s2 == s3
示例#16
0
def test_phosphogrid():
    m = MassSpecReader(get_yeast_small_data(), verbose=False)
    gene_names = set(list(m.df.Protein))
    p = phosphogrid.PhosphoGRID(directory = "../share/data")
    p.run(gene_names=gene_names)
    fh = TempFile(suffix='.sif')
    p.export2sif(filename=fh.name)
    p.plot()
    #p.run()
    
    
    fh.delete()
示例#17
0
    def score_A(self, filename):
        from easydev import TempFile
        fh = TempFile()
        script = self._pj(
            [self.classpath, 'weighted_average_concordance_index.pl'])
        datadir = self._pj([self.classpath, 'data'])
        cmd = "perl %s %s %s %s"
        cmd = cmd % (script, filename, datadir, fh.name)

        shellcmd(cmd, verbose=True, ignore_errors=True)
        try:
            df = pd.read_csv(fh.name, sep='\t', header=None)
        except:
            print("Something wrong in the Scoring while executing \n  %s. " %
                  cmd)
            print(
                "\n The D7C4 challenge requires a Perl package to be installed"
            )
            print("See D7C4 documentation e.g., on dreamtools.readthedocs.org")
            import sys
            sys.exit(1)
        df.columns = [
            'DrugID', 'probabilistic c-index',
            'weighted probabilistic c-index', 'zscores'
        ]
        df = df.set_index('DrugID')
        fh.delete()

        ws = (df.sum() / df.sum().ix['zscores'])
        ws = ws.ix['weighted probabilistic c-index']

        results = df.mean()
        results['weight average probabilistic c-index'] = ws

        del results['zscores']

        # Finally compute pvalues based on precomputed scores
        precomp = pd.read_csv(self._pj([
            self.classpath, 'data', 'DREAM7_DrugSensitivity1_drug_zscores.txt'
        ]),
                              sep='\t',
                              skiprows=6,
                              header=None)

        overall_mean = precomp.ix[31][1]
        overall_var = precomp.ix[31][2]

        pval = 1 - (.5 * (math.erf(
            (ws - overall_mean) / (math.sqrt(2 * overall_var))) + 1))

        results['weight average probabilistic c-index p-value'] = pval

        return {'Results': results}
示例#18
0
def test_read_ic50():
    # -------------------------------- functionalities
    r = IC50(ic50_test)
    # we can also instanciate from a valid dataframe
    r = IC50(r)

    # test repr
    r

    # and print statement 
    print(r)

    # the copy method
    assert r == r.copy()


    r.hist()
    r.plot_ic50_count()
    r.cosmicIds

    f = TempFile()
    r.to_csv(f.name)
    f.delete()

    # columns may be duplicated
    r = IC50(ic50_test)
    df = pd.concat([r.df, r.df[999]], axis=1)
    # create new instance that should raise an error
    try:
        IC50(df)
        assert False
    except:
        assert True

    # ---------------------------------------- different IC50 formats
    # test all files available
    for key in testing.keys() :
        filename = testing[key].location
        if filename.startswith('ic50_test'):
            ic = IC50(filename)
    # some specific checks:
    #ic = IC50(testing['ic50_test_header_drug_prefix_only'].location)
    #assert ic.df.shape == (2,2)
    #assert all(ic.df.columns == ['1','2'])
    ic = IC50(testing['ic50_test_header_no_drug_prefix'].location)
    assert ic.drugIds == [1, 2]

    ic = IC50(testing['ic50_test_header_drug_prefix_only'].location)
    assert ic.drugIds == [1, 2]

    ic = IC50(testing['ic50_test_header_mixed_drug_prefix'].location)
    assert ic.drugIds == [1, 2]
示例#19
0
def test_read_ic50():
    # -------------------------------- functionalities
    r = IC50(ic50_test)
    # we can also instanciate from a valid dataframe
    r = IC50(r)

    # test repr
    r

    # and print statement
    print(r)

    # the copy method
    assert r == r.copy()

    r.hist()
    r.plot_ic50_count()
    r.cosmicIds

    f = TempFile()
    r.to_csv(f.name)
    f.delete()

    # columns may be duplicated
    r = IC50(ic50_test)
    df = pd.concat([r.df, r.df[999]], axis=1)
    # create new instance that should raise an error
    try:
        IC50(df)
        assert False
    except:
        assert True

    # ---------------------------------------- different IC50 formats
    # test all files available
    for key in testing.keys():
        filename = testing[key].location
        if filename.startswith('ic50_test'):
            ic = IC50(filename)
    # some specific checks:
    #ic = IC50(testing['ic50_test_header_drug_prefix_only'].location)
    #assert ic.df.shape == (2,2)
    #assert all(ic.df.columns == ['1','2'])
    ic = IC50(testing['ic50_test_header_no_drug_prefix'].location)
    assert ic.drugIds == [1, 2]

    ic = IC50(testing['ic50_test_header_drug_prefix_only'].location)
    assert ic.drugIds == [1, 2]

    ic = IC50(testing['ic50_test_header_mixed_drug_prefix'].location)
    assert ic.drugIds == [1, 2]
示例#20
0
def test_d2c1():
    s = D2C1()
    s.test()

    filename = s.download_template()
    d = s.score(filename)
    assert_almost_equal(d['AUPR'], 0.2563463, 7)

    from easydev import TempFile
    fh = TempFile()
    s._create_templates(filename=fh.name)
    fh.delete()

    s.score_and_compare_with_lb(s.download_template())
示例#21
0
    def score_sc1(self, prediction_file):
        """Compute all results and compare user prediction with all official participants

        This scoring function can take a long time (about 5-10 minutes).
        """
        fh = TempFile()
        gs1, _ = self.download_gs()
        script = self.classpath + os.sep + "DREAM_Olfaction_scoring_Q1.pl"
        cmd = "perl %s %s %s %s"
        cmd = cmd % (script, prediction_file, fh.name, gs1)
        shellcmd(cmd)
        df = pd.read_csv(fh.name, sep='\t', index_col=None).ix[0]
        fh.delete()
        return df
示例#22
0
    def score_sc1(self, prediction_file):
        """Compute all results and compare user prediction with all official participants

        This scoring function can take a long time (about 5-10 minutes).
        """
        fh = TempFile()
        gs1, _ = self.download_gs()
        script = self.classpath + os.sep + "DREAM_Olfaction_scoring_Q1.pl"
        cmd = "perl %s %s %s %s"
        cmd = cmd % (script, prediction_file, fh.name, gs1)
        shellcmd(cmd)
        df = pd.read_csv(fh.name, sep='\t', index_col=None).ix[0]
        fh.delete()
        return df
示例#23
0
def test_d2c1():
    s = D2C1()
    s.test()

    filename = s.download_template()
    d = s.score(filename)
    assert_almost_equal(d['AUPR'], 0.2563463, 7)

    from easydev import TempFile
    fh = TempFile()
    s._create_templates(filename=fh.name)
    fh.delete()

    s.score_and_compare_with_lb(s.download_template())
示例#24
0
def test_read_write_from_cnograph():
    c  = CNOGraph(cnodata("PKN-ToyPB.sif"))
    fh = TempFile(suffix='.xml')
    c.to_sbmlqual(fh.name)
    c2 = CNOGraph(fh.name)
    assert c == c2
    fh.delete()

    c  = CNOGraph(cnodata("PKN-ToyPB.sif"))
    c.expand_and_gates()
    fh = TempFile(suffix='.xml')
    c.to_sbmlqual(fh.name)
    c2 = CNOGraph(fh.name)
    fh.delete()
    assert c == c2
示例#25
0
def test_simple_sbmlqual():
    # a simple  example with simple OR, simple link, mix of OR and AND and single ANd 
    c = CNOGraph()
    c.add_reaction("!A=C")
    c.add_reaction("C=D")
    c.add_reaction("B=C")
    c.expand_and_gates()
    c.add_reaction("a1=b")
    c.add_reaction("a2=b")
    c.add_reaction("D^b=E")


    fh = TempFile(suffix='.xml')
    c.to_sbmlqual(fh.name)
    c2 = CNOGraph(fh.name)
    fh.delete()
    assert c == c2
示例#26
0
def test_MSReader():

    # we can just create an instance
    r = MassSpecReader(verbose=verbose)

    # fails if wrong file
    try:
        r = MassSpecReader("dummy.csv", verbose=verbose)
        assert False
    except:
        assert True


    filename = yeast.get_yeast_filenames()[0]
    r = MassSpecReader(filename, verbose=verbose)
    print(r)
    r.mode
    r.N
    r.df
    r.measurements
    r.metadata


    try:
        r.mode = None
        assert False
    except:
        assert True

    r.sort_psites_ors_only()
    r['DIG1']
    r['DIG1',"S142"]
    r['DIG1_S142']
    try:
        r['DIG1', 'S142', 'dummy']
        assert False
    except:
        assert True
    r.sequences
    r.psites

    from easydev import TempFile
    f = TempFile()
    r.to_csv(f.name)
    f.delete()
示例#27
0
def test_yeast_june():
    #y = yeast.YEAST2MIDAS(get_yeast_small_data(), get_yeast_raw_data(),  verbose=False)
    #y.cleanup_june()
    #y.cleanup_june()
    #len(y.df)<100
    filename = gsf("msdas", "data", "PKN-yeastScaffold.sif")
    data.cleanup_june()
    c,m,e = data.export_pkn_and_midas_june(filename)

    from easydev import TempFile
    f = TempFile()
    data.to_midas(f.name)
    f.delete()

    cv = data.get_cv()
    m = data.get_midas()
    data.pcolor_na()
    data.plot_timeseries("DIG1_S126+S127")
示例#28
0
    def score_A(self, filename):
        from easydev import TempFile
        fh = TempFile()
        script = self._pj([self.classpath,
            'weighted_average_concordance_index.pl'])
        datadir = self._pj([self.classpath, 'data'])
        cmd = "perl %s %s %s %s"
        cmd = cmd % (script, filename, datadir , fh.name)

        shellcmd(cmd, verbose=True, ignore_errors=True)
        try:
            df = pd.read_csv(fh.name, sep='\t', header=None)
        except:
            print("Something wrong in the Scoring while executing \n  %s. " % cmd)
            print("\n The D7C4 challenge requires a Perl package to be installed")
            print("See D7C4 documentation e.g., on dreamtools.readthedocs.org")
            import sys
            sys.exit(1)
        df.columns = ['DrugID', 'probabilistic c-index',
        'weighted probabilistic c-index', 'zscores']
        df = df.set_index('DrugID')
        fh.delete()

        ws = (df.sum() / df.sum().ix['zscores'])
        ws = ws.ix['weighted probabilistic c-index']

        results = df.mean()
        results['weight average probabilistic c-index'] = ws

        del results['zscores']

        # Finally compute pvalues based on precomputed scores
        precomp = pd.read_csv(self._pj([self.classpath, 'data',
            'DREAM7_DrugSensitivity1_drug_zscores.txt']), sep='\t',
            skiprows=6,  header=None)

        overall_mean = precomp.ix[31][1]
        overall_var = precomp.ix[31][2]

        pval = 1 -  (.5 * (math.erf((ws - overall_mean)/(math.sqrt(2*overall_var))) + 1))

        results['weight average probabilistic c-index p-value'] = pval

        return {'Results': results}
示例#29
0
def test_config_parser():
    s1 = ParamsGA()
    s2 = ParamsGeneral()
    c1 = CNOConfigParser()
    c1.add_section(s2)
    c1.add_section(s1)

    s1 = ParamsGA()
    s2 = ParamsGeneral()
    c2 = CNOConfigParser()
    c2.add_section(s2)
    c2.add_section(s1)

    assert c1 == c2

    from easydev import TempFile
    fh = TempFile()
    c1.save(fh.name)
    c2 = CNOConfigParser(fh.name)
    fh.delete()
    assert c1 == c2
示例#30
0
def test_models():
    data = np.array([[1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1]])

    columns = [u'EGF=PI3K', u'TNFa=PI3K', u'Jnk=cJun', u'PI3K=Akt', u'Raf=Mek', u'!Akt=Mek', u'Mek=p90RSK', u'Mek=Erk', u'Erk=Hsp27', u'TNFa=Jnk', u'TNFa=NFkB', u'TNFa=Hsp27', u'EGF=Raf', u'EGF^TNFa=PI3K', u'Raf^!Akt=Mek', u'Erk^TNFa=Hsp27']

    df = pd.DataFrame(data, columns=columns)
    fh = TempFile()
    df.to_csv(fh.name)
    
    m1 = Models(df)
    m2 = Models(m1)
    m3 = Models(fh.name, index_col=0)  # there is an index column with no name
    fh.delete()

    # trying a stupid constructor
    try:
        Models(1)
        assert False
    except:
        assert True

    return m1, m2
    assert m1 == m2
    assert m1 == m3

    # plots
    m1.plot()
    m1.plot(1)
    m1.plot('cv')
    m1.errorbar()
    m1.heatmap()

    # exporters
    fh = TempFile()
    m1.to_csv(fh.name)
    fh.delete()

    fh = TempFile()
    m1.to_sif(fh.name)
    fh.delete()

    # m1 and m2 are identical. Adding them gets rid of duplicates so it should be
    # equal to itself.
    m1 == m1 + m2
示例#31
0
    def score_A(self, filename):
        from easydev import TempFile
        fh = TempFile()
        script = self._pj([self._path2data, 
            'weighted_average_concordance_index.pl'])
        datadir = self._pj([self._path2data, 'data'])
        cmd = "perl %s %s %s %s"
        cmd = cmd % (script, filename, datadir , fh.name)

        shellcmd(cmd, verbose=True, ignore_errors=True)
        df = pd.read_csv(fh.name, sep='\t', header=None)
        df.columns = ['DrugID','probabilistic c-index',	
        'weighted probabilistic c-index', 'zscores']
        df = df.set_index('DrugID')
        fh.delete()

        ws = (df.sum() / df.sum().ix['zscores'])
        ws = ws.ix['weighted probabilistic c-index']

        results = df.mean()
        results['weight average probabilitis c-index'] = ws 

        del results['zscores']

        # Finally compute pvalues based on precomputed scores
        precomp = pd.read_csv(self._pj([self._path2data, 'data',
            'DREAM7_DrugSensitivity1_drug_zscores.txt']), sep='\t', 
            skiprows=6,  header=None)

        overall_mean = precomp.ix[31][1]
        overall_var = precomp.ix[31][2]

        pval = 1 -  (.5 * (math.erf((ws - overall_mean)/(math.sqrt(2*overall_var))) + 1))

        results['weight average probabilitis c-index p-value'] = pval

        return {'Results': results}