Exemplo n.º 1
0
def test_snpeff():
    # a custom refrence
    fh_log = TempFile()

    mydata = snpeff.SnpEff(reference=sequana_data("JB409847.gbk"), log=fh_log.name)
    with TempFile() as fh:
        mydata.launch_snpeff(sequana_data("JB409847.vcf"), fh.name)
    fh_log.delete()

    # cleanup
    try:
        os.remove("snpEff.config")
    except:
        pass

    try:
        os.remove("snpEff_genes.txt")
    except:
        pass

    try:
        os.remove("snpEff_summary.html")
    except:
        pass

    try:
        snpeff.SnpEff(reference="dummy")
        assert False
    except SystemExit:
        assert True
    except:
        assert False
Exemplo n.º 2
0
def test_attrdict():

    a = tools.AttrDict(value=1)
    assert a.value == 1
    assert 'value' in list(a.keys())
    assert 1 in (a.values())

    a.description = 'test'
    assert a['description'] == 'test'

    a['output'] = 'txt'
    assert a.output == 'txt'


    d = {'a':{'b':1}, 'aa':2}
    ad = tools.AttrDict(**d)
    assert ad.a.b == 1
    ad.a.b = 2
    assert ad.a.b == 2

    ad['d'] = 4
    assert ad.d == 4

    try:
        ad.update(1)
        assert False
    except:
        assert True

    # check json capabilities
    fh = TempFile()
    js = ad.to_json()
    ad.to_json(filename=fh.name)
    ad.from_json(fh.name)
    fh.delete()
Exemplo n.º 3
0
def test_add_locus_with_modification():

    # Alter the original GBK to alter the locus name
    data = open(sequana_data("JB409847.gbk"), "r").read()
    newdata = data.replace("JB409847", "DUMMY_JB409847")

    fh = TempFile(suffix="gbk")
    with open(fh.name, 'w') as fout:
        fout.write(newdata)

    # Now we read this new GBK file that has a different locus name as
    # compared to the fasta
    mydata = snpeff.SnpEff(reference=fh.name)

    # Here is the corresponding FASTA
    fasta = sequana_data("JB409847.fasta")

    with TempFile(suffix="fasta") as fh2:
        mydata.add_locus_in_fasta(fasta, fh2.name)

        # In theory, in the newly created fasta file, we should find back the
        # DUMMY tag
        # cleanup
        try:
            os.remove("snpEff.config")
        except:
            pass

        data = open(fh2.name, "r").read()
        assert "DUMMY" in data
    fh.delete()
Exemplo n.º 4
0
def test_yeast_annotations():
    from easydev import gsf
    filename = gsf('msdas', "data", "YEAST_raw_sample.csv")
    r = MassSpecReader(filename, verbose=verbose)
    a = AnnotationsYeast(r, verbose=verbose)
    a.df = a.df.ix[0:200] # 200 is enough to get gene name cases and ambiguous gene names cases
    # e.g., ALD3_YEAST ['P54114', 'P40047']
    a.get_uniprot_entries()
    a.update_mapping()
    a.set_annotations()
    a.annotations.Sequence

    t = TempFile()
    a.to_csv(t.name)
    t.delete()

    t = TempFile()
    a.to_pickle("test", overwrite=True)
    try:
        a.to_pickle("test", overwrite=False)
        assert False
    except IOError:
        assert True
    a.read_pickle("YEAST_annotations_test.pkl")

    # create constructor given the annotations
    a = AnnotationsYeast(r, verbose=verbose, annotations="YEAST_annotations_test.pkl")
    a.get_uniprot_entries() # populate entry and entry_names in the df
    a.plot_goid_histogram()


    #cleanup
    os.remove("YEAST_annotations_test.pkl")
Exemplo n.º 5
0
 def score_sc2(self, prediction_file):
     fh = TempFile()
     _, gs2 = self.download_gs()
     script = self.classpath + os.sep + "DREAM_Olfaction_scoring_Q2.pl"
     cmd = "perl %s %s %s %s"
     cmd = cmd % (script, prediction_file, fh.name, gs2)
     shellcmd(cmd)
     df = pd.read_csv(fh.name, sep='\t', index_col=None).ix[0]
     fh.delete()
     return df
Exemplo n.º 6
0
def test_sequana_config():
    s = snaketools.Module("quality_control")
    config = snaketools.SequanaConfig(s.config)

    assert config.config.get("kraken:dummy", "test") == "test"
    assert config.config.get("kraken:dummy") == None

    # --------------------------------- tests different constructors
    config = snaketools.SequanaConfig()
    config = snaketools.SequanaConfig({"test":1})
    assert config.config.test == 1
    # with a dictionary
    config = snaketools.SequanaConfig(config.config)
    # with a sequanaConfig instance
    config = snaketools.SequanaConfig(config)
    # with a non-yaml file
    try:
        json = sequana_data('test_summary_fastq_stats.json')
        config = snaketools.SequanaConfig(json)
        assert False
    except:
        assert True
    try:
        config = snaketools.SequanaConfig("dummy_dummy")
        assert False
    except:
        assert True

    # Test an exception
    s = snaketools.Module("quality_control")
    config = snaketools.SequanaConfig(s.config)
    config._recursive_update(config._yaml_code, {"input_directory_dummy": "test"})

    # loop over all pipelines, read the config, save it and check the content is
    # identical. This requires to remove the templates. We want to make sure the
    # empty strings are kept and that "no value" are kept as well
    #
    #    field1: ""
    #    field2:
    #
    # is unchanged
    from easydev import TempFile
    output = TempFile(suffix=".yaml")
    for pipeline in snaketools.pipeline_names:
        config_filename = Module(pipeline)._get_config()
        cfg1 = SequanaConfig(config_filename)
        cfg1.cleanup() # remove templates and strip strings

        cfg1.save(output.name)
        cfg2 = SequanaConfig(output.name)
        assert cfg2._yaml_code == cfg1._yaml_code
        cfg2._update_config()
        assert cfg1.config == cfg2.config
    output.delete()
Exemplo n.º 7
0
def test_models():
    data = np.array([[1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0],
       [1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1],
       [1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1]])

    columns = [u'EGF=PI3K', u'TNFa=PI3K', u'Jnk=cJun', u'PI3K=Akt', u'Raf=Mek', u'!Akt=Mek', u'Mek=p90RSK', u'Mek=Erk', u'Erk=Hsp27', u'TNFa=Jnk', u'TNFa=NFkB', u'TNFa=Hsp27', u'EGF=Raf', u'EGF^TNFa=PI3K', u'Raf^!Akt=Mek', u'Erk^TNFa=Hsp27']

    df = pd.DataFrame(data, columns=columns)
    fh = TempFile()
    df.to_csv(fh.name)
    
    m1 = Models(df)
    m2 = Models(m1)
    m3 = Models(fh.name, index_col=0)  # there is an index column with no name
    fh.delete()

    # trying a stupid constructor
    try:
        Models(1)
        assert False
    except:
        assert True

    return m1, m2
    assert m1 == m2
    assert m1 == m3

    # plots
    m1.plot()
    m1.plot(1)
    m1.plot('cv')
    m1.errorbar()
    m1.heatmap()

    # exporters
    fh = TempFile()
    m1.to_csv(fh.name)
    fh.delete()

    fh = TempFile()
    m1.to_sif(fh.name)
    fh.delete()

    # m1 and m2 are identical. Adding them gets rid of duplicates so it should be
    # equal to itself.
    m1 == m1 + m2
Exemplo n.º 8
0
def install_package(query, dependencies=False, verbose=True,
    repos = "http://cran.univ-paris1.fr/"):
    """Install a R package

    :param str query: It can be a valid URL to a R package (tar ball), a CRAN
        package, a path to a R package (tar ball), or simply the directory
        containing a R package source.
    :param bool dependencies:
    :param repos: if provided, install_packages automatically select the
        provided repositories otherwise a popup window will ask you to select a repo

    ::

        >>> rtools.install_package("path_to_a_valid_Rpackage.tar.gz")
        >>> rtools.install_package("http://URL_to_a_valid_Rpackage.tar.gz")
        >>> rtools.install_package("hash") # a CRAN package
        >>> rtools.install_package("path to a valid R package directory")

    .. seealso:: :class:`biokit.rtools.RPackageManager`
    """
    session = RSession(verbose=verbose)

    # Is it a local file?
    if os.path.exists(query):
        repos = 'NULL'
    else:
        repos = '"{0}"'.format(repos) # we want the " to be part of the string later on
    
    try:
        # PART for fetching a file on the web, download and install locally
        if verbose:
            print("Trying from the web ?")
        data = urlopen(query)
        fh = TempFile(suffix=".tar.gz")
        with open(fh.name, 'w') as fh:
            for x in data.readlines():
                fh.write(x)
        code = """install.packages("%s", dependencies=%s """ % \
            (fh.name, bool2R(dependencies))
        code += """ , repos=NULL) """
        session.run(code)

    except Exception as err:
        if verbose:
            print(err)
            print("trying local or from repos")
            print("RTOOLS warning: URL provided does not seem to exist %s. Trying from CRAN" % query)
        code = """install.packages("%s", dependencies=%s """ % \
            (query, bool2R(dependencies))

        code += """ , repos=%s) """ % repos
        session.run(code)
        return
Exemplo n.º 9
0
def sbmlqual_from_datasets(identifier):

    # a simple model
    s1 = SIF()
    s2 = SIF(cnodata("PKN-" + identifier + ".sif"))
    fh = TempFile()
    s2.to_sbmlqual(fh.name)
    s1.read_sbmlqual(fh.name)
    fh.delete()
    assert s1 == s2
    s3 = SIF(cnodata("PKN-" + identifier + ".xml"))
    assert s1 == s3 and s2 == s3
Exemplo n.º 10
0
def test_phosphogrid():
    m = MassSpecReader(get_yeast_small_data(), verbose=False)
    gene_names = set(list(m.df.Protein))
    p = phosphogrid.PhosphoGRID(directory = "../share/data")
    p.run(gene_names=gene_names)
    fh = TempFile(suffix='.sif')
    p.export2sif(filename=fh.name)
    p.plot()
    #p.run()
    
    
    fh.delete()
Exemplo n.º 11
0
def test_read_ic50():
    # -------------------------------- functionalities
    r = IC50(ic50_test)
    # we can also instanciate from a valid dataframe
    r = IC50(r)

    # test repr
    r

    # and print statement 
    print(r)

    # the copy method
    assert r == r.copy()


    r.hist()
    r.plot_ic50_count()
    r.cosmicIds

    f = TempFile()
    r.to_csv(f.name)
    f.delete()

    # columns may be duplicated
    r = IC50(ic50_test)
    df = pd.concat([r.df, r.df[999]], axis=1)
    # create new instance that should raise an error
    try:
        IC50(df)
        assert False
    except:
        assert True

    # ---------------------------------------- different IC50 formats
    # test all files available
    for key in testing.keys() :
        filename = testing[key].location
        if filename.startswith('ic50_test'):
            ic = IC50(filename)
    # some specific checks:
    #ic = IC50(testing['ic50_test_header_drug_prefix_only'].location)
    #assert ic.df.shape == (2,2)
    #assert all(ic.df.columns == ['1','2'])
    ic = IC50(testing['ic50_test_header_no_drug_prefix'].location)
    assert ic.drugIds == [1, 2]

    ic = IC50(testing['ic50_test_header_drug_prefix_only'].location)
    assert ic.drugIds == [1, 2]

    ic = IC50(testing['ic50_test_header_mixed_drug_prefix'].location)
    assert ic.drugIds == [1, 2]
Exemplo n.º 12
0
    def score_sc1(self, prediction_file):
        """Compute all results and compare user prediction with all official participants

        This scoring function can take a long time (about 5-10 minutes).
        """
        fh = TempFile()
        gs1, _ = self.download_gs()
        script = self.classpath + os.sep + "DREAM_Olfaction_scoring_Q1.pl"
        cmd = "perl %s %s %s %s"
        cmd = cmd % (script, prediction_file, fh.name, gs1)
        shellcmd(cmd)
        df = pd.read_csv(fh.name, sep='\t', index_col=None).ix[0]
        fh.delete()
        return df
Exemplo n.º 13
0
def test_toypb_bioservices():
    from bioservices import biomodels
    b = biomodels.BioModels()

    sbml = b.getModelSBMLById('MODEL1305240000')

    fh =TempFile(suffix='.xml')
    with open(fh.name, 'w') as fh:
        fh.write(sbml.encode('utf-8'))

    c = CNOGraph(fh.name)

    c2 = CNOGraph(cnodata("PKN-ToyPB.sif"))
    assert c == c2
Exemplo n.º 14
0
def test_d2c1():
    s = D2C1()
    s.test()

    filename = s.download_template()
    d = s.score(filename)
    assert_almost_equal(d['AUPR'], 0.2563463, 7)

    from easydev import TempFile
    fh = TempFile()
    s._create_templates(filename=fh.name)
    fh.delete()

    s.score_and_compare_with_lb(s.download_template())
Exemplo n.º 15
0
def test_simple_sbmlqual():
    # a simple  example with simple OR, simple link, mix of OR and AND and single ANd 
    c = CNOGraph()
    c.add_reaction("!A=C")
    c.add_reaction("C=D")
    c.add_reaction("B=C")
    c.expand_and_gates()
    c.add_reaction("a1=b")
    c.add_reaction("a2=b")
    c.add_reaction("D^b=E")


    fh = TempFile(suffix='.xml')
    c.to_sbmlqual(fh.name)
    c2 = CNOGraph(fh.name)
    fh.delete()
    assert c == c2
Exemplo n.º 16
0
def test_MSReader():

    # we can just create an instance
    r = MassSpecReader(verbose=verbose)

    # fails if wrong file
    try:
        r = MassSpecReader("dummy.csv", verbose=verbose)
        assert False
    except:
        assert True


    filename = yeast.get_yeast_filenames()[0]
    r = MassSpecReader(filename, verbose=verbose)
    print(r)
    r.mode
    r.N
    r.df
    r.measurements
    r.metadata


    try:
        r.mode = None
        assert False
    except:
        assert True

    r.sort_psites_ors_only()
    r['DIG1']
    r['DIG1',"S142"]
    r['DIG1_S142']
    try:
        r['DIG1', 'S142', 'dummy']
        assert False
    except:
        assert True
    r.sequences
    r.psites

    from easydev import TempFile
    f = TempFile()
    r.to_csv(f.name)
    f.delete()
Exemplo n.º 17
0
def test_yeast_june():
    #y = yeast.YEAST2MIDAS(get_yeast_small_data(), get_yeast_raw_data(),  verbose=False)
    #y.cleanup_june()
    #y.cleanup_june()
    #len(y.df)<100
    filename = gsf("msdas", "data", "PKN-yeastScaffold.sif")
    data.cleanup_june()
    c,m,e = data.export_pkn_and_midas_june(filename)

    from easydev import TempFile
    f = TempFile()
    data.to_midas(f.name)
    f.delete()

    cv = data.get_cv()
    m = data.get_midas()
    data.pcolor_na()
    data.plot_timeseries("DIG1_S126+S127")
Exemplo n.º 18
0
    def score_A(self, filename):
        from easydev import TempFile
        fh = TempFile()
        script = self._pj([self.classpath,
            'weighted_average_concordance_index.pl'])
        datadir = self._pj([self.classpath, 'data'])
        cmd = "perl %s %s %s %s"
        cmd = cmd % (script, filename, datadir , fh.name)

        shellcmd(cmd, verbose=True, ignore_errors=True)
        try:
            df = pd.read_csv(fh.name, sep='\t', header=None)
        except:
            print("Something wrong in the Scoring while executing \n  %s. " % cmd)
            print("\n The D7C4 challenge requires a Perl package to be installed")
            print("See D7C4 documentation e.g., on dreamtools.readthedocs.org")
            import sys
            sys.exit(1)
        df.columns = ['DrugID', 'probabilistic c-index',
        'weighted probabilistic c-index', 'zscores']
        df = df.set_index('DrugID')
        fh.delete()

        ws = (df.sum() / df.sum().ix['zscores'])
        ws = ws.ix['weighted probabilistic c-index']

        results = df.mean()
        results['weight average probabilistic c-index'] = ws

        del results['zscores']

        # Finally compute pvalues based on precomputed scores
        precomp = pd.read_csv(self._pj([self.classpath, 'data',
            'DREAM7_DrugSensitivity1_drug_zscores.txt']), sep='\t',
            skiprows=6,  header=None)

        overall_mean = precomp.ix[31][1]
        overall_var = precomp.ix[31][2]

        pval = 1 -  (.5 * (math.erf((ws - overall_mean)/(math.sqrt(2*overall_var))) + 1))

        results['weight average probabilistic c-index p-value'] = pval

        return {'Results': results}
Exemplo n.º 19
0
def test_config_parser():
    s1 = ParamsGA()
    s2 = ParamsGeneral()
    c1 = CNOConfigParser()
    c1.add_section(s2)
    c1.add_section(s1)

    s1 = ParamsGA()
    s2 = ParamsGeneral()
    c2 = CNOConfigParser()
    c2.add_section(s2)
    c2.add_section(s1)

    assert c1 == c2

    from easydev import TempFile
    fh = TempFile()
    c1.save(fh.name)
    c2 = CNOConfigParser(fh.name)
    fh.delete()
    assert c1 == c2
Exemplo n.º 20
0
def test_fastq_unzipped():

    for thisdata in [data, datagz]:
        # isntanciation
        f = fastq.FastQ(thisdata)
        assert f.data_format == "Illumina_1.8+"
        # count lines
        # rune it twice because we want to make sure re-running count_lines
        # (decompression with zlib) works when run again.
        assert f.count_lines() == 1000
        assert f.count_lines() == 1000
        assert f.count_reads() == 250
        assert f.count_reads() == 250

        # extract head of the file into an unzipped file
        ft = TempFile()
        f.extract_head(100, ft.name)
        fcheck = fastq.FastQ(ft.name)
        assert fcheck.count_lines() == 100
        ft.delete()

        # extract head of the file and zip output
        ft = TempFile(suffix=".gz")
        f.extract_head(100, ft.name)
        fcheck = fastq.FastQ(ft.name)
        assert fcheck.count_lines() == 100
        ft.delete()

        with FastQ(thisdata) as ff:
            assert len(ff) == 250

        with TempFile() as fh:
            selection = f.select_random_reads(10, fh.name)
            f.select_random_reads(selection, fh.name)
Exemplo n.º 21
0
def test_pacbio_input_bam(tmpdir):
    # we need a summary and a bunch of images
    filename = sequana_data("summary_pacbio_qc1.json")

    # mock the PNG files found in the summary
    import json
    summary = json.load(open(filename))
    pngname = sequana_data("no_data.jpg")
    summary["images"]["gc_vs_length"] = pngname
    summary["images"]["hist_gc_content"] = pngname
    summary["images"]["hist_read_length"] = pngname
    summary["images"]["hist_snr"] = pngname
    summary["images"]["hist_zmw"] = pngname

    summary_file = TempFile()
    with open(summary_file.name, "w") as ff:
        json.dump(summary, ff)

    # Now that we have this new summary file, let us use it
    # we also need an output handler
    ff = TempFile()

    from sequana.utils import config
    config.output_dir = "/tmp"
    #here, ff.name is of the form /tmp/djhfjh4dz so we need to remove the /tmp
    pacbio_input_bam.PacbioInputBAMModule(summary_file.name, ff.name.split("/")[1])

    # cleanup
    summary_file.delete()
    ff.delete()
Exemplo n.º 22
0
    def score_A(self, filename):
        from easydev import TempFile
        fh = TempFile()
        script = self._pj([self._path2data, 
            'weighted_average_concordance_index.pl'])
        datadir = self._pj([self._path2data, 'data'])
        cmd = "perl %s %s %s %s"
        cmd = cmd % (script, filename, datadir , fh.name)

        shellcmd(cmd, verbose=True, ignore_errors=True)
        df = pd.read_csv(fh.name, sep='\t', header=None)
        df.columns = ['DrugID','probabilistic c-index',	
        'weighted probabilistic c-index', 'zscores']
        df = df.set_index('DrugID')
        fh.delete()

        ws = (df.sum() / df.sum().ix['zscores'])
        ws = ws.ix['weighted probabilistic c-index']

        results = df.mean()
        results['weight average probabilitis c-index'] = ws 

        del results['zscores']

        # Finally compute pvalues based on precomputed scores
        precomp = pd.read_csv(self._pj([self._path2data, 'data',
            'DREAM7_DrugSensitivity1_drug_zscores.txt']), sep='\t', 
            skiprows=6,  header=None)

        overall_mean = precomp.ix[31][1]
        overall_var = precomp.ix[31][2]

        pval = 1 -  (.5 * (math.erf((ws - overall_mean)/(math.sqrt(2*overall_var))) + 1))

        results['weight average probabilitis c-index p-value'] = pval

        return {'Results': results}
Exemplo n.º 23
0
def test_conv():
    # Scf V2 file
    infile_v2 = bioconvert_data("sample_v2.scf")
    expected_outfile_v2 = bioconvert_data("sample_v2.fasta")
    # Scf V3 file
    infile_v3 = bioconvert_data("sample_v3.scf")
    expected_outfile_v3 = bioconvert_data("sample_v3.fasta")

    with TempFile(suffix=".fasta") as tempfile:
        convert = SCF2FASTA(infile_v2, tempfile.name)
        convert()
        # Check that the output is correct with a checksum
        assert md5(tempfile.name) == md5(expected_outfile_v2)

        convert = SCF2FASTA(infile_v3, tempfile.name)
        convert()
        # Check that the output is correct with a checksum
        assert md5(tempfile.name) == md5(expected_outfile_v3)
Exemplo n.º 24
0
def test_conv(method):
    infile = bioconvert_data("JB409847.embl")

    with TempFile(suffix=".gbk") as tempfile:
        converter = EMBL2GENBANK(infile, tempfile.name)
        converter(method=method)

        # Check that the output is correct with a checksum
        if method == "biopython":
            assert md5(tempfile.name) == "cdd34902975a68e58ad5f105b44ff495" or \
                md5(tempfile.name) == "63002093c1aaef8c3a6fd693c2bbd9f4"
        elif method == "squizz":
            pass
            # TODO
            # embl input is not understood by squizz if generated by biopython
            #     assert md5(tempfile.name) == "????"
        else:
            raise NotImplementedError
Exemplo n.º 25
0
    def wrapped(inst, *args, **kwargs):
        infile_name = inst.infile

        output_compressed = None
        if inst.outfile.endswith(".gz"):
            (inst.outfile, output_compressed) = splitext(inst.outfile)
        elif inst.outfile.endswith(".bz2"):
            (inst.outfile, output_compressed) = splitext(inst.outfile)
        elif inst.outfile.endswith(".dsrc"):  # !!! only for fastq files
            (inst.outfile, output_compressed) = splitext(inst.outfile)
        # Now inst has the uncompressed output file name

        if infile_name.endswith(".gz"):
            # decompress input
            # TODO: https://stackoverflow.com/a/29371584/1878788
            _log.info("Generating uncompressed version of {} ".format(infile_name))
            (ungz_name, _) = splitext(infile_name)
            (_, base_suffix) = splitext(ungz_name)
            with TempFile(suffix=base_suffix) as ungz_infile:
                inst.infile = ungz_infile.name
                inst.shell("unpigz -c -p {} {} > {}".format(
                    inst.threads, infile_name, inst.infile))
                # computation
                results = func(inst, *args, **kwargs)
            inst.infile = infile_name
        else:
            results = func(inst, *args, **kwargs)

        # Compress output and restore inst output file name
        if output_compressed == ".gz":
            # TODO: this uses -f ; should be a
            _log.info("Compressing output into .gz")
            inst.shell("pigz -f -p {} {}".format(inst.threads, inst.outfile))
            inst.outfile = inst.outfile + ".gz"
        elif output_compressed == ".bz2":
            _log.info("Compressing output into .bz2")
            inst.shell("pbzip2 -f -p{} {}".format(inst.threads, inst.outfile))
            inst.outfile = inst.outfile + ".bz2"
        elif output_compressed == ".dsrc":  # !!! only for FastQ files
            _log.info("Compressing output into .dsrc")
            inst.shell("dsrc c -t{} {} {}.dsrc".format(
                inst.threads, inst.outfile, inst.outfile))
            inst.outfile = inst.outfile + ".dsrc"
        return results
Exemplo n.º 26
0
def test_kraken_results():
    test_file = sequana_data("test_kraken.out", "testing")
    k = KrakenResults(test_file)
    df = k.plot(kind='pie')
    k.boxplot_classified_vs_read_length()
    print(df)

    df = k.plot(kind='barh')

    df = k.get_taxonomy_db(11234)
    assert 11234 in df.index

    from easydev import TempFile
    with TempFile() as fout:
        k.kraken_to_csv(fout.name, "toydb")
        k.kraken_to_json(fout.name, "toydb")
        k.kraken_to_krona(fout.name )
        k.to_js(fout.name)
    df = k.plot2(kind='pie')
Exemplo n.º 27
0
def test_SIRV():

    with TempFile() as fh:
        data = sequana_data("test_sirv.xls")
        ss = SIRVReference()
        ss.from_excel(data)
        ss.to_fasta(fh.name)

        sirv = SIRV(fh.name)
        assert sirv.group_lengths == {
            'SIRV1': 7,
            'SIRV2': 6,
            'SIRV3': 11,
            'SIRV4': 7,
            'SIRV5': 12,
            'SIRV6': 18,
            'SIRV7': 7
        }
        assert sum(sirv.SIRV.lengths) == 75469
Exemplo n.º 28
0
def _test_conv(method):

    # the input file
    infile = bioconvert_data('test_measles.sorted.bam')

    # What is the expected md5sum of the final output file ?
    expected_outputfile = bioconvert_data('test_measles.bigwig')
    md5out = md5(expected_outputfile)

    # Call convert and check that the output file created has the correct md5sum
    with TempFile(suffix=".bigwig") as outfile:
        convert = BAM2BIGWIG(infile, outfile.name)
        if (method == 'ucsc'):
            convert(method=method,
                    chrom_sizes=bioconvert_data("hg38.chrom.sizes"))
            # TODO Failed in OCt 2018 . why ? bamCoverage vesrion in header ?
            #assert md5(outfile.name) == '61abd0de51bd614136ad85ae0a1ff85b', "{} failed".format(method)
        else:
            convert(method=method)
Exemplo n.º 29
0
def test_conv():
    infile = bioconvert_data("test_maf2sam.maf")
    outfile = bioconvert_data("test_maf2sam.sam")
    with TempFile(suffix=".sam") as tempfile:
        convert = MAF2SAM(infile, tempfile.name)
        convert(method="python")

        # In the SAM, the version may be different when using other bioconvert
        # version, so we need to get rid of the line that contains the version
        # and program
        data1 = open(outfile).readlines()
        data1 = [x for x in data1 if "bioconvert" not in x]
        data1 = "\n".join(data1)

        data2 = open(tempfile.name).readlines()
        data2 = [x for x in data2 if "bioconvert" not in x]
        data2 = "\n".join(data2)

        assert data1 == data2
Exemplo n.º 30
0
def test_fasta2clustal_goalign():
    infile = bioconvert_data("goalign.fasta")
    outfile = bioconvert_data("goalign.clustal")
    with TempFile(suffix=".clustal") as tempfile:
        converter = FASTA2CLUSTAL(infile, tempfile.name)
        converter(method='goalign')

        ## We remove goalign version from the first line
        out = ""
        with open(tempfile.name) as f:
            lines = f.readlines()
            if len(lines)>0:
                clustal = lines[0].split(" ")
                if len(clustal) > 0:
                    lines[0]=clustal[0]+"\n"
            out = ''.join(lines)

        # Check that the output is correct with a checksum
        assert hashlib.md5(out.encode('utf-8')).hexdigest() == md5(outfile)
Exemplo n.º 31
0
def test_bam2tsv():
    #your code here
    # you will need data for instance "mydata.fastq and mydata.fasta".
    # Put it in bioconvert/bioconvert/data
    # you can then use ::
    infile = bioconvert_data("test_measles.sorted.bam")
    #expected_outfile = bioconvert_data("test_measles.tsv")
    with TempFile(suffix=".tsv") as tempfile:
        convert = BAM2TSV(infile, tempfile.name)
        convert(method="pysam")
        # impossible to track down why this test fails on python3.6
        # looks like pytest changes the behaviour of the BAM2TSV class
        # when saving the file. Only the header is saved and the content
        # of the BAM file is not...su
        if sys.version_info[0] == 3 and sys.version_info[1] == 6:
            pass
        else:
            assert md5(tempfile.name) == "4c5f3336be8a03c95a6c56be28581fb7"
        convert(method="samtools")
        assert md5(tempfile.name) == "4c5f3336be8a03c95a6c56be28581fb7"
Exemplo n.º 32
0
def test_conv():
    infile = bioconvert_data("test_measles.sorted.bam")
    with TempFile(suffix=".fa") as tempfile:
        convert = BAM2Fasta(infile, tempfile.name)
        convert(method="bamtools")

        # Check that the output is correct with a checksum
        # Note that we cannot test the md5 on a gzip file but only
        # on the original data. This check sum was computed
        # fro the unzipped version of biokit/data/converters/measles.bed
        assert md5(tempfile.name) == "ea5511c3c8913626be152609887c8c4d"

        convert = BAM2Fasta(infile, tempfile.name)
        convert(method="samtools")
        # samtools 1.6 / hstlib 1.6 gives different results on travis and
        # locally
        assert md5(tempfile.name) in [
            "0924d2a11b43094680d1a7374aaaa45e",
            "cc9afcef458f3402fbdef1a091e05c39"
        ]
Exemplo n.º 33
0
def test_read_write_from_cnograph():
    c  = CNOGraph(cnodata("PKN-ToyPB.sif"))
    fh = TempFile(suffix='.xml')
    c.to_sbmlqual(fh.name)
    c2 = CNOGraph(fh.name)
    assert c == c2
    fh.delete()

    c  = CNOGraph(cnodata("PKN-ToyPB.sif"))
    c.expand_and_gates()
    fh = TempFile(suffix='.xml')
    c.to_sbmlqual(fh.name)
    c2 = CNOGraph(fh.name)
    fh.delete()
    assert c == c2
Exemplo n.º 34
0
def test_conv():
    infile = bioconvert_data("test_measles.sorted.bam")
    #outfile = biokit_data("converters/measles.sam")
    with TempFile(suffix=".bam") as tempfile:
        convert = BAM2SAM(infile, tempfile.name)
        convert()

        # Check that the output is correct with a checksum
        # Note that we cannot test the md5 on a gzip file but only
        # on the original data. This check sum was computed
        # fro the unzipped version of biokit/data/converters/measles.bed
        #assert md5(tempfile.name) == md5(outfile)
        # output is a SAM that can be read and must have
        import pysam
        sam = pysam.AlignmentFile(tempfile.name)
        assert sam.count() == 60
        convert(method="pysam")

        convert = BAM2SAM(infile, tempfile.name)
        convert(method="sambamba")
        assert md5(tempfile.name) == "ad83af4d159005a77914c5503bc43802"
Exemplo n.º 35
0
def test_gz2dsrc():
    """
    Test that fastq gz file is converted as expected to a fastq .dsrc file
    """
    from bioconvert import bioconvert_data
    in_gz = bioconvert_data("test_SP1.fq.gz")
    exp_fq = bioconvert_data("exp_SP1.fq")
    with TempFile(suffix=".dsrc") as tempfile:
        converter = GZ2DSRC(in_gz, tempfile.name)
        converter()

        # uncompress the created dsrc file, and compare uncompressed file
        # to the expected one. We do not directly compare dsrc files as
        # it depends on the dsrc version used...
        assert os.path.isfile(tempfile.name)
        tmp_fq = tempfile.name + ".fq"
        cmd = "dsrc d {} {}".format(tempfile.name, tmp_fq)
        subprocess.call(cmd.split())

        # Check that the output is correct with a checksum
        assert md5(tmp_fq) == md5(exp_fq)
Exemplo n.º 36
0
def test_gz2dsrc():
    """
    Test that fastq gz file is converted as expected to a fastq .dsrc file
    """
    from bioconvert import bioconvert_data
    infile = bioconvert_data("test_SP1.fq.dsrc")

    with TempFile(suffix=".fq.gz") as tempfile:
        converter = DSRC2GZ(infile, tempfile.name)
        converter()

        # uncompress the createdfile, and compare uncompressed file
        # to the expected md5. We do not directly compare dsrc or gz files as
        # it is not deterministic
        assert os.path.isfile(tempfile.name)

        cmd = "gunzip -c {} | md5sum -".format(tempfile.name)
        res = subprocess.check_output(cmd, shell=True)
        res = res.split()[0].decode()

        # Check that the output is correct with a checksum
        assert res == "d41d8cd98f00b204e9800998ecf8427e"
Exemplo n.º 37
0
def test_summary():
    s = Summary("test2", sample_name="chr1",data={"mean":1})
    assert s.data == {"mean":1}
    assert s.version
    assert s.date
    d = s.as_dict()
    assert "name" in d
    assert "version" in d
    assert "data" in d
    assert "date" in d

    # test wrong constructor
    try:
        s = Summary("test")
        assert False
    except:
        assert True

    try:
        s = Summary("test", "test")
        assert False
    except:
        assert True


    # test data_description
    s = Summary("test2", data={"mean":1})
    s.data_description = {"mean": "mean of the data set"}
    assert s.data_description == {"mean": "mean of the data set"}
    try:
        s.data_description = {"dummy": 1}
        assert False
    except:
        assert True

    from easydev import TempFile
    with TempFile(suffix=".json") as fh:
        s.to_json(fh.name)
Exemplo n.º 38
0
def test_sra2fastq_gz(method):
    infile = "SRR390728"
    outfile = bioconvert_data("SRR390728_1.fastq")
    outfile2 = bioconvert_data("SRR390728_2.fastq")
    with TempFile(suffix=".fastq.gz") as tempfile:
        converter = SRA2FASTQ(infile, tempfile.name, True)
        converter(method=method)
        outbasename, ext = os.path.splitext(tempfile.name)
        if ext == ".gz":
            outbasename, ext = os.path.splitext(outbasename)

        with gzip.open(outbasename + "_1.fastq.gz",
                       'rb') as f_in, open(outbasename + "_1.fastq",
                                           'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
        with gzip.open(outbasename + "_2.fastq.gz",
                       'rb') as f_in, open(outbasename + "_2.fastq",
                                           'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)

        # Check that the output is correct with a checksum
        assert md5(outbasename + "_1.fastq") == md5(outfile)
        assert md5(outbasename + "_2.fastq") == md5(outfile2)
Exemplo n.º 39
0
    def _method_wiggletools(self, *args, **kwargs):
        """

        """
        import os
        from easydev import TempFile

        # with need a unique name, that does not exists for the symlink 
        # Fixes #233
        fname = None
        with TempFile(suffix=".bb") as ftemp:
            fname = ftemp.name

        os.symlink(os.path.abspath(self.infile), ftemp.name)

        try:
            cmd = "wiggletools {} > {}".format(ftemp.name, self.outfile)
            self.execute(cmd)
        except Exception as err:
            raise(err)
        finally:
            # clean symlink
            os.unlink(fname)
Exemplo n.º 40
0
def test_af1():
    data = sequana_data("test_vcf_mpileup_4dot1.vcf")
    v = VCF(data)
    variant = next(v.vcf)

    variant.INFO['AF1'] = 1
    assert v.vcf.is_valid_af1(variant) is True
    variant.INFO['AF1'] = 0.5
    assert v.vcf.is_valid_af1(variant) is False

    # polymorphic case
    variant = next(v.vcf)

    variant.INFO['AF1'] = 1
    assert v.vcf.is_valid_af1(variant) is False
    #variant.INFO['AF1'] = 0.5
    #assert v.vcf.is_valid_af1(variant) is True

    v = VCF(data)
    v.vcf.apply_af1_filter = True
    with TempFile() as fh:
        res = v.vcf.filter_vcf(fh.name)
    assert res == {'N': 573, 'filtered': 391, 'unfiltered': 182}
Exemplo n.º 41
0
def test_copy_requirements():
    # We need 4 cases:
    # 1- http
    # 2- a sequana file (phix)
    # 3- an existing file elsewhere (here just a temporary file)
    # 4- an existing file in the same directory as the target dir

    from easydev import TempFile
    fh = tempfile.TemporaryDirectory()
    targetdir = fh.name

    # Case 3: a temporary file
    temprequire = TempFile()

    # Case 4: a local file (copy of the temp file)
    # TODO
    #localfile = temprequire.name.split(os.sep)[-1]
    #shutil.copy(temprequire.name, targetdir)

    cfg = snaketools.SequanaConfig()
    cfg.config.requirements = [
        "phiX174.fa",
        temprequire.name,
        #localfile,
        "https://raw.githubusercontent.com/sequana/sequana/master/README.rst"
    ]
    cfg._update_yaml()
    cfg.copy_requirements(target=fh.name)

    # error
    cfg.config.requirements = ['dummy']
    try:
        cfg.copy_requirements(target=fh.name)
        assert False
    except:
        assert True
Exemplo n.º 42
0
    def _method_chain(self, *args, **kwargs):
        """This method successively uses the default conversion method of each
        converter in the conversion path."""
        def conv_step(converter, infile, outfile):
            """Performs one conversion step."""
            converter(infile, outfile)(*args, **kwargs)

        # Contains the last temporary output file, if any
        pipe_files = deque()
        for (step_num, ((_, out_fmt), converter)) \
                in enumerate(self.converter_map, start=1):
            if step_num == 1:
                # May not be necessary:
                step_infile = None
                step_input = self.infile
                del_infile = False
            else:
                step_infile = pipe_files.popleft()
                step_input = step_infile.name
                del_infile = True

            if step_num == self.nb_steps:
                # May not be necessary:
                step_outfile = None
                step_output = self.outfile
            else:

                #FIXME: for mutiple IO converters
                if len(out_fmt) == 1:
                    step_outfile = TempFile(suffix=out_fmt[0].lower())
                    step_output = step_outfile.name
                    pipe_files.append(step_outfile)

            conv_step(converter, step_input, step_output)
            if del_infile:
                step_infile.delete()
Exemplo n.º 43
0
def test_stats_file():

    data = sequana_data("test_demultiplex_Stats.json")
    s = StatsFile(data)
    with TempFile() as fout:
        s.to_summary_reads(fout.name)
    with TempFile() as fout:
        s.barplot_summary(fout.name)
    with TempFile() as fout:
        s.barplot()
        for lane in s.get_data_reads().lane.unique():
             os.remove("lane{}_status.png".format(lane)) 

    data = sequana_data("test_demultiplex_Stats_undetermined.json")
    s = StatsFile(data)
    with TempFile() as fout:
        s.to_summary_reads(fout.name)
    with TempFile() as fout:
        s.barplot_summary(fout.name)
    with TempFile() as fout:
        s.barplot()
        for lane in s.get_data_reads().lane.unique():
             os.remove("lane{}_status.png".format(lane)) 
Exemplo n.º 44
0
def test_xmfa2phy(method):
    infile = bioconvert_data("test_phylip2xmfa.xmfa")
    #outfile = bioconvert_data("test_phylip2xmfa.xmfa")
    with TempFile(suffix=".xmfa") as tempfile:
        converter = XMFA2PHYLIP(infile, tempfile.name)
        converter(method=method)
Exemplo n.º 45
0
def create_graph(filename, layout="dot", use_singularity=False):
    """

    :param filename: should end in .png or .svg or .dot

    If extension is .dot, only the dot file is created.
    This is useful if you have issues installing graphviz.
    If so, under Linux you could use our singularity container
    see github.com/cokelaer/graphviz4all

    """
    from bioconvert.core.registry import Registry
    rr = Registry()

    try:
        if filename.endswith(".dot") or use_singularity is True:
            raise
        from pygraphviz import AGraph
        dg = AGraph(directed=True)

        for a, b in rr.get_conversions():
            dg.add_edge(a, b)

        dg.layout(layout)
        dg.draw(filename)

    except:

        dot = """
strict digraph{
    node [label="\\N"];

    """
        nodes = set([item for items in rr.get_conversions() for item in items])

        for node in nodes:
            dot += "\"{}\";\n".format(node)
        for a, b in rr.get_conversions():
            dot += "\"{}\" -> \"{}\";\n".format(a, b)
        dot += "}\n"

        from easydev import TempFile
        from bioconvert import shell
        dotfile = TempFile(suffix=".dot")
        with open(dotfile.name, "w") as fout:
            fout.write(dot)

        dotpath = ""
        if use_singularity:
            from bioconvert.core.downloader import download_singularity_image
            singfile = download_singularity_image(
                "graphviz.simg", "shub://cokelaer/graphviz4all:v1",
                "4288088d91c848e5e3a327282a1ab3d1")

            dotpath = "singularity run {} ".format(singfile)
            on_rtd = environ.get('READTHEDOCS', None) == 'True'
            if on_rtd:
                dotpath = ""

        ext = filename.rsplit(".", 1)[1]
        cmd = "{}dot -T{} {} -o {}".format(dotpath, ext, dotfile.name,
                                           filename)
        try:
            shell(cmd)
        except:
            import os
            os.system(cmd)
Exemplo n.º 46
0
def main(args=None):

    user_options = Options(prog="sequana")

    if args is None:
        args = sys.argv

    # If --help or no options provided, show the help
    if len(args) == 1:
        user_options.parse_args(["prog", "--help"])
    else:
       options = user_options.parse_args(args[1:])

    if options.version:
        import sequana
        print(sequana.version)
        sys.exit()

    if options.jobs > 20 and options.bypass is False:
        raise ValueError('The number of jobs is limited to 20. You can ' +
            'force this limit by using --bypass-job-limit')

    if misc.on_cluster("tars-") and options.unlock is False:
        if options.cluster is None:
            raise ValueError("You are on TARS (Institut Pasteur). You " +
                " must use --cluster option to provide the scheduler " +
                " options (typically ' --cluster 'sbatch --qos normal' )")

    # valid codecs:
    valid_extensions = [("fastq." + ext2).rstrip(".")
                        for ext2 in ['', 'bz2', 'gz', 'dsrc']]

    valid_extensions += [("fq." + ext2).rstrip(".")
                        for ext2 in ['', 'bz2', 'gz', 'dsrc']]

    valid_combos = [(x, y) for x in valid_extensions
                           for y in valid_extensions
                           if x!=y]

    if (options.source, options.target) not in valid_combos:
        raise ValueError("""--target and --source combo not valid.
Must be one of fastq, fastq.gz, fastq.bz2 or fastq.dsrc""")

    # Create the config file locally
    module = Module("compressor")

    with TempFile(suffix=".yaml", dir=".") as temp:
        cfg = SequanaConfig(module.config)
        cfg.config.compressor.source = options.source
        cfg.config.compressor.target = options.target
        cfg.config.compressor.recursive = options.recursive
        cfg.config.compressor.verbose = options.verbose
        cfg.config.compressor.threads = options.threads
        cfg._update_yaml()
        cfg.save(filename=temp.name)

        # The Snakefile can stay in its original place:
        rule = module.path + os.sep +  "compressor.rules"

        # Run the snakemake command itself.
        cmd = 'snakemake -s %s  --configfile %s -j %s ' % \
                (rule, temp.name, options.jobs)

        if options.dryrun:
            cmd += " --dryrun "

        if options.verbose is False:
            cmd += " --quiet "
        else:
            cmd += " -p "

        # for slurm only: --cores-per-socket
        if options.cluster:
            cluster = ' --cluster "%s" ' % options.cluster
            cmd += cluster

        if options.snakemake:
            if " -s " in options.snakemake or " -j " in options.snakemake:
                raise ValueError("-s or -j cannot be used in " +
                    " --snakemake-options    (already used internally")
            cmd += options.snakemake

        if options.unlock:
            cmd += " --unlock "

        if options.verbose:
            print(cmd)

        # On travis, snakemake.shell command from snakemake fails.
        # Most probably because travis itself uses a subprocess.
        # excute from easydev uses pexpect.spawn, which seems to work well
        from easydev import execute
        execute(cmd, showcmd=False)
Exemplo n.º 47
0
def test_converter1():
    infile = bioconvert_data("test_measles.sorted.bam")
    with TempFile(suffix=".bed") as tempfile:
        import sys
        sys.argv = ["bioconvert", infile, tempfile.name, "--force"]
        converter.main()
Exemplo n.º 48
0
def test_wget():
    from easydev import TempFile
    with TempFile() as fh:
        wget("https://github.com/sequana/sequana/raw/master/README.rst",
             fh.name)
Exemplo n.º 49
0
def test_fasta():
    with TempFile(suffix=".fasta") as fout:
        f = fasta.FastaSim(fout.name)
        f.nreads = 1000
        f.simulate()
Exemplo n.º 50
0
def test_savePathwayAs(wikipath):
    # Note that not all WP have the PDF format available.
    # WP4 has not (march 2018)
    with TempFile(suffix=".png") as fout:
        wikipath.savePathwayAs("WP232", fout.name, display=False)
Exemplo n.º 51
0
def test_converter():

    infile = bioconvert_data("test_measles.sorted.bam")
    with TempFile(suffix=".bed") as tempfile:
        cmd = "bioconvert %s %s --force" % (infile, tempfile.name)
        subprocess.Popen(cmd, shell=True)
Exemplo n.º 52
0
def test_genomecov():
    filename = sequana_data('JB409847.bed')

    try:
        bed = bedtools.GenomeCov("dummy.csv")
        assert False
    except:
        assert True

    try:
        bed = bedtools.GenomeCov(filename, "dummy.gbk")
        assert False
    except:
        assert True

    # !now let us read the good data sets
    bed = bedtools.GenomeCov(filename, sequana_data('JB409847.gbk'))
    bed.compute_coverage(4001)

    bed = bedtools.GenomeCov(filename, sequana_data('JB409847.gbk'))
    bed2 = bedtools.GenomeCov(filename, sequana_data('JB409847.gbk'))
    assert bed == bed

    # test equality for same chromosome but different data
    bed2.chr_list[0].df["cov"] += 100
    assert bed != bed2
    # test equality for same chromosome but different data
    bed2.chr_list[0].df["cov"] -= 100
    bed2.chr_list.append("dummy")
    assert bed != bed2


    # setter must be bool
    try:
        bed.circular = 1
        assert False
    except:
        assert True

    # cant use setter
    try:
        bed.feature_dict = {}
        assert False
    except:
        assert True

    assert len(bed) == 1
    # a getter for the first chromosome
    bed[0]

    # setter available but not sure this is useful
    bed.window_size = 4001
    bed.hist()

    # This requires to call other method before
    for chrom in bed:
        chrom.moving_average(n=501)
        chrom.running_median(n=501, circular=True)
        chrom.running_median(n=501, circular=False)

        chrom.compute_zscore()
        roi = chrom.get_roi()
        with TempFile(suffix='.png') as fh:
            chrom.plot_coverage(filename=fh.name)
        with TempFile(suffix='.png') as fh:
            chrom.plot_hist_zscore(filename=fh.name)
        with TempFile(suffix='.png') as fh:
            chrom.plot_hist_normalized_coverage(filename=fh.name)

        len(chrom)
        print(chrom)
        chrom.get_size()
        chrom.get_mean_cov()
        chrom.get_var_coef()
    with TempFile(suffix='.csv') as fh:
        bed.to_csv(fh.name)
        bed2 = bedtools.GenomeCov(fh.name, sequana_data('JB409847.gbk'))

    # plotting
    bed.chr_list[0].plot_hist_coverage()
    bed.chr_list[0].plot_hist_coverage(logx=False,logy=True)
    bed.chr_list[0].plot_hist_coverage(logx=True,logy=False)
    with TempFile(suffix=".png") as fh:
        bed.chr_list[0].plot_hist_coverage(logx=False,logy=False,
            filename=fh.name)
Exemplo n.º 53
0
def test_genomecov():
    filename = sequana_data('JB409847.bed')

    # wrong file
    try:
        bed = bedtools.GenomeCov("dummy.csv")
        assert False
    except:
        assert True

    # wrong threshold
    try:
        bed = bedtools.GenomeCov(filename, high_threshold=2)
        assert False
    except:
        assert True

    # wrong threshold
    try:
        bed = bedtools.GenomeCov(filename, low_threshold=-2)
        assert False
    except:
        assert True

    # wrong genbank
    try:
        bed = bedtools.GenomeCov(filename, "dummy.gbk")
        assert False
    except:
        assert True

    # !now let us read the good data sets by chunkd
    bed = bedtools.GenomeCov(filename,
                             sequana_data('JB409847.gbk'),
                             chunksize=5000)
    for c in bed.chr_list:
        c.run(1001, k=2)

    # setter must be bool
    try:
        bed.circular = 1
        assert False
    except:
        assert True

    # cant use setter
    try:
        bed.feature_dict = {}
        assert False
    except:
        assert True

    assert len(bed) == 1
    # a getter for the first chromosome
    bed[0]

    # setter available but not sure this is useful
    bed.window_size = 4000
    bed.window_size = 4001
    bed.hist()

    # This requires to call other method before
    for chrom in bed:
        chrom.moving_average(n=501)
        chrom.running_median(n=501, circular=True)
        chrom.running_median(n=501, circular=False)

        chrom.compute_zscore()
        roi = chrom.get_rois()
        with TempFile(suffix='.png') as fh:
            chrom.plot_coverage(filename=fh.name)
        with TempFile(suffix='.png') as fh:
            chrom.plot_hist_zscore(filename=fh.name)
        with TempFile(suffix='.png') as fh:
            chrom.plot_hist_normalized_coverage(filename=fh.name)

        len(chrom)
        print(chrom)
        chrom.get_size()
        chrom.DOC
        chrom.CV
    with TempFile(suffix='.csv') as fh:
        bed.gc_window_size = 100
        bed.to_csv(fh.name)

    # plotting
    bed.chr_list[0].plot_hist_coverage()
    bed.chr_list[0].plot_hist_coverage(logx=False, logy=True)
    bed.chr_list[0].plot_hist_coverage(logx=True, logy=False)
    with TempFile(suffix=".png") as fh:
        bed.chr_list[0].plot_hist_coverage(logx=False,
                                           logy=False,
                                           filename=fh.name)