Пример #1
0
def test_ena():

    from easydev import TempFile
    with TempFile() as f1:
        df.download_fasta("FN433596.1", output_filename=f1.name, method="ENA")

    with TempFile() as f2:
        df.download_fasta("FN433596.1", output_filename=f2.name, method="EUtils")

    with TempFile() as f3:
        from bioservices import ENA
        ena = ENA()
        df.download_fasta("FN433596.1", output_filename=f2.name, method="EUtils",
            service=ena)

    try:
        df.download_fasta("FN433596.1", method="dummy")
        assert False
    except:
        assert True
Пример #2
0
def test_ena():

    from easydev import TempFile
    with TempFile() as f1:
        df.download_fasta("FN433596.1", output_filename=f1.name, method="ENA")

    with TempFile() as f2:
        df.download_fasta("FN433596.1", output_filename=f2.name, method="EUtils")

    with TempFile() as f3:
        from bioservices import ENA
        ena = ENA()
        df.download_fasta("FN433596.1", output_filename=f2.name, method="EUtils",
            service=ena)

    try:
        df.download_fasta("FN433596.1", method="dummy")
        assert False
    except:
        assert True
Пример #3
0
def main(args=None):

    if args is None:
        args = sys.argv[:]

    user_options = Options(prog="sequana")

    # If --help or no options provided, show the help
    if len(args) == 1:
        user_options.parse_args(["prog", "--help"])
    else:
        options = user_options.parse_args(args[1:])

    sequana_debug_level(options.logging_level)

    if options.download_reference:
        logger.info("Downloading reference %s from %s\n" %
            (options.download_reference, options.database))

        from bioservices.apps import download_fasta as df
        df.download_fasta(options.download_reference, method=options.database)
        if options.download_genbank is None:
            return

    if options.download_genbank:
        logger.info("Downloading genbank %s from %s\n" %
            (options.download_reference, options.database))
        from sequana.snpeff import download_fasta_and_genbank
        download_fasta_and_genbank(options.download_genbank,
                                   options.download_genbank, 
                                   genbank=True, fasta=False)
        return

    if options.genbank:
        assert os.path.exists(options.genbank), \
            "%s does not exists" % options.genbank

    if options.verbose:
        logger.info("Reading %s. This may take time depending on " 
            "your input file" % options.input)

    # Convert BAM to BED
    if options.input.endswith(".bam"):
        bedfile = options.input.replace(".bam", ".bed")
        if options.verbose:
            logger.info("Converting BAM into BED file")
        shellcmd("bedtools genomecov -d -ibam %s > %s" % (options.input, bedfile))
    elif options.input.endswith(".bed"):
        bedfile = options.input
    else:
        raise ValueError("Input file must be a BAM or BED file")

    # Set the thresholds
    if options.low_threshold is None:
        options.low_threshold = -options.threshold

    if options.high_threshold is None:
        options.high_threshold = options.threshold

    # and output directory
    config.output_dir = options.output_directory
    config.sample_name = os.path.basename(options.input).split('.')[0]

    # Now we can create the instance of GenomeCoverage
    gc = GenomeCov(bedfile, options.genbank, options.low_threshold,
                   options.high_threshold, 0.5, 0.5)

    # if we have the reference, let us use it
    if options.reference:
        logger.info('Computing GC content')
        gc.compute_gc_content(options.reference, options.w_gc,
                              options.circular)

    # Now we scan the chromosomes, 
    if len(gc.chr_list) == 1:
        if options.verbose:
            logger.warning("There is only one chromosome. Selected automatically.")
        chrom = gc.chr_list[0]
        chromosomes = [chrom]
        run_analysis(chrom, options, gc.feature_dict)
    elif options.chromosome <=-1 or options.chromosome > len(gc.chr_list):
        raise ValueError("invalid chromosome index ; must be in [1-{}]".format(len(gc.chr_list)+1))
    else: # chromosome index is zero 
        # For user, we start at position 1 but in python, we start at zero
        if options.chromosome:
            chromosomes = [gc[options.chromosome-1]]
        else:
            chromosomes = gc

        if options.verbose:
            print("There are %s chromosomes/contigs." % len(gc))
            for this in gc.chr_list:
                print("    {}".format(this.chrom_name))

        for i, chrom in enumerate(chromosomes):
            if options.verbose:
                print("==================== analysing chrom/contig %s/%s (%s)"
                      % (i + options.chromosome, len(gc),
                      chrom.chrom_name))
            run_analysis(chrom, options, gc.feature_dict)

    if options.verbose:
        logger.info("Creating report in %s. Please wait" % config.output_dir)

    if options.chromosome:
        cc = options.chromosome - 1
        datatable = CoverageModule.init_roi_datatable(gc[cc])
        ChromosomeCoverageModule(chromosomes[0], datatable, None)
        page = "{0}{1}{2}.cov.html".format(config.output_dir, os.sep,
                                           chrom.chrom_name)
    else:
        CoverageModule(gc)
        page = "{0}{1}coverage.html".format(config.output_dir, os.sep)

    if options.show_html:
        from easydev import onweb
        onweb(page)
Пример #4
0
def main(args=None):

    if args is None:
        args = sys.argv[:]

    user_options = Options(prog="sequana")

    # If --help or no options provided, show the help
    if len(args) == 1:
        user_options.parse_args(["prog", "--help"])
    else:
        options = user_options.parse_args(args[1:])

    logger.level = options.logging_level

    if options.download_reference:
        logger.info("Downloading reference %s from %s\n" %
            (options.download_reference, options.database))

        from bioservices.apps import download_fasta as df
        df.download_fasta(options.download_reference, method=options.database)
        if options.download_genbank is None:
            return

    if options.download_genbank:
        logger.info("Downloading genbank %s from %s\n" %
            (options.download_genbank, options.database))
        from sequana.snpeff import download_fasta_and_genbank
        download_fasta_and_genbank(options.download_genbank,
                                   options.download_genbank,
                                   genbank=True, fasta=False)
        return

    if options.genbank:
        assert os.path.exists(options.genbank), \
            "%s does not exists" % options.genbank

    logger.info("Reading %s. This may take time depending on "
        "your input file" % options.input)

    # Convert BAM to BED
    if options.input.endswith(".bam"):
        bedfile = options.input.replace(".bam", ".bed")
        logger.info("Converting BAM into BED file")
        shellcmd("bedtools genomecov -d -ibam %s > %s" % (options.input, bedfile))
    elif options.input.endswith(".bed"):
        bedfile = options.input
    else:
        raise ValueError("Input file must be a BAM or BED file")

    # Set the thresholds
    if options.low_threshold is None:
        options.low_threshold = -options.threshold

    if options.high_threshold is None:
        options.high_threshold = options.threshold

    # and output directory
    config.output_dir = options.output_directory
    config.sample_name = os.path.basename(options.input).split('.')[0]

    # Now we can create the instance of GenomeCoverage
    if options.chromosome == -1:
        chrom_list = []
    else: 
        chrom_list = [options.chromosome]
    gc = GenomeCov(bedfile, options.genbank, options.low_threshold,
                   options.high_threshold, options.double_threshold,
                   options.double_threshold, chunksize=options.chunksize,
                   chromosome_list=chrom_list)


    # if we have the reference, let us use it
    if options.reference:
        logger.info('Computing GC content')
        gc.compute_gc_content(options.reference, options.w_gc,
                              options.circular)

    # Now we scan the chromosomes,
    if len(gc.chrom_names) == 1:
        logger.warning("There is only one chromosome. Selected automatically.")
        run_analysis(gc.chr_list[0], options, gc.feature_dict)
    elif options.chromosome <-1 or options.chromosome > len(gc.chrom_names):
        msg = "invalid chromosome index; must be in [1;{}]".format(len(gc.chrom_names))
        logger.error(msg)
        sys.exit(1)
    else:
        if options.chromosome == -1:
            chromosomes = gc.chrom_names # take all chromosomes
        else:
            # For user, we start at position 1 but in python, we start at zero
            chromosomes = [gc.chrom_names[options.chromosome-1]]

        logger.info("There are %s chromosomes/contigs." % len(gc))
        for this in gc.chrom_names:
            data = (this, gc.positions[this]["start"], gc.positions[this]["end"])
            logger.info("    {} (starting pos: {}, ending pos: {})".format(*data))

        # here we read chromosome by chromosome to save memory.
        # However, if the data is small.
        for i, chrom in enumerate(chromosomes):
            logger.info("==================== analysing chrom/contig %s/%s (%s)"
                  % (i + 1, len(gc), gc.chrom_names[i]))
            # since we read just one contig/chromosome, the chr_list contains
            # only one contig, so we access to it with index 0
            run_analysis(gc.chr_list[i], options, gc.feature_dict)

    if options.skip_multiqc is False:
        logger.info("=========================")
        logger.info("Creating multiqc report")
        pathtocfg = sequana_data("multiqc_config.yaml", "../multiqc/")
        cmd = 'multiqc . -m sequana_coverage -f -c {}'.format(pathtocfg)
        import subprocess
        proc = subprocess.Popen(cmd.split(), cwd=options.output_directory)
        proc.wait()
Пример #5
0
def main(args=None):

    if args is None:
        args = sys.argv[:]

    user_options = Options(prog="sequana")

    # If --help or no options provided, show the help
    if len(args) == 1:
        user_options.parse_args(["prog", "--help"])
    else:
        options = user_options.parse_args(args[1:])

    logger.level = options.logging_level

    if options.download_reference:
        logger.info("Downloading reference %s from %s\n" %
            (options.download_reference, options.database))

        from bioservices.apps import download_fasta as df
        df.download_fasta(options.download_reference, method=options.database)
        if options.download_genbank is None:
            return

    if options.download_genbank:
        logger.info("Downloading genbank %s from %s\n" %
            (options.download_genbank, options.database))
        from sequana.snpeff import download_fasta_and_genbank
        download_fasta_and_genbank(options.download_genbank,
                                   options.download_genbank,
                                   genbank=True, fasta=False)
        return

    if options.genbank:
        assert os.path.exists(options.genbank), \
            "%s does not exists" % options.genbank

    logger.info("Reading %s. This may take time depending on "
        "your input file" % options.input)

    # Convert BAM to BED
    if options.input.endswith(".bam"):
        bedfile = options.input.replace(".bam", ".bed")
        logger.info("Converting BAM into BED file")
        shellcmd("bedtools genomecov -d -ibam %s > %s" % (options.input, bedfile))
    elif options.input.endswith(".bed"):
        bedfile = options.input
    else:
        raise ValueError("Input file must be a BAM or BED file")

    # Set the thresholds
    if options.low_threshold is None:
        options.low_threshold = -options.threshold

    if options.high_threshold is None:
        options.high_threshold = options.threshold

    # Now we can create the instance of GenomeCoverage
    if options.chromosome == -1:
        chrom_list = []
    else:
        chrom_list = [options.chromosome]

    gc = GenomeCov(bedfile, options.genbank, options.low_threshold,
                   options.high_threshold, options.double_threshold,
                   options.double_threshold, chunksize=options.chunksize,
                   chromosome_list=chrom_list)

    # if we have the reference, let us use it
    if options.reference:
        logger.info('Computing GC content')
        gc.compute_gc_content(options.reference, options.w_gc,
                              options.circular)

    # Now we scan the chromosomes,
    if len(gc.chrom_names) == 1:
        logger.warning("There is only one chromosome. Selected automatically.")
        run_analysis(gc.chr_list[0], options, gc.feature_dict)
    elif options.chromosome <-1 or options.chromosome > len(gc.chrom_names):
        msg = "invalid chromosome index; must be in [1;{}]".format(len(gc.chrom_names))
        logger.error(msg)
        sys.exit(1)
    else:
        if options.chromosome == -1:
            chromosomes = gc.chrom_names # take all chromosomes
        else:
            # For user, we start at position 1 but in python, we start at zero
            chromosomes = [gc.chrom_names[options.chromosome-1]]

        logger.info("There are %s chromosomes/contigs." % len(gc))
        for this in gc.chrom_names:
            end = gc.positions[this]["end"]
            start = gc.positions[this]["start"]
            data = (this, gc.positions[this]["start"], gc.positions[this]["end"], end-start)
            logger.info("    {} (starting pos: {}, ending pos: {}, length: {})".format(*data))

        # here we read chromosome by chromosome to save memory.
        # However, if the data is small.
        for i, chrom in enumerate(chromosomes):
            logger.info("==================== analysing chrom/contig %s/%s (%s)"
                  % (i + 1, len(gc), gc.chrom_names[i]))
            # since we read just one contig/chromosome, the chr_list contains
            # only one contig, so we access to it with index 0
            run_analysis(gc.chr_list[i], options, gc.feature_dict)
            # logging level seems to be reset to warning somewhere
            logger.level = options.logging_level

    if options.skip_multiqc is False:
        logger.info("Creating multiqc report")
        pathtocfg = sequana_data("multiqc_config.yaml", "../multiqc/")
        cmd = 'multiqc . -m sequana_coverage -f -c {} '.format(pathtocfg)
        import subprocess
        proc = subprocess.Popen(cmd.split(), cwd=options.output_directory)
        proc.wait()
        #    stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        #out, err = proc.communicate()
        #with open("multiqc.log", "w") as fout:
        #    fout.write(err.decode())
    logger.info("Done")