Пример #1
0
def bed_to_bg(input_file, output_file,  size_file, extras):
    log.info("Converting %s to a genome coverage file", input_file)
    
    args = ["genomeCoverageBed", "-bg", "-split", "-i", input_file, "-g", size_file, ">", output_file]
    utils.run_cmd(args, "bed_to_bg")

    log.info("Deleting old file %s", input_file)
    os.unlink(input_file)
Пример #2
0
def pear_fastq_files(input_files, output_file, extras):

    log.info("Starting pear run on %s and %s", input_files[0], input_files[1])

    output_file = re.sub(r"\.assembled\.fastq", "", output_file)
    args = ["pear", "-f", input_files[0], "-r", input_files[1], "-o", output_file]

    utils.run_cmd("pear")
Пример #3
0
def bam_to_bed(input_file, output_file, output, extras):

    log.info("Converting %s to a bed file", input_file)
    args = ["bamToBed", "-i", input_file ">", output_file]
    utils.run_cmd(args, "bam_to_bed")

    # now we can move sorted bam to output
    file_name = os.path.basename(input_file)
    new_name = os.path.join(output, file_name)
    os.rename(input_file, new_name)
Пример #4
0
def bw_stats(input_files, output_file):
        
    # we are going to call bwtool summary and bwtool distribution
    # have to explicitly send stdout stuff like that
    # what a program
    summary = "bwtool summary 10000 -header -with-sum {} /dev/stdout"
    dist    = "bwtool distribution {} /dev/stdout"
    

    for input_file in input_files:
        log.info("Running bigwig stats on {}".format(input_file))
        with open(output_file, "a+") as stats:
            for command in [summary, dist]:

                output = utils.run_cmd(command.format(os.path.abspath(input_file)).split(), command, True)

                if command.startswith("bwtool summary"):
                    stats.write("#### bwtool summary for {}\n".format(input_file))
                    stats.write(output)
                    stats.write("####\n")
          
                # filter zeros out
                else:
                    output = output.rstrip()
                    output_clean = [line for line in output.split("\n") if line.split('\t')[1] != '0']
                    stats.write("#### bwtool distribution for {}\n".format(input_file))
                    stats.write("depth\tcount\n")
                    stats.write("\n".join(output_clean))
                    stats.write("\n####\n")

            stats.write("\n\n")
Пример #5
0
def align_with_bowtie_two(input_file, output_file, options, stats_file, extras):
    
    log.info("Running bowtie2 on %s", input_file)
    
    # use poen explicitly to cpature STDERR, check still
    args = ["bowtie2", "-t", "--no-unal", "-p", str(options.cores), "-x", options.index, input_file, "-S", output_file]
    output = utils.run_cmd(args, "bowtie2", True)

    # pass along to be saved
    utils.bowtie_record_output(output, input_file, stats_file)
Пример #6
0
def upload_to_one_codex(input_file, output_file, extras):

    args = ["onecodex", "upload", input_file]
    log.info("uploading %s to One Codex", input_file)

    utils.run_cmd(args, "One Codex")
Пример #7
0
    args = ["onecodex", "upload", input_file]
    log.info("uploading %s to One Codex", input_file)

    utils.run_cmd(args, "One Codex")

# only bowtie
# rsem does this for you
@transform(rename_accepted_hits, suffix(".bam"),".sorted.bam", extras)
def sort_bam(input_file, output_file, extras):
    log.info("Sorting %s ", input_file)

    # hacky
    output_file = re.sub(r"\.bam", "", output_file)
    args = ["samtools-rs", "rocksort", "-@", "8", "-m", "16G", input_file, output_file]):
    utils.run_cmd(args, "samtools rocksort")

    # careful
    log.info("Deleting old file %s", input_file)
    os.unlink(input_file)


@transform(sort_bam, suffix(".sorted.bam"), ".bed", options.output, extras)
def bam_to_bed(input_file, output_file, output, extras):

    log.info("Converting %s to a bed file", input_file)
    args = ["bamToBed", "-i", input_file ">", output_file]
    utils.run_cmd(args, "bam_to_bed")

    # now we can move sorted bam to output
    file_name = os.path.basename(input_file)