示例#1
0
def _update_data(align_file, out_dir, names, data):
    data = dd.set_work_bam(data, align_file)
    data = dd.set_align_bam(data, align_file)
    transcriptome_file = _move_transcriptome_file(out_dir, names)
    data = dd.set_transcriptome_bam(data, transcriptome_file)
    sjfile = get_splicejunction_file(out_dir, data)
    sjbed = junction2bed(sjfile)
    data = dd.set_junction_bed(data, sjbed)
    return data
示例#2
0
def _update_data(align_file, out_dir, names, data):
    data = dd.set_work_bam(data, align_file)
    data = dd.set_align_bam(data, align_file)
    transcriptome_file = _move_transcriptome_file(out_dir, names)
    data = dd.set_transcriptome_bam(data, transcriptome_file)
    sjfile = get_splicejunction_file(out_dir, data)
    if sjfile:
        sjbed = junction2bed(sjfile)
        data = dd.set_junction_bed(data, sjbed)
    return data
示例#3
0
def align(fastq_file, pair_file, ref_file, names, align_dir, data):
    paired = True if pair_file else False
    hisat2 = config_utils.get_program("hisat2", data)
    num_cores = dd.get_num_cores(data)
    quality_flag = _get_quality_flag(data)
    stranded_flag = _get_stranded_flag(data, paired)
    rg_flags = _get_rg_flags(names)
    out_file = os.path.join(align_dir,
                            "{0}-sort.bam".format(dd.get_sample_name(data)))
    if data.get("align_split"):
        final_file = out_file
        out_file, data = alignprep.setup_combine(final_file, data)
        fastq_file, pair_file = alignprep.split_namedpipe_cls(
            fastq_file, pair_file, data)
    else:
        final_file = None
    if not file_exists(out_file) and (final_file is None
                                      or not file_exists(final_file)):
        cmd = (
            "{hisat2} --new-summary -x {ref_file} -p {num_cores} {quality_flag} {stranded_flag} "
            "{rg_flags} ")
        if paired:
            cmd += "-1 {fastq_file} -2 {pair_file} "
        else:
            cmd += "-U {fastq_file} "
        if dd.get_analysis(data).lower() == "smallrna-seq":
            cmd += "-k 1000 "
        # if assembling transcripts, set flags that cufflinks/stringtie can use
        if dd.get_transcript_assembler(data):
            cmd += "--dta-cufflinks "
        if dd.get_analysis(data).lower() == "rna-seq":
            splicesites = get_known_splicesites_file(align_dir, data)
            if file_exists(splicesites):
                cmd += "--known-splicesite-infile {splicesites} "
        novel_splicesite_file = os.path.join(
            align_dir,
            "{0}-novelsplicesites.bed".format(dd.get_sample_name(data)))
        cmd += "--novel-splicesite-outfile {novel_splicesite_file} "
        # apply additional hisat2 options
        cmd += " ".join(_get_options_from_config(data))

        message = "Aligning %s and %s with hisat2." % (fastq_file, pair_file)
        with postalign.tobam_cl(data, out_file, pair_file
                                is not None) as (tobam_cl, tx_out_file):
            cmd += " | " + tobam_cl
            do.run(cmd.format(**locals()), message)
    data = dd.set_work_bam(data, out_file)
    junctionbed = get_splicejunction_file(align_dir, data)
    data = dd.set_junction_bed(data, junctionbed)
    return data
示例#4
0
def align(fastq_file, pair_file, ref_file, names, align_dir, data):
    paired = True if pair_file else False
    hisat2 = config_utils.get_program("hisat2", data)
    num_cores = dd.get_num_cores(data)
    quality_flag = _get_quality_flag(data)
    stranded_flag = _get_stranded_flag(data, paired)
    rg_flags = _get_rg_flags(names)
    out_file = os.path.join(align_dir, "{0}-sort.bam".format(dd.get_sample_name(data)))
    if data.get("align_split"):
        final_file = out_file
        out_file, data = alignprep.setup_combine(final_file, data)
        fastq_file, pair_file = alignprep.split_namedpipe_cls(fastq_file, pair_file, data)
    else:
        final_file = None
    if not file_exists(out_file) and (final_file is None or not file_exists(final_file)):
        cmd = ("{hisat2} --new-summary -x {ref_file} -p {num_cores} {quality_flag} {stranded_flag} "
               "{rg_flags} ")
        if paired:
            cmd += "-1 {fastq_file} -2 {pair_file} "
        else:
            cmd += "-U {fastq_file} "
        if dd.get_analysis(data).lower() == "smallrna-seq":
            cmd += "-k 1000 "
        # if assembling transcripts, set flags that cufflinks/stringtie can use
        if dd.get_transcript_assembler(data):
            cmd += "--dta-cufflinks "
        if dd.get_analysis(data).lower() == "rna-seq":
            splicesites = get_known_splicesites_file(align_dir, data)
            if file_exists(splicesites):
                cmd += "--known-splicesite-infile {splicesites} "
        novel_splicesite_file = os.path.join(align_dir, "{0}-novelsplicesites.bed".format(dd.get_sample_name(data)))
        cmd += "--novel-splicesite-outfile {novel_splicesite_file} "
        # apply additional hisat2 options
        cmd += " ".join(_get_options_from_config(data))

        message = "Aligning %s and %s with hisat2." % (fastq_file, pair_file)
        with postalign.tobam_cl(data, out_file, pair_file is not None) as (tobam_cl, tx_out_file):
            cmd += " | " + tobam_cl
            do.run(cmd.format(**locals()), message)
    data = dd.set_work_bam(data, out_file)
    junctionbed = get_splicejunction_file(align_dir, data)
    data = dd.set_junction_bed(data, junctionbed)
    return data