Exemplo n.º 1
0
def run_stringtie_expression(data):
    """
    estimate expression from Stringtie, using the bcbio datadict
    does not do transcriptome assembly
    """
    bam = dd.get_work_bam(data)
    sample_name = dd.get_sample_name(data)
    out_dir = os.path.join("stringtie", sample_name)
    isoform_fpkm = os.path.join(out_dir, sample_name + ".isoform.fpkm")
    gene_fpkm = os.path.join(out_dir, sample_name + ".fpkm")
    assembly = os.path.abspath(os.path.join(out_dir, "stringtie-assembly.gtf"))
    if file_exists(isoform_fpkm) and file_exists(gene_fpkm):
        data = dd.set_stringtie_dir(data, out_dir)
        data = dd.set_fpkm(data, gene_fpkm)
        data = dd.set_fpkm_isoform(data, isoform_fpkm)
        if "stringtie" in dd.get_transcript_assembler(data):
            assembled_gtfs = dd.get_assembled_gtf(data)
            assembled_gtfs.append(assembly)
            data = dd.set_assembled_gtf(data, assembled_gtfs)
        return data
    with file_transaction(data, out_dir) as tx_out_dir:
        transcript_file = _stringtie_expression(bam, data, tx_out_dir)
        df = _parse_ballgown(transcript_file)
        _write_fpkms(df, tx_out_dir, sample_name)
    data = dd.set_stringtie_dir(data, out_dir)
    data = dd.set_fpkm(data, gene_fpkm)
    data = dd.set_fpkm_isoform(data, isoform_fpkm)
    if "stringtie" in dd.get_transcript_assembler(data):
        assembled_gtfs = dd.get_assembled_gtf(data)
        assembled_gtfs.append(assembly)
        data = dd.set_assembled_gtf(data, assembled_gtfs)
    return data
Exemplo n.º 2
0
def run_stringtie_expression(data):
    """
    estimate expression from Stringtie, using the bcbio datadict
    does not do transcriptome assembly
    """
    bam = dd.get_work_bam(data)
    sample_name = dd.get_sample_name(data)
    out_dir = os.path.join("stringtie", sample_name)
    isoform_fpkm = os.path.join(out_dir, sample_name + ".isoform.fpkm")
    gene_fpkm = os.path.join(out_dir, sample_name + ".fpkm")
    assembly = os.path.abspath(os.path.join(out_dir, "stringtie-assembly.gtf"))
    if file_exists(isoform_fpkm) and file_exists(gene_fpkm):
        data = dd.set_stringtie_dir(data, out_dir)
        data = dd.set_fpkm(data, gene_fpkm)
        data = dd.set_fpkm_isoform(data, isoform_fpkm)
        if "stringtie" in dd.get_transcript_assembler(data):
            assembled_gtfs = dd.get_assembled_gtf(data)
            assembled_gtfs.append(assembly)
            data = dd.set_assembled_gtf(data, assembled_gtfs)
        return data
    with file_transaction(data, out_dir) as tx_out_dir:
        transcript_file = _stringtie_expression(bam, data, tx_out_dir)
        df = _parse_ballgown(transcript_file)
        _write_fpkms(df, tx_out_dir, sample_name)
    data = dd.set_stringtie_dir(data, out_dir)
    data = dd.set_fpkm(data, gene_fpkm)
    data = dd.set_fpkm_isoform(data, isoform_fpkm)
    if "stringtie" in dd.get_transcript_assembler(data):
        assembled_gtfs = dd.get_assembled_gtf(data)
        assembled_gtfs.append(assembly)
        data = dd.set_assembled_gtf(data, assembled_gtfs)
    return data
Exemplo n.º 3
0
def cufflinks_merge(*samples):
    to_merge = filter_missing([dd.get_assembled_gtf(data) for data in
                            dd.sample_data_iterator(samples)])
    data = samples[0][0]
    bam_file = dd.get_work_bam(data)
    ref_file = dd.get_sam_ref(data)
    gtf_file = dd.get_gtf_file(data)
    out_dir = os.path.join(dd.get_work_dir(data), "assembly")
    num_cores = dd.get_num_cores(data)
    merged_gtf = cufflinks.merge(to_merge, ref_file, gtf_file, num_cores, samples[0][0])
    for data in dd.sample_data_iterator(samples):
        dd.set_assembled_gtf(data, merged_gtf)
    return samples
Exemplo n.º 4
0
def cufflinks_assemble(data):
    bam_file = dd.get_work_bam(data)
    ref_file = dd.get_sam_ref(data)
    out_dir = os.path.join(dd.get_work_dir(data), "assembly")
    num_cores = dd.get_num_cores(data)
    assembled_gtf = cufflinks.assemble(bam_file, ref_file, num_cores, out_dir, data)
    data = dd.set_assembled_gtf(data, assembled_gtf)
    return [[data]]
Exemplo n.º 5
0
def cufflinks_assemble(data):
    bam_file = dd.get_work_bam(data)
    ref_file = dd.get_sam_ref(data)
    out_dir = os.path.join(dd.get_work_dir(data), "assembly")
    num_cores = dd.get_num_cores(data)
    assembled_gtf = cufflinks.assemble(bam_file, ref_file, num_cores, out_dir, data)
    data = dd.set_assembled_gtf(data, assembled_gtf)
    return [[data]]
Exemplo n.º 6
0
def cufflinks_merge(*samples):
    to_merge = filter_missing([dd.get_assembled_gtf(data) for data in
                            dd.sample_data_iterator(samples)])
    data = samples[0][0]
    bam_file = dd.get_work_bam(data)
    ref_file = dd.get_sam_ref(data)
    gtf_file = dd.get_gtf_file(data)
    out_dir = os.path.join(dd.get_work_dir(data), "assembly")
    num_cores = dd.get_num_cores(data)
    merged_gtf = cufflinks.merge(to_merge, ref_file, gtf_file, num_cores, samples[0][0])
    updated_samples = []
    for data in dd.sample_data_iterator(samples):
        data = dd.set_assembled_gtf(data, merged_gtf)
        updated_samples.append([data])
    return updated_samples