def run_stringtie_expression(data): """ estimate expression from Stringtie, using the bcbio datadict does not do transcriptome assembly """ bam = dd.get_work_bam(data) sample_name = dd.get_sample_name(data) out_dir = os.path.join("stringtie", sample_name) isoform_fpkm = os.path.join(out_dir, sample_name + ".isoform.fpkm") gene_fpkm = os.path.join(out_dir, sample_name + ".fpkm") assembly = os.path.abspath(os.path.join(out_dir, "stringtie-assembly.gtf")) if file_exists(isoform_fpkm) and file_exists(gene_fpkm): data = dd.set_stringtie_dir(data, out_dir) data = dd.set_fpkm(data, gene_fpkm) data = dd.set_fpkm_isoform(data, isoform_fpkm) if "stringtie" in dd.get_transcript_assembler(data): assembled_gtfs = dd.get_assembled_gtf(data) assembled_gtfs.append(assembly) data = dd.set_assembled_gtf(data, assembled_gtfs) return data with file_transaction(data, out_dir) as tx_out_dir: transcript_file = _stringtie_expression(bam, data, tx_out_dir) df = _parse_ballgown(transcript_file) _write_fpkms(df, tx_out_dir, sample_name) data = dd.set_stringtie_dir(data, out_dir) data = dd.set_fpkm(data, gene_fpkm) data = dd.set_fpkm_isoform(data, isoform_fpkm) if "stringtie" in dd.get_transcript_assembler(data): assembled_gtfs = dd.get_assembled_gtf(data) assembled_gtfs.append(assembly) data = dd.set_assembled_gtf(data, assembled_gtfs) return data
def cufflinks_merge(*samples): to_merge = filter_missing([dd.get_assembled_gtf(data) for data in dd.sample_data_iterator(samples)]) data = samples[0][0] bam_file = dd.get_work_bam(data) ref_file = dd.get_sam_ref(data) gtf_file = dd.get_gtf_file(data) out_dir = os.path.join(dd.get_work_dir(data), "assembly") num_cores = dd.get_num_cores(data) merged_gtf = cufflinks.merge(to_merge, ref_file, gtf_file, num_cores, samples[0][0]) for data in dd.sample_data_iterator(samples): dd.set_assembled_gtf(data, merged_gtf) return samples
def cufflinks_assemble(data): bam_file = dd.get_work_bam(data) ref_file = dd.get_sam_ref(data) out_dir = os.path.join(dd.get_work_dir(data), "assembly") num_cores = dd.get_num_cores(data) assembled_gtf = cufflinks.assemble(bam_file, ref_file, num_cores, out_dir, data) data = dd.set_assembled_gtf(data, assembled_gtf) return [[data]]
def cufflinks_merge(*samples): to_merge = filter_missing([dd.get_assembled_gtf(data) for data in dd.sample_data_iterator(samples)]) data = samples[0][0] bam_file = dd.get_work_bam(data) ref_file = dd.get_sam_ref(data) gtf_file = dd.get_gtf_file(data) out_dir = os.path.join(dd.get_work_dir(data), "assembly") num_cores = dd.get_num_cores(data) merged_gtf = cufflinks.merge(to_merge, ref_file, gtf_file, num_cores, samples[0][0]) updated_samples = [] for data in dd.sample_data_iterator(samples): data = dd.set_assembled_gtf(data, merged_gtf) updated_samples.append([data]) return updated_samples