def rapmap_index(gtf_file, ref_file, algorithm, data, out_dir): valid_indexes = ["pseudoindex", "quasiindex"] index_type = algorithm + "index" assert index_type in valid_indexes, \ "RapMap only supports %s indices." % valid_indexes out_dir = os.path.join(out_dir, index_type, dd.get_genome_build(data)) if dd.get_disambiguate(data): out_dir = "-".join([out_dir] + dd.get_disambguate(data)) rapmap = config_utils.get_program("rapmap", dd.get_config(data)) # use user supplied transcriptome FASTA file if it exists if dd.get_transcriptome_fasta(data): out_dir = os.path.join(out_dir, index_type, dd.get_genome_build(data)) gtf_fa = dd.get_transcriptome_fasta(data) else: gtf_fa = sailfish.create_combined_fasta(data) gtf_fa = fasta.strip_transcript_versions(gtf_fa, os.path.join(out_dir, os.path.basename(gtf_fa))) tmpdir = dd.get_tmp_dir(data) if file_exists(out_dir + "rapidx.jfhash"): return out_dir files = dd.get_input_sequence_files(data) kmersize = sailfish.pick_kmersize(files[0]) message = "Creating rapmap {index_type} for {gtf_fa} with {kmersize} bp kmers." with file_transaction(out_dir) as tx_out_dir: cmd = "{rapmap} {index_type} -k {kmersize} -i {tx_out_dir} -t {gtf_fa}" do.run(cmd.format(**locals()), message.format(**locals()), None) return out_dir
def rapmap_index(gtf_file, ref_file, algorithm, data, out_dir): out_dir = os.path.join(out_dir, "index") valid_indexes = ["pseudoindex", "quasiindex"] index_type = algorithm + "index" assert index_type in valid_indexes, \ "RapMap only supports %s indices." % valid_indexes out_dir = os.path.join(out_dir, index_type, sailfish.get_build_string(data)) if dd.get_disambiguate(data): out_dir = "-".join([out_dir] + dd.get_disambguate(data)) rapmap = config_utils.get_program("rapmap", dd.get_config(data)) # use user supplied transcriptome FASTA file if it exists if dd.get_transcriptome_fasta(data): gtf_fa = dd.get_transcriptome_fasta(data) else: gtf_fa = sailfish.create_combined_fasta(data) gtf_fa = fasta.strip_transcript_versions(gtf_fa, os.path.join(out_dir, os.path.basename(gtf_fa))) tmpdir = dd.get_tmp_dir(data) if file_exists(os.path.join(out_dir, "sa.bin")): return out_dir files = dd.get_input_sequence_files(data) kmersize = sailfish.pick_kmersize(files[0]) message = "Creating rapmap {index_type} for {gtf_fa} with {kmersize} bp kmers." with file_transaction(out_dir) as tx_out_dir: cmd = "{rapmap} {index_type} -k {kmersize} -i {tx_out_dir} -t {gtf_fa}" do.run(cmd.format(**locals()), message.format(**locals()), None) return out_dir
def run_pizzly(data): work_dir = dd.get_work_dir(data) pizzlydir = os.path.join(work_dir, "pizzly") samplename = dd.get_sample_name(data) gtf = dd.get_gtf_file(data) if dd.get_transcriptome_fasta(data): gtf_fa = dd.get_transcriptome_fasta(data) else: gtf_fa = sailfish.create_combined_fasta(data) stripped_fa = os.path.splitext( os.path.basename(gtf_fa))[0] + "-noversions.fa" stripped_fa = os.path.join(pizzlydir, stripped_fa) gtf_fa = fasta.strip_transcript_versions(gtf_fa, stripped_fa) fraglength = get_fragment_length(data) cachefile = os.path.join(pizzlydir, "pizzly.cache") fusions = kallisto.get_kallisto_fusions(data) pizzlypath = config_utils.get_program("pizzly", dd.get_config(data)) outdir = pizzly(pizzlypath, gtf, gtf_fa, fraglength, cachefile, pizzlydir, fusions, samplename, data) return outdir
def run_pizzly(data): samplename = dd.get_sample_name(data) work_dir = dd.get_work_dir(data) pizzlydir = os.path.join(work_dir, "pizzly") gtf = dd.get_transcriptome_gtf(data) if not gtf: gtf = dd.get_gtf_file(data) if dd.get_transcriptome_fasta(data): gtf_fa = dd.get_transcriptome_fasta(data) else: gtf_fa = sailfish.create_combined_fasta(data) stripped_fa = os.path.splitext(os.path.basename(gtf_fa))[0] + "-noversions.fa" stripped_fa = os.path.join(pizzlydir, stripped_fa) gtf_fa = fasta.strip_transcript_versions(gtf_fa, stripped_fa) fraglength = get_fragment_length(data) cachefile = os.path.join(pizzlydir, "pizzly.cache") fusions = kallisto.get_kallisto_fusions(data) pizzlypath = config_utils.get_program("pizzly", dd.get_config(data)) outdir = pizzly(pizzlypath, gtf, gtf_fa, fraglength, cachefile, pizzlydir, fusions, samplename, data) return outdir