def run_sorting(blat_psl, results) : print 'blat_psl is' ,blat_psl utils.check_existence_or_raise(blat_psl) sorted_psl = os.path.join(results,utils.get_name(blat_psl)+'.sorted.psl') tmp = utils.atomic_tmp_file(sorted_psl) params = ['sort', '-k 10,10', blat_psl, '>', tmp] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp, sorted_psl) sorted_psl_only_entries = os.path.join(results,utils.get_name(blat_psl)+'.sorted.only_entries.psl') tmp = utils.atomic_tmp_file(sorted_psl_only_entries) subprocess.call('tail -n +5 '+ sorted_psl + '| head -n -1 > ' + tmp, shell=True) utils.atomic_install(tmp, sorted_psl_only_entries) return sorted_psl_only_entries
def run_faToBit(fasta, path_to_results): utils.check_existence_or_raise(fasta) twobit = os.path.join(path_to_results, utils.get_name(fasta) + '.2bit') tmp = utils.atomic_tmp_file(twobit) params = ['faToTwoBit', '-noMask', fasta, tmp] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp, twobit) return twobit
def run_halLiftover(hal,specie_source,genome_bed_path,specie_target,output_prefix) : bed = output_prefix+'.bed' tmp=utils.atomic_tmp_file(bed) params = ['halLiftover --inMemory',hal,specie_source,genome_bed_path,specie_target,tmp] params=' '.join(params) subprocess.call(params, shell=True) utils.atomic_install(tmp,bed) return bed
def run_blat(fasta, ncrna, results): utils.check_existence_or_raise(fasta) blat_psl = os.path.join(results, utils.get_name(fasta) + '.psl') tmp = utils.atomic_tmp_file(blat_psl) params = ['blat', "-q=rna", fasta, ncrna, tmp] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp, blat_psl) return blat_psl
def run_pslToBigBed(blat_psl, sizes, results): utils.check_existence_or_raise(blat_psl) blat_bed=os.path.join(results,utils.get_name(blat_psl)+'.bed') tmp = utils.atomic_tmp_file(blat_bed) params = ['pslToBed', blat_psl, tmp] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp, blat_bed) sorted_bed = os.path.join(results,utils.get_name(blat_psl)+'.sorted.bed') tmp = utils.atomic_tmp_file(sorted_bed) params = ['bedSort', blat_bed, tmp] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp, sorted_bed) bigBed = os.path.join(results,utils.get_name(blat_psl)+'.bigBed') tmp = utils.atomic_tmp_file(bigBed) params = ['bedToBigBed', sorted_bed, sizes, tmp] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp, bigBed)
def run_faToBit(fasta, path_to_results): utils.check_existence_or_raise(fasta) twobit = os.path.join(path_to_results, utils.get_name(fasta)+'.2bit') tmp = utils.atomic_tmp_file(twobit) params = ['faToTwoBit', '-noMask', fasta, tmp] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp,twobit) return twobit
def run_wigToBigWig(fasta, wig, sizes, path_to_results): utils.check_existence_or_raise(wig) bigWig=os.path.join(path_to_results, utils.get_name(fasta)+'.gc5Base'+'.bw') tmp = utils.atomic_tmp_file(bigWig) params = ['wigToBigWig', wig, sizes, tmp] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp,bigWig) return bigWig
def run_gcPercent(fasta, twobit, path_to_results): utils.check_existence_or_raise(twobit) wig=os.path.join(path_to_results, 'gc5Base.'+utils.get_name(twobit)+'.wig') tmp = utils.atomic_tmp_file(wig) params = ['hgGcPercent', '-wigOut', '-doGaps', '-win=5', '-file='+tmp, '-verbose=0', utils.get_name(fasta), path_to_results] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp,wig) return wig
def run_wigToBigWig(fasta, wig, sizes, path_to_results): utils.check_existence_or_raise(wig) bigWig = os.path.join(path_to_results, utils.get_name(fasta) + '.gc5Base' + '.bw') tmp = utils.atomic_tmp_file(bigWig) params = ['wigToBigWig', wig, sizes, tmp] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp, bigWig) return bigWig
def run_genomecov(bed, size): genomecov = bed.split('.') genomecov.insert(1,'genomecov') genomecov = '.'.join(genomecov) tmp=utils.atomic_tmp_file(genomecov) params = ['bedtools genomecov -i', bed, '-g', size, '>', tmp] params=' '.join(params) subprocess.call(params, shell=True) utils.atomic_install(tmp,genomecov) return genomecov
def run_sort(bed): sorted = bed.split('.') sorted.insert(1,'sorted') sorted = '.'.join(sorted) tmp=utils.atomic_tmp_file(sorted) params = ['bedtools sort -i', bed, '>', tmp] params=' '.join(params) subprocess.call(params, shell=True) utils.atomic_install(tmp,sorted) return sorted
def run_gcPercent(fasta, twobit, path_to_results): utils.check_existence_or_raise(twobit) wig = os.path.join(path_to_results, 'gc5Base.' + utils.get_name(twobit) + '.wig') tmp = utils.atomic_tmp_file(wig) params = [ 'hgGcPercent', '-wigOut', '-doGaps', '-win=5', '-file=' + tmp, '-verbose=0', utils.get_name(fasta), path_to_results ] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp, wig) return wig
def run_filtering(sorted_psl, results) : utils.check_existence_or_raise(sorted_psl) stats = os.path.join(results,'filtering_stats') utils.create_dir_if_not_exists(stats) filtered_psl = os.path.join(results,utils.get_name(sorted_psl)+'.filtered.psl') tmp = utils.atomic_tmp_file(filtered_psl) params = ['pslCDnaFilter','-localNearBest=0.1 -ignoreNs -minCover=0.6 -repsAsMatch', ' -dropped='+os.path.join(stats,'dropped.out'), '-weirdOverlapped='+os.path.join(stats,'weird_overlapped.out'), '-alignStats='+os.path.join(stats,'align_stats.out'), '-statsOut='+os.path.join(stats,'overall_stats.out'), sorted_psl, tmp] subprocess.call(" ".join(params), shell=True) utils.atomic_install(tmp, filtered_psl) return filtered_psl