def evm_partition(ref_fa, evm, gffs=[''], otherParams=['']): '''run evm to merge all the gff files''' cmd = ('{evm} --genome {ref} {gffs} {other} --segmentSize 50000000 \ --overlapSize 10000 --partition_listing partitions_list.out').format( evm=evm, ref=ref_fa, gffs=' '.join(gffs), other=' '.join(otherParams)) print(cmd) sarge.run(cmd)
def main_evm(thread): os.chdir(evm_path) evm_gffs = [ '--gene_predictions ' + genemark_gff, '--transcript_alignments ' + tr_gff, '--protein_alignments ' + pr_gff ] # 1. partition input evm_partition(ref_fa, evm + '/EvmUtils/partition_EVM_inputs.pl', evm_gffs) # 2. generate command lines evm_cmd_out = 'evm.out' cmd_fn = 'commands.list' evm_cmd_list(evm_cmd_out, cmd_fn, evm + '/EvmUtils/write_EVM_commands.pl', ref_fa, weight_fn, 'partitions_list.out', evm_gffs) # 3. run commands pool = mp.Pool(processes=int(thread)) cmds = open(cmd_fn).readlines() for cmd in cmds: pool.apply_async(run_cmd, args=(cmd, )) pool.close() pool.join() # 4. combine results evm_combine = evm + '/EvmUtils/recombine_EVM_partial_outputs.pl' combine_partition(evm_combine, 'partitions_list.out') # 5. transfer to gff to_gff = evm + '/EvmUtils/convert_EVM_outputs_to_GFF3.pl' cmd = ( '{evm} --partitions partitions_list.out --output evm.out --genome {ref}' ).format(evm=to_gff, ref=ref_fa) sarge.run(cmd) # 6. merge gff fns = glob.glob('*/*.out.gff3') cmd = ('cat {input} > evm.merge.gff').format(input=' '.join(fns)) sarge.run(cmd) # 7. extract genes supported by two algorithm filter_evm_gff(evm_path)
def remove_cref(tex, cwd): """ """ sty_file = cwd+'/dynlearn.sty' # add poorman to cleveref import cref_regex1 = (r'usepackage\{cleveref\}', r'usepackage[poorman]{cleveref}') cref_regex2 = (r'usepackage\[(\w*)\]\{cleveref\}', r'usepackage[\g<1>,poorman]{cleveref}') with open(sty_file) as dynlearn: source = dynlearn.read() for exp, repl in [cref_regex1, cref_regex2]: source = re.sub(exp, repl, source) with open(sty_file, 'w') as dynlearn: dynlearn.write(source) # generate and run the sed script compile_tex(tex, cwd) sed_cmd = sarge.shell_format("sed -f {0}.sed {0}.tex > {0}.tex.temp", tex.split('.')[0]) sarge.run(sed_cmd, cwd=cwd) sarge.run(sarge.shell_format("mv {0}.temp {0}", tex), cwd=cwd) # remove the cleveref import cref_regex3 = r'\\usepackage\[.*\]\{cleveref\}' comment_lines(sty_file, [cref_regex3])
def git_dir(dir_with_file): run('git init', cwd=str(dir_with_file)) run('git config user.email "*****@*****.**"', cwd=str(dir_with_file)) run('git config user.name "You"', cwd=str(dir_with_file)) run('git add .', cwd=str(dir_with_file)) run('git commit -m "Initial commit"', cwd=str(dir_with_file)) return dir_with_file
def bwa_Db(db_path,ref_fa): """build bwa index""" if not os.path.exists(db_path): os.mkdir(db_path) cmd = ('bwa index -p {db_path}/bwa -a bwtsw {fa}').format(fa=ref_fa,db_path=db_path) print(cmd);sys.stdout.flush() sarge.run(cmd)
def STAR(fastqFiles,outSamFile,db_path,thread=1,annotation='',otherParameters=['']): """STAR for single end read""" if annotation != '': otherParameters.extend(['--sjdbGTFfile {gff}'.format(gff=annotation)]) if annotation.endswith('gff') or annotation.endswith('gff3'): otherParameters.append('--sjdbGTFtagExonParentTranscript Parent') # generate command if len(fastqFiles) == 1: starCmd = ('STAR --genomeDir {ref} --readFilesCommand zcat ' '--readFilesIn {fq1} --runThreadN {thread} ' '--outFileNamePrefix {output} --outSAMstrandField intronMotif ' '--outFilterIntronMotifs RemoveNoncanonical').format( ref=db_path,fq1=fastqFiles[0], thread=thread,output=outSamFile) elif len(fastqFiles) == 2: starCmd = ('STAR --genomeDir {ref} --readFilesCommand zcat ' '--readFilesIn {fq1} {fq2} --runThreadN {thread} ' '--outFileNamePrefix {output} --outSAMstrandField intronMotif ' '--outFilterIntronMotifs RemoveNoncanonical').format( ref=db_path,fq1=fastqFiles[0],fq2=fastqFiles[1], thread=thread,output=outSamFile) cmd = starCmd + ' ' + ' '.join(otherParameters) print(cmd);sys.stdout.flush() sarge.run(cmd) if 'SortedByCoordinate' in otherParameters: outFile = outSamFile+'Aligned.sortedByCoord.out.bam' else: outFile = outSamFile+'Aligned.out.bam' os.rename(outFile,outSamFile) if os.path.exists(outSamFile+'_STARgenome'): shutil.rmtree(outSamFile+'_STARgenome')
def install_package(self): """ Runs the install command for the package given in a sub-process. """ run(" ".join(self.install_command()), stdout=Capture(), stderr=Capture())
def gff2gb(gff, out_gb, ref): '''transfer gff file to genbank file''' cmd = ('gff2gbSmallDNA.pl {gff} {ref} 1000 {gb}').format(gff=gff, ref=ref, gb=out_gb) print(cmd) sarge.run(cmd)
def bash5tool(hdf5File, seqtype): cmd = ( 'bash5tools.py --outFilePrefix {h5} --readType subreads --outType {type} ' '--minReadScore 0.8 {input}').format(h5=hdf5File[:-7], type=seqtype, input=hdf5File) sarge.run(cmd)
def extract_topics(self, topics, topic_format): cmd = "" if topic_format is "TREC": cmd += "%s/extract_topics -i %s -o topics" % (self.bin_path, topics) else: cmd += "cp %s topics.title" % (topics) sarge.run(cmd)
def Message(string,email): """ This function send message to email when it run. Used to calculate the time code runs. """ cmd = ('echo {quote}|mailx -s "{string}" {email}').format(quote="",string=string,email=email) sarge.run(cmd)
def Honey_tails(finalBam,bamTail,otherParams=['']): """This function run Honey tail,culster the soft clipped reads """ cmd = ('Honey.py tails -o {out} {input} ').format(input=finalBam,out=bamTail) cmd = cmd + ' '.join(otherParams) print(cmd) sarge.run(cmd)
def _render(self, template_filename): render_config = self.render_config(template_filename) if self.badge_instance: url = reverse( "main:instance-filename", kwargs={ "slug": self.badge_template.slug, "pk": self.badge_instance.pk, "filename": template_filename, }, ) else: url = reverse( "main:preview-filename", kwargs={ "slug": self.badge_template.slug, "filename": template_filename, }, ) file_format = render_config.get("format", "png") if file_format == "pdf": f = tempfile.NamedTemporaryFile(delete=False) tmppdf = f.name f.close() cmd = ( "node {chrome_pdf_bin} " "--paper-width 8.27 --paper-height 11.69" "--no-margins --landscape --include-background --url {url} --pdf {tmppdf}" .format( chrome_pdf_bin=settings.CHROME_PDF_BIN, tmppdf=tmppdf, url="{}{}".format(settings.RENDER_PREFIX_URL, url), )) sarge.run(cmd, stdout=sarge.Capture()) f = open(tmppdf, "rb") image = f.read() f.close() os.unlink(f.name) else: cmd = ("{capture_bin} {url} --width={width} --height={height} " "--type={type} --element={element} --no-default-background". format( width=render_config.get("screen_width", 1000), height=render_config.get("screen_height", 1000), type=file_format, element=sarge.shell_quote( render_config.get("element", ".screenshot")), url="{}{}".format(settings.RENDER_PREFIX_URL, url), capture_bin=settings.CAPTURE_BIN, )) p = sarge.run(cmd, stdout=sarge.Capture()) image = p.stdout.bytes mime = magic.from_buffer(image, mime=True) return image, mime
def checkRaspiTemp(self): from sarge import run, Capture self._logger.debug("Checking Raspberry Pi internal temperature") mem = 0 if sys.platform == "linux2": p = run("/opt/vc/bin/vcgencmd measure_temp", stdout=Capture()) p = p.stdout.text if self.displayRaspiMem: m = run("/home/pi/bin/memavail", stdout=Capture()) mem = int(m.stdout.text) elif self.debugMode: import random def randrange_float(start, stop, step): return random.randint(0, int( (stop - start) / step)) * step + start p = "temp=%s'C" % randrange_float(5, 60, 0.1) self._logger.debug("response from sarge: %s" % p) match = re.search('=(.*)\'', p) if not match: self.isRaspi = False else: temp = match.group(1) self._logger.debug("match: %s" % temp) self._plugin_manager.send_plugin_message( self._identifier, dict(israspi=self.isRaspi, raspitemp=temp, memavail=mem))
def move_window(window, geometry, config): """Move or resize a window to a different position. If the window is maximized, it will be unmaximized first. Maximized windows always take up the full screen. Args: window: An instance of Window. geometry: An instance of Geometry containing the new position and size of the window (incl. the decorations). """ geometry = remove_window_decorations(geometry, config) # Unmaximize the window p = run('wmctrl -i -r {} -b "remove,maximized_vert,maximized_horz"'.format( window.id)) # Change the geometry p = run('wmctrl -i -r {id} -e "0,{x},{y},{w},{h}"'.format( id=window.id, x=int(geometry.x), y=int(geometry.y), w=int(geometry.w), h=int(geometry.h)))
def run_cmd(cmd): try: print(cmd);sys.stdout.flush() sarge.run(cmd) except: print cmd,'error' assert False
def evm_partition(ref_fa,evm,gffs=[''],otherParams=['']): '''run evm to merge all the gff files''' cmd = ('{evm} --genome {ref} {gffs} {other} --segmentSize 50000000 \ --overlapSize 10000 --partition_listing partitions_list.out').format(evm=evm,ref=ref_fa, gffs=' '.join(gffs),other=' '.join(otherParams)) print(cmd) sarge.run(cmd)
def evm_cmd_list(out_fn,cmd_fn,evm,ref_fa,weight_fn,partition,gffs=['']): '''create cmd list for evm''' cmd = ('{evm} --genome {ref} --weights {w} {gffs} --output_file_name {out_fn} \ --partitions {par} > {cmd_l}').format(evm=evm,ref=ref_fa, w=weight_fn,gffs=' '.join(gffs),out_fn=out_fn,par=partition,cmd_l=cmd_fn) print(cmd) sarge.run(cmd)
def uninstall_package(self): """ Runs the uninstall command for the package given in a sub-process. """ run(sys.executable + ' -m pip uninstall -y ' + self.package, stdout=Capture(), stderr=Capture())
def sniffle(bam,outVCF,otherParameters=['']): """run sniffle to detect SV using pacbio""" cmd = ('sniffles -m {bam} -v {outVCF} ').format(bam=bam,outVCF=outVCF) if otherParameters != ['']: cmd = cmd + ' '.join(otherParameters) print(cmd);sys.stdout.flush() sarge.run(cmd)
def upgrade_package(self): """ Runs the upgrade command for the package given in a sub-process. """ run(sys.executable + ' -m pip install ' + self.package + ' --upgrade', stdout=Capture(), stderr=Capture())
def RNA_BaseRecalibrator4(realiBam,recalBam,gatk,table,ref_fa,gold_vcf,thread='1'): '''Step 4 of base recalibration''' cmd = ('java -jar {gatk} -T PrintReads -R {ref_fa} ' '-I {input} -BQSR {table} -o {output} -nct {thread}').format(gatk=gatk, ref_fa=ref_fa,input=realiBam,table=table,output=recalBam,thread=str(thread)) print(cmd);sys.stdout.flush() sarge.run(cmd)
def RNA_BaseRecalibrator3(table,plot,post_table,gatk,ref_fa): '''Step 3 of base recalibration, compare the two tables''' cmd = ('java -jar {gatk} -T AnalyzeCovariates -R {ref_fa} ' '-before {table} -after {post_table} -plots {output}').format( gatk=gatk,ref_fa=ref_fa,table=table,post_table=post_table,output=plot) print(cmd);sys.stdout.flush() sarge.run(cmd)
def main_evm(thread): os.chdir(evm_path) evm_gffs = ['--gene_predictions '+genemark_gff,'--transcript_alignments '+tr_gff,'--protein_alignments '+pr_gff] # 1. partition input evm_partition(ref_fa,evm+'/EvmUtils/partition_EVM_inputs.pl',evm_gffs) # 2. generate command lines evm_cmd_out = 'evm.out' cmd_fn = 'commands.list' evm_cmd_list(evm_cmd_out,cmd_fn,evm+'/EvmUtils/write_EVM_commands.pl',ref_fa,weight_fn,'partitions_list.out',evm_gffs) # 3. run commands pool = mp.Pool(processes=int(thread)) cmds = open(cmd_fn).readlines() for cmd in cmds: pool.apply_async(run_cmd,args=(cmd,)) pool.close() pool.join() # 4. combine results evm_combine = evm + '/EvmUtils/recombine_EVM_partial_outputs.pl' combine_partition(evm_combine,'partitions_list.out') # 5. transfer to gff to_gff = evm + '/EvmUtils/convert_EVM_outputs_to_GFF3.pl' cmd = ('{evm} --partitions partitions_list.out --output evm.out --genome {ref}').format(evm=to_gff,ref=ref_fa) sarge.run(cmd) # 6. merge gff fns = glob.glob('*/*.out.gff3') cmd = ('cat {input} > evm.merge.gff').format(input=' '.join(fns)) sarge.run(cmd) # 7. extract genes supported by two algorithm filter_evm_gff(evm_path)
def ffmpeg_from_mjpeg(self): @backoff.on_exception(backoff.expo, Exception, jitter=None, max_tries=4) def wait_for_webcamd(webcam_settings): return capture_jpeg(webcam_settings) wait_for_port_to_close( '127.0.0.1', 8080) # wait for WebcamServer to be clear of port 8080 sarge.run('sudo service webcamd start') webcam_settings = self.plugin._settings.global_get(["webcam"]) jpg = wait_for_webcamd(webcam_settings) (_, img_w, img_h) = get_image_info(jpg) stream_url = webcam_full_url( webcam_settings.get("stream", "/webcam/?action=stream")) self.bitrate = bitrate_for_dim(img_w, img_h) self.start_ffmpeg( '-re -i {} -b:v {} -pix_fmt yuv420p -s {}x{} -flags:v +global_header -vcodec h264_omx' .format(stream_url, self.bitrate, img_w, img_h), via_wrapper=True) return
def htseq_count(sortedBam,countFile,annotation,strand,annotationSource): """This function run htseq_count to count reads given bam file * sortedBam: str. Bamfile name * countFile: outputfilename * annotation: annotation file * outputpath: path to store the result files * annotation: source. 'ncbi','ensembl' """ # 2. check the annotation source if annotationSource == 'ncbi': seqType = 'exon' id_attr = 'gene' elif annotationSource == 'ensembl': seqType = 'exon' id_attr = 'gene_id' elif annotationSource == 'genedb': seqType = 'CDS' id_attr = 'Parent' elif annotationSource == 'plasmodium': seqType = 'exon' id_attr = 'Parent' # 3. run htseq-count cmd = ('htseq-count -f bam -s {strand} -t {type} -i {gene} {bam} {annotation} > {output}').format(strand=strand, type=seqType,gene=id_attr,bam=sortedBam,annotation=annotation,output=countFile)#os.path.join(outpath,countFile)) print(cmd);sys.stdout.flush() sarge.run(cmd)
def copy_fq_files(path, target_path, index=''): ''' path: path of raw fastq files target_path: where to copy fastq files to index: should be a list of integers, '' means it copy all samples ''' os.chdir(path) folders = [f for f in os.listdir(path) if os.path.isdir(f)] folders = natsorted(folders) if index == '': index = range(0, len(folders)) for folder in [folders[i] for i in index]: fq_path = path + '/' + folder os.chdir(fq_path) fqFiles = [f for f in os.listdir(fq_path) if f.endswith('.fastq.gz')] fqFiles = natsorted(fqFiles) fst = [f for f in fqFiles if 'R1' in f] snd = [f for f in fqFiles if 'R2' in f] cmd = ('cat {input} > {tar}/{folder}_1.fq.gz').format( input=' '.join(fst), folder=folder, tar=target_path) print(cmd) sarge.run(cmd) cmd = ('cat {input} > {tar}/{folder}_2.fq.gz').format( input=' '.join(snd), folder=folder, tar=target_path) print(cmd) sarge.run(cmd)
def test_capture_bytes(self): with Capture() as err: self.assertEqual(run("cat >&2", stderr=err, shell=True, input="bar").returncode, 0) self.assertEqual(err.bytes, b"bar") with Capture() as err: self.assertEqual(run("cat >&2", stderr=err, shell=True, input="bar").returncode, 0) self.assertEqual(err.text, "bar")
def load_gff(gff, ref_fa, ppl_fn, config): cmd = ('{ppl} -c {config} -g {ref} -P {gff}').format(ppl=ppl_fn, config=config, ref=ref_fa, gff=gff) print(cmd) sarge.run(cmd)
def get_tags(): global system_tags if system_tags: return system_tags (os, _, ver, _, arch, _) = platform.uname() tags = dict(os=os, os_ver=ver, arch=arch) try: v4l2 = run('v4l2-ctl --list-devices', stdout=Capture()) v4l2_out = ''.join( re.compile(r"^([^\t]+)", re.MULTILINE).findall(v4l2.stdout.text)).replace( '\n', '') if v4l2_out: tags['v4l2'] = v4l2_out except: pass try: usb = run( "lsusb | cut -d ' ' -f 7- | grep -vE ' hub| Hub' | grep -v 'Standard Microsystems Corp'", stdout=Capture()) usb_out = ''.join(usb.stdout.text).replace('\n', '') if usb_out: tags['usb'] = usb_out except: pass system_tags = tags return system_tags
def build_fa_dict(ref_fa,picard): '''build dictionary file for fa file ''' out = '.'.join(ref_fa.split('.')[:-1]) + '.dict' cmd = ('java -jar {picard} CreateSequenceDictionary R={ref} O={out}').format( picard = picard,ref=ref_fa,out=out) print(cmd);sys.stdout.flush() sarge.run(cmd)
def exonerate(ref_fa,pr_fn,out_fn): '''map protein sequence to dna seq''' cmd = ('exonerate -m p2g -q {pr} -t {ref} --showalignment no \ --showvulgar no --showtargetgff yes --minintron 20 --percent 50 \ --score 100 --geneseed 250 -n 10 > {gff}').format(pr=pr_fn,ref=ref_fa,gff=out_fn) print(cmd) sarge.run(cmd)
def conda_Trimmomatic(fqFiles,trim_fqFiles,thread,adapter_file='',min_len=36): """This function run trimmomatic to trim reads""" # main parameters unpair = [f + 'unpair' for f in fqFiles] phred = get_phred_score(fqFiles[0]) if len(fqFiles) == 1: trimCmd1st = ('trimmomatic SE -threads {thread} -phred{type} ' '{input} {output} ').format(thread = int(thread), input = fqFiles[0],output=trim_fqFiles[0],type=phred) trimCmd2nd = 'SLIDINGWINDOW:5:10 LEADING:15 TRAILING:10 MINLEN:{len} TOPHRED33 '.format(len=min_len) elif len(fqFiles) == 2: trimCmd1st = ('trimmomatic PE -threads {thread} -phred{type} {fastq1} {fastq2} ' '{Trimmed1} {unpair1} {Trimmed2} {unpair2} ').format( thread=int(thread),type=phred,fastq1 = fqFiles[0], fastq2=fqFiles[1], Trimmed1 = trim_fqFiles[0], Trimmed2 = trim_fqFiles[1],unpair1=unpair[0],unpair2=unpair[1]) trimCmd2nd = 'SLIDINGWINDOW:5:10 LEADING:15 TRAILING:10 MINLEN:{len} TOPHRED33 '.format(len=str(min_len)) # adapter file if adapter_file != '': adaptCmd = 'ILLUMINACLIP:{adapter}:2:30:10 '.format(adapter=adapter_file) else: adaptCmd = '' cmd = trimCmd1st + adaptCmd + trimCmd2nd print(cmd);sys.stdout.flush() sarge.run(cmd) for un in unpair: if os.path.exists(un): os.remove(un)
def _nosetests(): nosetests_cmd = os.path.join(curdir, 'bin', 'nosetests') zato_packages = ' '.join( [item for item in glob.iglob(os.path.join(curdir, 'zato-*'))]) run('{} {} --with-coverage --cover-package=zato --nocapture'.format( nosetests_cmd, zato_packages))
def start_component(self, py_path, name, program_dir, on_keyboard_interrupt=None): """ Starts a component in background or foreground, depending on the 'fg' flag. """ tmp_path = mkstemp('-zato-start-{}.txt'.format(name.replace(' ','')))[1] stdout_redirect = '' if self.args.fg else '1> /dev/null' stderr_redirect = '2> {}'.format(tmp_path) program = '{} -m {} {} {} {}'.format(get_executable(), py_path, program_dir, stdout_redirect, stderr_redirect) try: _stderr = _StdErr( tmp_path, stderr_sleep_fg if self.args.fg else stderr_sleep_bg) run(program, async=False if self.args.fg else True) # Wait a moment for any potential errors _err = _stderr.wait_for_error() if _err: self.logger.warn(_err) sys.exit(self.SYS_ERROR.FAILED_TO_START) except KeyboardInterrupt: if on_keyboard_interrupt: on_keyboard_interrupt() sys.exit(0) if self.show_output: if not self.args.fg and self.verbose: self.logger.debug('Zato {} `{}` starting in background'.format(name, self.component_dir)) else: self.logger.info('OK')
def ngmlr(in_fa, outBam, ref_fa, thread): '''run nglmr for better SV detection using pacbio''' cmd = ('ngmlr -t {thread} -r {ref} -q {fa} | samtools view -hb - > outBam' ).format(thread=str(thread), ref=ref_fa, fa=in_fa) print(cmd) sys.stdout.flush() sarge.run(cmd)
def rnaseq_map_and_extract_by_chr(path,target_path,batch,rna_pipeline_file,rna_pipeline_param,chrom=''): os.chdir(path) folders = [f for f in os.listdir(path) if os.path.isdir(f)] folders = natsorted(folders) sub_folders = chunk(folders,batch) for sub_dir in sub_folders: # 1. copy files copy_files(path,target_path,sub_dir) # 2. map using STAR os.chdir(target_path) cmd = ('python {pipe} {pipe_param}').format(pipe=rna_pipeline_file,pipe_param=rna_pipeline_param) sarge.run(cmd) # 3. extract chromosome if chrom != '': bam_path = target_path + '/sortBam' os.chdir(bam_path) if not os.path.exists(bam_path+'/chr'): os.mkdir(bam_path+'/chr') bams = [f for f in os.listdir(bam_path) if f.endswith('.sort.bam')] for bam in bams: out = bam[5:] cmd = ('samtools view {input} {chr} > chr/{out}').format(input=bam,chr=chrom,out=out) print(cmd) sarge.run(cmd) os.remove(bam) os.remove(bam+'.bai') shutil.rmtree(target_path+'/bam')
def bwa_mem(fqFile, outSam, db_name, thread, otherParameters=['']): """run bwa""" if otherParameters != ['']: other = ' '.join(otherParameters) + ' ' else: other = '' if len(fqFile) == 1: bwaCmd = ( 'bwa mem -t {thread} {other}{db} {fq} | samtools view -bh - > {out} ' ).format(thread=str(thread), other=other, db=db_name, fq=fqFile[0], out=outSam) else: bwaCmd = ( 'bwa mem -t {thread} {other}{db} {fq1} {fq2} | samtools view -bh - > ' '{out} ').format(thread=str(thread), other=other, db=db_name, fq1=fqFile[0], fq2=fqFile[1], out=outSam) print(bwaCmd) sys.stdout.flush() sarge.run(bwaCmd)
def hisat2(fqFile, outBam, db_name, thread, otherParameters=['']): """ """ if otherParameters != ['']: other = ' '.join(otherParameters) + ' ' else: other = '' if len(fqFile) == 1: hisat2Cmd = ('hisat2 -x {db} -U {fq} -t {other} -p {thread} ' '| samtools view -bh - > {out}').format( db=db_name, fq=fqFile[0], other=other, thread=str(thread), out=outBam) else: hisat2Cmd = ('hisat2 -x {db} -1 {fq1} -2 {fq2} -t {other} -p {thread} ' '| samtools view -bh - > {out}').format( db=db_name, fq1=fqFile[0], fq2=fqFile[1], other=other, thread=str(thread), out=outBam) print(hisat2Cmd) sys.stdout.flush() sarge.run(hisat2Cmd)
def make_server(): with Capture() as out: print("Running make server:") run('make server', cwd=project_base, stdout=out).wait() for line in out: print(' %s' % line.rstrip()) print('make server completed')
def focus_window(window): """Focus a specific window. Args: window: An instance of Window. """ run('wmctrl -ia ' + window.id)
def align_assemble(ppl_fn, config, ref_fa, rna_fa, thread, otherParameters=['']): '''This function do alignment assembly generate 4 type of files: sample_mydb_pasa.assemblies.fasta :the PASA assemblies in FASTA format. sample_mydb_pasa.pasa_assemblies.gff3,.gtf,.bed :the PASA assembly structures. sample_mydb_pasa.pasa_alignment_assembly_building.ascii_illustrations.out :descriptions of alignment assemblies and how they were constructed from the underlying transcript alignments. sample_mydb_pasa.pasa_assemblies_described.txt :tab-delimited format describing the contents of the PASA assemblies, including the identity of those transcripts that were assembled into the corresponding structure. ''' cmd = ('{ppl} -c {config} -C -r -R -g {ref_fa} \ -t {rna_fa} --ALIGNERS gmap --CPU {thread} {other}').format( ppl=ppl_fn, config=config, ref_fa=ref_fa, rna_fa=rna_fa, thread=str(thread), other=' '.join(otherParameters)) print(cmd) sys.stdout.flush() sarge.run(cmd)
def rnaseq_map_and_extract_by_chr(path, target_path, batch, rna_pipeline_file, rna_pipeline_param, chrom=''): os.chdir(path) folders = [f for f in os.listdir(path) if os.path.isdir(f)] folders = natsorted(folders) sub_folders = chunk(folders, batch) for sub_dir in sub_folders: # 1. copy files copy_files(path, target_path, sub_dir) # 2. map using STAR os.chdir(target_path) cmd = ('python {pipe} {pipe_param}').format( pipe=rna_pipeline_file, pipe_param=rna_pipeline_param) sarge.run(cmd) # 3. extract chromosome if chrom != '': bam_path = target_path + '/sortBam' os.chdir(bam_path) if not os.path.exists(bam_path + '/chr'): os.mkdir(bam_path + '/chr') bams = [f for f in os.listdir(bam_path) if f.endswith('.sort.bam')] for bam in bams: out = bam[5:] cmd = ('samtools view {input} {chr} > chr/{out}').format( input=bam, chr=chrom, out=out) print(cmd) sarge.run(cmd) os.remove(bam) os.remove(bam + '.bai') shutil.rmtree(target_path + '/bam')
def test_working_dir(self): d = tempfile.mkdtemp() try: run('touch newfile.txt', cwd=d) files = os.listdir(d) self.assertEqual(files, ['newfile.txt']) finally: shutil.rmtree(d)
def _setup_gpio(self): import sarge command = ["gpio", "-g", "mode", "2", "out"] try: sarge.run(command) except: self._logger.exception("{} failed".format(" ".join(command)))
def BaseRecalibrator_2(realiBam,post_table,table,gold_pair,gatk,ref_fa,thread): '''Step 2 of base recalibration: get post table''' cmd = ('java -jar {gatk} -T BaseRecalibrator -R {ref_fa} ' '-I {realignbam} -knownSites {snp} -knownSites {indel} -BQSR {table} ' '-o {output} -nct {thread}').format(gatk=gatk,ref_fa=ref_fa, realignbam=realiBam,snp=gold_pair[0],indel=gold_pair[1],output=post_table,table=table,thread=str(thread)) print(cmd);sys.stdout.flush() sarge.run(cmd)
def RNA_BaseRecalibrator_1(realiBam,table,gatk,ref_fa,gold_vcf,thread='1'): '''step 1 of base recalibration,generate a table''' cmd = ('java -jar {gatk} -T BaseRecalibrator -R {ref_fa} ' '-I {realignbam} -knownSites {gold} ' '-o {output} -nct {thread}').format(gatk=gatk,ref_fa=ref_fa, realignbam=realiBam,gold=gold_vcf,output=table,thread=str(thread)) print(cmd);sys.stdout.flush() sarge.run(cmd)
def make_tag_directory(in_bam, tag_dir, ref_fa): '''make tag directory which extract mapping position into tsv file ''' cmd = ('makeTagDirectory {o_dir} -genome {g} -checkGC \ -single {bam}').format(o_dir=tag_dir, g=ref_fa, bam=in_bam) print(cmd) sys.stdout.flush() sarge.run(cmd)
def test_is_repo_clean_no_master(git_dir): run('git checkout -b new_branch', cwd=str(git_dir)) with pytest.raises(FatalError) as excinfo: git.is_repo_clean(repo_path=str(git_dir)) assert 'branch should be master' in str(excinfo.value) assert git.is_repo_clean(repo_path=str(git_dir), master=False) is None
def splitN(dedupBam,splitBam,gatk,ref_fa): '''This function splits reads due to wrong splicng by STAR''' cmd = ('java -jar {gatk} -T SplitNCigarReads -R {ref_fa} ' '-I {input} -o {output} -rf ReassignOneMappingQuality ' '-RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS').format( gatk=gatk,ref_fa=ref_fa,input=dedupBam,output=splitBam) print(cmd);sys.stdout.flush() sarge.run(cmd)
def makeblast(ref_fa,out,db_type): ''' ref_fa: gzipped fa file ''' cmd = ('gunzip -c {ref} | makeblastdb -in - -dbtype {type} -out {out} -title {title}').format( ref=ref_fa,type=db_type,out=out,title=out) print(cmd) sarge.run(cmd)
def mark_duplicates(sortBam,dedupBam,picard): '''mark duplicates''' cmd = ('java -Djava.io.tmpdir=tmp -jar {picard} MarkDuplicates I={input} O={out} ' 'CREATE_INDEX=true METRICS_FILE=metrics.txt MAX_RECORDS_IN_RAM=8000000 ' 'MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 ' 'VALIDATION_STRINGENCY=LENIENT').format(picard=picard,input=sortBam,out=dedupBam) print(cmd);sys.stdout.flush() sarge.run(cmd)
def cnv_extract_bam(in_bam,out_root,others=['']): ''' extract read mapping from bam files ''' cmd = ('cnvnator -root {out} -unique -tree {bam} {other}').format(out=out_root,bam=in_bam, other=' '.join(others)) print(cmd);sys.stdout.flush() sarge.run(cmd)
def sniffle(bam, outVCF, otherParameters=['']): """run sniffle to detect SV using pacbio""" cmd = ('sniffles -m {bam} -v {outVCF} ').format(bam=bam, outVCF=outVCF) if otherParameters != ['']: cmd = cmd + ' '.join(otherParameters) print(cmd) sys.stdout.flush() sarge.run(cmd)
def purge_install_node(): """ Purges the install node cache """ click.echo("Purging install node caches") sh = run('rm -rf {0}'.format(cache_root)) sh = run('mkdir -p {0} {1}'.format(cache_root, ansible_facts)) click.echo("OK")
def Honey_spots(finalBam,spotFile,ref_fa,thread,otherParams=['']): """This function run Honey sorts. """ cmd = ('Honey.py spots --reference {ref} -n {thread} -o {out} {input} ').format( input=finalBam,ref=ref_fa,thread=str(thread),out=spotFile) cmd = cmd + ' '.join(otherParams) print(cmd) sarge.run(cmd)