def bamtofq(self, bam, oid, rm_bam=True): out_dir = pu.get_file_directory(bam) fastq_name = os.path.join(out_dir, oid) sname = os.path.join(out_dir, 's.fq') oname = os.path.join(out_dir, 'o.fq') o2name = os.path.join(out_dir, 'o2.fq') #tempfilename tmpdir = os.environ.get('LOCAL') if not tmpdir: tmpdir = './' tmpfile = os.path.join(tmpdir, pu.get_file_basename(bam) + pu.get_timestamp()) internal_kwargs = { 'F': fastq_name + '_1.fastq', 'F2': fastq_name + '_2.fastq', 'S': sname, 'O': oname, 'O2': o2name, 'T': tmpfile, 'filename': bam } #call run status = self.run(None, objectid=oid, **internal_kwargs) if status and rm_bam: pe.delete_file(bam) return status
def create_lock(self, target_list, message): """ Cretes a temporary .Lock file associated with a target file and write a message in it. Parameters ---------- target_list : List List of target files. message : Str Message to write in file. Returns ------- templist : List A list of .Lock file names coressponding to the target files. """ templist = [] for f in target_list: temp_path = pu.get_file_directory(f) if not pu.check_paths_exist(temp_path): pu.mkdir(temp_path) prefix = pu.get_filename(f) + '_' temp = tempfile.NamedTemporaryFile(prefix=prefix, suffix='.Lock', dir=temp_path, delete=False) #TODO: dump command in lock timestamp = pu.get_timestamp() temp.write(str.encode(timestamp + '\t' + message)) templist.append(temp.name) return templist
def sortbam(bam, oid): outfile = pu.get_file_basename(bam) + "_sorted.bam" outdir = pu.get_file_directory(bam) outpath = os.path.join(outdir, outfile) cmd = 'sambamba sort -t 25 -m 100G -o ' + outpath + ' ' + bam st = pe.execute_command(cmd.split(), logs=True, objectid=oid) if not st: return "" return outpath
def checkEnvLog(logFile): #check all logs exist logFileDir=pu.get_file_directory(logFile) basename=pu.get_file_basename(logFile) envLog=os.path.join(logFileDir,basename+"ENV.log") if not pu.check_files_exist(logFile,envLog): print("Please check missing log files. Exiting.") sys.exit(1) return envLog
def perform_assembly(self, bam_file, out_dir=None, out_suffix="_stringtie", objectid="NA"): """Function to run stringtie using a bam file. Parameters ---------- bam_file: string path to the bam file out_dir: string Path to out file out_suffix: string Suffix for the output gtf file objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Returns the path to output GTF file :rtype: string """ #create path to output file fname = pu.get_file_basename(bam_file) if not out_dir: out_dir = pu.get_file_directory(bam_file) if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf") #Add output file name and input bam internal_args = (bam_file, ) internal_kwargs = {"-o": out_gtf_file} #add positional args internal_kwargs['--'] = internal_args #call stringtie status = self.run(None, objectid=objectid, target=out_gtf_file, **internal_kwargs) if status: #check if sam file is present in the location directory of sraOb if not pu.check_files_exist(out_gtf_file) and not _dryrun: return "" return out_gtf_file return ""
def init_object(self,srr_accession,directory,fastq,fastq2,sra): #if fastq are provided if fastq and fastq2: self.layout="PAIRED" self.directory=pu.get_file_directory(fastq) if not pu.check_files_exist(fastq,fastq2) and not _dryrun: pu.print_boldred("ERROR: File not found") raise ValueError("Please check fastq files {} {}".format(fastq,fastq2)) return True #if only one fastq (single) if fastq: self.layout="SINGLE" self.directory=pu.get_file_directory(fastq) if not pu.check_files_exist(fastq) and not _dryrun: pu.print_boldred("ERROR: File not found") raise ValueError("Please check fastq files {}".format(fastq)) return True #init from srr_accession and directory return self.init_from_accession(srr_accession,directory)
def get_lock_files(self, target): """ Returns .Lock files associated with a target Parameters ---------- target : Str Target file name. Returns ------- lock_files : List List of .Lock files present. """ #check if lock exists filepath = pu.get_file_directory(target) filename = pu.get_filename(target) pre = '.*' suff = '.*\.Lock$' pattern = pre + filename + suff lock_files = pu.find_files(filepath, pattern) return lock_files
def checkEnvLog(logFile): """ Check log exist and return path to corresponding ENV log Parameters ---------- logFile : str path to log file. Returns ------- envLog : TYPE DESCRIPTION. """ #check all logs exist logFileDir = pu.get_file_directory(logFile) basename = pu.get_file_basename(logFile) envLog = os.path.join(logFileDir, basename + "ENV.log") if not pu.check_files_exist(logFile, envLog): print("Please check missing log files. Exiting.") sys.exit(1) return envLog
def runRibocode(self, gtf, genome, bam, l="no", outsuffix="ribocode_out", verbose=False, quiet=False, logs=True, objectid="NA"): """Wrapper to run ribocode in one step """ #check input if not pu.check_files_exist(gtf, genome, bam): pu.print_boldred("Please check input files for Ribocode") return "" out_dir = pu.get_file_directory(gtf) outFile = os.path.join(out_dir, outsuffix) newOpts = {"-g": gtf, "f": genome, "-r": bam, "-l": l, "-o": outFile} ribocode_Cmd = ['RiboCode_onestep'] ribocode_Cmd.extend(pu.parse_unix_args(self.valid_args, newOpts)) status = pe.execute_command(ribocode_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("ribocode failed") return "" return outFile
def perform_assembly(self,bam_file,out_dir="",out_suffix="_cufflinks",reference_gtf=None,threads=None,overwrite=True,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs): """Function to run cufflinks with BAM file as input. Parameters ---------- bam_file: string path to bam file out_dir: output directory out_suffix: string Suffix for the output gtf file reference_gtf: str Path to reference gtf threads: int Number of threads to use overwrite: bool Overwrite if output file already exists. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. kwargs: dict Options to pass to cufflinks. :return: Returns the path to output GTF file :rtype: string """ #create path to output file fname=pu.get_file_basename(bam_file) if not out_dir: out_dir=pu.get_file_directory(bam_file) else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) out_gtf_file=os.path.join(out_dir,fname+out_suffix+".gtf") """ Handle overwrite """ if not overwrite: #check if file exists. return if yes if os.path.isfile(out_gtf_file): print("The file "+out_gtf_file+" already exists. Exiting..") return out_gtf_file if not threads: threads=self.threads #Add output file name and input bam new_opts={"-o":out_dir,"--":(bam_file,),"-p":str(threads)} #add ref gtf if reference_gtf: if not pu.check_files_exist(reference_gtf): pu.print_boldred("Error: Provided reference GTF {} doesn't exist. Exiting...".format(reference_gtf)) return "" new_opts["-g"]=reference_gtf merged_opts={**new_opts,**kwargs} #call cufflinks status=self.run_cufflinks(verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**merged_opts) if status: #move out_dir/transcripts.gtf to outfile pe.move_file(os.path.join(out_dir,"transcripts.gtf"),out_gtf_file) #check if sam file is present in the location directory of sraOb if pu.check_files_exist(out_gtf_file): return out_gtf_file else: return ""
def stringtie_merge(self,*args,out_dir=None,out_suffix="_stringtieMerge",threads=None,overwrite=False,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs): """Function to run stringtie merge. Parameters ---------- args: tuple path to gtf files to merge out_suffix: string Suffix for output gtf file name threads: int Number of threads to use overwrite: bool Overwrite if output file already exists. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to stringtie. :return: Returns the path to the merged GTF file :rtype: string """ if len(args) < 1: print("ERROR: No input gtf for stringtie merge.") return "" #create path to output sam file fname=pu.get_file_basename(args[0]) if not out_dir: out_dir=pu.get_file_directory(args[0]) out_gtf_file=os.path.join(out_dir,fname+out_suffix+".gtf") if not overwrite: #check if file exists. return if yes if os.path.isfile(out_gtf_file): print("The file "+out_gtf_file+" already exists. Exiting..") return out_gtf_file if not threads: threads=self.threads #Add merge flag, output file name and input bam new_opts={"--merge":"","-o":out_gtf_file,"--":args,"-p":str(threads)} merged_opts={**new_opts,**kwargs} #call stringtie status=self.run_stringtie(verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**merged_opts) if status: #check if sam file is present in the location directory of sraOb if pu.check_files_exist(out_gtf_file): return out_gtf_file else: return ""
def sam_to_bam(self, sam_file, out_dir="", out_suffix="", delete_sam=False, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Convert sam file to a bam file. Output bam file will have same name as input sam. out_suffix: string Suffix for the output sam file delete_sam: bool delete the sam file after conversion verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to trimgalore. This will override the existing options :return: Returns the path to the bam file. Returns empty string if operation failed. :rtype: string """ if not out_dir: out_dir = pu.get_file_directory(sam_file) else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) fname = pu.get_file_basename(sam_file) #output will be out_bam out_bam = os.path.join(out_dir, fname + out_suffix + '.bam') newOpts = {"--": (sam_file, ), "-o": out_bam, "-b": ""} mergedOpts = {**kwargs, **newOpts} status = self.run_samtools("view", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: print("Sam to bam failed for:" + sam_file) return "" #check if bam file exists if not pu.check_files_exist(out_bam): return "" #delete_sam_file if delete_sam: if not pe.deleteFileFromDisk(sam_file): print("Error deleting sam file:" + sam_file) #return path to file return out_bam
def merge_bam(self, *args, out_file="merged", out_dir="", delete_bams=False, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Merge multiple bam files into a single file Parameters ---------- out_file: string Output file name to save the results. .bam will be added at the end. args:tuple Paths to bam files to combine out_dir: string Path where to save the merged bam file. Default path is the same as the first bam_file's verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to trimgalore. This will override the existing options :return: Returns the path to the merged bam file. :rtype: string """ if len(args) < 2: print("Please supply at least 2 files to merge") return "" if not out_dir: out_dir = pu.get_file_directory(args[0]) else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) outMergedFile = os.path.join(out_dir, out_file + ".bam") newOpts = {"--": (outMergedFile, ) + args} mergedOpts = {**kwargs, **newOpts} status = self.run_samtools("merge", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: print("Bam merge failed for:" + outMergedFile) return "" #check if bam file exists if not pu.check_files_exist(outMergedFile): return "" if delete_bams: for bam_file in args: if not pe.deleteFileFromDisk(bam_file): print("Error deleting sam file:" + bam_file) return outMergedFile
def perform_assembly(self, bam_file, out_dir=None, out_suffix="_cufflinks", objectid="NA"): """Function to run cufflinks with BAM file as input. Parameters ---------- bam_file: string path to bam file out_dir: output directory out_suffix: string Suffix for the output gtf file objectid: str Provide an id to attach with this command e.g. the SRR accession. :return: Returns the path to output GTF file :rtype: string """ #create path to output file fname = pu.get_file_basename(bam_file) if not out_dir: out_dir = pu.get_file_directory(bam_file) else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) #Add output file name and input bam internal_args = (bam_file, ) internal_kwargs = {"-o": out_dir} #add positional args internal_kwargs['--'] = internal_args #targets outfile = os.path.join(out_dir, "transcripts.gtf") out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf") #if final file already exists if not _force and pu.check_files_exist(out_gtf_file): pu.print_green( 'Target files {} already exist.'.format(out_gtf_file)) return out_gtf_file #call cufflinks status = self.run(None, objectid=objectid, target=outfile, **internal_kwargs) if status: if not _dryrun: pe.move_file(outfile, out_gtf_file) if not pu.check_files_exist(out_gtf_file): return "" return out_gtf_file return ""
def perform_cleaning(self,sra_object,bbsplit_index,out_dir="",out_suffix="_bbsplit",objectid="NA",**kwargs): """ Remove contaminated reads mapping to given reference using bbsplit Parameters ---------- sra_object: SRA an SRA object bbsplit_index: string Path to bbsplit index or fasta file which will generate index out_dir: string Path to output dir. Default: sra_object.directory out_suffix: string Suffix for output file name objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict options passed to bbsplit :return: Returns the path of fastq files after QC. tuple has one item for single end files and 2 for paired. :rtype: tuple """ #check index indexPath="" if not pu.check_paths_exist(bbsplit_index): #index folder doesn't exist #check if input is path to fasta if not pu.check_files_exist(bbsplit_index): print("Error: Please check bbsplit index") return ("",) #check if index folder "ref" exists in this directory indexPath=os.path.join(pu.get_file_directory(bbsplit_index),"ref") if pu.check_paths_exist(indexPath): print("Using bbsplit index: "+indexPath) else: #create new index print("Creating new index"+indexPath) newOpts={"ref_x":bbsplit_index,"path": pu.get_file_directory(bbsplit_index)} mergedOpts={**kwargs,**newOpts} #run bbduk if not self.run_bbsplit(objectid=objectid,**mergedOpts): print("Error creating bbsplit index.") return ("",) if not pu.check_paths_exist(indexPath): print("Error creating bbsplit index.") return ("",) else: indexPath=bbsplit_index #indexPath point to the ref directory, go one directory higher indexPath=os.path.dirname(indexPath) #make out_dir if not out_dir: out_dir=sra_object.directory else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) if sra_object.layout=='PAIRED': fq1=sra_object.fastq_path fq2=sra_object.fastq2_path #append input and output options out_fileName1=pu.get_file_basename(fq1)+out_suffix+".fastq" out_fileName2=pu.get_file_basename(fq2)+out_suffix+".fastq" out_file1Path=os.path.join(out_dir,out_fileName1) out_file2Path=os.path.join(out_dir,out_fileName2) newOpts={"in1":fq1,"in2":fq2,"outu1":out_file1Path,"outu2":out_file2Path,"path":indexPath} mergedOpts={**kwargs,**newOpts} #run bbsplit if self.run_bbsplit(objectid=objectid,target=[out_file1Path,out_file2Path],**mergedOpts): if pu.check_files_exist(out_file1Path,out_file2Path): return(out_file1Path,out_file2Path) return("",) else: fq=sra_object.fastq_path #append input and output options out_fileName=pu.get_file_basename(fq)+out_suffix+".fastq" out_filePath=os.path.join(out_dir,out_fileName) newOpts={"in":fq,"outu":out_filePath,"path":indexPath} mergedOpts={**kwargs,**newOpts} #run bbsplit if self.run_bbsplit(objectid=objectid,target=out_filePath,**mergedOpts): if pu.check_files_exist(out_filePath): return(out_filePath,) return("",)
def build_index(self,index_path,transcriptome,objectid="NA"): """Function to build kallisto index index_path: str path to the index transcriptome: str Path to transcriptome objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Status of kallisto index :rtype: bool """ #if index already exists then exit if not _force: #check if files exists if pu.check_files_exist(index_path): pu.print_green("Kallisto index {} already exists.".format(index_path)) self.index=index_path return True #check input if not pu.check_files_exist(transcriptome): pu.print_boldred("{} does not exist. Exiting".format(transcriptome)) raise ValueError("Please check input to kallisto index") #create out dir indexdir=pu.get_file_directory(index_path) #create the out dir if not pu.check_paths_exist(indexdir): if not pu.mkdir(indexdir): raise OSError("Error creating kallisto index. Failed to create index directory.") args=(transcriptome,) internal_kwargs={"-i":index_path} #read build parameters yamlfile=os.path.join(_params_dir,'kallisto_index.yaml') if pu.check_files_exist(yamlfile): yaml_params=pl.YAML_loader(yamlfile) yaml_kwargs=yaml_params.get_kwargs() internal_kwargs={**yaml_kwargs,**internal_kwargs} #add positional args internal_kwargs['--']=args validArgsIndex=valid_args._args_KALLISTO_INDEX kallisto_cmd=['kallisto','index'] kallisto_cmd.extend(pu.parse_unix_args(validArgsIndex,internal_kwargs)) #call kallisto status=pe.execute_command(kallisto_cmd,objectid=objectid) if status: if pu.check_files_exist(index_path) and not _dryrun: #update object's index self.index=index_path if self.check_index(): return True else: raise OSError("Error building kallisto index") return False
def build_index(self,index_path,transcriptome,objectid="NA"): """ Parameters ---------- index_path : TYPE DESCRIPTION. transcriptome : TYPE DESCRIPTION. objectid : TYPE, optional DESCRIPTION. The default is "NA". Raises ------ OSError DESCRIPTION. Returns ------- bool DESCRIPTION. """ #if index already exists then exit if not _force: #check if files exists if pu.check_salmonindex(index_path): pu.print_green("Salmon index {} already exists.".format(index_path)) self.index=index_path return True #check input if not pu.check_files_exist(transcriptome): pu.print_boldred("{} does not exist. Exiting".format(transcriptome)) return False #create out dir indexdir=pu.get_file_directory(index_path) #create the out dir if not pu.check_paths_exist(indexdir): if not pu.mkdir(indexdir): raise OSError("Error creating salmon index. Failed to create index directory.") validArgsIndex=valid_args._args_SALMON_INDEX internal_kwargs={"--threads":_threads,"-t":transcriptome,"-i":index_path} #read build parameters yamlfile=os.path.join(_params_dir,'salmon_index.yaml') if pu.check_files_exist(yamlfile): yaml_params=pl.YAML_loader(yamlfile) yaml_kwargs=yaml_params.get_kwargs() internal_kwargs={**yaml_kwargs,**internal_kwargs} salmon_cmd=['salmon','index'] salmon_cmd.extend(pu.parse_unix_args(validArgsIndex,internal_kwargs)) #call salmon status=pe.execute_command(salmon_cmd,objectid=objectid) if status: if pu.check_salmonindex(index_path) and not _dryrun: #update object's index self.index=index_path if self.check_index(): return True else: raise OSError("Error building salmon index") return False
def sort_bam(self, bam_file, out_dir="", out_suffix="", threads=None, delete_bam=False, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Sorts an input bam file. Outpufile will end in _sorted.bam bam_file: str Path to the input bam file out_dir: str Path to output directory out_suffix: str Output file suffix threads: int Number of threads. Default: Use self.threads initialized in init(). delete_bam: bool Delete input bam_file verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to samtools. This will override the existing options :return: Returns path to the sorted bam file. Returns empty string if operation failed. :rtype: string """ if not out_dir: out_dir = pu.get_file_directory(bam_file) else: if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) fname = pu.get_file_basename(bam_file) #output will be out_bam outSortedbam_file = os.path.join(out_dir, fname + out_suffix + '_sorted.bam') #handle threads if not threads: threads = self.threads newOpts = { "--": (bam_file, ), "-o": outSortedbam_file, "-@": str(threads) } mergedOpts = {**newOpts, **kwargs} status = self.run_samtools("sort", verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **mergedOpts) if not status: print("Bam sort failed for:" + bam_file) return "" #check if bam file exists if not pu.check_files_exist(outSortedbam_file): return "" if delete_bam: if not pe.deleteFileFromDisk(bam_file): print("Error deleting sam file:" + bam_file) #return path to file return outSortedbam_file
def build_index(self, index_path, genome, objectid="NA"): """Build a bowtie2 index with given parameters and saves the new index to self.index. Parameters ---------- index_path: string Path where the index will be created genome: string Path to the reference genome objectid : string Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Returns the status of bowtie2-build :rtype: bool """ #check input references if not _force: if pu.check_bowtie2index(index_path): pu.print_green( "bowtie index {} already exists.".format(index_path)) self.index = index_path return True #check input files if not (pu.check_files_exist(genome)): pu.print_boldred( "Please provide a valid input fasta file to build bowtie2 index" ) raise ValueError("Please check input to star build index") return False bowtie2_build_args = [ '-f', '-c', '--large-index', '--debug', '--sanitized', '--verbose', '-a', '--noauto', '-p', '--packed', '--bmax', '--bmaxdivn', '--dcv', '--nodc', '-r', '--noref', '-3', '--justref', '-o', '--offrate', '-t', '--ftabchars', '--threads', '--seed', '-q', '--quiet' ] #create the out dir indexdir = pu.get_file_directory(index_path) if not pu.check_paths_exist(indexdir): if not pu.mkdir(indexdir): raise OSError( "Error creating bowtie2 index. Failed to create index directory." ) return False args = (genome, index_path) internal_kwargs = {"--threads": self._threads} #read build parameters yamlfile = os.path.join(_params_dir, 'bowtie2_index.yaml') if pu.check_files_exist(yamlfile): yaml_params = pl.YAML_loader(yamlfile) yaml_kwargs = yaml_params.get_kwargs() internal_kwargs = {**yaml_kwargs, **internal_kwargs} #add positional args internal_kwargs['--'] = args bowtie2Build_Cmd = ['bowtie2-build'] #add options bowtie2Build_Cmd.extend( pu.parse_unix_args(bowtie2_build_args, internal_kwargs)) #start ececution status = pe.execute_command(bowtie2Build_Cmd, objectid=objectid) if not status: pu.print_boldred("bowtie2-build failed") return False if status: if pu.check_bowtie2index(index_path) and not _dryrun: #update object's index self.index = index_path if self.check_index(): return True else: raise OSError("Error building bowtie2 index") return True
def perform_assembly(self,sra_object=None,bam_file=None,out_dir="trinity_out_dir",max_memory=None,max_intron=10000,threads=None,overwrite=True,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs): """Function to run trinity with sra object or BAM file as input. Parameters ---------- sra_object: SRA object of SRA class bam_file: string path to bam file out_dir: string path to out directory max_memory: string Max memory argument e.g. "2G" max_intron: int specify the "--genome_guided_max_intron" argument threads: int Number of threads to use overwrite: bool Overwrite if output file already exists verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. kwargs: dict Options to pass to Trinity. :return: Return the path to output GTF file :rtype: string """ #add trinity to outdir if "trinity" not in out_dir: out_dir+="_trinity" if not threads: threads=self.threads if not max_memory: max_memory=self.max_memory new_opts={} if sra_object is not None: parent_dir=sra_object.location out_dir=os.path.join(parent_dir,out_dir) if sra_object.layout == 'PAIRED': new_opts={"--seqType":"fq","--left":sra_object.localfastq1Path,"--right":sra_object.localfastq2Path,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads)} else: new_opts={"--seqType":"fq","--single":sra_object.localfastqPath,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads)} elif bam_file is not None: if not pu.check_files_exist(bam_file): pu.print_boldred("Input to trinity does not exist:"+bam_file) return "" parent_dir=pu.get_file_directory(bam_file) out_dir=os.path.join(parent_dir,out_dir) new_opts={"--genome_guided_bam":bam_file,"--output":out_dir,"--max_memory":str(max_memory)+"G","--CPU":str(threads),"--genome_guided_max_intron":max_intron} else: pu.print_boldred("Please provide valid input to run trinity") return "" merged_opts={**new_opts,**kwargs} #call trinity status=self.run_trinity(valid_args_list=None,verbose=False,quiet=False,logs=True,objectid="NA",**merged_opts) if status: #check out dir if pu.check_paths_exist(out_dir): return out_dir else: return ""
def test_get_dir(): assert pu.get_file_directory('aaa/bb') == 'aaa', 'get_dir failed' assert pu.get_file_directory('bbb/aaa/bb') == 'bbb/aaa', 'get_dir failed'
def perform_assembly(self, bam_file, out_dir="", out_suffix="_stringtie", overwrite=True, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Function to run stringtie using a bam file. Parameters ---------- bam_file: string path to the bam file out_suffix: string Suffix for the output gtf file overwrite: bool Overwrite if output file already exists. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to stringtie. This will override the existing options in self.passed_args_dict (only replace existing arguments and not replace all the arguments). :return: Returns the path to output GTF file :rtype: string """ #create path to output file fname = pu.get_file_basename(bam_file) if not out_dir: out_dir = pu.get_file_directory(bam_file) out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf") """ Handle overwrite """ if not overwrite: #check if file exists. return if yes if os.path.isfile(out_gtf_file): print("The file " + out_gtf_file + " already exists. Exiting..") return out_gtf_file #Add output file name and input bam new_opts = {"-o": out_gtf_file, "--": (bam_file, )} merged_opts = {**kwargs, **new_opts} #call stringtie status = self.run_stringtie(verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, **merged_opts) if status: #check if sam file is present in the location directory of sraOb if pu.check_files_exist(out_gtf_file): return out_gtf_file else: return ""
##################################################################################### #argv[1]: file containg gtex/tcga ids idsfile = sys.argv[1] analysis = sys.argv[2] runquant = False runalign = False if analysis == 'quant': runquant = True if analysis == 'align': runalign = True with open(idsfile) as f: data = f.read().splitlines() #set infile dir as workdir basedir = pu.get_file_directory(idsfile) #pyrpipe objects star = mapping.Star() #Create stringtie object stieobj = assembly.Stringtie() #biobambam biobb = Bamtofastq() #salmon for quant salmon = quant.Salmon() #delete final sorted bam delete_bam = True #out_dir is same name as input file out_dir = basedir
def perform_assembly(self,bam_file,out_dir=None,out_suffix="_stringtie",reference_gtf=None,threads=None,overwrite=False,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs): """Function to run stringtie using a bam file. Parameters ---------- bam_file: string path to the bam file out_suffix: string Suffix for the output gtf file reference_gtf: str Path to the reference gtf used as guide threads: int Number of threads to use overwrite: bool Overwrite if output file already exists. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to stringtie. :return: Returns the path to output GTF file :rtype: string """ #create path to output file fname=pu.get_file_basename(bam_file) if not out_dir: out_dir=pu.get_file_directory(bam_file) out_gtf_file=os.path.join(out_dir,fname+out_suffix+".gtf") """ Handle overwrite """ if not overwrite: #check if file exists. return if yes if os.path.isfile(out_gtf_file): print("The file "+out_gtf_file+" already exists. Exiting..") return out_gtf_file if not threads: threads=self.threads #Add output file name and input bam new_opts={"-o":out_gtf_file,"--":(bam_file,),"-p":str(threads)} if reference_gtf: if not pu.check_files_exist(reference_gtf): pu.print_boldred("Error: Provided reference GTF {} doesn't exist. Exiting...".format(reference_gtf)) return "" new_opts["-G"]=reference_gtf merged_opts={**new_opts,**kwargs} #call stringtie status=self.run_stringtie(verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**merged_opts) if status: #check if sam file is present in the location directory of sraOb if pu.check_files_exist(out_gtf_file): return out_gtf_file else: return ""
def build_index(self, index_path, genome, objectid="NA"): """Build a hisat index with given parameters and saves the new index to self.index. Parameters ---------- index_path: string Path where the index will be created genome: string Path to the reference genome objectid : string Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Returns the status of hisat2-build :rtype: bool """ #if index already exists then exit if not _force: #check if files exists if pu.check_hisatindex(index_path): pu.print_green( "Hisat2 index {} already exists.".format(index_path)) self.index = os.path.join(index_path) return True #check input files if not pu.check_files_exist(genome): pu.print_boldred( "Please provide a valid input fasta file to build Hisat2 index" ) raise ValueError("Please check input to hisat2 build index") indexdir = pu.get_file_directory(index_path) #create the out dir if not pu.check_paths_exist(indexdir): if not pu.mkdir(indexdir): raise OSError( "Error creating hisat2 index. Failed to create index directory." ) hisat2Buildvalid_args = valid_args._args_HISAT2BUILD args = (genome, index_path) internal_kwargs = {"-p": self._threads} #read build parameters yamlfile = os.path.join(_params_dir, 'hisat2_index.yaml') if pu.check_files_exist(yamlfile): yaml_params = pl.YAML_loader(yamlfile) yaml_kwargs = yaml_params.get_kwargs() internal_kwargs = {**yaml_kwargs, **internal_kwargs} #add positional args internal_kwargs['--'] = args hisat2Build_Cmd = ['hisat2-build'] hisat2Build_Cmd.extend( pu.parse_unix_args(hisat2Buildvalid_args, internal_kwargs)) #execute command status = pe.execute_command(hisat2Build_Cmd, objectid=objectid) if status: if pu.check_hisatindex(index_path) and not _dryrun: #update object's index self.index = index_path if self.check_index(): return True else: raise OSError("Error building Hisat2 index") return True