def runMikado(self, sub_command, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper to run mikado """ valid_commands = [ 'configure', 'prepare', 'serialise', 'pick', 'compare' ] if sub_command not in valid_commands: pu.print_boldred("Invalid command: " + sub_command + ". Exiting...") return False mikado_Cmd = ['mikado', sub_command] #add options mikado_Cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #print("Executing:"+" ".join(mergedArgsDict)) #start ececution status = pe.execute_command(mikado_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("mikado failed") #return status return status
def test_linux_args(): res = ['-O', './test', 'IN1', 'IN2'] assert pu.parse_unix_args(['-O', '-t', '-q'], { "-O": "./test", "Attr2": "XX", "--": ("IN1", "IN2") }) == res, 'linux args failed'
def run_trinity(self,valid_args_list=None,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs): """Wrapper for running trinity Parameters ---------- valid_args: list list of valid arguments verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. kwargs: dict Options passed to trinity :return: Return the status of trinity command. :rtype: bool """ trinity_cmd=['Trinity'] #add options trinity_cmd.extend(pu.parse_unix_args(valid_args_list,kwargs)) #start ececution status=pe.execute_command(trinity_cmd,verbose=verbose,quiet=quiet,logs=logs,objectid=objectid) if not status: pu.print_boldred("trinity failed") #return status return status
def run_star(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running star. The self.star_index index used. Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to stringtie. This will override the existing options in self.passed_args_dict (only replace existing arguments and not replace all the arguments). kwargs: dict arguments to pass to star. This will override parametrs already existing in the self.passedArgumentList list but NOT replace all of them. :return: Returns the status of star. True is passed, False if failed. :rtype: bool """ #check for a valid index if not self.check_index(): raise Exception( "ERROR: Invalid star index. Please run build index to generate an index." ) #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} star_cmd = ['STAR'] #add options star_cmd.extend(pu.parse_unix_args(self.valid_args, mergedArgsDict)) #execute command cmd_status = pe.execute_command(star_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not cmd_status: print("STAR failed:" + " ".join(star_cmd)) #return status return cmd_status
def run_cuff(self, command, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running cuff* commands Parameters ---------- command: string the command name verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. kwargs: dict Options passed to command :return: Returns the status of the command. :rtype: bool """ validCommands = [ 'cuffcompare', 'cuffdiff', 'cufflinks', 'cuffmerge', 'cuffnorm', 'cuffquant' ] if command in validCommands: #override existing arguments merged_args_dict = {**self.passed_args_dict, **kwargs} cuff_cmd = [command] #add options cuff_cmd.extend( pu.parse_unix_args(self.valid_args_list, merged_args_dict)) #start ececution status = pe.execute_command(cuff_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("cufflinks failed") #return status return status else: pu.print_boldred("Unknown command {}" + command) return False
def run_hisat2(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running hisat2. Run HISAT2 using and SRA object and produce .bam file as result. The HISAT2 index used will be self.hisat2_index. All output will be written to SRA.location by default. Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. arg: dict arguments to pass to hisat2. This will override parametrs already existing in the self.passedArgumentList list but NOT replace them. :return: Returns the status of hisat2. True is passed, False if failed. :rtype: bool """ #check for a valid index if not self.check_index(): raise Exception( "ERROR: Invalid HISAT2 index. Please run build index to generate an index." ) #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} hisat2_Cmd = ['hisat2'] #add options hisat2_Cmd.extend(pu.parse_unix_args(self.valid_args, mergedArgsDict)) #execute command cmd_status = pe.execute_command(hisat2_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not cmd_status: print("hisat2 failed:" + " ".join(hisat2_Cmd)) #return status return cmd_status
def run_salmon(self, subcommand, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running salmon. Parameters ---------- subcommand: str subcommand for salmon valid_args: list List of valid arguments verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to salmon. This will override the existing options :return: Returns the status of salmon. True is passed, False if failed. :rtype: bool """ #check for a valid index if subcommand != "index": if not self.check_index(): raise Exception( "ERROR: Invalid salmon index. Please run build index to generate an index." ) salmon_Cmd = ['salmon', subcommand] salmon_Cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #start ececution status = pe.execute_command(salmon_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid, command_name=" ".join(salmon_Cmd[0:2])) if not status: pu.print_boldred("salmon failed") return status
def run_portcullis(self, sub_command, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """ Wrapper to run portcullis. Parameters ---------- sub_command: string sub_command to pass to portcullis e.g. full, prep, junc etc. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to portcullis. This will override parametrs already existing in the self.passedArgumentDict list but NOT replace them. :return: Returns the status of portcullis. True is passed, False if failed. :rtype: bool """ #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} portcullis_cmd = ['portcullis', sub_command] #add options portcullis_cmd.extend( pu.parse_unix_args(self.valid_args, mergedArgsDict)) print("Executing:" + " ".join(portcullis_cmd)) #start ececution status = pe.execute_command(portcullis_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("portcullis failed") #return status return status
def run_diamond(self, subcommand, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running diamond. Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to hisat2. This will override parametrs already existing in the self.passedArgumentList list but NOT replace them. :return: Returns the status of diamond. True is passed, False if failed. :rtype: bool """ #check for a valid index if subcommand == "blastx" or subcommand == "blastp": if not self.check_index(): raise Exception( "ERROR: Invalid Diamond index. Please run build_index() to generate an index." ) diamond_cmd = ['diamond', subcommand] #add options diamond_cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #execute command cmd_status = pe.execute_command(diamond_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not cmd_status: print("Diamond failed:" + " ".join(diamond_cmd)) #return status return cmd_status
def run_hisat2(self, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running hisat2. Parameters ---------- valid_args: list list of valid arguments verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to hisat2. :return: Returns the status of hisat2. True is passed, False if failed. :rtype: bool """ #check for a valid index if not self.check_index(): raise Exception( "ERROR: Invalid HISAT2 index. Please run build index to generate an index." ) hisat2_Cmd = ['hisat2'] #add options hisat2_Cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #execute command cmd_status = pe.execute_command(hisat2_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not cmd_status: print("hisat2 failed:" + " ".join(hisat2_Cmd)) #return status return cmd_status
def run_bowtie2(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running bowtie2. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to bowtie2. This will override parametrs already existing in the self.passedArgumentList list but NOT replace them. :return: Returns the status of bowtie2. True is passed, False if failed. :rtype: bool """ #check for a valid index if not self.check_index(): raise Exception( "ERROR: Invalid Bowtie2 index. Please run build index to generate an index." ) #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} bowtie2_cmd = ['bowtie2'] bowtie2_cmd.extend(pu.parse_unix_args(self.valid_args, mergedArgsDict)) #print("Executing:"+" ".join(bowtie2_cmd)) #start ececution status = pe.execute_command(bowtie2_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("bowtie2 failed") return status
def run_samtools(self, sub_command, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """A wrapper to run samtools. Parameters ---------- sub_command: string sub_command to pass to samtools e.g. sort, merge etc valid_args: list A list containing valid parameters. Parameters in kwargs not in this list will be ignored. Default: None arg1: dict arguments to pass to samtools. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to samtools. This will override the existing options :return: Returns the status of samtools. True is passed, False if failed. :rtype: bool """ samtools_cmd = ['samtools', sub_command] #add options samtools_cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #start ececution status = pe.execute_command(samtools_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("samtools failed") #return status return status
def run_portcullis(self, sub_command, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """ Wrapper to run portcullis. Parameters ---------- sub_command: string sub_command to pass to portcullis e.g. full, prep, junc etc. valid_args: list A list of valid arguments. Arguments outside this list will be ignored. If empty or None, accepts all arguments. verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to portcullis. :return: Returns the status of portcullis. True is passed, False if failed. :rtype: bool """ portcullis_cmd = ['portcullis', sub_command] #add options portcullis_cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #start ececution status = pe.execute_command(portcullis_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("portcullis failed") #return status return status
def run_stringtie(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running stringtie. This can be used to run stringtie without using perform_assembly() function. Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to stringtie. This will override the existing options in self.passed_args_dict (only replace existing arguments and not replace all the arguments). :return: Returns the status of stringtie command. :rtype: bool """ #override existing arguments merged_args_dict = {**self.passed_args_dict, **kwargs} stie_cmd = ['stringtie'] #add options stie_cmd.extend( pu.parse_unix_args(self.valid_args_list, merged_args_dict)) #start ececution status = pe.execute_command(stie_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("stringtie failed") #return status return status
def run_trimgalore(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running trimgalore Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to trimgalore (will override existing parameters) :return: Status of trimgalore command :rtype: bool """ #override existing arguments mergedArgsDict = {**self.passedArgumentDict, **kwargs} #create command to run trimgalore_cmd = ['trim_galore'] trimgalore_cmd.extend( pu.parse_unix_args(self.valid_args, mergedArgsDict)) #start ececution status = pe.execute_command(trimgalore_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("trimgalore failed") #return status return status
def run_cufflinks(self, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running cufflinks Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. kwargs: dict Options passed to cufflinks :return: Returns the status of cufflinks command. :rtype: bool """ #override existing arguments merged_args_dict = {**self.passed_args_dict, **kwargs} cufflinks_cmd = ['cufflinks'] #add options cufflinks_cmd.extend( pu.parse_unix_args(self.valid_args_list, merged_args_dict)) #start ececution status = pe.execute_command(cufflinks_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("cufflinks failed") #return status return status
def run_transdecoder(self, command, valid_args=None, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Wrapper for running transdecoder. Parameters ---------- verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict arguments to pass to hisat2. This will override parametrs already existing in the self.passedArgumentList list but NOT replace them. :return: Returns the status of diamond. True is passed, False if failed. :rtype: bool """ txd_cmd = [command] #add options txd_cmd.extend(pu.parse_unix_args(valid_args, kwargs)) #execute command cmd_status = pe.execute_command(txd_cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not cmd_status: print("Transdecoder failed:" + " ".join(txd_cmd)) #return status return cmd_status
def runRibocode(self, gtf, genome, bam, l="no", outsuffix="ribocode_out", verbose=False, quiet=False, logs=True, objectid="NA"): """Wrapper to run ribocode in one step """ #check input if not pu.check_files_exist(gtf, genome, bam): pu.print_boldred("Please check input files for Ribocode") return "" out_dir = pu.get_file_directory(gtf) outFile = os.path.join(out_dir, outsuffix) newOpts = {"-g": gtf, "f": genome, "-r": bam, "-l": l, "-o": outFile} ribocode_Cmd = ['RiboCode_onestep'] ribocode_Cmd.extend(pu.parse_unix_args(self.valid_args, newOpts)) status = pe.execute_command(ribocode_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("ribocode failed") return "" return outFile
def build_index(self, index_path, index_name, *args, threads=None, overwrite=False, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Build a hisat index with given parameters and saves the new index to self.hisat2_index. Parameters ---------- index_path: string Path where the index will be created index_name: string A name for the index args: tuple Path to reference input files threads: int Num threads to use verbose : bool Print stdout and std error quiet : bool Print nothing logs : bool Log this command to pyrpipe logs objectid : string Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Parameters for the hisat2-build command :return: Returns the status of hisat2-build :rtype: bool """ #check input references if len(args) < 1: pu.print_boldred( "No reference sequence provided to hisat2-build. Exiting") return False if not pu.check_files_exist(*args): pu.print_boldred( "Please check input reference sequences provided to hisat2-build. Exiting" ) return False print("Building hisat index...") hisat2Buildvalid_args = [ '-c', '--large-index', '-a', '-p', '--bmax', '--bmaxdivn', '--dcv', '--nodc', '-r', '-3', '-o', '-t', '--localoffrate', '--localftabchars', '--snp', '--haplotype', '--ss', '--exon', '--seed', '-q', '-h', '--usage', '--version' ] #create the out dir if not pu.check_paths_exist(index_path): if not pu.mkdir(index_path): print( "ERROR in building hisat2 index. Failed to create index directory." ) return False if not overwrite: #check if files exists if pu.check_hisatindex(os.path.join(index_path, index_name)): print("Hisat2 index with same name already exists. Exiting...") self.hisat2_index = os.path.join(index_path, index_name) return True #handle threads if not threads: threads = self.threads hisat2Build_Cmd = ['hisat2-build'] newOpts = {"-p": str(threads)} mergedOpts = {**newOpts, **kwargs} #add options hisat2Build_Cmd.extend( pu.parse_unix_args(hisat2Buildvalid_args, mergedOpts)) #add input files hisat2Build_Cmd.append(str(",".join(args))) #add dir/basenae hisat2Build_Cmd.append(os.path.join(index_path, index_name)) #print("Executing:"+str(" ".join(hisat2Build_Cmd))) #start ececution status = pe.execute_command(hisat2Build_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("hisatBuild failed") return False #check index files if not pu.check_hisatindex(os.path.join(index_path, index_name)): pu.print_boldred("hisatBuild failed") return False #set the index path self.hisat2_index = os.path.join(index_path, index_name) #return status return True
def run_fasterqdump(self, delete_sra=False, verbose=False, quiet=False, logs=True, **kwargs): """Execute fasterq-dump to convert .sra file to fastq files. The fastq files will be stored in the same directory as the sra file. All fastq files should be consistently named using the extension .fastq Parameters ---------- delete_sra: bool delete sra file after completion verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs kwargs: dict A dict containing fasterq-dump arguments :return: Return status of the fasterq-dump command. True if successful download and False if failed. :rtype: bool Examples -------- >>> object.run_fasterqdump() True """ #check if fastq files exists already if self.fastqFilesExistsLocally(): pu.print_green("Fastq files exist already") return True #first check is sra exists if not self.sraFileExistsLocally(): pu.print_boldred( "Error executing fasterq-dump: .sra file not found. Please run download_sra()." ) return False #else directly run fasterq-dump on accession ? fasterqdumpArgsList = [ '-f', '-t', '-s', '-N', '-X', '-a', '-p', '-c', '-o', '-O', '-h', '-V', '-L', '-v', '-q', '-b', '-m', '-e', '-x', '-S', '-3', '-P', '-M', '-B', '--option-file', '--strict', '--table', '--include-technical', '--skip-technical', '--concatenate-reads' ] #ignore location and file name arguments if given if '-O' in kwargs: print("Ignoring -O flag." + " location is: " + self.location) #delete -O parameter del kwargs['-O'] if '-o' in kwargs: print("Ignoring -o flag." + " File name is: " + self.srr_accession) #delete -o parameter del kwargs['-o'] #execute command fstrqd_Cmd = ['fasterq-dump'] fstrqd_Cmd.extend(pu.parse_unix_args(fasterqdumpArgsList, kwargs)) #add location fstrqd_Cmd.extend(['-O', self.location]) #add output filename. output will be <srr_accession>.fastq or <srr_accession>_1.fastq and <srr_accession>_2.fastq fstrqd_Cmd.extend(['-o', self.srr_accession + ".fastq"]) fstrqd_Cmd.append(self.localSRAFilePath) #execute command cmdStatus = pe.execute_command(fstrqd_Cmd, objectid=self.srr_accession) if not cmdStatus: print("fasterqdump failed for:" + self.srr_accession) return False #check if fastq files are downloaded if (self.layout == "SINGLE"): self.localfastqPath = os.path.join(self.location, self.srr_accession + ".fastq") if not pu.check_files_exist(self.localfastqPath): pu.print_boldred("Error running fasterq-dump file. File " + self.localfastqPath + " does not exist!!!") return False else: self.localfastq1Path = os.path.join( self.location, self.srr_accession + "_1.fastq") self.localfastq2Path = os.path.join( self.location, self.srr_accession + "_2.fastq") if not pu.check_files_exist(self.localfastq1Path, self.localfastq2Path): pu.print_boldred("Error running fasterq-dump file. File " + self.localfastq1Path + " does not exist!!!") return False #delete sra file if specified if delete_sra: self.delete_sra() return True
def download_sra(self, verbose=False, quiet=False, logs=True, **kwargs): """This function downloads .sra file from NCBI SRA servers using the prefetch command. NCBI sra-toolkit 2.9 or higher must be installed on the system in order to use prefetch. prefetch will create a folder with name same as <srr_accession> under the location (path) specified. The path of downloaded file is saved in the object as localSRAPath. This localSRAPath is then used by other functions to access the downloaded data. The **kwargs is for passing arguments to the prefetch command. Parameters ---------- kwargs: dict dict containing additional prefetch arguments :return: Return status of the prefetch command. True if successful download and False if failed. :rtype: bool Examples -------- >>> object.download_sra() True """ #store path to the downloaded sra file self.localSRAFilePath = os.path.join(self.location, self.srr_accession + ".sra") #check if already exists if pu.check_files_exist(self.localSRAFilePath): pu.print_green("File already exists:" + self.localSRAFilePath) #save file .sra file size self.sraFileSize = pu.get_file_size(self.localSRAFilePath) #test if file is paired or single end if pe.is_paired(self.localSRAFilePath): self.layout = "PAIRED" else: self.layout = "SINGLE" return True pu.print_info("Downloading " + self.srr_accession + " ...") #scan for prefetch arguments prefetchArgsList = [ '-f', '-t', '-l', '-n', '-s', '-R', '-N', '-X', '-o', '-a', '--ascp-options', '-p', '--eliminate-quals', '-c', '-o', '-O', '-h', '-V', '-L', '-v', '-q' ] #ignore location and file name arguments if given if '-O' in kwargs: print("Ignoring -O flag." + " location is: " + self.location) #delete -O parameter del kwargs['-O'] if '-o' in kwargs: print("Ignoring -o flag." + " File name is: " + self.srr_accession) #delete -o parameter del kwargs['-o'] prefetch_Cmd = ['prefetch'] prefetch_Cmd.extend(pu.parse_unix_args(prefetchArgsList, kwargs)) prefetch_Cmd.extend(['-O', self.location]) prefetch_Cmd.append(self.srr_accession) cmdStatus = pe.execute_command(prefetch_Cmd, objectid=self.srr_accession) if not cmdStatus: pu.print_boldred("prefetch failed for:" + self.srr_accession) return False #validate path exists if not pu.check_files_exist(self.localSRAFilePath): pu.print_boldred("Error downloading file. File " + self.localSRAFilePath + " does not exist!!!") return False print("Downloaded file: " + self.localSRAFilePath + " {0} ".format(pu.get_file_size(self.localSRAFilePath))) #save file .sra file size self.sraFileSize = pu.get_file_size(self.localSRAFilePath) #test if file is paired or single end if pe.is_paired(self.localSRAFilePath): self.layout = "PAIRED" else: self.layout = "SINGLE" return True
def download_fastq(self, verbose=False, quiet=False, logs=True, procs=2, **kwargs): """Function to download fastq files """ #check if fastq files exists already if self.fastqFilesExistsLocally(): pu.print_green("Fastq files exist already") return True fasterqdumpArgsList = [ '-f', '-t', '-s', '-N', '-X', '-a', '-p', '-c', '-o', '-O', '-h', '-V', '-L', '-v', '-q', '-b', '-m', '-x', '-S', '-3', '-P', '-M', '-B', '--option-file', '--strict', '--table', '--include-technical', '--skip-technical', '--concatenate-reads' ] fstrqd_Cmd = ['fasterq-dump'] fstrqd_Cmd.extend(pu.parse_unix_args(fasterqdumpArgsList, kwargs)) #add location fstrqd_Cmd.extend(['-O', self.location]) #add output filename. output will be <srr_accession>.fastq or <srr_accession>_1.fastq and <srr_accession>_2.fastq fstrqd_Cmd.extend(['-o', self.srr_accession + ".fastq"]) fstrqd_Cmd.extend(['-e', str(procs)]) if self.sraFileExistsLocally(): fstrqd_Cmd.append(self.localSRAFilePath) else: fstrqd_Cmd.append(self.srr_accession) #execute command cmdStatus = pe.execute_command(fstrqd_Cmd, objectid=self.srr_accession) if not cmdStatus: print("fasterqdump failed for:" + self.srr_accession) return False if not hasattr(self, 'layout'): fq_files = pe.find_files(self.location, self.srr_accession + "*.fastq") if len(fq_files) == 1: self.layout = 'SINGLE' else: self.layout = 'PAIRED' #check if fastq files are downloaded if (self.layout == "SINGLE"): self.localfastqPath = os.path.join(self.location, self.srr_accession + ".fastq") if not pu.check_files_exist(self.localfastqPath): pu.print_boldred("Error running fasterq-dump file. File " + self.localfastqPath + " does not exist!!!") return False else: self.localfastq1Path = os.path.join( self.location, self.srr_accession + "_1.fastq") self.localfastq2Path = os.path.join( self.location, self.srr_accession + "_2.fastq") if not pu.check_files_exist(self.localfastq1Path, self.localfastq2Path): pu.print_boldred("Error running fasterq-dump file. File " + self.localfastq1Path + " does not exist!!!") return False return True
def build_index(self,index_path,transcriptome,objectid="NA"): """Function to build kallisto index index_path: str path to the index transcriptome: str Path to transcriptome objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Status of kallisto index :rtype: bool """ #if index already exists then exit if not _force: #check if files exists if pu.check_files_exist(index_path): pu.print_green("Kallisto index {} already exists.".format(index_path)) self.index=index_path return True #check input if not pu.check_files_exist(transcriptome): pu.print_boldred("{} does not exist. Exiting".format(transcriptome)) raise ValueError("Please check input to kallisto index") #create out dir indexdir=pu.get_file_directory(index_path) #create the out dir if not pu.check_paths_exist(indexdir): if not pu.mkdir(indexdir): raise OSError("Error creating kallisto index. Failed to create index directory.") args=(transcriptome,) internal_kwargs={"-i":index_path} #read build parameters yamlfile=os.path.join(_params_dir,'kallisto_index.yaml') if pu.check_files_exist(yamlfile): yaml_params=pl.YAML_loader(yamlfile) yaml_kwargs=yaml_params.get_kwargs() internal_kwargs={**yaml_kwargs,**internal_kwargs} #add positional args internal_kwargs['--']=args validArgsIndex=valid_args._args_KALLISTO_INDEX kallisto_cmd=['kallisto','index'] kallisto_cmd.extend(pu.parse_unix_args(validArgsIndex,internal_kwargs)) #call kallisto status=pe.execute_command(kallisto_cmd,objectid=objectid) if status: if pu.check_files_exist(index_path) and not _dryrun: #update object's index self.index=index_path if self.check_index(): return True else: raise OSError("Error building kallisto index") return False
def build_index(self,index_path,transcriptome,objectid="NA"): """ Parameters ---------- index_path : TYPE DESCRIPTION. transcriptome : TYPE DESCRIPTION. objectid : TYPE, optional DESCRIPTION. The default is "NA". Raises ------ OSError DESCRIPTION. Returns ------- bool DESCRIPTION. """ #if index already exists then exit if not _force: #check if files exists if pu.check_salmonindex(index_path): pu.print_green("Salmon index {} already exists.".format(index_path)) self.index=index_path return True #check input if not pu.check_files_exist(transcriptome): pu.print_boldred("{} does not exist. Exiting".format(transcriptome)) return False #create out dir indexdir=pu.get_file_directory(index_path) #create the out dir if not pu.check_paths_exist(indexdir): if not pu.mkdir(indexdir): raise OSError("Error creating salmon index. Failed to create index directory.") validArgsIndex=valid_args._args_SALMON_INDEX internal_kwargs={"--threads":_threads,"-t":transcriptome,"-i":index_path} #read build parameters yamlfile=os.path.join(_params_dir,'salmon_index.yaml') if pu.check_files_exist(yamlfile): yaml_params=pl.YAML_loader(yamlfile) yaml_kwargs=yaml_params.get_kwargs() internal_kwargs={**yaml_kwargs,**internal_kwargs} salmon_cmd=['salmon','index'] salmon_cmd.extend(pu.parse_unix_args(validArgsIndex,internal_kwargs)) #call salmon status=pe.execute_command(salmon_cmd,objectid=objectid) if status: if pu.check_salmonindex(index_path) and not _dryrun: #update object's index self.index=index_path if self.check_index(): return True else: raise OSError("Error building salmon index") return False
def build_index(self, index_path, genome, objectid="NA"): """Build a STAR index with given parameters and saves the new index to self.index. Parameters ---------- index_path: string Path where the index will be created genome: string Path to the reference genome objectid : string Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Returns the status of STAR-build index :rtype: bool """ #if index already exists then exit if not _force: if pu.check_starindex(index_path): pu.print_green( "STAR index {} already exists.".format(index_path)) self.index = index_path return True #check input files if not (pu.check_files_exist(genome)): pu.print_boldred( "Please provide a valid input fasta file to build STAR index") raise ValueError("Please check input to build star index") #create index path if doesnt exist if not pu.check_paths_exist(index_path): if not pu.mkdir(index_path): raise OSError( "Error creating STAR index. Failed to create index directory." ) return False #determine parameters and execute cmd #internal_args=() internal_kwargs = { "--runMode": "genomeGenerate", "--genomeDir": index_path, "--genomeFastaFiles": genome, "--runThreadN": self._threads } #read build parameters yamlfile = os.path.join(_params_dir, 'star_index.yaml') if pu.check_files_exist(yamlfile): yaml_params = pl.YAML_loader(yamlfile) yaml_kwargs = yaml_params.get_kwargs() internal_kwargs = {**yaml_kwargs, **internal_kwargs} starbuild_Cmd = ['STAR'] starbuild_Cmd.extend( pu.parse_unix_args(valid_args._args_STAR, internal_kwargs)) #execute command status = pe.execute_command(starbuild_Cmd, objectid=objectid) if status: if pu.check_paths_exist(index_path) and not _dryrun: #update object's index self.index = index_path if self.check_index(): return True else: raise OSError("Error building STAR index") return True
def build_index(self, index_path, genome, objectid="NA"): """Build a bowtie2 index with given parameters and saves the new index to self.index. Parameters ---------- index_path: string Path where the index will be created genome: string Path to the reference genome objectid : string Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. :return: Returns the status of bowtie2-build :rtype: bool """ #check input references if not _force: if pu.check_bowtie2index(index_path): pu.print_green( "bowtie index {} already exists.".format(index_path)) self.index = index_path return True #check input files if not (pu.check_files_exist(genome)): pu.print_boldred( "Please provide a valid input fasta file to build bowtie2 index" ) raise ValueError("Please check input to star build index") return False bowtie2_build_args = [ '-f', '-c', '--large-index', '--debug', '--sanitized', '--verbose', '-a', '--noauto', '-p', '--packed', '--bmax', '--bmaxdivn', '--dcv', '--nodc', '-r', '--noref', '-3', '--justref', '-o', '--offrate', '-t', '--ftabchars', '--threads', '--seed', '-q', '--quiet' ] #create the out dir indexdir = pu.get_file_directory(index_path) if not pu.check_paths_exist(indexdir): if not pu.mkdir(indexdir): raise OSError( "Error creating bowtie2 index. Failed to create index directory." ) return False args = (genome, index_path) internal_kwargs = {"--threads": self._threads} #read build parameters yamlfile = os.path.join(_params_dir, 'bowtie2_index.yaml') if pu.check_files_exist(yamlfile): yaml_params = pl.YAML_loader(yamlfile) yaml_kwargs = yaml_params.get_kwargs() internal_kwargs = {**yaml_kwargs, **internal_kwargs} #add positional args internal_kwargs['--'] = args bowtie2Build_Cmd = ['bowtie2-build'] #add options bowtie2Build_Cmd.extend( pu.parse_unix_args(bowtie2_build_args, internal_kwargs)) #start ececution status = pe.execute_command(bowtie2Build_Cmd, objectid=objectid) if not status: pu.print_boldred("bowtie2-build failed") return False if status: if pu.check_bowtie2index(index_path) and not _dryrun: #update object's index self.index = index_path if self.check_index(): return True else: raise OSError("Error building bowtie2 index") return True
def download_fastq(self,*args,**kwargs): """Function to download fastq files """ #check if fastq files exists already if self.fastq_exists(): pu.print_green("Fastq files exist already") return True #internal_args are created by pyrpipe and will always replace external passed args #add the positional args if self.sra_exists(): internal_args=(self.sra_path,) else: #fstrqd_Cmd.append(self.srr_accession) internal_args=(self.srr_accession,) #keyword args; boolean flags have empty values internal_kwargs={'-O':self.directory, '-o':self.srr_accession+".fastq", '-e':_threads, '-f':"" } #merge args, kwargs, internal_args, internal_kwargs #If args and kwargs are present if args or kwargs: internal_kwargs={**kwargs,**internal_kwargs} internal_args=tuple(set(args+internal_args)) #append the args to the kwargs using special key '--' internal_kwargs['--']=internal_args else: #check for yaml parameters filepath=os.path.join(_params_dir,'fasterq-dump.yaml') yaml_params=pl.YAML_loader(filepath) yaml_kwargs=yaml_params.get_kwargs() #yaml_args=yaml_params.get_args() internal_kwargs={**yaml_kwargs,**internal_kwargs} #internal_args=tuple(set(yaml_args+internal_args)) internal_kwargs['--']=internal_args params_list=pu.parse_unix_args(valid_args._args_FASTERQDUMP,internal_kwargs) fstrqd_Cmd=['fasterq-dump'] #add command and params fstrqd_Cmd.extend(params_list) #execute command cmdStatus=pe.execute_command(fstrqd_Cmd,objectid=self.srr_accession) if not cmdStatus: pu.print_boldred("fasterqdump failed for:"+self.srr_accession) return False #self.search_fastq(self.directory) #determine layout self.layout='PAIRED' #check files with names <SRR>_1.fastq and <SRR>_2.fastq fq=os.path.join(self.directory,self.srr_accession+'_1.fastq') fq2=os.path.join(self.directory,self.srr_accession+'_2.fastq') self.fastq_path=fq self.fastq2_path=fq2 #if dry run if _dryrun: return True if pu.check_files_exist(fq,fq2): self.fastq_path=fq self.fastq2_path=fq2 self.layout="PAIRED" #remove SRA self.delete_sra() return True #check single end file fq=os.path.join(self.directory,self.srr_accession+'.fastq') if pu.check_files_exist(fq): self.fastq_path=fq self.layout="SINGLE" #remove SRA self.delete_sra() return True return False
def run(self, *args, subcommand=None, target=None, requires=None, objectid=None, verbose=None, logs=None, **kwargs): """ Parameters ---------- *args : Tuple Positoinal arguments passed to a command. This will copmletely REPLACE the exsiting self._args created during initialization of the runnable object. subcommand : String or List, optional DESCRIPTION. subcommand passed to the command. The default is None. target : Str or List of Str, optional DESCRIPTION. The expected output/target files produced by the run operation. False is returned is all target files are not found after the command. The default is None. requires : Str or List of Str, optional DESCRIPTION. Files required to strat the run method. Exception is thrown if files are missing. The default is None. objectid : Str, optional DESCRIPTION. A uniq id to identify the run operation in the logs. Thi is useful for benchmarks. The default is None. **kwargs : Keyword arguments DESCRIPTION. The options to be passed to the command. This will OVERRIDE ANY EXISTING options in the self._kwargs created during initialization of the runnable object. Raises ------ TypeError If incorerct types are used for target and required. FileNotFoundError Raises FileNotFoundError if any of the required files are missing. OSError Raises OSError if the command is incorrect or not present in path. ValueError Raises ValueError if args_type is something other than LINUX or JAVA. Returns ------- bool Return the status of command as True or False. True implies command had 0 exit-code and all target files were found after the command finished. """ #create target list target_list = [] locks = [] requires_list = [] if target: if isinstance(target, str): target_list = [target] elif isinstance(target, list): target_list = target else: raise TypeError("target must be a string or a list object") #ckeck for locks and remove previous locks and associated targets if exist for target in target_list: self.verify_integrity(target) #if target already present and not overwrite exists then return if not _force and target_list: if self.verify_target_list(target_list): pu.print_green('Target files {} already exist.'.format( ', '.join(target_list))) return True #check if all requirements are satisfied if requires: if isinstance(requires, str): requires_list = [requires] elif isinstance(requires, list): requires_list = requires else: raise TypeError("requires must be a string or a list object") #Raise exception if requirements not satisfied if requires_list: if not self.verify_target_list(requires_list): pu.print_boldred('Required files {} fot found.'.format( ', '.join(requires_list))) raise FileNotFoundError("FilesNotFound") #check if any required file had lock for file in requires_list: if len(self.get_lock_files(file)): pu.print_boldred( 'Required file {} is corrupt. Please verify file is correct and remove any .Lock files' .format(', '.join(requires_list))) raise FileNotFoundError("FilesNotFound") #override class kwargs by passed kwargs kwargs = {**self._kwargs, **kwargs} #if no args provided use constructor's args if not args: args = self._args #if args are not None if args and args[0]: kwargs['--'] = args #make a copy of self._command if not self._command: pu.print_boldred("Error: command can not be None or empty") raise OSError("CommandNotFoundException") cmd = [] if isinstance(self._command, list): cmd = self._command.copy() elif isinstance(self._command, str): cmd = [self._command] #if subcommand supplied #get valid args valid_args_subcommand = self.get_valid_parameters(subcommand) if subcommand: if isinstance(subcommand, str): subcommand = [subcommand] #add to command cmd.extend(subcommand) #parse and add parameters if self._args_style == 'LINUX': cmd.extend(pu.parse_unix_args(valid_args_subcommand, kwargs)) elif self._args_style == 'JAVA': cmd.extend(pu.parse_java_args(valid_args_subcommand, kwargs)) else: pu.print_boldred("Unknown args style: {}".format(self._args_style)) raise ValueError("Unknown args style") #create locks on target; locks indicate incomplete commands if not _dryrun: locks = self.create_lock(target_list, ' '.join(cmd)) #execute command cmd_status = pe.execute_command(cmd, objectid=objectid, verbose=verbose, logs=logs) # if command finished remove locks self.remove_locks(locks) if not cmd_status: pu.print_boldred("{} failed: {}".format(self._command, " ".join(cmd))) #remove target files if not _dryrun and target_list: pu.print_boldred("Removing target files {}: ".format( ', '.join(target_list))) pe.delete_files(*target_list) return False if cmd_status and target_list and not _dryrun: return self.verify_target_list(target_list, verbose=True) #return status return cmd_status
def build_index(self, index_path, *args, threads=None, overwrite=False, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Build a star index with given parameters and saves the new index to self.star_index. Parameters ---------- index_path: string Path where the index will be created args: tuple Path to reference input files threads: int Num threads to use overwrite: bool Overwrite if index already exists verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Parameters for the star command :return: Returns status of star command :rtype: bool """ #if index already exists then exit if not overwrite: if pu.check_starindex(index_path): pu.print_green("STAR index already exists. Using it...") self.star_index = index_path return True #check input files if len(args) < 1: pu.print_boldred( "Please provide input fasta file to build STAR index") return False if not pu.check_files_exist(*args): raise Exception("Please check input to star index") return False #create path if doesnt exist if not pu.check_paths_exist(index_path): if not pu.mkdir(index_path): raise Exception("Error creating STAR index. Exiting.") return False if not threads: threads = self.threads #add runMode newOpts = { "--runMode": "genomeGenerate", "--genomeDir": index_path, "--genomeFastaFiles": " ".join(args), "--runThreadN": str(threads) } mergedOpts = {**newOpts, **kwargs} starbuild_Cmd = ['STAR'] starbuild_Cmd.extend(pu.parse_unix_args(None, mergedOpts)) #execute command status = pe.execute_command(starbuild_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if status: if pu.check_paths_exist(index_path): #update object's index self.star_index = index_path if self.check_index(): return True else: return False
def build_index(self, index_path, index_name, *args, threads=None, overwrite=False, verbose=False, quiet=False, logs=True, objectid="NA", **kwargs): """Build a bowtie2 index with given parameters and saves the new index to self.bowtie2_index. Parameters ---------- index_path: string Path where the index will be created index_name: string A name for the index args: tuple Path to reference input files threads: int Num threads to use overwrite: bool Overwrite already existing index verbose: bool Print stdout and std error quiet: bool Print nothing logs: bool Log this command to pyrpipe logs objectid: str Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports. kwargs: dict Options to pass to bowtie2. This will override the existing options in self.passed_args_dict (only replace existing arguments and not replace all the arguments). :return: Returns the status of bowtie-build :rtype: bool """ #check input references if len(args) < 1: pu.print_boldred( "No reference sequence provided to bowtie2-build. Exiting") return False if not pu.check_files_exist(*args): pu.print_boldred( "Please check input reference sequences provided to bowtie2-build. Exiting" ) return False bowtie2_build_args = [ '-f', '-c', '--large-index', '--debug', '--sanitized', '--verbose', '-a', '--noauto', '-p', '--packed', '--bmax', '--bmaxdivn', '--dcv', '--nodc', '-r', '--noref', '-3', '--justref', '-o', '--offrate', '-t', '--ftabchars', '--threads', '--seed', '-q', '--quiet', '-h', '--help', '--usage', '--version' ] #create the out dir if not pu.check_paths_exist(index_path): if not pu.mkdir(index_path): print( "ERROR in building bowtie2 index. Failed to create index directory." ) return False if not overwrite: #check if files exists if pu.check_bowtie2index(os.path.join(index_path, index_name)): print( "bowtie2 index with same name already exists. Exiting...") self.bowtie2_index = os.path.join(index_path, index_name) return True bowtie2Build_Cmd = ['bowtie2-build'] if not threads: threads = self.threads newopts = {"--threads": str(threads)} mergedopts = {**newopts, **kwargs} #add options bowtie2Build_Cmd.extend( pu.parse_unix_args(bowtie2_build_args, mergedopts)) #add input files bowtie2Build_Cmd.append(str(",".join(args))) #add dir/basenae bowtie2Build_Cmd.append(os.path.join(index_path, index_name)) #print("Executing:"+str(" ".join(hisat2Build_Cmd))) #start ececution status = pe.execute_command(bowtie2Build_Cmd, verbose=verbose, quiet=quiet, logs=logs, objectid=objectid) if not status: pu.print_boldred("bowtie2-build failed") return False #check index files if not pu.check_bowtie2index(os.path.join(index_path, index_name)): pu.print_boldred("bowtie2-build failed") return False #set the index path self.bowtie2_index = os.path.join(index_path, index_name) #return status return True