示例#1
0
 def perform_quant(self,sra_object,out_suffix="",out_dir="",objectid="NA"):
     """Run kallisto quant
     
     sra_object: SRA
         SRA object contatining paths to fastq files
     out_suffix: str
         suffix for output file
     out_dir: str
         path to output directory
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
    
     :return: Path to kallisto out directory
     :rtype: string
     """
     
     if not out_dir:
         out_dir=os.path.join(sra_object.directory,"kallisto_out")
     else:
         #create out_dir if not exists
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
     
     
     if sra_object.layout == 'PAIRED':
         args=(sra_object.fastq_path,sra_object.fastq2_path)
         internal_kwargs={"-o":out_dir,"-i":self.index}
     else:
         args=(sra_object.fastq_path,)
         internal_kwargs={"-o":out_dir,"--single":"","-i":self.index}
         
     
     #targets
     outfile=os.path.join(out_dir,"abundance.tsv")
     newfile=os.path.join(out_dir,"abundance"+out_suffix+".tsv")
     #check if final files already exists
     if not _force and pu.check_files_exist(newfile):
         pu.print_green('Target files {} already exist.'.format(newfile))
         return newfile
     
     #call kallisto
     status=self.run(*args,subcommand='quant',objectid=sra_object.srr_accession,target=outfile,**internal_kwargs)
     
     if status:
         #return rename the bam  file and return path
         if not _dryrun:
             pe.move_file(outfile,newfile)
             if not pu.check_files_exist(newfile):
                 return ""            
         return newfile
     
     return ""
示例#2
0
 def perform_quant(self,sra_object,out_suffix="",out_dir="",objectid="NA"):
     """run salmon quant
     sra_object: SRA
         An SRA object with valid fastq files
     out_suffix: str
         suffix string fout out file
     out_dir: str
         path to outdir
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     
     :return: Path to salmon out file
     :rtype: string
     """
         
     if not out_dir:
         out_dir=os.path.join(sra_object.directory,"salmon_out")
     else:
         #create out_dir if not exists
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
     
     
     if sra_object.layout == 'PAIRED':
         internal_kwargs={"-o":out_dir,"-l":"A","-1":sra_object.fastq_path,"-2":sra_object.fastq2_path,"-i":self.index}
     else:
         internal_kwargs={"-o":out_dir,"-l":"A","-r":sra_object.fastq_path,"-i":self.index}
     
     #targets
     outfile=os.path.join(out_dir,"quant.sf")
     newfile=os.path.join(out_dir,"quant"+out_suffix+".sf")
     #check if final files already exists
     if not _force and pu.check_files_exist(newfile):
         pu.print_green('Target files {} already exist.'.format(newfile))
         return newfile
     
     #call salmon
     status=self.run(None,subcommand='quant',objectid=sra_object.srr_accession,target=newfile,**internal_kwargs)
     
     if status:
         #return rename the bam  file and return path
         if not _dryrun:
             pe.move_file(outfile,newfile)
             if not pu.check_files_exist(newfile):
                 return ""            
         return newfile
     
     return ""
示例#3
0
    def run_transdecoder_predict(self,
                                 infasta,
                                 longorfs_dir,
                                 out_dir=None,
                                 verbose=False,
                                 quiet=False,
                                 logs=True,
                                 objectid="NA",
                                 **kwargs):

        if not pu.check_files_exist(infasta):
            pu.print_boldred("Please check input file:" + infasta)
        if not pu.check_paths_exist(longorfs_dir):
            pu.print_boldred("Path {} doesn't exist".format(longorfs_dir))

        move_flag = True
        if not out_dir:
            out_dir = os.getcwd()
            move_flag = False

        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)

        newOpts = {"-t": infasta, "-O": longorfs_dir}
        mergedOpts = {**newOpts, **kwargs}

        #execute Predict
        status = self.run_transdecoder('TransDecoder.Predict',
                                       verbose=verbose,
                                       quiet=quiet,
                                       logs=logs,
                                       objectid=objectid,
                                       **mergedOpts)
        if not status:
            pu.print_boldred("Transdecoder failed")
            return ""

        #move output files to outdir
        if move_flag:
            outfile_prefix = pu.get_filename(infasta) + ".transdecoder"
            pe.move_file(outfile_prefix + ".bed",
                         os.path.join(out_dir, outfile_prefix + ".bed"),
                         verbose)
            pe.move_file(outfile_prefix + ".cds",
                         os.path.join(out_dir, outfile_prefix + ".cds"),
                         verbose)
            pe.move_file(outfile_prefix + ".gff3",
                         os.path.join(out_dir, outfile_prefix + ".gff3"),
                         verbose)
            pe.move_file(outfile_prefix + ".pep",
                         os.path.join(out_dir, outfile_prefix + ".pep"),
                         verbose)
        return out_dir
示例#4
0
 def perform_assembly(self,bam_file,out_dir="",out_suffix="_cufflinks",reference_gtf=None,threads=None,overwrite=True,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """Function to run cufflinks with BAM file as input.
             
     Parameters
     ----------
     bam_file: string
         path to bam file
     out_dir: 
         output directory
     out_suffix: string
         Suffix for the output gtf file
     reference_gtf: str
         Path to reference gtf 
     threads: int
         Number of threads to use
     overwrite: bool
         Overwrite if output file already exists.
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession.
     kwargs: dict
         Options to pass to cufflinks. 
         
     :return: Returns the path to output GTF file
     :rtype: string       
     """
     
     #create path to output file
     fname=pu.get_file_basename(bam_file)
     if not out_dir:
         out_dir=pu.get_file_directory(bam_file)
     else:
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
     out_gtf_file=os.path.join(out_dir,fname+out_suffix+".gtf")
     
     """
     Handle overwrite
     """
     if not overwrite:
         #check if file exists. return if yes
         if os.path.isfile(out_gtf_file):
             print("The file "+out_gtf_file+" already exists. Exiting..")
             return out_gtf_file
     
     if not threads:
         threads=self.threads
         
     #Add output file name and input bam
     new_opts={"-o":out_dir,"--":(bam_file,),"-p":str(threads)}
     
     #add ref gtf
     if reference_gtf:
         if not pu.check_files_exist(reference_gtf):
             pu.print_boldred("Error: Provided reference GTF {} doesn't exist. Exiting...".format(reference_gtf))
             return ""
         
         new_opts["-g"]=reference_gtf
     
     merged_opts={**new_opts,**kwargs}
     
     #call cufflinks
     status=self.run_cufflinks(verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**merged_opts)
     
     if status:
         #move out_dir/transcripts.gtf to outfile
         pe.move_file(os.path.join(out_dir,"transcripts.gtf"),out_gtf_file)
         #check if sam file is present in the location directory of sraOb
         if pu.check_files_exist(out_gtf_file):
             return out_gtf_file
     else:
         return ""
示例#5
0
    def perform_qc(self,sra_object,out_dir="",out_suffix="_trimgalore",objectid="NA"):
        """Function to perform qc using trimgalore.
        The function perform_qc() is consistent for all QC classess.
        
        Parameters
        ----------
        
        sra_object: SRA
            An SRA object whose fastq files will be used
        out_dir: str
            Path to output directory
        out_suffix: string
            Suffix for the output sam file
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
            
        :return: Returns the path of fastq files after QC. tuple has one item for single end files and two for paired.
        :rtype: tuple
        """
        if not out_dir:
            out_dir=sra_object.directory
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)
        
        #get layout
        if sra_object.layout=='PAIRED':
            fq1=sra_object.fastq_path
            fq2=sra_object.fastq2_path
            internal_args=(fq1,fq2)
            internal_kwargs={"--paired":"","-o":out_dir}
            
            
            """
            running trim galore will create two files named <input>_val_1.fq and <input>_val_2.fq
            move these files to the specified out files
            """
            file1=os.path.join(out_dir,pu.get_file_basename(fq1)+"_val_1.fq")
            file2=os.path.join(out_dir,pu.get_file_basename(fq2)+"_val_2.fq")
            #targets
            out_file1=os.path.join(out_dir,pu.get_file_basename(fq1)+out_suffix+".fastq")
            out_file2=os.path.join(out_dir,pu.get_file_basename(fq2)+out_suffix+".fastq")
            
            #check if final files already exists
            if not _force and pu.check_files_exist(out_file1,out_file2):
                pu.print_green('Target files {}, {} already exist.'.format(out_file1,out_file2))
                return out_file1,out_file2
            
            
            #run trimgalore
            status=self.run(*internal_args,objectid=objectid,target=[file1,file2],**internal_kwargs)
            
            if status:
                #return rename the bam  file and return path
                if not _dryrun:
                    pe.move_file(file1,out_file1,verbose=False)
                    pe.move_file(file2,out_file2,verbose=False)
                    if not pu.check_files_exist(out_file1,out_file2):
                        return ""
                
                return out_file1,out_file2
            
            return ("",)
            
            
        else:
            fq=sra_object.fastq_path
            internal_args=(fq,)
            internal_kwargs={"-o":out_dir}

            """
            running trim galore will create one file named <input>_trimmed.fq
            move these files to the specified out files
            """
            file=os.path.join(out_dir,pu.get_file_basename(fq)+"_trimmed.fq")
            #target
            out_file=os.path.join(out_dir, pu.get_file_basename(fq)+out_suffix+".fastq")
            #check if final files already exists
            if not _force and pu.check_files_exist(out_file):
                pu.print_green('Target files {} already exist.'.format(out_file))
                return (out_file,)
            
            #run trimgalore
            status=self.run(*internal_args,objectid=objectid,target=file,**internal_kwargs)
            if status:
                #return rename the bam  file and return path
                if not _dryrun:
                    pe.move_file(file,out_file)
                    if not pu.check_files_exist(out_file):
                        return ""
                
                return (out_file,)
            
            return ("",)
示例#6
0
文件: qc.py 项目: lijing28101/pyrpipe
    def perform_qc(self,
                   sra_object,
                   out_dir="",
                   out_suffix="_trimgalore",
                   verbose=False,
                   quiet=False,
                   logs=True,
                   objectid="NA",
                   **kwargs):
        """Function to perform qc using trimgalore.
        The function perform_qc() is consistent for all QC classess.
        
        Parameters
        ----------
        sra_object: SRA
            An SRA object whose fastq files will be used
        out_suffix: string
            Suffix for the output sam file
        verbose: bool
            Print stdout and std error
        quiet: bool
            Print nothing
        logs: bool
            Log this command to pyrpipe logs
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        kwargs: dict
            Options to pass to trimgalore. This will override the existing options 

            :return: Returns the path of fastq files after QC. tuple has one item for single end files and two for paired.
            :rtype: tuple
        """

        if not out_dir:
            out_dir = sra_object.location
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        #create new options based on parametrs
        newOpts = {}
        #get layout
        if sra_object.layout == 'PAIRED':
            fq1 = sra_object.localfastq1Path
            fq2 = sra_object.localfastq2Path
            out_file1 = os.path.join(
                out_dir,
                pu.get_file_basename(fq1) + out_suffix + ".fastq")
            out_file2 = os.path.join(
                out_dir,
                pu.get_file_basename(fq2) + out_suffix + ".fastq")
            newOpts = {"--paired": "", "--": (fq1, fq2), "-o": out_dir}
            mergedOpts = {**kwargs, **newOpts}
            #run trimgalore
            self.run_trimgalore(verbose=verbose,
                                quiet=quiet,
                                logs=logs,
                                objectid=objectid,
                                **mergedOpts)
            """
            running trim galore will create two files named <input>_val_1.fq and <input>_val_2.fq
            move these files to the specified out files
            """
            oldFile1 = os.path.join(out_dir,
                                    pu.get_file_basename(fq1) + "_val_1.fq")
            oldFile2 = os.path.join(out_dir,
                                    pu.get_file_basename(fq2) + "_val_2.fq")

            pe.move_file(oldFile1, out_file1)
            pe.move_file(oldFile2, out_file2)

            if not pu.check_files_exist(out_file1, out_file2):
                print("Trimgalore failed")
                return ("", )
            return out_file1, out_file2

        else:
            fq = sra_object.localfastqPath
            out_file = os.path.join(
                out_dir,
                pu.get_file_basename(fq) + out_suffix + ".fastq")
            #giving input arguments as a tuple "--":(fq,)
            newOpts = {"--": (fq, ), "-o": out_dir}
            #run trimgalore
            mergedOpts = {**kwargs, **newOpts}

            self.run_trimgalore(verbose=verbose,
                                quiet=quiet,
                                logs=logs,
                                objectid=objectid,
                                **mergedOpts)
            """
            running trim galore will create one file named <input>_trimmed.fq
            move these files to the specified out files
            """
            oldFile = os.path.join(out_dir,
                                   pu.get_file_basename(fq) + "_trimmed.fq")

            pe.move_file(oldFile, out_file)

            if not pu.check_files_exist(out_file):
                print("Trimgalore failed")
                return ("", )
            return (out_file, )
示例#7
0
文件: sra.py 项目: shinyfluba/pyrpipe
    def download_sra(self,**kwargs):
        """This function downloads .sra file from NCBI SRA servers using the prefetch command.

        NCBI sra-toolkit 2.9 or higher must be installed on the system in order to use prefetch. 
        prefetch will create a folder with name same as <srr_accession> under the directory (path) specified.
        The path of downloaded file is saved in the object as localSRAPath. This localSRAPath is then used
        by other functions to access the downloaded data. 
        The **kwargs is for passing arguments to the prefetch command.
        
        Parameters
        ----------
        
        kwargs: dict
            dict containing additional prefetch arguments

        :return: Return status of the prefetch command. True if successful download and False if failed.
        :rtype: bool

        Examples
        --------
        >>> object.download_sra()
        True
        """     
        
        #store path to the downloaded sra file
        self.sra_path=os.path.join(self.directory,self.srr_accession+".sra")
        #check if already exists
        if pu.check_files_exist(self.sra_path):
            #pu.print_green("File already exists:"+self.sra_path)
            #save file .sra file size
            self.sraFileSize=pu.get_file_size(self.sra_path)
            #test if file is paired or single end
            if pe.is_paired(self.sra_path):
                self.layout="PAIRED"
            else:
                self.layout="SINGLE"
            return True
                
        #scan for prefetch arguments
        prefetchArgsList=['-f','-t','-l','-n','-s','-R','-N','-X','-o','-a','--ascp-options','-p','--eliminate-quals','-c','-o','-O','-h','-V','-L','-v','-q']
        
        
        #ignore directory and file name arguments if given
        if '-O' in kwargs:
            print("Ignoring -O flag."+" directory is: "+self.directory)
            #delete -O parameter
            del kwargs['-O']
        if '-o' in kwargs:
            print("Ignoring -o flag."+" File name is: "+self.srr_accession)
            #delete -o parameter
            del kwargs['-o']
            

        prefetch_Cmd=['prefetch']
        prefetch_Cmd.extend(pu.parse_unix_args(prefetchArgsList,kwargs))
        prefetch_Cmd.extend(['-O',self.directory])
        prefetch_Cmd.append(self.srr_accession)
                
        cmdStatus=pe.execute_command(prefetch_Cmd,objectid=self.srr_accession)
        
        #return if dryrun
        if _dryrun: return True
        
        if not cmdStatus:
            pu.print_boldred("prefetch failed for:"+self.srr_accession)
            return False
        
        
        #move file if downloaded inside the directory
        if not pu.check_files_exist(self.sra_path):
            #check outdir/SRR/SRR/SRR.sra
            if pu.check_files_exist(os.path.join(self.directory,self.srr_accession,self.srr_accession+".sra")):
                pe.move_file(os.path.join(self.directory,self.srr_accession,self.srr_accession+".sra"),self.sra_path)
                
           
        #validate path exists
        if not pu.check_files_exist(self.sra_path):            
            pu.print_boldred("Error downloading file. File "+self.sra_path+" does not exist!!!\n Please check you SRA-Tools config")
            return False
        
        #print ("Downloaded file: "+self.sra_path+" {0} ".format(pu.get_file_size(self.sra_path)))
        #save file .sra file size
        self.sraFileSize=pu.get_file_size(self.sra_path)
        #test if file is paired or single end
        if pe.is_paired(self.sra_path):
            self.layout="PAIRED"
        else:
            self.layout="SINGLE"
            
        
        return True
    
    
    #def destroy(self):
        """
        Delete everything for this object from memory and disk
        """
      #  pass
    
        
示例#8
0
    def perform_assembly(self,
                         bam_file,
                         out_dir="",
                         out_suffix="_cufflinks",
                         overwrite=True,
                         verbose=False,
                         quiet=False,
                         logs=True,
                         objectid="NA",
                         **kwargs):
        """Function to run cufflinks with BAM file as input.
                
        Parameters
        ----------
        bam_file: string
            path to bam file
        out_dir: output directory
        out_suffix: string
            Suffix for the output gtf file
        overwrite: bool
            Overwrite if output file already exists.
        verbose: bool
            Print stdout and std error
        quiet: bool
            Print nothing
        logs: bool
            Log this command to pyrpipe logs
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession.
        kwargs: dict
            Options to pass to cufflinks. This will override the existing options self.passed_args_dict (only replace existing arguments and not replace all the arguments).

        :return: Returns the path to output GTF file
        :rtype: string       
        """

        #create path to output file
        fname = pu.get_file_basename(bam_file)
        if not out_dir:
            out_dir = pu.get_file_directory(bam_file)
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)
        out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf")
        """
        Handle overwrite
        """
        if not overwrite:
            #check if file exists. return if yes
            if os.path.isfile(out_gtf_file):
                print("The file " + out_gtf_file +
                      " already exists. Exiting..")
                return out_gtf_file

        #Add output file name and input bam
        new_opts = {"-o": out_dir, "--": (bam_file, )}
        merged_opts = {**kwargs, **new_opts}

        #call cufflinks
        status = self.run_cufflinks(verbose, quiet, logs, objectid,
                                    **merged_opts)

        if status:
            #move out_dir/transcripts.gtf to outfile
            pe.move_file(os.path.join(out_dir, "transcripts.gtf"),
                         out_gtf_file)
            #check if sam file is present in the location directory of sraOb
            if pu.check_files_exist(out_gtf_file):
                return out_gtf_file
        else:
            return ""
示例#9
0
    def perform_assembly(self,
                         bam_file,
                         out_dir=None,
                         out_suffix="_cufflinks",
                         objectid="NA"):
        """Function to run cufflinks with BAM file as input.
                
        Parameters
        ----------
        bam_file: string
            path to bam file
        out_dir: 
            output directory
        out_suffix: string
            Suffix for the output gtf file
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession.
            
        :return: Returns the path to output GTF file
        :rtype: string       
        """

        #create path to output file
        fname = pu.get_file_basename(bam_file)
        if not out_dir:
            out_dir = pu.get_file_directory(bam_file)
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        #Add output file name and input bam
        internal_args = (bam_file, )
        internal_kwargs = {"-o": out_dir}
        #add positional args
        internal_kwargs['--'] = internal_args

        #targets
        outfile = os.path.join(out_dir, "transcripts.gtf")
        out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf")

        #if final file already exists
        if not _force and pu.check_files_exist(out_gtf_file):
            pu.print_green(
                'Target files {} already exist.'.format(out_gtf_file))
            return out_gtf_file

        #call cufflinks
        status = self.run(None,
                          objectid=objectid,
                          target=outfile,
                          **internal_kwargs)

        if status:
            if not _dryrun:
                pe.move_file(outfile, out_gtf_file)
                if not pu.check_files_exist(out_gtf_file):
                    return ""

            return out_gtf_file

        return ""
示例#10
0
    def perform_alignment(self,
                          sra_object,
                          out_suffix="_star",
                          out_dir="",
                          objectid="NA"):
        """Function to perform STAR alignment using sra_object.
        
        Parameters
        ----------
        
        sra_object SRA object
            An object of type SRA. The path to fastq files will be obtained from this object.
        out_suffix: string
            Suffix for the output sam file
        out_dir: string
            Directory to save the results. Default value is sra_object.directory
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        
        :return: Returns the path to output bam
        :rtype: string
        """

        if not out_dir:
            out_dir = sra_object.directory
        else:
            #create out_dir if not exists
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        #find layout and fq file paths
        if sra_object.layout == 'PAIRED':
            internal_kwargs = {
                "--readFilesIn":
                sra_object.fastq_path + " " + sra_object.fastq2_path
            }
        else:
            internal_kwargs = {"--readFilesIn": sra_object.fastq_path}
        #add out dir
        internal_kwargs["--outFileNamePrefix"] = out_dir + "/"

        #the expected out file
        #star can return Aligned.sortedByCoord.out.bam Aligned.out.bam Aligned.toTranscriptome.out.bam
        #return sorted bam or unsorted bam which ever is present
        bam = os.path.join(out_dir, 'Aligned.out.bam')

        #if outSAMtype is not specified make it bam by default
        if not '--outSAMtype' in self._kwargs:
            self._kwargs['--outSAMtype'] = 'BAM SortedByCoordinate'

        if '--outSAMtype' in self._kwargs and 'SortedByCoordinate' in self._kwargs[
                '--outSAMtype']:
            bam = os.path.join(out_dir, 'Aligned.sortedByCoord.out.bam')
        finalbam = bam.split('.bam')[0] + out_suffix + '.bam'

        #check if final bam already exists
        if not _force and pu.check_files_exist(finalbam):
            pu.print_green('Target files {} already exist.'.format(finalbam))
            return finalbam

        #call star
        status = self.run(None,
                          objectid=sra_object.srr_accession,
                          target=bam,
                          **internal_kwargs)

        if status:
            #return rename the bam  file and return path
            if not _dryrun:
                pe.move_file(bam, finalbam)
                if not pu.check_files_exist(finalbam):
                    return ""

            return finalbam

        return ""