Exemplo n.º 1
0
    def fastqFilesExistsLocally(self):
        """Function to check if fastq file is present on disk
        """

        if not hasattr(self, 'layout'):
            return False

        if self.layout == 'PAIRED':
            if hasattr(self, 'localfastq1Path'):
                if not pu.check_files_exist(self.localfastq1Path):
                    return False
            else:
                return False

            if hasattr(self, 'localfastq2Path'):
                if not pu.check_files_exist(self.localfastq2Path):
                    return False
            else:
                return False

            return True

        else:
            if hasattr(self, 'localfastqPath'):
                return pu.check_files_exist(self.localfastqPath)
            else:
                return False
Exemplo n.º 2
0
 def perform_qc(self,sra_object,out_dir="",out_suffix="_bbduk",objectid="NA"):
     """Run bbduk on fastq files specified by the sra_object
    
     sra_object: SRA
         An SRA object whose fastq files will be used
     out_dir: str
         Path to output directory
     out_suffix: string
         Suffix for the output sam file
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     
     :return: Returns the path of fastq files after QC. tuple has one item for single end files and 2 for paired.
     :rtype: tuple
         
     """
     #make out_dir
     if not out_dir:
             out_dir=sra_object.directory
     else:
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
            
     if sra_object.layout=='PAIRED':
         fq1=sra_object.fastq_path
         fq2=sra_object.fastq2_path
         out_fileName1=pu.get_file_basename(fq1)+out_suffix+".fastq"
         out_fileName2=pu.get_file_basename(fq2)+out_suffix+".fastq"
         out_file1Path=os.path.join(out_dir,out_fileName1)
         out_file2Path=os.path.join(out_dir,out_fileName2)
         
         internal_args=()
         internal_kwargs={"in":fq1,"in2":fq2,"out":out_file1Path,"out2":out_file2Path}
                     
         #run bbduk
         status=self.run(*internal_args,objectid=objectid,target=[out_file1Path,out_file2Path],**internal_kwargs)
         
         if status:
             if not pu.check_files_exist(out_file1Path,out_file2Path) and not _dryrun:
                     return("",)
                     
         return(out_file1Path,out_file2Path)
         
         
     else:
         fq=sra_object.fastq_path
         out_fileName=pu.get_file_basename(fq)+out_suffix+".fastq"
         out_filePath=os.path.join(out_dir,out_fileName)
         internal_args=()
         internal_kwargs={"in":fq,"out":out_filePath}
         
         #run bbduk
         status=self.run(*internal_args,objectid=objectid,target=out_filePath,**internal_kwargs)
         if status:
             if not pu.check_files_exist(out_filePath) and not _dryrun:
                 return("",)
             
         return(out_filePath,) 
Exemplo n.º 3
0
def test_samtools():
    #test sam to sorted bam
    sm=tools.Samtools()    
    sortedBam=sm.sam_sorted_bam(testVars.hisatSam,out_dir=testVars.testDir)
    print("check:"+sortedBam)
    st=pu.check_files_exist(sortedBam)
    assert st==True, "Failed to convert sam to sorted bam"
    
    #test merge
    mergedBam=sm.merge_bam(testVars.hisatSortedBam,testVars.starSortedBam,out_dir=testVars.testDir,**{"-f":""})
    st=pu.check_files_exist(mergedBam)
    assert st==True, "Failed to merge bam"
Exemplo n.º 4
0
 def perform_quant(self,sra_object,out_suffix="",out_dir="",objectid="NA"):
     """Run kallisto quant
     
     sra_object: SRA
         SRA object contatining paths to fastq files
     out_suffix: str
         suffix for output file
     out_dir: str
         path to output directory
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
    
     :return: Path to kallisto out directory
     :rtype: string
     """
     
     if not out_dir:
         out_dir=os.path.join(sra_object.directory,"kallisto_out")
     else:
         #create out_dir if not exists
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
     
     
     if sra_object.layout == 'PAIRED':
         args=(sra_object.fastq_path,sra_object.fastq2_path)
         internal_kwargs={"-o":out_dir,"-i":self.index}
     else:
         args=(sra_object.fastq_path,)
         internal_kwargs={"-o":out_dir,"--single":"","-i":self.index}
         
     
     #targets
     outfile=os.path.join(out_dir,"abundance.tsv")
     newfile=os.path.join(out_dir,"abundance"+out_suffix+".tsv")
     #check if final files already exists
     if not _force and pu.check_files_exist(newfile):
         pu.print_green('Target files {} already exist.'.format(newfile))
         return newfile
     
     #call kallisto
     status=self.run(*args,subcommand='quant',objectid=sra_object.srr_accession,target=outfile,**internal_kwargs)
     
     if status:
         #return rename the bam  file and return path
         if not _dryrun:
             pe.move_file(outfile,newfile)
             if not pu.check_files_exist(newfile):
                 return ""            
         return newfile
     
     return ""
Exemplo n.º 5
0
    def check_index(self):
        """Check a diamond index
        """
        if not hasattr(self, "index"):
            return False

        if pu.check_files_exist(self.index):
            return True

        if pu.check_files_exist(self.index + ".dmnd"):
            return True

        return False
Exemplo n.º 6
0
 def build_index(self,index_path,index_name,fasta,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """Function to  build kallisto index
     
     index_path: str
         path to the output directory
     index_name: str
         index name
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     kwargs: dict
         Options to pass to kallisto. This will override the existing options in self.passed_args_dict (only replace existing arguments and not replace all the arguments).
         
     :return: Status of kallisto index
     :rtype: bool
     """
     
     #check input
     if not pu.check_files_exist(fasta):
         pu.print_boldred("{} does not exist. Exiting".format(fasta))
         return False
     
     #create out dir
     if not pu.check_paths_exist(index_path):
         if not pu.mkdir(index_path):
             print("ERROR in building kallisto index. Failed to create index directory.")
             return False
         
     indexOut=os.path.join(index_path,index_name)
     newOpts={"--":(fasta,),"-i":indexOut}
     mergedOpts={**kwargs,**newOpts}
     
     #call salmon
     status=self.run_kallisto("index",verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**mergedOpts)
     
     if status:
         #check if sam file is present in the location directory of sra_object
         if pu.check_files_exist(indexOut):
             self.kallisto_index=indexOut
             self.passedArgumentDict['-i']=self.kallisto_index
             pu.print_green("kallisto_index is:"+self.kallisto_index)
             return True
     else:
         pu.print_boldred("Failed to create kallisto index")
         return False
Exemplo n.º 7
0
 def search_fastq(self,path):
     """Search .fastq file under a dir and create SRA object
     Return True if found otherwise False
     """
     
     #check files with names <SRR>_1.fastq and <SRR>_2.fastq
     fq=os.path.join(path,self.srr_accession+'_1.fastq')
     fq2=os.path.join(path,self.srr_accession+'_2.fastq')
     if pu.check_files_exist(fq,fq2):
         self.fastq_path=fq
         self.fastq2_path=fq2
         pu.print_green("Found .fastq "+self.fastq_path+" "+self.fastq2_path)
         self.layout="PAIRED"
         return True
     
     #check single end file
     fq=os.path.join(path,self.srr_accession+'.fastq')
     if pu.check_files_exist(fq):
         self.fastq_path=fq
         pu.print_green("Found .fastq "+self.fastq_path)
         self.layout="SINGLE"
         return True       
     
     #search files under the path
     #fq_files=pe.find_files(path,"*.fastq")
     fq_files=pu.find_files(path,".fastq$")
     
     if len(fq_files)<1:
         return False
     
     if len(fq_files)>2:
         return False
     
     fq_files.sort()
     #case with single fastq
     if len(fq_files)==1:
         self.fastq_path=fq_files[0]
         pu.print_green("Found .fastq "+self.fastq_path)
         self.layout="SINGLE"
     
     #case with paired fastq
     if len(fq_files)==2:
         self.fastq_path=fq_files[0]
         self.fastq2_path=fq_files[1]
         pu.print_green("Found .fastq "+self.fastq_path+" "+self.fastq2_path)
         self.layout="PAIRED"
     
     return True 
Exemplo n.º 8
0
 def perform_quant(self,sra_object,out_suffix="",out_dir="",objectid="NA"):
     """run salmon quant
     sra_object: SRA
         An SRA object with valid fastq files
     out_suffix: str
         suffix string fout out file
     out_dir: str
         path to outdir
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     
     :return: Path to salmon out file
     :rtype: string
     """
         
     if not out_dir:
         out_dir=os.path.join(sra_object.directory,"salmon_out")
     else:
         #create out_dir if not exists
         if not pu.check_paths_exist(out_dir):
             pu.mkdir(out_dir)
     
     
     if sra_object.layout == 'PAIRED':
         internal_kwargs={"-o":out_dir,"-l":"A","-1":sra_object.fastq_path,"-2":sra_object.fastq2_path,"-i":self.index}
     else:
         internal_kwargs={"-o":out_dir,"-l":"A","-r":sra_object.fastq_path,"-i":self.index}
     
     #targets
     outfile=os.path.join(out_dir,"quant.sf")
     newfile=os.path.join(out_dir,"quant"+out_suffix+".sf")
     #check if final files already exists
     if not _force and pu.check_files_exist(newfile):
         pu.print_green('Target files {} already exist.'.format(newfile))
         return newfile
     
     #call salmon
     status=self.run(None,subcommand='quant',objectid=sra_object.srr_accession,target=newfile,**internal_kwargs)
     
     if status:
         #return rename the bam  file and return path
         if not _dryrun:
             pe.move_file(outfile,newfile)
             if not pu.check_files_exist(newfile):
                 return ""            
         return newfile
     
     return ""
Exemplo n.º 9
0
    def run_transdecoder_longorfs(self,
                                  infasta,
                                  out_dir=None,
                                  verbose=False,
                                  quiet=False,
                                  logs=True,
                                  objectid="NA",
                                  **kwargs):

        if not pu.check_files_exist(infasta):
            pu.print_boldred("Please check input file:" + infasta)

        if not out_dir:
            out_dir = os.getcwd()

        newOpts = {"-t": infasta, "-O": out_dir}
        mergedOpts = {**newOpts, **kwargs}

        #execute LongOrfs
        status = self.run_transdecoder('TransDecoder.LongOrfs',
                                       verbose=verbose,
                                       quiet=quiet,
                                       logs=logs,
                                       objectid=objectid,
                                       **mergedOpts)
        if not status:
            pu.print_boldred("Transdecoder failed")
            return ""

        return out_dir
Exemplo n.º 10
0
    def verify_integrity(self, target, verbose=False):
        """
        Verify target file is present and is not LOCKED i.e. .Lock file is not present.

        Parameters
        ----------
        target : Str
            The target file.
        verbose : bool, optional
            Print additional messages. The default is False.

        Returns
        -------
        bool
            Return True is target is present and not Locked.

        """
        #check if lock exists
        lock_files = self.get_lock_files(target)
        if len(lock_files) > 0:
            #remove the target and locks
            if pu.check_files_exist(target):
                pu.print_notification(
                    "Found incomplete target {}. Restarting command...".format(
                        target))
                self.remove_locks(lock_files + [target])
            else:
                self.remove_locks(lock_files)

        return True
Exemplo n.º 11
0
def download_gtex_bams(manifest_file, outdir):
    #load list of bam files
    with open(manifest_file, 'r') as fi:
        thisdata = json.load(fi)
    flist = []
    #check existing files
    for d in thisdata:
        f = d["file_name"]
        gid = f.split('.Aligned')[0]
        outfile = os.path.join(outdir, gid, f)
        #if pu.check_files_exist(outfile) and pu.get_mdf(outfile)==d["md5sum"]:
        if pu.check_files_exist(outfile):
            print("Outfile {} exists. Skipping...".format(outfile))
            #copy it back to out dir
            os.rename(outfile, os.path.join(outdir, f))
        flist.append(d["file_name"])

    cmd = 'gen3-client download-multiple --profile={} --manifest={} --download-path={} --protocol=s3 --numparallel={} --skip-completed --no-prompt'.format(
        profile, m, outdir, threads)
    cdcmd = 'cd {}'.format(cwd)
    sshcmd = dtn_ssh + " '{}; {}'".format(cdcmd, cmd)
    out = pe.get_shell_output(sshcmd, verbose=True)

    #move the files
    for f in flist:
        source = os.path.join(outdir, f)
        gid = f.split('.Aligned')[0]
        destdir = os.path.join(outdir, gid)
        pu.mkdir(destdir)
        dest = os.path.join(destdir, f)
        #print('Moving {}-->{}'.format(source,dest))
        os.rename(source, dest)
Exemplo n.º 12
0
 def __init__(self,kallisto_index,**kwargs):
     super().__init__() 
     self.programName="kallisto"
     self.dep_list=[self.programName]        
     if not pe.check_dependencies(self.dep_list):
         raise Exception("ERROR: "+ self.programName+" not found.")
     
     
     ##kallisto index
     self.validArgsIndex=['-i','--index','-k','--kmer-size','--make-unique']
     ##kallisto quant
     self.validArgsQuant=['-i','--index','-o','--output-dir','--bias','-b','--bootstrap-samples',
                          '--seed','--plaintext','--fusion','--single','--fr-stranded','--rf-stranded',
                          '-l','--fragment-length','-s','--sd','-t','--threads','--pseudobam']
     ##kallisto pseudo
     self.validArgsPseudo=['-i','--index','-o','--output-dir','-u','--umi','-b','--batch',
                           '--single','-l','--fragment-length','-s','--sd','-t','--threads','--pseudobam']
         ##kallisto h5dump
     self.validArgsh5dump=['-o','--output-dir']
     
     self.valid_args=pu.get_union(self.validArgsIndex,self.validArgsQuant,self.validArgsPseudo,self.validArgsh5dump)
     
     #initialize the passed arguments
     self.passedArgumentDict=kwargs
     
     #if index is passed, update the passed arguments
     if len(kallisto_index)>0 and pu.check_files_exist(kallisto_index):
         print("kallisto index is: "+kallisto_index)
         self.kallisto_index=kallisto_index
         self.passedArgumentDict['-i']=self.kallisto_index
     else:
         print("No kallisto index provided. Please use build_index() now to generate an index...")
Exemplo n.º 13
0
def is_paired(sra_file):
    """Function to test wheather a .sra file is paired or single.
    
    Parameters
    ----------
    
    sra_file (string)  the path ro sra file
    
    
    :return: True is sra is paired
    :rtype: bool
    """
    if not pu.check_files_exist(sra_file):
        raise Exception(
            "Error checking layout. {0} doesn't exist".format(sra_file))

    try:
        fastqdCmd = ["fastq-dump", "-X", "1", "-Z", "--split-spot", sra_file]
        output = subprocess.check_output(fastqdCmd, stderr=subprocess.DEVNULL)
        numLines = output.decode("utf-8").count("\n")
        if (numLines == 4):
            return False
        elif (numLines == 8):
            return True
        else:
            raise Exception("Unexpected output from fast-dump")
    except subprocess.CalledProcessError as e:
        raise Exception("Error running fastq-dump: {}".format(str(e)))
Exemplo n.º 14
0
        def goodbye():
            logfile = os.path.join(_logs_dir, _log_name + '.log')
            #if log was created
            if not pu.check_files_exist(logfile):
                pu.print_yellow("No logs written")
                return
            if _dryrun:
                pu.print_yellow(
                    "This was a dry run. Logs were saved to {}".format(
                        logfile))
                return

            pu.print_yellow("Logs were saved to {}".format(logfile))
            pu.print_yellow(
                "A copy of script is saved to {} with md5 checksum {}".format(
                    target, _md5))

            #get summary from log
            envlog = logfile.replace('.log', 'ENV.log')
            reports.generate_summary(logfile, envlog)

            #export shell commands
            out_cmds = logfile + '_commands'
            reports.generateBashScript(logfile, out_cmds, None, verbose=False)
            out_cmds = logfile + '_failed'
            reports.generateBashScript(logfile,
                                       out_cmds,
                                       None,
                                       coverage='i',
                                       verbose=False)
            #run reports/multiqc if specified
            if _multiqc:
                reports.generate_multiqc(directory=os.getcwd(),
                                         tempDir='MultiQC_temp',
                                         outDir='MultiQC_out')
Exemplo n.º 15
0
    def build_index(self,
                    in_fasta,
                    dbname,
                    out_dir=None,
                    threads=None,
                    verbose=False,
                    quiet=False,
                    logs=True,
                    objectid="NA",
                    **kwargs):
        """Build a diamond index and store its path in self
        """

        #check input files
        if not pu.check_files_exist(in_fasta):
            pu.print_boldred(
                "Input fasta: {} not found...\n diamond makedb failed".format(
                    in_fasta))
            return False
        #create out_dir
        if not out_dir:
            out_dir = os.getcwd()
        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)

        #check if index already exists
        index_path = os.path.join(out_dir, dbname)
        self.index = index_path
        if self.check_index():
            pu.print_green("Diamond index: {} exists, using it...".format(
                self.index))
            self.index = index_path
            return True

        if not threads:
            threads = self.threads

        newOpts = {
            "--in": in_fasta,
            "-d": index_path,
            "--threads": str(threads)
        }

        #add input files to kwargs, overwrite newOpts with kwargs
        mergedOpts = {**newOpts, **kwargs}

        #call run_diamond
        status = self.run_diamond("makedb",
                                  verbose=verbose,
                                  quiet=quiet,
                                  logs=logs,
                                  objectid=objectid,
                                  **mergedOpts)

        if status:
            self.index = index_path
            return True

        return False
Exemplo n.º 16
0
def test_stringtie():
    bam = testVars.hisatSortedBam
    gtf = testVars.gtf
    stie = assembly.Stringtie()
    result = stie.perform_assembly(bam,
                                   out_dir=testVars.testDir,
                                   objectid="test")
    assert pu.check_files_exist(result) == True, "Failed stringtie"
Exemplo n.º 17
0
def test_mikado():
    gtfdir = testVars.mikadofiles
    out_dir = testVars.testDir + "/mikadoout"
    mk = tools.Mikado()

    listfile = mk.createMikadoGTFlist("mikadolist", out_dir, gtfdir)
    st = pu.check_files_exist(listfile)
    assert st == True, "Mikado list failed"
Exemplo n.º 18
0
def test_cufflinks():
    bam = testVars.hisatSortedBam
    gtf = testVars.gtf
    cl = assembly.Cufflinks(reference_gtf=gtf)
    result = cl.perform_assembly(bam,
                                 out_dir=testVars.testDir,
                                 objectid="test")
    assert pu.check_files_exist(result) == True, "Failed stringtie"
Exemplo n.º 19
0
def test_cufflinks():
    bam = testVars.hisatSortedBam
    gtf = testVars.gtf
    cl = assembly.Cufflinks()
    result = cl.perform_assembly(bam,
                                 out_dir=testVars.testDir,
                                 objectid="test")
    print('RES:', result)
    assert pu.check_files_exist(result) == True, "Failed cufflinks"
Exemplo n.º 20
0
def checkEnvLog(logFile):
    #check all logs exist
    logFileDir=pu.get_file_directory(logFile)
    basename=pu.get_file_basename(logFile)
    envLog=os.path.join(logFileDir,basename+"ENV.log")
    if not pu.check_files_exist(logFile,envLog):
        print("Please check missing log files. Exiting.")
        sys.exit(1)
    return envLog
Exemplo n.º 21
0
def deleteFileFromDisk(filePath):
    """Delete a given file from disk
    Returns true if file is deleted or doesn't exist
    """
    if pu.check_files_exist(filePath):
        rm_Cmd = ['rm', filePath]
        rv = getReturnStatus(rm_Cmd)
        return rv
    #if file doesn't exist return true
    return True
Exemplo n.º 22
0
    def __init__(self, file):
        self.__params = None
        self.__kwargs = None

        if not pu.check_files_exist(file):
            return
        #read yaml
        with open(file) as f:
            #self.__params=yaml.full_load(f)
            self.__params = yaml.load(f, Loader=yaml.SafeLoader)
            self.parse_params()
Exemplo n.º 23
0
    def perform_alignment(self,
                          sra_object,
                          out_suffix="_bowtie2",
                          out_dir="",
                          objectid="NA"):
        """Function to perform alignment using sra_object.
        
        Parameters
        ----------
        
        sra_object SRA object
            An object of type SRA. The path to fastq files will be obtained from this object.
        out_suffix: string
            Suffix for the output sam file
        out_dir: string
            Directory to save the results. Default value is sra_object.directory
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        :return: Returns the sorted bam file path after converting sam to bam and sorting it
        :rtype: string
        """
        if not out_dir:
            out_dir = sra_object.directory
        else:
            if not pu.check_paths_exist(out_dir):
                pu.mkdir(out_dir)

        #create path to output sam file
        outSamFile = os.path.join(
            out_dir, sra_object.srr_accession + out_suffix + ".sam")
        #outBamFile=os.path.join(out_dir,sra_object.srr_accession+out_suffix+"_sorted.bam")

        #find layout and fq file paths
        if sra_object.layout == 'PAIRED':
            internal_kwargs = {
                "-1": sra_object.fastq_path,
                "-2": sra_object.fastq2_path,
                "-S": outSamFile
            }
        else:
            internal_kwargs = {"-U": sra_object.fastq_path, "-S": outSamFile}

        status = self.run(None,
                          objectid=sra_object.srr_accession,
                          target=outSamFile,
                          **internal_kwargs)

        if status:
            if not pu.check_files_exist(outSamFile) and not _dryrun:
                return ""
            #convert to bam before returning; returns outBamFile
            return tools.Samtools().sam_sorted_bam(outSamFile)

        return ""
Exemplo n.º 24
0
    def run_transdecoder_predict(self,
                                 infasta,
                                 longorfs_dir,
                                 out_dir=None,
                                 verbose=False,
                                 quiet=False,
                                 logs=True,
                                 objectid="NA",
                                 **kwargs):

        if not pu.check_files_exist(infasta):
            pu.print_boldred("Please check input file:" + infasta)
        if not pu.check_paths_exist(longorfs_dir):
            pu.print_boldred("Path {} doesn't exist".format(longorfs_dir))

        move_flag = True
        if not out_dir:
            out_dir = os.getcwd()
            move_flag = False

        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)

        newOpts = {"-t": infasta, "-O": longorfs_dir}
        mergedOpts = {**newOpts, **kwargs}

        #execute Predict
        status = self.run_transdecoder('TransDecoder.Predict',
                                       verbose=verbose,
                                       quiet=quiet,
                                       logs=logs,
                                       objectid=objectid,
                                       **mergedOpts)
        if not status:
            pu.print_boldred("Transdecoder failed")
            return ""

        #move output files to outdir
        if move_flag:
            outfile_prefix = pu.get_filename(infasta) + ".transdecoder"
            pe.move_file(outfile_prefix + ".bed",
                         os.path.join(out_dir, outfile_prefix + ".bed"),
                         verbose)
            pe.move_file(outfile_prefix + ".cds",
                         os.path.join(out_dir, outfile_prefix + ".cds"),
                         verbose)
            pe.move_file(outfile_prefix + ".gff3",
                         os.path.join(out_dir, outfile_prefix + ".gff3"),
                         verbose)
            pe.move_file(outfile_prefix + ".pep",
                         os.path.join(out_dir, outfile_prefix + ".pep"),
                         verbose)
        return out_dir
Exemplo n.º 25
0
    def __init__(self, *args, index=None, genome=None, threads=None, **kwargs):
        """
        init Hisat2 object

        Parameters
        ----------
        *args : tuple
            Positional arguements
        index : Str, optional
            Path to Hisat index. If index is not present it will generate an index using the genome. Index can be supplied via the hisat2.yaml file too. The default is None.
        genome : Str, optional
            Path to the reference genome. This will be used to build an index if index is not present The default is None.
        threads : int, optional
            Threads to use for hisat2. This will override the global --threads parameter supplied to pyrpipe. The default is None.
        **kwargs : dict
            keyword arguments

        Raises
        ------
        ValueError
            Raises ValueError if hisat index is not found and genome is not present to generate an index.

        Returns
        -------
        None.

        """
        super().__init__(*args, **kwargs)
        self._command = 'hisat2'
        self._deps = [self._command, 'samtools']
        self.index = index
        self.genome = genome
        self._param_yaml = 'hisat2.yaml'
        self._valid_args = valid_args._args_HISAT2

        #resolve threads to use
        self.resolve_parameter("-p", threads, _threads, '_threads')
        #resolve index to use
        self.resolve_parameter("-x", index, index, 'index')

        #check index
        if not self.check_index():
            if not (pu.check_files_exist(self.genome)):
                pu.print_boldred(
                    "Hisat2 index '{}' not found; New index could not be created as genome file '{}' not found."
                    .format(self.index, self.genome))
                raise ValueError(
                    "Please provide a valid Hisat2 index, or a valid fasta file to generate the index"
                )
            else:
                #call build index to generate index
                self.build_index(self.index, self.genome)
Exemplo n.º 26
0
    def perform_assembly(self,
                         bam_file,
                         out_dir=None,
                         out_suffix="_stringtie",
                         objectid="NA"):
        """Function to run stringtie using a bam file.
                
        Parameters
        ----------
        
        bam_file: string
            path to the bam file
        out_dir: string
            Path to out file
        out_suffix: string
            Suffix for the output gtf file
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        :return: Returns the path to output GTF file
        :rtype: string
        """

        #create path to output file
        fname = pu.get_file_basename(bam_file)

        if not out_dir:
            out_dir = pu.get_file_directory(bam_file)

        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)

        out_gtf_file = os.path.join(out_dir, fname + out_suffix + ".gtf")

        #Add output file name and input bam
        internal_args = (bam_file, )
        internal_kwargs = {"-o": out_gtf_file}
        #add positional args
        internal_kwargs['--'] = internal_args

        #call stringtie
        status = self.run(None,
                          objectid=objectid,
                          target=out_gtf_file,
                          **internal_kwargs)

        if status:
            #check if sam file is present in the location directory of sraOb
            if not pu.check_files_exist(out_gtf_file) and not _dryrun:
                return ""
            return out_gtf_file

        return ""
Exemplo n.º 27
0
 def init_object(self,srr_accession,directory,fastq,fastq2,sra):
     
     #if fastq are provided
     if fastq and fastq2:
         self.layout="PAIRED"
         self.directory=pu.get_file_directory(fastq)
         if not pu.check_files_exist(fastq,fastq2) and not _dryrun:
             pu.print_boldred("ERROR: File not found")
             raise ValueError("Please check fastq files {} {}".format(fastq,fastq2))
         return True
             
     #if only one fastq (single)
     if fastq:
         self.layout="SINGLE"
         self.directory=pu.get_file_directory(fastq)
         if not pu.check_files_exist(fastq) and not _dryrun:
             pu.print_boldred("ERROR: File not found")
             raise ValueError("Please check fastq files {}".format(fastq))
         return True
 
     #init from srr_accession and directory
     return self.init_from_accession(srr_accession,directory)
Exemplo n.º 28
0
def test_diamond():
    dm = tools.Diamond(index="", mode='sensitive')
    dm.build_index(testVars.uniprot,
                   "diamondDB",
                   out_dir=testVars.testDir + "/dout")
    dm.run_align(testVars.cdna,
                 "diamond_out",
                 command="blastx",
                 out_fmt=6,
                 fmt_string="qseqid sseqid evalue pident",
                 out_dir=testVars.testDir + "/dout")
    st = pu.check_files_exist(testVars.testDir + "/dout/diamond_out")
    assert st == True, "Diamond failed"
Exemplo n.º 29
0
 def build_index(self,index_path,index_name,fasta,verbose=False,quiet=False,logs=True,objectid="NA",**kwargs):
     """
     build salmon index and store the path to index in self
     
     index_path: str
         path to the output directory
     index_name: str
         index name
     verbose: bool
         Print stdout and std error
     quiet: bool
         Print nothing
     logs: bool
         Log this command to pyrpipe logs
     objectid: str
         Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
     kwargs: dict
         Options to pass to salmon. This will override the existing options
         
     :return: status of salmon index
     :rtype: bool
     """
     
     #check input
     if not pu.check_files_exist(fasta):
         pu.print_boldred("{} does not exist. Exiting".format(fasta))
         return False
     #create out dir
     if not pu.check_paths_exist(index_path):
         if not pu.mkdir(index_path):
             print("ERROR in building hisat2 index. Failed to create index directory.")
             return False
     indexOut=os.path.join(index_path,index_name)
     newOpts={"-t":fasta,"-i":indexOut}
     mergedOpts={**kwargs,**newOpts}
     
     #call salmon
     status=self.run_salmon("index",verbose=verbose,quiet=quiet,logs=logs,objectid=objectid,**mergedOpts)
     
     if status:
         #check if sam file is present in the location directory of sra_object
         #if check_files_exist(os.path.join(indexOut,"versionInfo.json")): #not sure if this is reliable
         if pu.check_paths_exist(indexOut):
             self.salmon_index=indexOut
             self.passedArgumentDict['-i']=self.salmon_index
             pu.print_green("salmon index is:"+self.salmon_index)
             return True
     
     pu.print_boldred("Failed to create salmon index")
     return False
Exemplo n.º 30
0
 def fastq_exists(self):
     """Function to check if fastq file is present on disk
     """
     
     if not hasattr(self,'layout'):
         return False
     
     if self.layout=='PAIRED':
         if hasattr(self,'fastq_path'):
             if not self.fastq_path:
                 #if None
                 return False
             if not pu.check_files_exist(self.fastq_path):
                 return False
         else:
             return False
         
         if hasattr(self,'fastq2_path'):
             if not self.fastq2_path:
                 #if None
                 return False
             if not pu.check_files_exist(self.fastq2_path):
                 return False
         else:
             return False
         
         return True
         
     else:
         if hasattr(self,'fastq_path'):
             if not self.fastq_path:
                 #if None
                 return False
             return pu.check_files_exist(self.fastq_path)
         else:            
             return False