Exemplo n.º 1
0
    def __init__(self, index="", threads=None):

        super().__init__()
        self.programName = "STAR"

        self.dep_list = [self.programName]
        #check if star exists
        if not pe.check_dependencies(self.dep_list):
            raise Exception("ERROR: " + self.programName + " not found.")
        """
        self.valid_args=['--help','--parametersFiles','--sysShell','--runMode','--runThreadN','--runDirPerm','--runRNGseed','--quantMode','--quantTranscriptomeBAMcompression','--quantTranscriptomeBan','--twopassMode','--twopass1readsN',
                            '--genomeDir','--genomeLoad','--genomeFastaFiles','--genomeChrBinNbits','--genomeSAindexNbases','--genomeSAsparseD','--genomeSuffixLengthMax','--genomeChainFiles','--genomeFileSizes',
                            '--sjdbFileChrStartEnd','--sjdbGTFfile','--sjdbGTFchrPrefix','--sjdbGTFfeatureExon','--sjdbGTFtagExonParentTranscript','--sjdbGTFtagExonParentGene','--sjdbOverhang','--sjdbScore','--sjdbInsertSave',
                            '--inputBAMfile','--readFilesIn','--readFilesCommand','--readMapNumber','--readMatesLengthsIn','--readNameSeparator','--clip3pNbases','--clip5pNbases','--clip3pAdapterSeq','--clip3pAdapterMMp','--clip3pAfterAdapterNbases',
                            '--limitGenomeGenerateRAM','--limitIObufferSize','--limitOutSAMoneReadBytes','--limitOutSJoneRead','--limitOutSJcollapsed','--limitBAMsortRAM ','--limitSjdbInsertNsj','--outFileNamePrefix','--outTmpDir','--outTmpKeep',
                            '--outStd','--outReadsUnmapped','--outQSconversionAdd','--outMultimapperOrder','--outSAMtype','--outSAMmode','--outSAMstrandField','--outSAMattributes','--outSAMattrIHstart','--outSAMunmapped','--outSAMorder',
                            '--outSAMprimaryFlag','--outSAMreadID','--outSAMmapqUnique','--outSAMflagOR','--outSAMflagAND','--outSAMattrRGline','--outSAMheaderHD','--outSAMheaderPG','--outSAMheaderCommentFile','--outSAMfilter','--outSAMmultNmax',
                            '--outBAMcompression','--outBAMsortingThreadN','--bamRemoveDuplicatesType','--bamRemoveDuplicatesMate2basesN','--outWigType','--outWigStrand','--outWigReferencesPrefix','--outWigNorm','--outFilterType',
                            '--outFilterMultimapScoreRange','--outFilterMultimapNmax','--outFilterMismatchNmax','--outFilterMismatchNoverLmax','--outFilterMismatchNoverReadLmax','--outFilterScoreMin','--outFilterScoreMinOverLread',
                            '--outFilterMatchNmin','--outFilterMatchNminOverLread','--outFilterIntronMotifs','--outSJfilterReads','--outSJfilterOverhangMin','--outSJfilterCountUniqueMin','--outSJfilterCountTotalMin','--outSJfilterDistToOtherSJmin',
                            '--outSJfilterIntronMaxVsReadN','--scoreGap','--scoreGapNoncan','--scoreGapGCAG ','--scoreGapATAC','--scoreGenomicLengthLog2scale','--scoreDelOpen','--scoreDelBase','--scoreInsOpen','--scoreInsBase','--scoreStitchSJshift',
                            '--seedSearchStartLmax','--seedSearchStartLmaxOverLread','--seedSearchLmax','--seedMultimapNmax','--seedPerReadNmax','--seedPerWindowNmax','--seedNoneLociPerWindow','--alignIntronMin','--alignIntronMax','--alignMatesGapMax',
                            '--alignSJoverhangMin','--alignSJstitchMismatchNmax','--alignSJDBoverhangMin','--alignSplicedMateMapLmin','--alignSplicedMateMapLminOverLmate','--alignWindowsPerReadNmax','--alignTranscriptsPerWindowNmax','--alignTranscriptsPerReadNmax',
                            '--alignEndsType','--alignEndsProtrude','--alignSoftClipAtReferenceEnds','--winAnchorMultimapNmax','--winBinNbits','--winAnchorDistNbins','--winFlankNbins','--winReadCoverageRelativeMin','--winReadCoverageBasesMin',
                            '--chimOutType','--chimSegmentMin','--chimScoreMin','--chimScoreDropMax','--chimScoreSeparation','--chimScoreJunctionNonGTAG','--chimJunctionOverhangMin','--chimSegmentReadGapMax','--chimFilter','--chimMainSegmentMultNmax']
        """

        if not threads:
            threads = os.cpu_count()

        self.threads = threads

        #if index is passed, update the passed arguments
        if index and pu.check_starindex(index):
            print("STAR index is: " + index)
            self.star_index = index
        else:
            print(
                "No STAR index provided. Please build index now to generate an index using build_index()...."
            )
Exemplo n.º 2
0
 def check_index(self):
     if hasattr(self, 'star_index'):
         return (pu.check_starindex(self.star_index))
     else:
         return False
Exemplo n.º 3
0
    def build_index(self,
                    index_path,
                    *args,
                    threads=None,
                    overwrite=False,
                    verbose=False,
                    quiet=False,
                    logs=True,
                    objectid="NA",
                    **kwargs):
        """Build a star index with given parameters and saves the new index to self.star_index.
        
        Parameters
        ----------
        
        index_path: string
            Path where the index will be created
        args: tuple
            Path to reference input files
        threads: int
            Num threads to use
        overwrite: bool
            Overwrite if index already exists
        verbose: bool
            Print stdout and std error
        quiet: bool
            Print nothing
        logs: bool
            Log this command to pyrpipe logs
        objectid: str
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
        
        kwargs: dict
            Parameters for the star command

        :return: Returns status of star command
        :rtype: bool
        """

        #if index already exists then exit
        if not overwrite:
            if pu.check_starindex(index_path):
                pu.print_green("STAR index already exists. Using it...")
                self.star_index = index_path
                return True

        #check input files
        if len(args) < 1:
            pu.print_boldred(
                "Please provide input fasta file to build STAR index")
            return False

        if not pu.check_files_exist(*args):
            raise Exception("Please check input to star index")
            return False

        #create path if doesnt exist
        if not pu.check_paths_exist(index_path):
            if not pu.mkdir(index_path):
                raise Exception("Error creating STAR index. Exiting.")
                return False

        if not threads:
            threads = self.threads

        #add runMode
        newOpts = {
            "--runMode": "genomeGenerate",
            "--genomeDir": index_path,
            "--genomeFastaFiles": " ".join(args),
            "--runThreadN": str(threads)
        }

        mergedOpts = {**newOpts, **kwargs}

        starbuild_Cmd = ['STAR']
        starbuild_Cmd.extend(pu.parse_unix_args(None, mergedOpts))

        #execute command
        status = pe.execute_command(starbuild_Cmd,
                                    verbose=verbose,
                                    quiet=quiet,
                                    logs=logs,
                                    objectid=objectid)

        if status:
            if pu.check_paths_exist(index_path):
                #update object's index
                self.star_index = index_path
                if self.check_index():
                    return True
        else:
            return False
Exemplo n.º 4
0
    def build_index(self, index_path, genome, objectid="NA"):
        """Build a STAR index with given parameters and saves the new index to self.index.
        
        Parameters
        ----------
        
        index_path: string
            Path where the index will be created
        genome: string
            Path to the reference genome
        objectid : string 
            Provide an id to attach with this command e.g. the SRR accession. This is useful for debugging, benchmarking and reports.
            
        :return: Returns the status of STAR-build index
        :rtype: bool
        """

        #if index already exists then exit
        if not _force:
            if pu.check_starindex(index_path):
                pu.print_green(
                    "STAR index {} already exists.".format(index_path))
                self.index = index_path
                return True

        #check input files
        if not (pu.check_files_exist(genome)):
            pu.print_boldred(
                "Please provide a valid input fasta file to build STAR index")
            raise ValueError("Please check input to build star index")

        #create index path if doesnt exist
        if not pu.check_paths_exist(index_path):
            if not pu.mkdir(index_path):
                raise OSError(
                    "Error creating STAR index. Failed to create index directory."
                )
                return False

        #determine parameters and execute cmd
        #internal_args=()
        internal_kwargs = {
            "--runMode": "genomeGenerate",
            "--genomeDir": index_path,
            "--genomeFastaFiles": genome,
            "--runThreadN": self._threads
        }

        #read build parameters
        yamlfile = os.path.join(_params_dir, 'star_index.yaml')
        if pu.check_files_exist(yamlfile):
            yaml_params = pl.YAML_loader(yamlfile)
            yaml_kwargs = yaml_params.get_kwargs()
            internal_kwargs = {**yaml_kwargs, **internal_kwargs}

        starbuild_Cmd = ['STAR']
        starbuild_Cmd.extend(
            pu.parse_unix_args(valid_args._args_STAR, internal_kwargs))

        #execute command
        status = pe.execute_command(starbuild_Cmd, objectid=objectid)
        if status:
            if pu.check_paths_exist(index_path) and not _dryrun:
                #update object's index
                self.index = index_path
                if self.check_index():
                    return True
        else:
            raise OSError("Error building STAR index")

        return True