def __init__(self, fastqInput=None, fileFormat=None, fastqcOutputDir=None, threads=None, cmdParam=None, **kwargs): super(Step, self).__init__(cmdParam, **kwargs) # set all input and output parameters self.setParamIO('fastqInput', fastqInput) if fastqcOutputDir == None: self.setParamIO('fastqcOutputDir', Configure.getTmpDir()) else: self.setParamIO('fastqcOutputDir', fastqcOutputDir) # call self.initIO() self.initIO() #set other parameters #self.setParam('isNoDiscordant', isNoDiscordant) self.setParam('fileFormat', fileFormat) if threads is None: threads = Configure.getThreads() self.setParam('threads', threads) print(self.params)
def impInitIO(self, ): """ This function is to initialize all of the input and output files from the io parameters set in __init__() """ # obtain all input and output parameters fastqInput1 = self.getParamIO('fastqInput1') fastqInput2 = self.getParamIO('fastqInput2') fastqOutputDir1 = self.getParamIO('fastqOutputDir1') fastqOutputDir2 = self.getParamIO('fastqOutputDir2') #set all input files self.setInputDirOrFile('fastqInput1', fastqInput1) self.setInputDirOrFile('fastqInput2', fastqInput2) # create output file paths and set if fastqOutputDir1 is None: self.setParamIO('fastqOutputDir1', Configure.getTmpDir()) if fastqOutputDir2 is None: self.setParamIO('fastqOutputDir2', Configure.getTmpDir()) self.setOutputDir1To1('fastqOutput1', fastqOutputDir1, None, 'fastq', 'fastqInput1') self.setOutputDir1To1('fastqOutput2', fastqOutputDir2, None, 'fastq', 'fastqInput2') self.setOutputDir1To1('adapterOutput', None, None, 'adapter.txt', 'fastqInput1') self.setOutputDir1To1('settingsOutput', None, None, 'settings', 'fastqInput1') # set how many sample are there if fastqInput1 is not None: self._setInputSize(len(self.getInputList('fastqInput1')))
def impInitIO(self, ): # obtain all input and output parameters #fastqInput1 = self.getParamIO('fastqInput1') fastqInput1 = self.getParamIO('fastqInput1') fastqInput2 = self.getParamIO('fastqInput2') samOutputDir = self.getParamIO('samOutputDir') ht2Idx = self.getParamIO('ht2Idx') if samOutputDir is None: self.setParamIO('samOutputDir', Configure.getTmpDir()) # print(ht2Idx) #set all input files #self.setInputDirOrFile('fastqInput1',fastqInput1) self.setInputDirOrFile('fastqInput1', fastqInput1) self.setInputDirOrFile('fastqInput2', fastqInput2) if ht2Idx is None: self.setInput('ht2IdxFile', Configure.getConfig('ht2IdxFile')) self.setParamIO('ht2Idx', Configure.getConfig('ht2Indx')) else: suffix = [ '.1.ht2', '.2.ht2', '.3.ht2', '.4.ht2', '.5.ht2', '.6.ht2', '.7.ht2', '.8.ht2' ] ht2IdxFiles = [ht2Idx + s for s in suffix] self.setInput('ht2IdxFiles', ht2IdxFiles) # create output file paths and set self.setOutputDir1To1('samOutput', samOutputDir, 'hisat', 'sam', 'fastqInput1') # set how many sample are there if fastqInput1 is not None: self._setInputSize(len(self.getInputList('fastqInput1')))
def impInitIO(self, ): faInput1 = self.getParamIO('faInput1') gtfInput1 = self.getParamIO('gtfInput1') assembliesInput = self.getParamIO('assembliesInput') gtfOutputDir = self.getParamIO('gtfOutputDir') if gtfOutputDir is None: self.setParamIO('gtfOutputDir', Configure.getTmpDir()) #set all input files self.setInputDirOrFile('assembliesInput', assembliesInput) if faInput1 is None: faInput1 = Configure.getConfig('') self.setIput('faInput1', faInput1) self.setParamIO('faInput1', faInput1) else: self.setInput('faInput1', faInput1) if gtfInput1 is None: gtfInput1 = Configure.getConfig('') self.setIput('gtfInput1', gtfInput1) self.setParamIO('gtfInput1', gtfInput1) else: self.setInput('gtfInput1', gtfInput1) if assembliesInput is not None: self._setInputSize(len(self.getInputList('assembliesInput'))) merged_gtf = list() for i in range(len(self.getInputList('assembliesInput'))): merged_gtf.append( os.path.join(gtfOutputDir, 'cuffmerge_' + str(i), 'merged.gtf')) self.setOutput('merged_gtf', merged_gtf) else: self.setOutput('merged_gtf', None)
def __init__(self, bamInput=None, figureOutput=None, peakInput=None, genome=None, threads=1, rScript='./ChromVarUtility.R', cmdParam=None, **kwargs): super(Step, self).__init__(cmdParam, **kwargs) # set IO parameters self.setParamIO('bamInput', bamInput) self.setParamIO('figureOutput', figureOutput) self.setParamIO('peakInput', peakInput) self.setParamIO('rScript', rScript) self.initIO() # set other parameters self.setParam('threads', threads) if genome is None: self.setParam('genome', Configure.getGenome()) else: self.setParam('genome', genome) self._setUpstreamSize(2)
def __init__(self, fastqInput1=None, fastqInput2=None, ht2Idx=None, samOutputDir=None, threads=None, cmdParam=None, **kwargs): super(Step, self).__init__(cmdParam, **kwargs) # set all input and output parameters #self.setParamIO('fastqInput1',fastqInput1) self.setParamIO('fastqInput1', fastqInput1) self.setParamIO('fastqInput2', fastqInput2) self.setParamIO('ht2Idx', ht2Idx) self.setParamIO('samOutputDir', samOutputDir) # call self.initIO() self.initIO() #set other parameters #self.setParam('isNoDiscordant', isNoDiscordant) if threads is None: self.setParam('threads', Configure.getThreads()) else: self.setParam('threads', threads)
def __init__(self, bamInput = None, gtfInput = None, outputDir = None, threads = None, ismultiReadCorrect = None, isupperQuartileForm = None, istotalHitsNorm = True, fragLenMean = 200, fragLenStdDev = 80, cmdParam = None, **kwargs ): super(Step, self).__init__(cmdParam,**kwargs) self.setParamIO('bamInput',bamInput) self.setParamIO('gtfInput',gtfInput) self.setParamIO('outputDir',outputDir) #self.setParamIO('fragBiasCorrectInput',fragBiasCorrectInput) self.initIO() self.setParam('ismultiReadCorrect',ismultiReadCorrect) self.setParam('fragLenMean',fragLenMean) self.setParam('fragLenStdDev',fragLenStdDev) self.setParam('isupperQuartileForm',isupperQuartileForm) self.setParam('istotalHitsNorm',istotalHitsNorm) if threads is None: threads = Configure.getThreads() self.setParam('threads',threads)
def impInitIO(self, ): sraInput1 = self.getParamIO('sraInput1') fastqOutputDir = self.getParamIO('fastqOutputDir') if fastqOutputDir is None: self.setParamIO('fastqOutputDir', Configure.getTmpDir()) #set all input files self.setInputDirOrFile('sraInput1', sraInput1) # self.setOutputDir1To1('fastqOutputDir', fastqOutputDir,'fastqDump','fastq','sraInput1',sep='_') self.setOutputDir1To1('fastqOutput1', fastqOutputDir, None, '1.fastq', 'sraInput1', sep='_') self.setOutputDir1To1('fastqOutput2', fastqOutputDir, None, '2.fastq', 'sraInput1', sep='_') if sraInput1 is not None: self._setInputSize(len(self.getInputList('sraInput1')))
def impInitIO(self, ): """ This function is to initialize all of the input and output files from the io parameters set in __init__() """ # obtain all input and output parameters fastqInput1 = self.getParamIO('fastqInput1') fastqInput2 = self.getParamIO('fastqInput2') bt2Idx = self.getParamIO('bt2Idx') samOutputDir = self.getParamIO('samOutputDir') mapRsOutputDir = self.getParamIO('mapRsOutputDir') #set all input files self.setInputDirOrFile('fastqInput1', fastqInput1) self.setInputDirOrFile('fastqInput2', fastqInput2) #some special input from __init__ or configure if bt2Idx is None: self.setInput('bt2IdxFiles', Configure.getConfig('bt2IdxFiles')) self.setParamIO('bt2Idx', Configure.getConfig('bt2Idx')) else: suffix = [ '.1.bt2', '.2.bt2', '.3.bt2', '.4.bt2', '.rev.1.bt2', '.rev.2.bt2' ] bt2IdxFiles = [bt2Idx + s for s in suffix] self.setInput('bt2IdxFiles', bt2IdxFiles) # create output file paths and set if samOutputDir is None: self.setParamIO('samOutputDir', Configure.getTmpDir()) if mapRsOutputDir is None: self.setParamIO('mapRsOutputDir', Configure.getTmpDir()) self.setOutputDir1To1('samOutput', samOutputDir, None, 'sam', 'fastqInput1') self.setOutputDir1To1('mapRsOutput', mapRsOutputDir, None, 'result.txt', 'fastqInput1') # set how many sample are there if fastqInput1 is not None: self._setInputSize(len(self.getInputList('fastqInput1')))
def impInitIO(self,): bamInput = self.getParamIO('bamInput') gtfInput = self.getParamIO('gtfInput') outputDir = self.getParamIO('outputDir') #fragBiasCorrectInput = self.getParamIO('fragBiasCorrectInput') if outputDir is None: self.setParamIO('outputDir',Configure.getTmpDir()) self.setInputDirOrFile('bamInput',bamInput) if gtfInput is None: gtfInput=Configure.getConfig('') self.setIput('gtfInput',gtfInput) self.setParamIO('gtfInput',gtfInput) else: self.setInput('gtfInput',gtfInput) self.setOutput('assembliesOutput',os.path.join(Configure.getTmpDir(), 'assemblies.txt')) if bamInput is not None: self._setInputSize(len(self.getInputList('bamInput'))) genes_fpkm_tracking=list() isoforms_fpkm_tracking=list() skipped_gtf=list() transcripts_gtf=list() for i in range(len(self.getInputList('bamInput'))): genes_fpkm_tracking.append(os.path.join(outputDir, 'cufflinks_'+str(i),'genes.fpkm_tracking')) isoforms_fpkm_tracking.append(os.path.join(outputDir, 'cufflinks_'+str(i),'isoforms.fpkm_tracking')) skipped_gtf.append(os.path.join(outputDir, 'cufflinks_'+str(i),'skipped.gtf')) transcripts_gtf.append(os.path.join(outputDir, 'cufflinks_'+str(i),'transcripts.gtf')) self.setOutput('genes_fpkm_tracking',genes_fpkm_tracking) self.setOutput('isoforms_fpkm_tracking',isoforms_fpkm_tracking) self.setOutput('skipped_gtf',skipped_gtf) self.setOutput('transcripts_gtf',transcripts_gtf) else: self.setOutput('genes_fpkm_tracking',None) self.setOutput('isoforms_fpkm_tracking',None) self.setOutput('skipped_gtf',None) self.setOutput('transcripts_gtf',None)
def impInitIO(self, ): fastqInput = self.getParamIO('fastqInput') refile = self.getParamIO('refile') outputdir = self.getParamIO('outputdir') self.setInputDirOrFile('fastqInput', fastqInput) self.setInputDirOrFile('version', os.path.join(refile, 'version')) self.setInputDirOrFile('Reference', os.path.join(refile, 'reference.json')) self.setInputDirOrFile('README', os.path.join(refile, 'README.BEFORE.MODIFYING')) for i in [ 'chrLength.txt', 'chrName.txt', 'exonGeTrInfo.tab', 'geneInfo.tab', 'genomeParameters.txt', 'SAindex', 'sjdbList.fromGTF.out.tab', 'transcriptInfo.tab', 'chrNameLength.txt', 'chrStart.txt', 'exonInfo.tab', 'Genome', 'SA', 'sjdbInfo.txt', 'sjdbList.out.tab' ]: self.setInputDirOrFile(i, os.path.join(refile, 'star', i)) self.setInputDirOrFile('genes.pickle', os.path.join(refile, 'pickle', 'genes.pickle')) self.setInputDirOrFile('genes.gtf', os.path.join(refile, 'genes', 'genes.gtf')) self.setInputDirOrFile('genome.fa', os.path.join(refile, 'fasta', 'genome.fa')) if outputdir is None: self.setParamIO('outputdir', Configure.getTmpDir()) outputdir = self.getParamIO('outputdir') self.resultdir = 'Cellranger' else: self.resultdir = '' self.setParamIO( 'finaldir', os.path.join(outputdir, self.resultdir, 'outs', 'filtered_gene_bc_matrices', 'hg19')) self.setOutputDirNTo1( 'genes', os.path.join(outputdir, self.resultdir, 'outs', 'filtered_gene_bc_matrices', 'hg19', 'genes.tsv'), '', 'fastqInput') self.setOutputDirNTo1( 'matrix', os.path.join(outputdir, self.resultdir, 'outs', 'filtered_gene_bc_matrices', 'hg19', 'matrix.mtx'), '', 'fastqInput') self.setOutputDirNTo1( 'barcodes', os.path.join(outputdir, self.resultdir, 'outs', 'filtered_gene_bc_matrices', 'hg19', 'barcodes.tsv'), '', 'fastqInput')
def __init__(self, fastqInput1=None, fastqInput2=None, fastqOutputDir1=None, fastqOutputDir2=None, adapter1=None, adapter2=None, threads=None, cmdParamFindAdapter=None, cmdParam=None, **kwargs): super(Step, self).__init__(cmdParam=[cmdParamFindAdapter, cmdParam], **kwargs) """ called by 'AdapterRemoval()' __init__(): Initialize the class with inputs, outputs and other parameters. Setting all parameter is the main target of this function. """ # set all input and output parameters self.setParamIO('fastqInput1', fastqInput1) self.setParamIO('fastqInput2', fastqInput2) self.setParamIO('fastqOutputDir1', fastqOutputDir1) self.setParamIO('fastqOutputDir2', fastqOutputDir2) # call self.initIO() self.initIO() #set other parameters if threads is None: threads = Configure.getThreads() self.setParam('threads', threads) if adapter1 is None: self.setParam('adapter1', None) elif len(adapter1) > 1: self.setParam('adapter1', adapter1) elif os.path.exists(adapter1): self.setParam('adapter1', self.getListInFile()) else: self.setParam('adapter1', adapter1) if adapter2 is None: self.setParam('adapter2', None) elif len(adapter1) > 1: self.setParam('adapter2', adapter2) elif os.path.exists(adapter2): self.setParam('adapter2', self.getListInFile()) else: self.setParam('adapter2', adapter2) self.adapter1 = {} self.adapter2 = {}
def impInitIO(self): samInput = self.getParamIO('samInput') bamOutputDir = self.getParamIO('bamOutputDir') if bamOutputDir is None: self.setParamIO('bamOutputDir',Configure.getTmpDir()) # set all input files self.setInputDirOrFile('samInput', samInput) # set all output files self.setOutputDir1To1('bamOutput', bamOutputDir, None, 'bam', 'samInput') if samInput is not None: self._setInputSize(len(self.getInputList('samInput')))
def impInitIO(self): sraInput = self.getParamIO('sraInput') fastqOutputDir = self.getParamIO('fastqOutputDir') # set all input files self.setInputDirOrFile('sraInput', sraInput) # set all output files self.setOutputDir1To1('fastqOutput1', fastqOutputDir, None, '_1.fastq', 'sraInput', '') self.setOutputDir1To1('fastqOutput2', fastqOutputDir, None, '_2.fastq', 'sraInput', '') if fastqOutputDir is None: self.setParamIO('fastqOutputDir', Configure.getTmpDir()) if sraInput is not None: self._setInputSize(len(self.getInputList('sraInput')))
def impInitIO(self, ): samInput1 = self.getParamIO('samInput1') gtfInput1 = self.getParamIO('gtfInput1') countOutputDir = self.getParamIO('countOutputDir') if countOutputDir is None: self.setParamIO('countOutputDir', Configure.getTmpDir()) #set all input files self.setInputDirOrFile('samInput1', samInput1) self.setInputDirOrFile('gtfInput1', gtfInput1) self.setOutputDir1To1('countOutput', countOutputDir, None, 'count', 'samInput1') if samInput1 is not None: self._setInputSize(len(self.getInputList('samInput1')))
def __init__(self, faInput1=None, gtfInput1=None, assembliesInput=None, threads=None, gtfOutputDir=None, cmdParam=None, **kwargs): super(Step, self).__init__(cmdParam, **kwargs) self.setParamIO('faInput1', faInput1) self.setParamIO('gtfInput1', gtfInput1) self.setParamIO('assembliesInput', assembliesInput) self.setParamIO('gtfOutputDir', gtfOutputDir) self.initIO() if threads is None: threads = Configure.getThreads() self.setParam('threads', threads)
def __init__(self, fastqInput = None, outFileDir = None, genomeDir = None, threads = None, #outSamType = 'BAM' cmdParam = None, **kwargs): super(Step, self).__init__(cmdParam, ** kwargs) self.setParamIO('fastqInput', fastqInput) self.setParamIO('outFileDir', outFileDir) self.setParamIO('genomeDir', genomeDir) self.initIO() #self.setParam('outSamType', outSamType) if threads is None: threads = Configure.getThreads() self.setParam('threads', threads)
def __init__(self, fastqInput1=None, fastqInput2=None, bt2Idx=None, samOutputDir=None, mapRsOutputDir=None, threads=None, isNoDiscordant=True, isNoUnal=True, isNoMixed=True, X=2000, cmdParam=None, **kwargs): super(Step, self).__init__(cmdParam, **kwargs) """ called by 'Bowtie()' __init__(): Initialize the class with inputs, outputs and other parameters. Setting all parameter is the main target of this function. """ # set all input and output parameters self.setParamIO('fastqInput1', fastqInput1) self.setParamIO('fastqInput2', fastqInput2) self.setParamIO('bt2Idx', bt2Idx) self.setParamIO('samOutputDir', samOutputDir) self.setParamIO('mapRsOutputDir', mapRsOutputDir) # call self.initIO() self.initIO() #set other parameters self.setParam('isNoDiscordant', isNoDiscordant) self.setParam('isNoUnal', isNoUnal) self.setParam('isNoMixed', isNoMixed) self.setParam('X', X) if threads is None: threads = Configure.getThreads() self.setParam('threads', threads)
def impInitIO(self, ): # matrix = self.getParamIO('matrix') # barcodes = self.getParamIO('barcodes') # genes = self.getParamIO('genes') outputdir = self.getParamIO('outputdir') inputdir = self.getParamIO('inputdir') rscript = self.getParamIO('rscript') self.setInputDirOrFile('rscript', rscript) # if outputdir is None, os will error if outputdir is None: self.setParamIO('outputdir', Configure.getTmpDir()) outputdir = self.getParamIO('outputdir') # set output/input paths # if inputdir is None, os will error if inputdir is not None: self.setInputDirOrFile('barcodes', os.path.join(inputdir, 'barcodes.tsv')) self.setInputDirOrFile('genes', os.path.join(inputdir, 'genes.tsv')) self.setInputDirOrFile('matrix', os.path.join(inputdir, 'matrix.mtx')) self.setOutputDirNTo1('violinplot', os.path.join(outputdir, 'violinplot.jpeg'), '', 'barcodes') self.setOutputDirNTo1('geneplot', os.path.join(outputdir, 'geneplot.jpeg'), '', 'barcodes') self.setOutputDirNTo1( 'variableGenes', os.path.join(outputdir, 'variableGenes.jpeg'), '', 'barcodes') self.setOutputDirNTo1('Elbowplot', os.path.join(outputdir, 'Elbowplot.jpeg'), '', 'barcodes') self.setOutputDirNTo1('TSNEplot', os.path.join(outputdir, 'TSNEplot.jpeg'), '', 'barcodes')
def _singleRun(self,i): bamInput = self.getInputList('bamInput') gtfInput = self.getParamIO('gtfInput') #fragBiasCorrectInput = self.getInputList('fragBiasCorrectInput') outputDir = self.getParamIO('outputDir') print(os.path.join(Configure.getTmpDir(), 'assemblies.txt')) cmdline = [ 'cufflinks', '-p',str(self.getParam('threads')), self.getBoolParamCmd('-u','ismultiReadCorrect'), self.getBoolParamCmd('-N','isupperQuartileForm'), self.getBoolParamCmd('--total-hits-norm','istotalHitsNorm'), '-m',str(self.getParam('fragLenMean')), '-s',str(self.getParam('fragLenStdDev')), '-G',gtfInput, '-o',os.path.join(outputDir,'cufflinks_'+str(i)), bamInput[i], ';', # 'echo', '"'+self.convertToRealPath(os.path.join(outputDir,'cufflinks_'+str(i),'transcripts.gtf')).split('.tmp')[1]+'" >>', 'echo', '"'+os.path.join(outputDir,'cufflinks_'+str(i),'transcripts.gtf')+'" >>', self.getOutput("assembliesOutput") ] self.callCmdline('V1', cmdline)
# -*- coding: utf-8 -*- from SamToBam import SamToBam from StepBase import Configure, Schedule Configure.setRefDir('/home/hca/zhangwei1/hg19_bowtie2') Configure.setGenome('hg19') Configure.setIdentity('ATAC') test = SamToBam(samInput='./minidata/atac/SamForTest', threads=5, bamOutputDir='./bamOutputDir') Schedule.run()
from Bowtie2 import Bowtie2 from AdapterRemoval import AdapterRemoval from StepBase import Configure,Schedule import os Configure.setRefDir('/home/wzhang/genome/hg19_bowtie2/') Configure.setGenome('hg19') #Configure.setIdentity('zwei') Configure.enableDocker(False) rs=Bowtie2(fastqInput1='./minidata/atac/end1', fastqInput2='./minidata/atac/end2') Schedule.run()
from StepBase import Schedule, Configure from FastqDump import FastqDump from Hisat2 import Hisat2 from SamToBam import SamToBam from BamSort import BamSort from Cufflinks import Cufflinks from Cuffmerge import Cuffmerge Configure.setIdentity('sqchen') #Fastq-dump fastq_dump = FastqDump(sraInput1='./minidata/smartseq/sra') # print(fastq_dump.outputs) #Hisat2 hisat = Hisat2(ht2Idx="./minidata/smartseq/hg19_index/genome")(fastq_dump) # print (hisat.outputs) # Bam2Sam # Configure.setRefDir('../../yinqijin/hg19_bowtie2/') # Configure.setGenome('hg19') sam2bam = SamToBam(threads=5)(hisat) # print (sam2bam.outputs) # #BamSort bamsort = BamSort()(sam2bam) # print (bamsort.outputs) # #Cufflinks cufflinks = Cufflinks(gtfInput='./minidata/smartseq/genome.gtf', threads=16)(bamsort)