def __init__(self, **keywords): """ 2011-7-11 """ CompareAlleleFrequencyOfTwoPopulationFromOneVCFFolder.__init__(self, **keywords) """
def addAllJobs(self, workflow=None, inputVCFData=None, chr2IntervalDataLs=None, \ GenomeAnalysisTKJar=None, samtools=None, \ CreateSequenceDictionaryJava=None, CreateSequenceDictionaryJar=None, \ BuildBamIndexFilesJava=None, BuildBamIndexJar=None,\ mv=None, \ refFastaFList=None, \ needFastaIndexJob=False, needFastaDictJob=False, \ data_dir=None, no_of_gatk_threads = 1, \ intervalSize=3000, intervalOverlapSize=0, \ outputDirPrefix="", transferOutput=True, job_max_memory=2000, **keywords): """ 2012.10.15 architect of the whole map-reduce framework call the parent's addAllJobs in a loop """ samplingReturnDataLs = [] for i in xrange(self.noOfSamplings): oneSamplingReturnData = CompareAlleleFrequencyOfTwoPopulationFromOneVCFFolder.addAllJobs(self, \ workflow=workflow, inputVCFData=inputVCFData, \ chr2IntervalDataLs=chr2IntervalDataLs, samtools=samtools, \ GenomeAnalysisTKJar=GenomeAnalysisTKJar, \ CreateSequenceDictionaryJava=CreateSequenceDictionaryJava, CreateSequenceDictionaryJar=CreateSequenceDictionaryJar, \ BuildBamIndexFilesJava=BuildBamIndexFilesJava, BuildBamIndexJar=BuildBamIndexJar,\ mv=mv, \ refFastaFList=refFastaFList,\ needFastaIndexJob=needFastaIndexJob, needFastaDictJob=needFastaDictJob, \ data_dir=data_dir, no_of_gatk_threads = 1, \ intervalSize=intervalSize, intervalOverlapSize=intervalOverlapSize, \ outputDirPrefix='%s_%s_'%(outputDirPrefix, i), transferOutput=transferOutput, job_max_memory=job_max_memory,\ **keywords) samplingReturnDataLs.append(oneSamplingReturnData) topOutputDir = "%sFinalReduce"%(outputDirPrefix) topOutputDirJob = yh_pegasus.addMkDirJob(workflow, mkdir=workflow.mkdirWrap, outputDir=topOutputDir) #a ReduceMatrixByAverageColumnsWithSameKey job outputFile = File(os.path.join(topOutputDir, 'medianAlleleSharingStatAcrossAllSampling.tsv')) medianReduceJob = self.addStatMergeJob(workflow, statMergeProgram=workflow.ReduceMatrixByAverageColumnsWithSameKey, \ outputF=outputFile, extraArguments='--keyColumnLs 0 -v 1-8', parentJobLs=[topOutputDirJob], \ extraDependentInputLs=None, transferOutput=True) #a MergeSameHeaderTablesIntoOne job outputFile = File(os.path.join(topOutputDir, 'alleleSharingStatAcrossAllSampling.tsv')) mergeJob = self.addStatMergeJob(workflow, statMergeProgram=workflow.MergeSameHeaderTablesIntoOne, \ outputF=outputFile, extraArguments=None, parentJobLs=[topOutputDirJob], \ extraDependentInputLs=None, transferOutput=True) for oneSamplingReturnData in samplingReturnDataLs: self.addInputToStatMergeJob(workflow=workflow, statMergeJob=medianReduceJob, parentJobLs=[oneSamplingReturnData.estimateOutlierJob]) self.addInputToStatMergeJob(workflow=workflow, statMergeJob=mergeJob, parentJobLs=[oneSamplingReturnData.estimateOutlierJob]) outputFile = File( os.path.join(topOutputDirJob.output, 'outlierFraction_Hist.png')) #no spaces or parenthesis or any other shell-vulnerable letters in the x or y axis labels (whichColumnPlotLabel, xColumnPlotLabel) self.addDrawHistogramJob(workflow=workflow, executable=workflow.DrawHistogram, inputFileList=[mergeJob.output], \ outputFile=outputFile, \ whichColumn=None, whichColumnHeader="outlierFraction", whichColumnPlotLabel="outlierFraction", \ logY=False, positiveLog=True, logCount=False, valueForNonPositiveYValue=-1,\ minNoOfTotal=5,\ figureDPI=100, samplingRate=1,\ parentJobLs=[topOutputDirJob, mergeJob], \ extraDependentInputLs=None, \ extraArguments=None, transferOutput=True, job_max_memory=2000) outputFile = File( os.path.join(topOutputDirJob.output, 'AFS_cor_Hist.png')) #no spaces or parenthesis or any other shell-vulnerable letters in the x or y axis labels (whichColumnPlotLabel, xColumnPlotLabel) self.addDrawHistogramJob(workflow=workflow, executable=workflow.DrawHistogram, inputFileList=[mergeJob.output], \ outputFile=outputFile, \ whichColumn=None, whichColumnHeader="corr", whichColumnPlotLabel="AFSCorrelation", \ logY=False, positiveLog=True, logCount=False, valueForNonPositiveYValue=-1,\ minNoOfTotal=5,\ figureDPI=100, samplingRate=1,\ parentJobLs=[topOutputDirJob, mergeJob], \ extraDependentInputLs=None, \ extraArguments=None, transferOutput=True, job_max_memory=2000) sys.stderr.write("%s jobs.\n"%(self.no_of_jobs))