示例#1
0
 def getChunks(self, sequenceFiles, chunksDir):
     return [ chunk for chunk in popenCatch("cactus_blast_chunkSequences %s %i %i %s %s" % \
                                                       (getLogLevelString(), 
                                                       self.blastOptions.chunkSize, 
                                                       self.blastOptions.overlapSize,
                                                       chunksDir,
                                                       " ".join(sequenceFiles))).split("\n") if chunk != "" ]
示例#2
0
def getRandomConfigFile():
    tempConfigFile = getTempFile(rootDir="./", suffix=".xml")
    config = ET.parse(os.path.join(cactusRootPath(), "cactus_config.xml")).getroot()
    cafNode = config.find("caf")
    assert len(config.findall("caf")) == 1
    
    annealingRounds = 1 + int(random.random() * 10)
    cafNode.attrib["annealingRounds"] = " ".join([ str(1 + int(random.random() * 10)) for i in xrange(annealingRounds) ])
    deannealingRounds = list(set([ 1 + int(random.random() * 10) for i in xrange(int(random.random() * 10)) ]))
    deannealingRounds.sort()
    cafNode.attrib["deannealingRounds"] = " ".join([ str(i) for i in deannealingRounds ])
    cafNode.attrib["trim"] = " ".join([ str(1 + int(random.random() * 5)) for i in xrange(annealingRounds) ])
    
    cafNode.attrib["alignRepeatsAtLoop"] = str(random.random() * annealingRounds)
    
    cafNode.attrib["minimumTreeCoverage"] = str(random.random())
    cafNode.attrib["blockTrim"] = str(int(random.random() * 5))
    cafNode.attrib["ignoreAllChainsLessThanMinimumTreeCoverage"] = str(random.choice([0, 1]))
    cafNode.attrib["minimumBlockDegree"] = str(random.choice([0, 5]))
    
    checkNode = config.find("check")
    checkNode.attrib["runCheck"] = "1"
    
    checkNode = config.find("normal")
    checkNode.attrib["iterations"] = "2"
    
    #Now print the file..
    fileHandle = open(tempConfigFile, 'w')
    ET.ElementTree(config).write(fileHandle)
    fileHandle.close()
    if getLogLevelString() == "DEBUG":
        system("cat %s" % tempConfigFile)
    return tempConfigFile
示例#3
0
def createJob(attrib, parent, config):
    """Creates an XML record for the job in a file within the hierarchy of jobs.
    """
    job = ET.Element("job")
    job.attrib["file"] = config.attrib["job_file_dir"].getTempFile(".xml")
    job.attrib["remaining_retry_count"] = config.attrib["retry_count"]
    job.attrib["colour"] = "grey"
    followOns = ET.SubElement(job, "followOns")
    ET.SubElement(followOns, "followOn", attrib.copy())
    if parent != None:
        job.attrib["parent"] = parent
    job.attrib["child_count"] = str(0)
    job.attrib["black_child_count"] = str(0)
    job.attrib["log_level"] = getLogLevelString()
    job.attrib["log_file"] = config.attrib["log_file_dir"].getTempFile(".log") #The log file for the actual command
    job.attrib["slave_log_file"] = config.attrib["slave_log_file_dir"].getTempFile(".log") #The log file for the slave
    job.attrib["global_temp_dir"] = config.attrib["temp_dir_dir"].getTempDirectory()
    job.attrib["job_creation_time"] = str(time.time())
    job.attrib["environment_file"] = config.attrib["environment_file"]
    job.attrib["job_time"] = config.attrib["job_time"]
    job.attrib["max_log_file_size"] = config.attrib["max_log_file_size"]
    job.attrib["default_memory"] = config.attrib["default_memory"]
    job.attrib["default_cpu"] = config.attrib["default_cpu"]
    job.attrib["total_time"] = attrib["time"]
    if bool(int(config.attrib["reportAllJobLogFiles"])):
        job.attrib["reportAllJobLogFiles"] = ""
    if config.attrib.has_key("stats"):
        job.attrib["stats"] = config.attrib["log_file_dir"].getTempFile(".xml") #The file to store stats in..
    ET.SubElement(job, "children") 
    return job
 def run(self):
     logger.info("Preparing sequence for preprocessing")
     # chunk it up
     inChunkDirectory = makeSubDir(os.path.join(self.getGlobalTempDir(), "preprocessChunksIn"))
     inChunkList = [
         chunk
         for chunk in popenCatch(
             "cactus_blast_chunkSequences %s %i 0 %s %s"
             % (getLogLevelString(), self.prepOptions.chunkSize, inChunkDirectory, self.inSequencePath)
         ).split("\n")
         if chunk != ""
     ]
     outChunkDirectory = makeSubDir(os.path.join(self.getGlobalTempDir(), "preprocessChunksOut"))
     outChunkList = []
     # For each input chunk we create an output chunk, it is the output chunks that get concatenated together.
     for i in xrange(len(inChunkList)):
         outChunkList.append(os.path.join(outChunkDirectory, "chunk_%i" % i))
         # Calculate the number of chunks to use
         inChunkNumber = int(max(1, math.ceil(len(inChunkList) * self.prepOptions.proportionToSample)))
         assert inChunkNumber <= len(inChunkList) and inChunkNumber > 0
         # Now get the list of chunks flanking and including the current chunk
         j = max(0, i - inChunkNumber / 2)
         inChunks = inChunkList[j : j + inChunkNumber]
         if len(inChunks) < inChunkNumber:  # This logic is like making the list circular
             inChunks += inChunkList[: inChunkNumber - len(inChunks)]
         assert len(inChunks) == inChunkNumber
         self.addChildTarget(
             PreprocessChunk(
                 self.prepOptions, inChunks, float(inChunkNumber) / len(inChunkList), inChunkList[i], outChunkList[i]
             )
         )
     # follow on to merge chunks
     self.setFollowOnTarget(MergeChunks(self.prepOptions, outChunkList, self.outSequencePath))
示例#5
0
 def testBlastRandom(self):
     """Make some sequences, put them in a file, call blast with random parameters 
     and check it runs okay.
     """
     tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa")
     self.tempFiles.append(tempSeqFile)
     for test in xrange(self.testNo):
         seqNo = random.choice(xrange(0, 10))
         seq = getRandomSequence(8000)[1]
         fileHandle = open(tempSeqFile, 'w')
         for fastaHeader, seq in [(str(i),
                                   mutateSequence(seq,
                                                  0.3 * random.random()))
                                  for i in xrange(seqNo)]:
             if random.random() > 0.5:
                 seq = reverseComplement(seq)
             fastaWrite(fileHandle, fastaHeader, seq)
         fileHandle.close()
         chunkSize = random.choice(xrange(500, 9000))
         overlapSize = random.choice(xrange(2, 100))
         toilDir = os.path.join(getTempDirectory(self.tempDir), "toil")
         runCactusBlast([tempSeqFile], self.tempOutputFile, toilDir,
                        chunkSize, overlapSize)
         #runToilStatusAndFailIfNotComplete(toilDir)
         if getLogLevelString() == "DEBUG":
             system("cat %s" % self.tempOutputFile)
         system("rm -rf %s " % toilDir)
示例#6
0
 def test_bedParsing(self):
     """ mafComparator should parse a bed file and use the intervals for testing
     """
     for maf1, maf2, bed, totalTrue, totalTrueInInterval in self.knownValues:
         if not os.path.exists('tempTestFiles'):
             os.mkdir('tempTestFiles')
         f = open(self.maf1path, 'w')
         f.write('%s%s%s' % (self.header, maf1, self.footer))
         f.close()
         f = open(self.maf2path, 'w')
         f.write('%s%s%s' % (self.header, maf2, self.footer))
         f.close()
         f = open(self.bedpath, 'w')
         f.write('%s' % bed)
         f.close()
         cmd = ['mafComparator']
         cmd.append('--mafFile1=%s' % self.maf1path)
         cmd.append('--mafFile2=%s' % self.maf2path)
         cmd.append('--outputFile=%s' % os.path.join('tempTestFiles', 'output.xml'))
         if bed != '':
             cmd.append('--bedFiles=%s' % os.path.join('tempTestFiles', 'bed.bed'))
         cmd.append('--sampleNumber=1000 --logLevel %s' % getLogLevelString())
         system(" ".join(cmd))
         tree = ET.parse(os.path.join('tempTestFiles', 'output.xml'))
         homTests = tree.findall('homologyTests')
         self.assertAlmostEquals(totalTrue, 
                                 float(homTests[0].find('aggregateResults').find('all').attrib['totalTrue']))
         if totalTrueInInterval is None:
             self.assertEqual(None, homTests[0].find('aggregateResults').find('A'))
         else:
             self.assertAlmostEquals(totalTrueInInterval, 
                                     float(homTests[0].find('aggregateResults').find('A').attrib['totalTrue']))
         shutil.rmtree(os.path.dirname(self.maf1path))
示例#7
0
 def run(self):
     chunksDir = makeSubDir(os.path.join(self.getGlobalTempDir(), "chunks"))
     chunks = [ chunk for chunk in popenCatch("cactus_blast_chunkFlowerSequences %s '%s' %s %i %i %i %s" % \
                                                       (getLogLevelString(), self.cactusDisk, self.flowerName, 
                                                       self.blastOptions.chunkSize, 
                                                       self.blastOptions.overlapSize,
                                                       self.blastOptions.minimumSequenceLength,
                                                       chunksDir)).split("\n") if chunk != "" ]
     logger.info("Broken up the flowers into individual 'chunk' files")
     self.addChildTarget(MakeBlastsAllAgainstAll(self.blastOptions, chunks, self.finalResultsFile))
示例#8
0
def reloadJobTree(jobTree):
    """Load the job tree from a dir.
    """
    logger.info("The job tree appears to already exist, so we'll reload it")
    assert os.path.isfile(getConfigFileName(jobTree)) #A valid job tree must contain the config file
    assert os.path.isfile(getEnvironmentFileName(jobTree)) #A valid job tree must contain a pickle file which encodes the path environment of the job
    assert os.path.isdir(getJobFileDirName(jobTree)) #A job tree must have a directory of jobs.
    
    config = ET.parse(getConfigFileName(jobTree)).getroot()
    config.attrib["log_level"] = getLogLevelString()
    writeConfig(config) #This updates the on disk config file with the new logging setting
    
    batchSystem = loadTheBatchSystem(config)
    logger.info("Reloaded the jobtree")
    return config, batchSystem
示例#9
0
 def testCPecanEmMultipleTrials(self):
     """Runs uns cPecanEm with multiple different trials.
     """
     for seqFile1, seqFile2 in seqFilePairGenerator():
         tempDir = getTempDirectory(rootDir=os.getcwd())
         jobTreeDir = os.path.join(tempDir, "jobTree")
         alignmentsFile = os.path.join(tempDir, "alignments.cigars")
         computeAlignments(seqFile1, seqFile2, alignmentsFile)
         logger.info("Computed alignments for seqs %s and %s" % (seqFile1, seqFile2))
         outputModelFile = os.path.join(tempDir, "outputModel.txt")
         outputModelXMLFile = os.path.join(tempDir, "outputModel.xml")
         outputBlastFile = os.path.join(tempDir, "outputBlast.txt")
         #First run the script to generate a model and do one iteration of EM to 
         #get the likelihood to compare with the final likelihood
         trials=3
         runCPecanEm(sequenceFiles=[ seqFile1, seqFile2 ], 
                      alignmentsFile=alignmentsFile, outputModelFile=outputModelFile, 
                      jobTreeDir=jobTreeDir,
                      trials=trials,
                      outputTrialHmms=True,
                      iterations=5, randomStart=True, logLevel=getLogLevelString(),
                      optionsToRealign="--diagonalExpansion=6 --splitMatrixBiggerThanThis=100",
                      outputXMLModelFile=outputModelXMLFile,
                      blastScoringMatrixFile=outputBlastFile)
         trialHmms = [ Hmm.loadHmm(outputModelFile + ("_%i" % i)) for i in xrange(trials) ]
         hmm = Hmm.loadHmm(outputModelFile)
         node = ET.parse(outputModelXMLFile).getroot()
         logger.info("After multiple trials and iterations of EM the best likelihood found was %s, the likelihoods of the variants were: %s" % 
                     (hmm.likelihood, " ".join(map(lambda x : str(x.likelihood), trialHmms))))
         
         matchProbs, gapOpen, gapExtend = makeBlastScoringMatrix(hmm, ("ACTG",))
         logger.info("Gap open: %s, Gap extend: %s, Match probs %s" % (gapOpen, gapExtend, " ".join(map(str, matchProbs))))
         
         self.assertTrue(float(node.attrib["maxLikelihood"]) == hmm.likelihood)
         
         #Now use the blast file to compute a new matrix
         computeAlignments(seqFile1, seqFile2, alignmentsFile, lastzArguments=("--ambiguous=iupac --scores=%s" % outputBlastFile))
         
         #Run modifyHmm to check it works
         system("cPecanModifyHmm %s %s --gcContent=0.5 --substitutionRate=0.05 --setFlatIndelEmissions" % (outputModelFile, outputModelFile))
         hmm = Hmm.loadHmm(outputModelFile)
         node = ET.parse(outputModelXMLFile).getroot()
         
         system("rm -rf %s" % tempDir)
示例#10
0
def createJobTree(options):
    logger.info("Starting to create the job tree setup for the first time")
    options.jobTree = absSymPath(options.jobTree)
    os.mkdir(options.jobTree)
    os.mkdir(getJobFileDirName(options.jobTree))
    config = ET.Element("config")
    config.attrib["log_level"] = getLogLevelString()
    config.attrib["job_tree"] = options.jobTree
    config.attrib["parasol_command"] = options.parasolCommand
    config.attrib["try_count"] = str(int(options.retryCount) + 1)
    config.attrib["max_job_duration"] = str(float(options.maxJobDuration))
    config.attrib["batch_system"] = options.batchSystem
    config.attrib["job_time"] = str(float(options.jobTime))
    config.attrib["max_log_file_size"] = str(int(options.maxLogFileSize))
    config.attrib["default_memory"] = str(int(options.defaultMemory))
    config.attrib["default_cpu"] = str(int(options.defaultCpu))
    config.attrib["max_cpus"] = str(int(options.maxCpus))
    config.attrib["max_memory"] = str(int(options.maxMemory))
    config.attrib["max_threads"] = str(int(options.maxThreads))
    if options.bigBatchSystem != None:
        config.attrib["big_batch_system"] = options.bigBatchSystem
        config.attrib["big_memory_threshold"] = str(
            int(options.bigMemoryThreshold))
        config.attrib["big_cpu_threshold"] = str(int(options.bigCpuThreshold))
        config.attrib["big_max_cpus"] = str(int(options.bigMaxCpus))
        config.attrib["big_max_memory"] = str(int(options.bigMaxMemory))

    if options.stats:
        config.attrib["stats"] = ""
    #Load the batch system.
    batchSystem = loadTheBatchSystem(config, options)
    logger.info("Loaded the batch system %s" % batchSystem)

    #Set the parameters determining the polling frequency of the system.
    config.attrib["rescue_jobs_frequency"] = str(
        float(batchSystem.getRescueJobFrequency()))
    if options.rescueJobsFrequency != None:
        config.attrib["rescue_jobs_frequency"] = str(
            float(options.rescueJobsFrequency))

    writeConfig(config)

    logger.info("Finished the job tree setup")
    return config, batchSystem
示例#11
0
def getRandomConfigFile():
    tempConfigFile = getTempFile(rootDir="./", suffix=".xml")
    config = ET.parse(os.path.join(cactusRootPath(),
                                   "cactus_config.xml")).getroot()
    cafNode = config.find("caf")
    assert len(config.findall("caf")) == 1

    annealingRounds = 1 + int(random.random() * 10)
    cafNode.attrib["annealingRounds"] = " ".join(
        [str(1 + int(random.random() * 10)) for i in xrange(annealingRounds)])
    deannealingRounds = list(
        set([
            1 + int(random.random() * 10)
            for i in xrange(int(random.random() * 10))
        ]))
    deannealingRounds.sort()
    cafNode.attrib["deannealingRounds"] = " ".join(
        [str(i) for i in deannealingRounds])
    cafNode.attrib["trim"] = " ".join(
        [str(1 + int(random.random() * 5)) for i in xrange(annealingRounds)])

    cafNode.attrib["alignRepeatsAtLoop"] = str(random.random() *
                                               annealingRounds)

    cafNode.attrib["minimumTreeCoverage"] = str(random.random())
    cafNode.attrib["blockTrim"] = str(int(random.random() * 5))
    cafNode.attrib["ignoreAllChainsLessThanMinimumTreeCoverage"] = str(
        random.choice([0, 1]))
    cafNode.attrib["minimumBlockDegree"] = str(random.choice([0, 5]))

    checkNode = config.find("check")
    checkNode.attrib["runCheck"] = "1"

    checkNode = config.find("normal")
    checkNode.attrib["iterations"] = "2"

    #Now print the file..
    fileHandle = open(tempConfigFile, 'w')
    ET.ElementTree(config).write(fileHandle)
    fileHandle.close()
    if getLogLevelString() == "DEBUG":
        system("cat %s" % tempConfigFile)
    return tempConfigFile
示例#12
0
 def testCPecanEm(self):
     """Runs cPecanEm. 
     """
     trial = 0
     for modelType in ("fiveState", "fiveStateAsymmetric", "threeState", "threeStateAsymmetric"):
         for seqFile1, seqFile2 in seqFilePairGenerator():
             tempDir = getTempDirectory(rootDir=os.getcwd())
             jobTreeDir = os.path.join(tempDir, "jobTree")
             alignmentsFile = os.path.join(tempDir, "alignments.cigars")
             computeAlignments(seqFile1, seqFile2, alignmentsFile)
             logger.info("Computed alignments for seqs %s and %s" % (seqFile1, seqFile2))
             outputModelFile = os.path.join(tempDir, "outputModel.txt")
             #First run the script to generate a model and do one iteration of EM to 
             #get the likelihood to compare with the final likelihood
             runCPecanEm(sequenceFiles=[ seqFile1, seqFile2 ], 
                          alignmentsFile=alignmentsFile, outputModelFile=outputModelFile, 
                          modelType=modelType,
                          jobTreeDir=jobTreeDir,
                          iterations=1, trials=1, randomStart=False, logLevel=getLogLevelString(),
                          setJukesCantorStartingEmissions=0.2,
                          #useDefaultModelAsStart=,
                          trainEmissions=True,
                          tieEmissions=True,
                          optionsToRealign="--diagonalExpansion=6 --splitMatrixBiggerThanThis=100")
             hmm = Hmm.loadHmm(outputModelFile)
             system("rm -rf %s" % jobTreeDir) #Cleanup the old jobTree
             logger.info("For trial %s the likelihood after 1 iteration of EM is %s" % (trial, hmm.likelihood))
             iterations = 5
             runCPecanEm(sequenceFiles=[ seqFile1, seqFile2 ], 
                         alignmentsFile=alignmentsFile, outputModelFile=outputModelFile, jobTreeDir=jobTreeDir,
                         optionsToRealign="--diagonalExpansion=6 --splitMatrixBiggerThanThis=100",
                         iterations=iterations, inputModelFile=outputModelFile, logLevel=getLogLevelString(),
                         maxAlignmentLengthPerJob=10000) #, updateTheBand=True)
             hmm2 = Hmm.loadHmm(outputModelFile)
             logger.info("For trial %s the likelihood after a further %s iterations of EM is %s" % (trial, iterations, hmm2.likelihood))
             self.assertTrue(hmm.likelihood < hmm2.likelihood)
             hmm2.normalise()
             logger.info("Final transitions: %s" % " ".join(map(str, hmm2.transitions)))
             logger.info("Final emissions: %s" % " ".join(map(str, hmm2.emissions)))
             system("rm -rf %s" % tempDir)
             trial += 1
示例#13
0
def createJobTree(options):
    logger.info("Starting to create the job tree setup for the first time")
    options.jobTree = absSymPath(options.jobTree)
    os.mkdir(options.jobTree)
    os.mkdir(getJobFileDirName(options.jobTree))
    config = ET.Element("config")
    config.attrib["log_level"] = getLogLevelString()
    config.attrib["job_tree"] = options.jobTree
    config.attrib["parasol_command"] = options.parasolCommand
    config.attrib["try_count"] = str(int(options.retryCount) + 1)
    config.attrib["max_job_duration"] = str(float(options.maxJobDuration))
    config.attrib["batch_system"] = options.batchSystem
    config.attrib["job_time"] = str(float(options.jobTime))
    config.attrib["max_log_file_size"] = str(int(options.maxLogFileSize))
    config.attrib["default_memory"] = str(int(options.defaultMemory))
    config.attrib["default_cpu"] = str(int(options.defaultCpu))
    config.attrib["max_cpus"] = str(int(options.maxCpus))
    config.attrib["max_memory"] = str(int(options.maxMemory))
    config.attrib["max_threads"] = str(int(options.maxThreads))
    if options.bigBatchSystem != None:
        config.attrib["big_batch_system"] = options.bigBatchSystem
        config.attrib["big_memory_threshold"] = str(int(options.bigMemoryThreshold))
        config.attrib["big_cpu_threshold"] = str(int(options.bigCpuThreshold))
        config.attrib["big_max_cpus"] = str(int(options.bigMaxCpus))
        config.attrib["big_max_memory"] = str(int(options.bigMaxMemory))
        
    if options.stats:
        config.attrib["stats"] = ""
    #Load the batch system.
    batchSystem = loadTheBatchSystem(config)
    
    #Set the parameters determining the polling frequency of the system.  
    config.attrib["rescue_jobs_frequency"] = str(float(batchSystem.getRescueJobFrequency()))
    if options.rescueJobsFrequency != None:
        config.attrib["rescue_jobs_frequency"] = str(float(options.rescueJobsFrequency))
    
    writeConfig(config)
    
    logger.info("Finished the job tree setup")
    return config, batchSystem
示例#14
0
 def testBlastRandom(self):
     """Make some sequences, put them in a file, call blast with random parameters 
     and check it runs okay.
     """
     tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa")
     self.tempFiles.append(tempSeqFile)
     for test in xrange(self.testNo):
         seqNo = random.choice(xrange(0, 10))
         seq = getRandomSequence(8000)[1]
         fileHandle = open(tempSeqFile, 'w')
         for fastaHeader, seq in [ (str(i), mutateSequence(seq, 0.3*random.random())) for i in xrange(seqNo) ]:
             if random.random() > 0.5:
                 seq = reverseComplement(seq)
             fastaWrite(fileHandle, fastaHeader, seq)
         fileHandle.close()
         chunkSize = random.choice(xrange(500, 9000))
         overlapSize = random.choice(xrange(2, 100))
         toilDir = os.path.join(getTempDirectory(self.tempDir), "toil")
         runCactusBlast([ tempSeqFile ], self.tempOutputFile, toilDir, chunkSize, overlapSize)
         #runToilStatusAndFailIfNotComplete(toilDir)
         if getLogLevelString() == "DEBUG":
             system("cat %s" % self.tempOutputFile)
         system("rm -rf %s " % toilDir)
示例#15
0
 def test_seedTesting(self):
     """ mafComparator should have replicatable runs via the --seed command
     """
     for maf1, maf2  in self.knownValues:
         if not os.path.exists('tempTestFiles'):
             os.mkdir('tempTestFiles')
         f = open(self.maf1path, 'w')
         f.write('%s%s%s' % (self.header, maf1, self.footer))
         f.close()
         f = open(self.maf2path, 'w')
         f.write('%s%s%s' % (self.header, maf2, self.footer))
         f.close()
         cmd = ['mafComparator']
         cmd.append('--mafFile1=%s' % self.maf1path)
         cmd.append('--mafFile2=%s' % self.maf2path)
         cmd.append('--outputFile=%s' % os.path.join('tempTestFiles', 'output.xml'))
         cmd.append('--sampleNumber=10 --logLevel %s' % getLogLevelString())
         system(" ".join(cmd))
         tree = ET.parse(os.path.join('tempTestFiles', 'output.xml'))
         ac = tree.getroot()
         seed = int(ac.attrib['seed'])
         origHomTests = tree.findall('homologyTests')
         cmd.append('--seed=%d' % seed)
         for i in xrange(0, 10):
             system(" ".join(cmd))
             tree = ET.parse(os.path.join('tempTestFiles', 'output.xml'))
             ac = tree.getroot()
             homTests = tree.findall('homologyTests')
             self.assertEqual(seed, int(ac.attrib['seed']))
             for elm in ['totalTrue', 'totalFalse', 'average']:
                 self.assertEqual(homTests[0].find('aggregateResults').find('all').attrib[elm],
                                  origHomTests[0].find('aggregateResults').find('all').attrib[elm])
                 self.assertEqual(homTests[1].find('aggregateResults').find('all').attrib[elm],
                                  origHomTests[1].find('aggregateResults').find('all').attrib[elm])
             os.remove(os.path.join('tempTestFiles', 'output.xml'))
         shutil.rmtree(os.path.dirname(self.maf1path))
示例#16
0
 def testHalGeneratorFunctions(self):
     """Run all the CuTests, fail if any of them fail.
     """
     cactus_call(parameters=["cactus_halGeneratorTests", getLogLevelString()])
示例#17
0
 def testCuTest(self):
     cactus_call(parameters=["referenceTests", getLogLevelString()])
示例#18
0
def getLogLevelString2(logLevelString):
    """Gets the log level string for the binary
    """
    if logLevelString == None:
        return getLogLevelString()
    return logLevelString
 def testCuTest(self):
     cactus_call(parameters=["referenceTests", getLogLevelString()])
示例#20
0
 def testCuTest(self):
     cactus_call(parameters=["stCafTests", getLogLevelString()])
示例#21
0
 def testHalGeneratorFunctions(self):
     """Run all the CuTests, fail if any of them fail.
     """
     cactus_call(
         parameters=["cactus_halGeneratorTests",
                     getLogLevelString()])
示例#22
0
 def testCuTest(self):
     system("matchingAndOrderingTests %s" % getLogLevelString())
示例#23
0
 def testAPI(self):
     """Run all the cactusAPI CuTests, fail if any of them fail.
     """
     system("cactusAPITests %s" % getLogLevelString())
示例#24
0
 def run(self):
     tempResultsFile = os.path.join(self.getLocalTempDir(), "tempResults.cig")
     system("cactus_blast_sortAlignments %s %s %i" % (getLogLevelString(), self.cigarFile, tempResultsFile))
     logger.info("Sorted the alignments okay")
     system("mv %s %s" % (tempResultsFile, self.cigarFile))
示例#25
0
 def testSonLibCTests(self):
     """Run m,ost the sonLib CuTests, fail if any of them fail.
     """
     system("sonLibTests %s" % getLogLevelString())
示例#26
0
 def testHalGeneratorFunctions(self):
     """Run all the CuTests, fail if any of them fail.
     """
     system("cactus_halGeneratorTests %s" % getLogLevelString())
示例#27
0
 def testCuTest(self):
     system("stPinchesAndCactiTests %s" % getLogLevelString())
 def test3Edge(self):
     """Run the 3-edge connected CuTests, fail if any of them fail.
     """
     system("3EdgeTests %s" % getLogLevelString())
示例#29
0
 def testPosetAlignerAPI(self):
     """Run all the cactus base aligner CuTests, fail if any of them fail.
     """
     system("cactus_barTests %s" % getLogLevelString())
 def testReferenceAndAsMedianAlgorithms(self):
     """Iterates through a list of simulation variants and prints results
     """
     headerLine = "\t".join(("elementNumber", "chromosomeNumber", "leafGenomeNumber", 
                              "operationNumber",
                              "totalOperationNumber",
                              "doInversion", "doShortInversion", "doDcj", "doTranslocation", "doShortTranslocation", 
                              "greedyIterations",
                              "theta",
                              "replicate", 
                              "medianDCJDistance", "medianOutOfOrderDistance", 
                              "weightedMedianOutOfOrderDistance", 
                              "medianDCJDistanceForReferenceAlgorithm",
                              "medianOutOfOrderDistanceForReferenceAlgorithm", 
                              "weightedMedianOutOfOrderDistanceForReferenceAlgorithm",
                              "dCJDistanceForReferenceAlgorithmFromMedian",
                              "outOfOrderDistanceForReferenceAlgorithmFromMedian",
                              "weightedOutOfOrderDistanceForReferenceAlgorithmFromMedian",
                              "medianDCJDistanceForAsMedian", 
                              "medianOutOfOrderDistanceForAsMedian", 
                              "weightedMedianOutOfOrderDistanceForAsMedian", 
                              "dCJDistanceForAsMedianFromMedian",
                              "outOfOrderDistanceForAsMedianFromMedian",
                              "weightedOutOfOrderDistanceForAsMedianFromMedian",
                              "medianGenomeForReferenceAlgorithm", 
                              "medianGenomeForAsMedian"))
     if getLogLevelString() in  ("DEBUG", "INFO" ):
         print headerLine
     for elementNumber in self.elementNumbers:
         for chromosomeNumber in self.chromosomeNumbers:
             for leafGenomeNumber in self.leafGenomeNumbers:
                 for operationNumber in self.operationNumber:
                     for doInversion, doShortInversion, doDcj, doTranslocation, doShortTranslocation in self.operationType:
                         for greedyIterations in self.greedyIterations:
                             for theta in self.theta:
                                 for replicate in xrange(self.replicates):
                                     medianHistory = MedianHistory(Genome(elementNumber=elementNumber, chromosomeNumber=chromosomeNumber), leafGenomeNumber=leafGenomeNumber)
                                     medianHistory.permuteLeafGenomes(operationNumber=operationNumber, doInversion=doInversion, doDcj=doDcj, doTranslocation=doTranslocation,
                                                                      doShortInversion=doShortInversion, doShortTranslocation=doShortTranslocation)
                                     medianDCJDistance = medianHistory.getMedianDcjDistance(medianHistory.getMedianGenome())
                                     medianOutOfOrderDistance = medianHistory.getMedianOutOfOrderDistance(medianHistory.getMedianGenome())
                                     weightedMedianOutOfOrderDistance = medianHistory.getWeightedMedianOutOfOrderDistance(medianHistory.getMedianGenome(), theta=theta)
                                     #Now run reference problem algorithm   
                                     referenceProblemMedianGenome = runReferenceMedianProblemTest(medianHistory, greedyIterations, theta)      
                                     medianDCJDistanceForReferenceAlgorithm = medianHistory.getMedianDcjDistance(referenceProblemMedianGenome)
                                     medianOutOfOrderDistanceForReferenceAlgorithm = medianHistory.getMedianOutOfOrderDistance(referenceProblemMedianGenome)
                                     weightedMedianOutOfOrderDistanceForReferenceAlgorithm = medianHistory.getWeightedMedianOutOfOrderDistance(referenceProblemMedianGenome, theta=theta)
                                     dCJDistanceForReferenceAlgorithmFromMedian = medianHistory.getMedianGenome().getCircularDcjDistance(referenceProblemMedianGenome)
                                     outOfOrderDistanceForReferenceAlgorithmFromMedian = medianHistory.getMedianGenome().getOutOfOrderDistance(referenceProblemMedianGenome)
                                     weightedOutOfOrderDistanceForReferenceAlgorithmFromMedian = medianHistory.getMedianGenome().getWeightedOutOfOrderDistance(referenceProblemMedianGenome, theta=theta)
                                     totalOperationNumber = operationNumber * len([  i for i in (doInversion, doShortInversion, doDcj, doTranslocation, doShortTranslocation) if i == True ])
                                     #Biomedian comparison turned off
                                     if False and leafGenomeNumber == 3 and doDcj == False and float(totalOperationNumber) / elementNumber <= 0.5:
                                         asMedianProblemMedianGenome = runAsMedianMedianProblemTest(medianHistory)
                                         medianDCJDistanceForAsMedian = medianHistory.getMedianDcjDistance(asMedianProblemMedianGenome)
                                         medianOutOfOrderDistanceForAsMedian = medianHistory.getMedianOutOfOrderDistance(asMedianProblemMedianGenome)
                                         weightedMedianOutOfOrderDistanceForAsMedian = medianHistory.getWeightedMedianOutOfOrderDistance(asMedianProblemMedianGenome, theta=theta)
                                         dCJDistanceForAsMedianFromMedian = medianHistory.getMedianGenome().getCircularDcjDistance(asMedianProblemMedianGenome)
                                         outOfOrderDistanceForAsMedianFromMedian = medianHistory.getMedianGenome().getOutOfOrderDistance(asMedianProblemMedianGenome)
                                         weightedOutOfOrderDistanceForAsMedianFromMedian = medianHistory.getMedianGenome().getWeightedOutOfOrderDistance(asMedianProblemMedianGenome, theta=theta)
                                     else:
                                         asMedianProblemMedianGenome = "n/a"
                                         medianDCJDistanceForAsMedian = "n/a"
                                         medianOutOfOrderDistanceForAsMedian = "n/a"
                                         weightedMedianOutOfOrderDistanceForAsMedian = "n/a"
                                         dCJDistanceForAsMedianFromMedian = "n/a"
                                         outOfOrderDistanceForAsMedianFromMedian = "n/a"
                                         weightedOutOfOrderDistanceForAsMedianFromMedian = "n/a"
                                     #Now prepare line to print
                                     line = "\t".join([ str(i) for i in 
                                     (elementNumber, chromosomeNumber, leafGenomeNumber, 
                                      operationNumber,
                                      totalOperationNumber,
                                      doInversion, doShortInversion, doDcj, doTranslocation, doShortTranslocation,
                                      greedyIterations,
                                      theta,
                                      replicate, 
                                      medianDCJDistance, medianOutOfOrderDistance, 
                                      weightedMedianOutOfOrderDistance,
                                      medianDCJDistanceForReferenceAlgorithm,
                                      medianOutOfOrderDistanceForReferenceAlgorithm, 
                                      weightedMedianOutOfOrderDistanceForReferenceAlgorithm, 
                                      dCJDistanceForReferenceAlgorithmFromMedian,
                                      outOfOrderDistanceForReferenceAlgorithmFromMedian,
                                      weightedOutOfOrderDistanceForReferenceAlgorithmFromMedian,
                                      medianDCJDistanceForAsMedian, 
                                      medianOutOfOrderDistanceForAsMedian,
                                      weightedMedianOutOfOrderDistanceForAsMedian,
                                      dCJDistanceForAsMedianFromMedian,
                                      outOfOrderDistanceForAsMedianFromMedian,
                                      weightedOutOfOrderDistanceForAsMedianFromMedian,
                                      "'%s'" % str(referenceProblemMedianGenome),
                                      "'%s'" % str(asMedianProblemMedianGenome)) ])
                                     #Print line
                                     if getLogLevelString() in ("DEBUG", "INFO"):
                                         print line
示例#31
0
 def testSonLibCTests(self):
     """Run m,ost the sonLib CuTests, fail if any of them fail.
     """
     system("sonLibTests %s" % getLogLevelString())
示例#32
0
    def testCPecanEmMultipleTrials(self):
        """Runs uns cPecanEm with multiple different trials.
        """
        for seqFile1, seqFile2 in seqFilePairGenerator():
            tempDir = getTempDirectory(rootDir=os.getcwd())
            jobTreeDir = os.path.join(tempDir, "jobTree")
            alignmentsFile = os.path.join(tempDir, "alignments.cigars")
            computeAlignments(seqFile1, seqFile2, alignmentsFile)
            logger.info("Computed alignments for seqs %s and %s" %
                        (seqFile1, seqFile2))
            outputModelFile = os.path.join(tempDir, "outputModel.txt")
            outputModelXMLFile = os.path.join(tempDir, "outputModel.xml")
            outputBlastFile = os.path.join(tempDir, "outputBlast.txt")
            #First run the script to generate a model and do one iteration of EM to
            #get the likelihood to compare with the final likelihood
            trials = 3
            runCPecanEm(
                sequenceFiles=[seqFile1, seqFile2],
                alignmentsFile=alignmentsFile,
                outputModelFile=outputModelFile,
                jobTreeDir=jobTreeDir,
                trials=trials,
                outputTrialHmms=True,
                iterations=5,
                randomStart=True,
                logLevel=getLogLevelString(),
                optionsToRealign=
                "--diagonalExpansion=6 --splitMatrixBiggerThanThis=100",
                outputXMLModelFile=outputModelXMLFile,
                blastScoringMatrixFile=outputBlastFile)
            trialHmms = [
                Hmm.loadHmm(outputModelFile + ("_%i" % i))
                for i in xrange(trials)
            ]
            hmm = Hmm.loadHmm(outputModelFile)
            node = ET.parse(outputModelXMLFile).getroot()
            logger.info(
                "After multiple trials and iterations of EM the best likelihood found was %s, the likelihoods of the variants were: %s"
                % (hmm.likelihood, " ".join(
                    map(lambda x: str(x.likelihood), trialHmms))))

            matchProbs, gapOpen, gapExtend = makeBlastScoringMatrix(
                hmm, ("ACTG", ))
            logger.info("Gap open: %s, Gap extend: %s, Match probs %s" %
                        (gapOpen, gapExtend, " ".join(map(str, matchProbs))))

            self.assertTrue(
                float(node.attrib["maxLikelihood"]) == hmm.likelihood)

            #Now use the blast file to compute a new matrix
            computeAlignments(seqFile1,
                              seqFile2,
                              alignmentsFile,
                              lastzArguments=("--ambiguous=iupac --scores=%s" %
                                              outputBlastFile))

            #Run modifyHmm to check it works
            system(
                "cPecanModifyHmm %s %s --gcContent=0.5 --substitutionRate=0.05 --setFlatIndelEmissions"
                % (outputModelFile, outputModelFile))
            hmm = Hmm.loadHmm(outputModelFile)
            node = ET.parse(outputModelXMLFile).getroot()

            system("rm -rf %s" % tempDir)
示例#33
0
def runDbTestScript(options,
                    firstKey=0,
                    keyNumber=0,
                    addRecords=False,
                    setRecords=False):
    def fn(stringId, bool):
        if bool:
            return stringId
        return ""

    addRecords = fn("--addRecords", addRecords)
    setRecords = fn("--setRecords", setRecords)
    command = "dbTestScript --databaseConf '%s' --firstKey %s --keyNumber %s %s %s --minRecordSize %s --maxRecordSize %s --logLevel %s" %\
    (getDatabaseConf(options), firstKey, keyNumber, addRecords, setRecords, options.minRecordSize, options.maxRecordSize, getLogLevelString())
    system(command)
示例#34
0
 def test3Edge(self):
     """Run the 3-edge connected CuTests, fail if any of them fail.
     """
     system("3EdgeTests %s" % getLogLevelString())
示例#35
0
def runDbTestScript(options, firstKey=0, keyNumber=0, addRecords=False, setRecords=False):
    def fn(stringId, bool):
        if bool:
            return stringId
        return ""
    addRecords = fn("--addRecords", addRecords)
    setRecords = fn("--setRecords", setRecords)
    command = "dbTestScript --databaseConf '%s' --firstKey %s --keyNumber %s %s %s --minRecordSize %s --maxRecordSize %s --logLevel %s" %\
    (getDatabaseConf(options), firstKey, keyNumber, addRecords, setRecords, options.minRecordSize, options.maxRecordSize, getLogLevelString())
    system(command)
示例#36
0
 def testCuTest(self):
     system("referenceTests %s" % getLogLevelString())
示例#37
0
 def testCPecanLib(self):
     """Run all the cPecanLib CuTests, fail if any of them fail.
     """
     system("cPecanLibTests %s" % getLogLevelString())
示例#38
0
 def testCactusWorkflow_Blanchette(self): 
     """Runs the workflow on blanchette's simulated (colinear) regions.
     """
     if "SON_TRACE_DATASETS" not in os.environ:
         return
     for test in xrange(self.testNo):
         tempFiles = []
         tempDir = getTempDirectory(os.getcwd())
         
         trueAlignment = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation", "00.job", "true.mfa")
         
         #Load the true alignment.
         columnAlignment = [ i for i in  fastaAlignmentRead(trueAlignment) ]
         fastaHeaders = [ i for i in fastaReadHeaders(trueAlignment) ]
         sequenceNumber = 9
         
         #The tree
         newickTreeString = "((((HUMAN:0.006969, CHIMP:0.009727):0.025291, BABOON:0.044568):0.11,(RAT:0.072818, MOUSE:0.081244):0.260342):0.023260,((DOG:0.07, CAT:0.07):0.087381,(PIG:0.06, COW:0.06):0.104728):0.04);"
         
         #Get random dir
         testDir = getTempDirectory(tempDir)
         
         #random alignment
         alignmentLength = 5000
         randomStart = random.choice(xrange(len(columnAlignment)-alignmentLength))
         subAlignment = columnAlignment[randomStart:randomStart+alignmentLength]
         logger.info("Got a sub alignment, it is %i columns long" % len(subAlignment))
         
         #Get sequences
         sequences = [ (fastaHeaders[seqNo], "".join([ column[seqNo] for column in subAlignment if column[seqNo] != '-' ])) for seqNo in xrange(sequenceNumber) ]
         logger.info("Got the sequences")
         
         #Write sequences into temp files
         tempFastaFiles = []
         for seqNo in xrange(sequenceNumber):
             header, sequence = sequences[seqNo]
             logger.info("Making temp file for header: %s, seq: %s" % (header, sequence))
             tempFastaFile = os.path.join(testDir, "%i.fa" % seqNo)
             tempFastaFiles.append(tempFastaFile)
             fileHandle = open(tempFastaFile, "w")
             fastaWrite(fileHandle, header, sequence)
             fileHandle.close()
         logger.info("Got the temp sequence files")
         
         experiment = getCactusWorkflowExperimentForTest(tempFastaFiles, newickTreeString, testDir)
         experimentFile = os.path.join(testDir, "experiment.xml")
         experiment.writeXML(experimentFile)
         cactusDiskDatabaseString = experiment.getDiskDatabaseString()
         
         jobTree = os.path.join(testDir, "jobTree")
         
         runCactusWorkflow(experimentFile, jobTree)
         logger.info("Ran the the workflow")
         
         #Check the output alignment
         runJobTreeStatusAndFailIfNotComplete(jobTree)
         logger.info("Checked the job tree dir")
         
         #Output the 'TRUE' alignment file
         if os.system("mfaToMaf --help > /dev/null 2>&1") == 0 and\
            os.system("cactus_MAFGenerator --help > /dev/null 2>&1") == 0 and\
            os.system("mafComparator --help > /dev/null 2>&1") == 0 and\
            os.system("cactus_treeStats --help > /dev/null 2>&1") == 0:
             trueMFAFile = os.path.join(testDir, "true.mfa")
             fastaAlignmentWrite(subAlignment, fastaHeaders, len(fastaHeaders), trueMFAFile)
             trueMAFFile = os.path.join(testDir, "true.maf")
             system("mfaToMaf --mfaFile %s --outputFile %s --logLevel %s" % (trueMFAFile, trueMAFFile, getLogLevelString()))
             system("cat %s" % trueMAFFile)
             
             #Now get mafs for the region.
             mAFFile = os.path.join(testDir, "flower.maf")
             system("cactus_MAFGenerator --flowerName 0 --cactusDisk '%s' --outputFile %s --logLevel %s" % (cactusDiskDatabaseString, mAFFile, getLogLevelString()))
             logger.info("Got the MAFs from the flower disk")
             system("cat %s" % mAFFile)
             
             statsFile = os.path.join(testDir, "stats.xml")
             system("cactus_treeStats --cactusDisk '%s' --flowerName 0 --outputFile %s --logLevel %s" % (cactusDiskDatabaseString, statsFile, getLogLevelString()))
             system("cat %s" % statsFile)
             logger.info("Got the cactus tree stats")
             
             #Now compare the mafs to the output.
             resultsFile = os.path.join(testDir, "results.xml")
             system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s --logLevel %s" % (trueMAFFile, mAFFile, resultsFile, getLogLevelString()))
             logger.info("Ran the maf comparator")
             
             system("cat %s" % resultsFile)
             
             #Cleanup
             experiment.cleanupDb()
             system("rm -rf %s" % testDir)
             logger.info("Successfully ran test for the problem")
             
         for tempFile in tempFiles:
             os.remove(tempFile)
         system("rm -rf %s" % tempDir)
示例#39
0
 def testPosetAlignerAPI(self):
     """Run all the cactus base aligner CuTests, fail if any of them fail.
     """
     cactus_call(parameters=["cactus_barTests", getLogLevelString()])
示例#40
0
 def testPosetAlignerAPI(self):
     """Run all the cactus base aligner CuTests, fail if any of them fail.
     """
     cactus_call(parameters=["cactus_barTests", getLogLevelString()])
示例#41
0
 def testCuTest(self):
     system("stCafTests %s" % getLogLevelString())       
示例#42
0
 def testCPecanEm(self):
     """Runs cPecanEm. 
     """
     trial = 0
     for modelType in ("fiveState", "fiveStateAsymmetric", "threeState",
                       "threeStateAsymmetric"):
         for seqFile1, seqFile2 in seqFilePairGenerator():
             tempDir = getTempDirectory(rootDir=os.getcwd())
             jobTreeDir = os.path.join(tempDir, "jobTree")
             alignmentsFile = os.path.join(tempDir, "alignments.cigars")
             computeAlignments(seqFile1, seqFile2, alignmentsFile)
             logger.info("Computed alignments for seqs %s and %s" %
                         (seqFile1, seqFile2))
             outputModelFile = os.path.join(tempDir, "outputModel.txt")
             #First run the script to generate a model and do one iteration of EM to
             #get the likelihood to compare with the final likelihood
             runCPecanEm(
                 sequenceFiles=[seqFile1, seqFile2],
                 alignmentsFile=alignmentsFile,
                 outputModelFile=outputModelFile,
                 modelType=modelType,
                 jobTreeDir=jobTreeDir,
                 iterations=1,
                 trials=1,
                 randomStart=False,
                 logLevel=getLogLevelString(),
                 setJukesCantorStartingEmissions=0.2,
                 #useDefaultModelAsStart=,
                 trainEmissions=True,
                 tieEmissions=True,
                 optionsToRealign=
                 "--diagonalExpansion=6 --splitMatrixBiggerThanThis=100")
             hmm = Hmm.loadHmm(outputModelFile)
             system("rm -rf %s" % jobTreeDir)  #Cleanup the old jobTree
             logger.info(
                 "For trial %s the likelihood after 1 iteration of EM is %s"
                 % (trial, hmm.likelihood))
             iterations = 5
             runCPecanEm(
                 sequenceFiles=[seqFile1, seqFile2],
                 alignmentsFile=alignmentsFile,
                 outputModelFile=outputModelFile,
                 jobTreeDir=jobTreeDir,
                 optionsToRealign=
                 "--diagonalExpansion=6 --splitMatrixBiggerThanThis=100",
                 iterations=iterations,
                 inputModelFile=outputModelFile,
                 logLevel=getLogLevelString(),
                 maxAlignmentLengthPerJob=10000)  #, updateTheBand=True)
             hmm2 = Hmm.loadHmm(outputModelFile)
             logger.info(
                 "For trial %s the likelihood after a further %s iterations of EM is %s"
                 % (trial, iterations, hmm2.likelihood))
             self.assertTrue(hmm.likelihood < hmm2.likelihood)
             hmm2.normalise()
             logger.info("Final transitions: %s" %
                         " ".join(map(str, hmm2.transitions)))
             logger.info("Final emissions: %s" %
                         " ".join(map(str, hmm2.emissions)))
             system("rm -rf %s" % tempDir)
             trial += 1
示例#43
0
 def testAPI(self):
     """Run all the cactusAPI CuTests, fail if any of them fail.
     """
     cactus_call(parameters=["cactusAPITests", getLogLevelString()])
示例#44
0
def getLogLevelString2(logLevelString):
    """Gets the log level string for the binary
    """
    if logLevelString == None:
        return getLogLevelString()
    return logLevelString
示例#45
0
 def testReferenceAndAsMedianAlgorithms(self):
     """Iterates through a list of simulation variants and prints results
     """
     headerLine = "\t".join(
         ("elementNumber", "chromosomeNumber", "leafGenomeNumber",
          "operationNumber", "totalOperationNumber", "doInversion",
          "doShortInversion", "doDcj", "doTranslocation",
          "doShortTranslocation", "greedyIterations", "theta", "replicate",
          "medianDCJDistance", "medianOutOfOrderDistance",
          "weightedMedianOutOfOrderDistance",
          "medianDCJDistanceForReferenceAlgorithm",
          "medianOutOfOrderDistanceForReferenceAlgorithm",
          "weightedMedianOutOfOrderDistanceForReferenceAlgorithm",
          "dCJDistanceForReferenceAlgorithmFromMedian",
          "outOfOrderDistanceForReferenceAlgorithmFromMedian",
          "weightedOutOfOrderDistanceForReferenceAlgorithmFromMedian",
          "medianDCJDistanceForAsMedian",
          "medianOutOfOrderDistanceForAsMedian",
          "weightedMedianOutOfOrderDistanceForAsMedian",
          "dCJDistanceForAsMedianFromMedian",
          "outOfOrderDistanceForAsMedianFromMedian",
          "weightedOutOfOrderDistanceForAsMedianFromMedian",
          "medianGenomeForReferenceAlgorithm", "medianGenomeForAsMedian"))
     if getLogLevelString() in ("DEBUG", "INFO"):
         print headerLine
     for elementNumber in self.elementNumbers:
         for chromosomeNumber in self.chromosomeNumbers:
             for leafGenomeNumber in self.leafGenomeNumbers:
                 for operationNumber in self.operationNumber:
                     for doInversion, doShortInversion, doDcj, doTranslocation, doShortTranslocation in self.operationType:
                         for greedyIterations in self.greedyIterations:
                             for theta in self.theta:
                                 for replicate in xrange(self.replicates):
                                     medianHistory = MedianHistory(
                                         Genome(elementNumber=elementNumber,
                                                chromosomeNumber=
                                                chromosomeNumber),
                                         leafGenomeNumber=leafGenomeNumber)
                                     medianHistory.permuteLeafGenomes(
                                         operationNumber=operationNumber,
                                         doInversion=doInversion,
                                         doDcj=doDcj,
                                         doTranslocation=doTranslocation,
                                         doShortInversion=doShortInversion,
                                         doShortTranslocation=
                                         doShortTranslocation)
                                     medianDCJDistance = medianHistory.getMedianDcjDistance(
                                         medianHistory.getMedianGenome())
                                     medianOutOfOrderDistance = medianHistory.getMedianOutOfOrderDistance(
                                         medianHistory.getMedianGenome())
                                     weightedMedianOutOfOrderDistance = medianHistory.getWeightedMedianOutOfOrderDistance(
                                         medianHistory.getMedianGenome(),
                                         theta=theta)
                                     #Now run reference problem algorithm
                                     referenceProblemMedianGenome = runReferenceMedianProblemTest(
                                         medianHistory, greedyIterations,
                                         theta)
                                     medianDCJDistanceForReferenceAlgorithm = medianHistory.getMedianDcjDistance(
                                         referenceProblemMedianGenome)
                                     medianOutOfOrderDistanceForReferenceAlgorithm = medianHistory.getMedianOutOfOrderDistance(
                                         referenceProblemMedianGenome)
                                     weightedMedianOutOfOrderDistanceForReferenceAlgorithm = medianHistory.getWeightedMedianOutOfOrderDistance(
                                         referenceProblemMedianGenome,
                                         theta=theta)
                                     dCJDistanceForReferenceAlgorithmFromMedian = medianHistory.getMedianGenome(
                                     ).getCircularDcjDistance(
                                         referenceProblemMedianGenome)
                                     outOfOrderDistanceForReferenceAlgorithmFromMedian = medianHistory.getMedianGenome(
                                     ).getOutOfOrderDistance(
                                         referenceProblemMedianGenome)
                                     weightedOutOfOrderDistanceForReferenceAlgorithmFromMedian = medianHistory.getMedianGenome(
                                     ).getWeightedOutOfOrderDistance(
                                         referenceProblemMedianGenome,
                                         theta=theta)
                                     totalOperationNumber = operationNumber * len(
                                         [
                                             i for i in
                                             (doInversion, doShortInversion,
                                              doDcj, doTranslocation,
                                              doShortTranslocation)
                                             if i == True
                                         ])
                                     #Biomedian comparison turned off
                                     if False and leafGenomeNumber == 3 and doDcj == False and float(
                                             totalOperationNumber
                                     ) / elementNumber <= 0.5:
                                         asMedianProblemMedianGenome = runAsMedianMedianProblemTest(
                                             medianHistory)
                                         medianDCJDistanceForAsMedian = medianHistory.getMedianDcjDistance(
                                             asMedianProblemMedianGenome)
                                         medianOutOfOrderDistanceForAsMedian = medianHistory.getMedianOutOfOrderDistance(
                                             asMedianProblemMedianGenome)
                                         weightedMedianOutOfOrderDistanceForAsMedian = medianHistory.getWeightedMedianOutOfOrderDistance(
                                             asMedianProblemMedianGenome,
                                             theta=theta)
                                         dCJDistanceForAsMedianFromMedian = medianHistory.getMedianGenome(
                                         ).getCircularDcjDistance(
                                             asMedianProblemMedianGenome)
                                         outOfOrderDistanceForAsMedianFromMedian = medianHistory.getMedianGenome(
                                         ).getOutOfOrderDistance(
                                             asMedianProblemMedianGenome)
                                         weightedOutOfOrderDistanceForAsMedianFromMedian = medianHistory.getMedianGenome(
                                         ).getWeightedOutOfOrderDistance(
                                             asMedianProblemMedianGenome,
                                             theta=theta)
                                     else:
                                         asMedianProblemMedianGenome = "n/a"
                                         medianDCJDistanceForAsMedian = "n/a"
                                         medianOutOfOrderDistanceForAsMedian = "n/a"
                                         weightedMedianOutOfOrderDistanceForAsMedian = "n/a"
                                         dCJDistanceForAsMedianFromMedian = "n/a"
                                         outOfOrderDistanceForAsMedianFromMedian = "n/a"
                                         weightedOutOfOrderDistanceForAsMedianFromMedian = "n/a"
                                     #Now prepare line to print
                                     line = "\t".join([
                                         str(i) for i in
                                         (elementNumber, chromosomeNumber,
                                          leafGenomeNumber, operationNumber,
                                          totalOperationNumber, doInversion,
                                          doShortInversion, doDcj,
                                          doTranslocation,
                                          doShortTranslocation,
                                          greedyIterations, theta,
                                          replicate, medianDCJDistance,
                                          medianOutOfOrderDistance,
                                          weightedMedianOutOfOrderDistance,
                                          medianDCJDistanceForReferenceAlgorithm,
                                          medianOutOfOrderDistanceForReferenceAlgorithm,
                                          weightedMedianOutOfOrderDistanceForReferenceAlgorithm,
                                          dCJDistanceForReferenceAlgorithmFromMedian,
                                          outOfOrderDistanceForReferenceAlgorithmFromMedian,
                                          weightedOutOfOrderDistanceForReferenceAlgorithmFromMedian,
                                          medianDCJDistanceForAsMedian,
                                          medianOutOfOrderDistanceForAsMedian,
                                          weightedMedianOutOfOrderDistanceForAsMedian,
                                          dCJDistanceForAsMedianFromMedian,
                                          outOfOrderDistanceForAsMedianFromMedian,
                                          weightedOutOfOrderDistanceForAsMedianFromMedian,
                                          "'%s'" %
                                          str(referenceProblemMedianGenome),
                                          "'%s'" %
                                          str(asMedianProblemMedianGenome))
                                     ])
                                     #Print line
                                     if getLogLevelString() in ("DEBUG",
                                                                "INFO"):
                                         print line
示例#46
0
 def testCuTest(self):
     system("matchingAndOrderingTests %s" % getLogLevelString())