示例#1
0
 def testMoveRoot(self):
     for test in range(0, self.testNo):
         binaryTree = getRandomTree()
         binaryTree_depthFirstNumbers(binaryTree)
         node = getRandomNode(binaryTree)
         print("before", printBinaryTree(binaryTree, True),
               printBinaryTree(node, True))
         remodTree = moveRoot(binaryTree, node.traversalID.mid)
         print("test", test, printBinaryTree(binaryTree, True),
               printBinaryTree(node, True),
               printBinaryTree(remodTree, True))
         binaryTree_depthFirstNumbers(remodTree)
示例#2
0
 def testNewickTreeParser(self):
     if self.testNo > 0:
         d = '((human,baboon),chimp);'
         e = newickTreeParser(d)
         f = printBinaryTree(e, False)
         print(d, f)
         assert d == f
示例#3
0
def moveRoot(root, branch):
    """
    Removes the old root and places the new root at the mid point along the given branch
    """
    from sonLib import bioio
    if root.traversalID.mid == branch:
        return bioio.newickTreeParser(bioio.printBinaryTree(root, True))
    def fn2(tree, seq):
        if seq is not None:
            return '(' + bioio.printBinaryTree(tree, True)[:-1] + ',' + seq + ')'
        return bioio.printBinaryTree(tree, True)[:-1]
    def fn(tree, seq):
        if tree.traversalID.mid == branch:
            i = tree.distance
            tree.distance /= 2
            seq = '(' + bioio.printBinaryTree(tree, True)[:-1] + ',(' + seq + ('):%s' % tree.distance) + ');'
            tree.distance = i
            return seq
        if tree.internal:
            if branch < tree.traversalID.mid:
                seq = fn2(tree.right, seq)
                return fn(tree.left, seq)
            else:
                assert branch > tree.traversalID.mid
                seq = fn2(tree.left, seq)
                return fn(tree.right, seq)
        else:
            return bioio.printBinaryTree(tree, True)[:-1]
    s = fn(root, None)
    return bioio.newickTreeParser(s)
示例#4
0
 def fn(tree, seq):
     if tree.traversalID.mid == branch:
         i = tree.distance
         tree.distance /= 2
         seq = '(' + bioio.printBinaryTree(tree, True)[:-1] + ',(' + seq + ('):%s' % tree.distance) + ');'
         tree.distance = i
         return seq
     if tree.internal:
         if branch < tree.traversalID.mid:
             seq = fn2(tree.right, seq)
             return fn(tree.left, seq)
         else:
             assert branch > tree.traversalID.mid
             seq = fn2(tree.left, seq)
             return fn(tree.right, seq)
     else:
         return bioio.printBinaryTree(tree, True)[:-1]
示例#5
0
 def testNewickTreeParser_UnaryNodes(self):
     #tests with unary nodes
     for test in range(0, self.testNo):
         tree = getRandomTreeString()
         logger.debug("tree to try\t", tree)
         tree2 = newickTreeParser(tree, reportUnaryNodes=True)
         tree3 = printBinaryTree(tree2, True)
         logger.debug("tree found\t", tree3)
         assert tree == tree3
示例#6
0
 def testRemodelTreeRemovingRoot(self):
     for test in range(0, self.testNo):
         binaryTree = getRandomTree()
         binaryTree_depthFirstNumbers(binaryTree)
         node = getRandomLeafNode(binaryTree)
         remodTree = remodelTreeRemovingRoot(binaryTree,
                                             node.traversalID.mid)
         print("test", test, printBinaryTree(binaryTree, True),
               printBinaryTree(node, True),
               printBinaryTree(remodTree, True))
         binaryTree_depthFirstNumbers(remodTree)
         distances = mapTraversalIDsBetweenTrees(binaryTree, remodTree)
         d = getDistancesBetweenLeaves(binaryTree)
         d2 = getDistancesBetweenLeaves(remodTree)
         print(d)
         print(d2)
         for key in d2:
             assert close(d2[key], d[key], 0.0001)
示例#7
0
    def testCalculateDupsAndLossesByReconcilingTrees_Examples(self):
        treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);'
        speciesTree = newickTreeParser(treeString)
        binaryTree_depthFirstNumbers(speciesTree)
        #s =  printBinaryTree(speciesTree, True)
        #speciesTree = newickTreeParser(s)
        #binaryTree_depthFirstNumbers(speciesTree)

        geneString1 = ('((human,baboon),chimp);', 1, 3)
        geneString2 = ('((human,chimp),baboon);', 0, 0)
        geneString3 = ('((human,(human, chimp)),baboon);', 1, 1)
        geneString4 = ('((human,(human, chimp)),(chimp, baboon));', 2, 3)

        geneString5 = ('(dog,cat);', 0, 0)
        geneString6 = ('((dog,cat), cow);', 0, 0)
        geneString7 = ('(cow,(dog,cat));', 0, 0)
        geneString8 = ('(cow,(cat,dog));', 0, 0)

        geneString9 = ('((cow,dog),(dog,cow));', 1, 2)
        geneString10 = ('((cow,(cow,cow)),(dog,cat));', 2, 0)
        geneString11 = ('((cow,(cow,cow)),(dog,((cat,cat),cat)));', 4, 0)

        geneStrings = [ geneString1, geneString2, geneString3, geneString4, \
                        geneString5, geneString6, geneString7, geneString8,
                        geneString9, geneString10, geneString11 ]
        print("")
        for geneString, dupCount, lossCount in geneStrings:
            geneTree = newickTreeParser(geneString)
            binaryTree_depthFirstNumbers(geneTree)
            print(printBinaryTree(geneTree, True),
                  printBinaryTree(speciesTree, True))
            dupCount2, lossCount2 = calculateDupsAndLossesByReconcilingTrees(
                speciesTree, geneTree, processID=lambda x: x)
            print(geneString, "dups", dupCount, dupCount2, "losses", lossCount,
                  lossCount2)
            assert dupCount == dupCount2
            assert lossCount == lossCount2
示例#8
0
    def testRandom(self):
        """Makes random sequences and tests that Ortheus can align them and produce a valid output.
        """
        outputFile = getTempFile()
        self.tempFiles.append(outputFile)

        MAX_SEQS = 20

        for i in xrange(MAX_SEQS):
            self.tempFiles.append(getTempFile())

        for test in xrange(0, self.testNo):
            print "test no : %i " % test
            #seqNo
            binaryTree = randomTree()
            middleSeq = getRandomSequence(250)[1]
            seqs = []
            getTreeSeqs(binaryTree, middleSeq, seqs)

            if len(seqs) <= MAX_SEQS and len(seqs) > 2:
                seqFiles = []
                for i in xrange(0, len(seqs)):
                    seqFiles.append(self.tempFiles[1 + i])
                    fileHandle = open(seqFiles[i], 'w')
                    fastaWrite(fileHandle, "%i" % i, seqs[i])
                    fileHandle.close()
                print "Have seq files ", seqFiles

                treeString = printBinaryTree(binaryTree, True)
                print "For tree ", treeString

                #align seqs and check no failure
                command = "ortheus_core -a %s -b '%s' -d %s -e" % (
                    " ".join(seqFiles), treeString, outputFile)
                print "command to call", command
                system(command)

                #check alignment is complete
                alignment = [i[:] for i in fastaAlignmentRead(outputFile)]
                #print "alignment", alignment
                checkAlignment(alignment, seqs)

                print "test no is finished : %i " % test
示例#9
0
    def testCalculateProbableRootOfGeneTree_Examples(self):
        #return
        treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);'
        speciesTree = newickTreeParser(treeString)
        binaryTree_depthFirstNumbers(speciesTree)

        geneString1 = ('((human,baboon),chimp);', '((human,chimp),baboon);')
        geneString2 = ('((human,chimp),baboon);', '((human,chimp),baboon);')
        geneString3 = (
            '((((human,chimp),baboon),((dog,cat),cow)),(mouse,rat));',
            '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));')
        geneString4 = (
            '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));',
            '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));')
        geneString5 = (
            '((((human,(chimp, chimp)),baboon),((dog,cat),cow)),(mouse,rat));',
            '((((human,(chimp,chimp)),baboon),(mouse,rat)),((dog,cat),cow));')
        #geneString3 = ('((human,(human, chimp)),baboon);', 1)
        #geneString4 = ('((human,(human, chimp)),(chimp, baboon));', 2)

        #geneString5 = ('(dog,cat);', 0)
        #geneString6 = ('((dog,cat), cow);', 0)
        #geneString7 = ('(cow,(dog,cat));', 0)
        #geneString8 = ('(cow,(cat,dog));', 0)

        #geneString9 = ('((cow,dog),(dog,cow));', 1)
        #geneString10 = ('((cow,(cow,cow)),(dog,cat));', 2)
        #geneString11 = ('((cow,(cow,cow)),(dog,((cat,cat),cat)));', 4)

        geneStrings = [
            geneString1, geneString2, geneString3, geneString4, geneString5
        ]
        #[ geneString3, geneString4, \
        #geneString5, geneString6, geneString7, geneString8,
        #geneString9, geneString10, geneString11 ]
        for geneString, rootedGeneString in geneStrings:
            geneTree = newickTreeParser(geneString)
            rootedGeneTree = newickTreeParser(rootedGeneString)
            binaryTree_depthFirstNumbers(geneTree)
            rootedGeneTree2, dupCount, lossCount = calculateProbableRootOfGeneTree(
                speciesTree, geneTree)
            print("rootedGeneTree", rootedGeneString, dupCount, lossCount,
                  printBinaryTree(rootedGeneTree2, False))
示例#10
0
    def testRandom(self):
        """Makes random sequences and tests that Ortheus can align them and produce a valid output.
        """
        outputFile = getTempFile()
        self.tempFiles.append(outputFile)

        MAX_SEQS = 20

        for i in xrange(MAX_SEQS):
            self.tempFiles.append(getTempFile())

        for test in xrange(0, self.testNo):
            print "test no : %i " % test
            # seqNo
            binaryTree = randomTree()
            middleSeq = getRandomSequence(250)[1]
            seqs = []
            getTreeSeqs(binaryTree, middleSeq, seqs)

            if len(seqs) <= MAX_SEQS and len(seqs) > 2:
                seqFiles = []
                for i in xrange(0, len(seqs)):
                    seqFiles.append(self.tempFiles[1 + i])
                    fileHandle = open(seqFiles[i], "w")
                    fastaWrite(fileHandle, "%i" % i, seqs[i])
                    fileHandle.close()
                print "Have seq files ", seqFiles

                treeString = printBinaryTree(binaryTree, True)
                print "For tree ", treeString

                # align seqs and check no failure
                command = "ortheus_core -a %s -b '%s' -d %s -e" % (" ".join(seqFiles), treeString, outputFile)
                print "command to call", command
                system(command)

                # check alignment is complete
                alignment = [i[:] for i in fastaAlignmentRead(outputFile)]
                # print "alignment", alignment
                checkAlignment(alignment, seqs)

                print "test no is finished : %i " % test
import sys
import xml.etree.ElementTree as ET
from sonLib.tree import BinaryTree
from sonLib.tree import njI
from sonLib.tree import upgmaI
from sonLib.tree import DistancePair
from sonLib.bioio import printBinaryTree

l = {}
def fn(eventName):
    if not l.has_key(eventName):
        l[eventName] = BinaryTree(0.0, False, None, None, eventName)
    return l[eventName]
distancePairs = [ DistancePair(float(i.attrib["indelsPerBase"]), fn(i.attrib["eventName1"]), 1, fn(i.attrib["eventName2"]), 1) for i in ET.parse(sys.argv[1]).getroot().findall("indelDistanceForEvents") if i.attrib["eventName1"] != i.attrib["eventName2"] ] 

print len(distancePairs), l
print "NJ", printBinaryTree(njI(distancePairs, len(l.keys())), includeDistances=True)
print "UPGMA", printBinaryTree(upgmaI(distancePairs, len(l.keys())), includeDistances=True)
示例#12
0
文件: test.py 项目: zoumingr/cactus
def getCactusInputs_random(regionNumber=0,
                           tempDir=None,
                           sequenceNumber=None,
                           avgSequenceLength=None,
                           treeLeafNumber=None):
    """Gets a random set of sequences, each of length given, and a species
    tree relating them. Each sequence is a assigned an event in this tree.
    """
    if sequenceNumber is None:
        sequenceNumber = random.choice(list(range(30)))
    if avgSequenceLength is None:
        avgSequenceLength = random.choice(list(range(1, 3000)))
    if treeLeafNumber is None:
        treeLeafNumber = random.choice(list(range(2, 4)))

    #Make tree
    binaryTree = makeRandomBinaryTree(treeLeafNumber)
    newickTreeString = printBinaryTree(binaryTree, includeDistances=True)
    newickTreeLeafNames = []

    def fn(tree):
        if tree.internal:
            fn(tree.left)
            fn(tree.right)
        else:
            newickTreeLeafNames.append(tree.iD)

    fn(binaryTree)
    logger.info("Made random binary tree: %s" % newickTreeString)

    sequenceDirs = []
    for i in range(len(newickTreeLeafNames)):
        seqDir = getTempDirectory(rootDir=tempDir)
        sequenceDirs.append(seqDir)

    logger.info("Made a set of random directories: %s" %
                " ".join(sequenceDirs))

    #Random sequences and species labelling
    sequenceFile = None
    fileHandle = None
    parentSequence = getRandomSequence(
        length=random.choice(list(range(1, 2 * avgSequenceLength))))[1]
    emptySequenceDirs = set(sequenceDirs)
    i = 0
    while i < sequenceNumber or len(emptySequenceDirs) > 0:
        if sequenceFile == None:
            if random.random(
            ) > 0.5:  #Randomly choose the files to be attached or not
                suffix = ".fa.complete"
            else:
                suffix = ".fa"
            sequenceDir = random.choice(sequenceDirs)
            if sequenceDir in emptySequenceDirs:
                emptySequenceDirs.remove(sequenceDir)
            sequenceFile = getTempFile(rootDir=sequenceDir, suffix=suffix)
            fileHandle = open(sequenceFile, 'w')
        if random.random() > 0.8:  #Get a new root sequence
            parentSequence = getRandomSequence(
                length=random.choice(list(range(1, 2 * avgSequenceLength))))[1]
        sequence = mutateSequence(parentSequence,
                                  distance=random.random() * 0.25)
        name = getRandomAlphaNumericString(15)
        if random.random() > 0.5:
            sequence = reverseComplement(sequence)
        fastaWrite(fileHandle, name, sequence)
        if random.random() > 0.5:
            fileHandle.close()
            fileHandle = None
            sequenceFile = None
        i += 1
    if fileHandle != None:
        fileHandle.close()

    logger.info("Made %s sequences in %s directories" %
                (sequenceNumber, len(sequenceDirs)))

    return sequenceDirs, newickTreeString
示例#13
0
 def fn2(tree, seq):
     if seq is not None:
         return '(' + bioio.printBinaryTree(tree, True)[:-1] + ',' + seq + ')'
     return bioio.printBinaryTree(tree, True)[:-1]
示例#14
0
 def fn3(bT):
     if hash[bT] == root:
         s = '(' + bioio.printBinaryTree(fn2(hash[bT], bT), bT, True)[:-1] + ')'
     else:
         s = '(' + bioio.printBinaryTree(fn2(hash[bT], bT), bT, True)[:-1] + ',' + fn3(hash[bT]) + ')'
     return s + ":" + str(bT.distance)
示例#15
0
def getCactusInputs_random(regionNumber=0, tempDir=None,
                           sequenceNumber=None,
                           avgSequenceLength=None,
                           treeLeafNumber=None):
    """Gets a random set of sequences, each of length given, and a species
    tree relating them. Each sequence is a assigned an event in this tree.
    """
    if sequenceNumber is None:
        sequenceNumber = random.choice(xrange(30))
    if avgSequenceLength is None:
        avgSequenceLength = random.choice(xrange(1,3000))
    if treeLeafNumber is None:
        treeLeafNumber = random.choice(xrange(2, 4))
    #Make tree
    binaryTree = makeRandomBinaryTree(treeLeafNumber)
    newickTreeString = printBinaryTree(binaryTree, includeDistances=True)
    newickTreeLeafNames = []
    def fn(tree):
        if tree.internal:
            fn(tree.left)
            fn(tree.right)
        else:
            newickTreeLeafNames.append(tree.iD)
    fn(binaryTree)
    logger.info("Made random binary tree: %s" % newickTreeString)
    
    sequenceDirs = []
    for i in xrange(len(newickTreeLeafNames)):
        seqDir = getTempDirectory(rootDir=tempDir)
        sequenceDirs.append(seqDir)

    logger.info("Made a set of random directories: %s" % " ".join(sequenceDirs))

    #Random sequences and species labelling
    sequenceFile = None
    fileHandle = None
    parentSequence = getRandomSequence(length=random.choice(xrange(1, 2*avgSequenceLength)))[1]
    emptySequenceDirs = set(sequenceDirs)
    i = 0
    while i < sequenceNumber or len(emptySequenceDirs) > 0:
        #for i in xrange(sequenceNumber):
        if sequenceFile == None:
            if random.random() > 0.5: #Randomly choose the files to be attached or not
                suffix = ".fa.complete"
            else:
                suffix = ".fa"
            sequenceDir = random.choice(sequenceDirs)
            if sequenceDir in emptySequenceDirs:
                emptySequenceDirs.remove(sequenceDir)
            sequenceFile = getTempFile(rootDir=sequenceDir, suffix=suffix)
            fileHandle = open(sequenceFile, 'w')
        if random.random() > 0.8: #Get a new root sequence
            parentSequence = getRandomSequence(length=random.choice(xrange(1, 2*avgSequenceLength)))[1]
        sequence = mutateSequence(parentSequence, distance=random.random()*0.5)
        name = getRandomAlphaNumericString(15)
        if random.random() > 0.5:
            sequence = reverseComplement(sequence)
        fastaWrite(fileHandle, name, sequence)
        if random.random() > 0.5:
            fileHandle.close()
            fileHandle = None
            sequenceFile = None
        i += 1
    if fileHandle != None:
        fileHandle.close()

    logger.info("Made %s sequences in %s directories" % (sequenceNumber, len(sequenceDirs)))
    
    return sequenceDirs, newickTreeString
示例#16
0
import xml.etree.ElementTree as ET
from sonLib.tree import BinaryTree
from sonLib.tree import njI
from sonLib.tree import upgmaI
from sonLib.tree import DistancePair
from sonLib.bioio import printBinaryTree

l = {}


def fn(eventName):
    if not l.has_key(eventName):
        l[eventName] = BinaryTree(0.0, False, None, None, eventName)
    return l[eventName]


distancePairs = [
    DistancePair(float(i.attrib["substitutionRate"]),
                 fn(i.attrib["eventName1"]), 1, fn(i.attrib["eventName2"]), 1)
    for i in ET.parse(sys.argv[1]).getroot().findall("distancesForSamples")
]
distancePairs += [
    DistancePair(i.distance, i.leaf2, 1, i.leaf1, 1) for i in distancePairs
]

print len(distancePairs), l
print "NJ", printBinaryTree(njI(distancePairs, len(l.keys())),
                            includeDistances=True)
print "UPGMA", printBinaryTree(upgmaI(distancePairs, len(l.keys())),
                               includeDistances=True)