/
OrtheusTests.py
161 lines (133 loc) · 7.17 KB
/
OrtheusTests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python
#Copyright (C) 2008-2011 by Benedict Paten (benedictpaten@gmail.com)
#
#Released under the MIT license, see LICENSE.txt
"""Tests ortheus_core and the old Ortheus python scripts.
"""
import os
import random
import sys
import unittest
from sonLib.bioio import printBinaryTree
from sonLib.bioio import fastaAlignmentRead
from sonLib.bioio import fastaWrite
from sonLib.bioio import getTempFile
from sonLib.tree import BinaryTree
from sonLib.bioio import system
from sonLib.bioio import getRandomSequence
from sonLib.bioio import mutateSequence
from sonLib.bioio import TestStatus
from sonLib.bioio import parseSuiteTestOptions
class TestCase(unittest.TestCase):
def setUp(self):
self.testNo = TestStatus.getTestSetup()
self.tempFiles = []
unittest.TestCase.setUp(self)
def tearDown(self):
for tempFile in self.tempFiles:
os.remove(tempFile)
unittest.TestCase.tearDown(self)
def testENm001(self):
if TestStatus.getTestStatus() == TestStatus.TEST_VERY_LONG:
encodePath = TestStatus.getPathToDataSets() + "/MAY-2005/ENm001"
outputPath = TestStatus.getPathToDataSets() + "/ortheus/encodeTest"
#treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);'
treeString = '((((human:0.006969,chimp:0.009727):0.025291,baboon:0.044568):0.108727,(rat:0.081244,mouse:0.072818):0.260327):0.02326,(cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.048788):0.749525;'
seqFiles = [ "human.ENm001.fa", "chimp.ENm001.fa", "baboon.ENm001.fa", "rat.ENm001.fa", "mouse.ENm001.fa", "cow.ENm001.fa", "cat.ENm001.fa", "dog.ENm001.fa" ]
seqFiles = [ encodePath + "/" + i for i in seqFiles ]
outputFile = outputPath + "/outputENm001.mfa"
command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % \
(" ".join(seqFiles), treeString, outputFile)
print "running command", command
system(command)
def testSimulation(self):
if TestStatus.getTestStatus() == TestStatus.TEST_LONG:
blanchettePath = TestStatus.getPathToDataSets() + "/blanchettesSimulation/00.job"
outputPath = TestStatus.getPathToDataSets() + "/ortheus/blanchettesSimulationTest"
treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);'
seqFiles = [ "HUMAN", "CHIMP", "BABOON", "RAT", "MOUSE", "COW", "CAT", "DOG" ]
seqFiles = [ blanchettePath + "/" + i for i in seqFiles ]
outputFile = outputPath + "/outputJob1.mfa"
command = "Ortheus.py -e %s -d '%s' -f %s -j -a -b" % \
(" ".join(seqFiles), treeString, outputFile)
print "running command", command
system(command)
def testAndyYatesFirstExample(self):
if TestStatus.getTestStatus() == TestStatus.TEST_LONG:
filePath = TestStatus.getPathToDataSets() + "/ortheus/andyYatesExample1"
seqs = "seq1.fa seq2.fa seq3.fa seq4.fa seq5.fa seq6.fa seq7.fa seq8.fa seq9.fa seq10.fa seq11.fa \
seq12.fa seq13.fa seq14.fa seq15.fa seq16.fa seq17.fa seq18.fa seq19.fa seq20.fa seq21.fa seq22.fa seq23.fa seq24.fa seq25.fa seq26.fa \
seq27.fa seq28.fa seq29.fa seq30.fa seq31.fa seq32.fa seq33.fa seq34.fa seq35.fa seq36.fa"
seqs = " ".join([ "%s/%s" % (filePath, i) for i in seqs.split() ])
command = 'Ortheus.py -l "#-j 0 -e" -e %s -z \
"(((1012:0.0112,1051:0.0119):0.0026,(1055:0.0015,1052:0.0018):0.0370):0.0022,1054:0.0108,1053:0.0116);" \
-A 1054 1051 1054 1054 1053 1012 1054 1054 1053 1054 1051 1054 1051 1051 1053 1051 1051 1012 1051 1054 1012 1054 1053 1051 1053 \
1054 1054 1051 1012 1012 1054 1053 1053 1012 1054 1051 -f %s/output.16163.mfa -g %s/output.16163.tree-a -k "# -A" -m "java -Xmx1800m -Xms1800m" -a -b' % \
(seqs, filePath, filePath)
print "running command", command
system(command)
def testRandom(self):
"""Makes random sequences and tests that Ortheus can align them and produce a valid output.
"""
outputFile = getTempFile()
self.tempFiles.append(outputFile)
MAX_SEQS = 20
for i in xrange(MAX_SEQS):
self.tempFiles.append(getTempFile())
for test in xrange(0, self.testNo):
print "test no : %i " % test
#seqNo
binaryTree = randomTree()
middleSeq = getRandomSequence(250)[1]
seqs = []
getTreeSeqs(binaryTree, middleSeq, seqs)
if len(seqs) <= MAX_SEQS and len(seqs) > 2:
seqFiles = []
for i in xrange(0, len(seqs)):
seqFiles.append(self.tempFiles[1+i])
fileHandle = open(seqFiles[i], 'w')
fastaWrite(fileHandle, "%i" % i, seqs[i])
fileHandle.close()
print "Have seq files ", seqFiles
treeString = printBinaryTree(binaryTree, True)
print "For tree ", treeString
#align seqs and check no failure
command = "ortheus_core -a %s -b '%s' -d %s -e" % (" ".join(seqFiles), treeString, outputFile)
print "command to call", command
system(command)
#check alignment is complete
alignment = [ i[:] for i in fastaAlignmentRead(outputFile) ]
#print "alignment", alignment
checkAlignment(alignment, seqs)
print "test no is finished : %i " % test
def randomTree():
leafNo = [-1]
def fn():
if random.random() > 0.6:
return BinaryTree(random.random()*0.8, True, fn(), fn(), None)
else:
leafNo[0] += 1
return BinaryTree(random.random()*0.8, False, None, None, str(leafNo[0]))
return BinaryTree(random.random(), True, fn(), fn(), None)
def getTreeSeqs(binaryTree, seq, l):
seq = mutateSequence(seq, binaryTree.distance)
if binaryTree.internal:
getTreeSeqs(binaryTree.left, seq, l)
getTreeSeqs(binaryTree.right, seq, l)
else:
l.append(seq)
def checkAlignment(align, seqs):
i = [0]*len(seqs)
for j in align:
for k in xrange(0, len(seqs)):
if j[k*2] != '-':
assert j[k*2] == seqs[k][i[k]]
i[k] += 1
for j in xrange(0, len(seqs)):
assert i[j] == len(seqs[j])
def main():
parseSuiteTestOptions()
sys.argv = sys.argv[:1]
unittest.main()
if __name__ == '__main__':
main()