-
Notifications
You must be signed in to change notification settings - Fork 0
/
SNPMatrixGenerator.py
74 lines (58 loc) · 2.18 KB
/
SNPMatrixGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
'''
Created on Aug 28, 2013
@author: Matthew Demarest
'''
import random
from nexus import NexusReader
from nexus import NexusWriter
NUCLEOTIDES = {'A', 'T', 'G', 'C', 'a', 't', 'g', 'c'}
def snpMatrixGenerator(sourceFile, destFile, recordAll=False,
recordRandomSample=True):
if recordAll == recordRandomSample:
print "Invalid Options"
exit()
destNexus = NexusWriter()
block = ""
snpCol = 0
for line in sourceFile:
if all(x in line.lower() for x in {"begin", "data"}):
sourceNexus = NexusReader()
sourceNexus.read_string(block)
if "data" in sourceNexus.blocks:
snpCol = _findDifferences(sourceNexus, destNexus, snpCol,
recordAll, recordRandomSample)
block = line
else:
block += line
sourceNexus = NexusReader()
sourceNexus.read_string(block)
if "data" in sourceNexus.blocks:
snpCol = _findDifferences(sourceNexus, destNexus, snpCol,
recordAll, recordRandomSample)
destFile.write(destNexus.make_nexus() + '\n')
destFile.close()
sourceFile.close()
def _findDifferences(sourceNexus, destNexus, destCol,
recordAll=False, recordRandomSample=True):
differentCols = []
for i in range(len(sourceNexus.data.characters)):
s = set(sourceNexus.data.characters[i].values())
s = s.intersection(NUCLEOTIDES)
if len(s) > 1:
if recordAll:
_addCol(sourceNexus, destNexus, i, destCol)
destCol += 1
elif recordRandomSample:
differentCols.append(i)
if recordRandomSample:
if differentCols:
_addCol(sourceNexus, destNexus, random.choice(differentCols),
destCol)
destCol += 1
return destCol
def _addCol(sourceNexus, destNexus, sourceCol, destCol):
for taxa, char in sourceNexus.data.characters[sourceCol].items():
destNexus.add(taxa, destCol, char)
if __name__ == '__main__':
snpMatrixGenerator(open("../Example/Nexus_Ex.nex"),
open("output.nex", "w"), recordAll=True, recordRandomSample=False)