forked from argriffing/xgcode
-
Notifications
You must be signed in to change notification settings - Fork 0
/
20100603e.py
103 lines (89 loc) · 2.77 KB
/
20100603e.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""Convert a .hud file and a MAT_pheno.txt file to an .ind file.
The .hud file provides the names of the OTUs.
The MAT_pheno.txt file provides the 'case-control' status.
The output file is in eigenstrat format.
"""
from StringIO import StringIO
import sys
import os
import argparse
from SnippetUtil import HandlingError
import Form
import FormOut
import iterutils
import hud
g_tags = ['pca:convert']
g_default_hud_string = """
IC31 1 1 1 0
IC32 1 1 1 0
IC33 1 0 1 0
IC34 1 0 1 0
""".strip()
g_default_matpheno_string = """
IC34 null
IC33 12
IC32 1
IC31 2
""".strip()
def process(hud_lines, matpheno_lines):
"""
@param hud_lines: lines of a .hud file
@param matpheno_lines: lines of a MAT_pheno.txt file
@return: contents of an .ind file
"""
# get the ordered names from the .hud file
names, hud_data = hud.decode(hud_lines)
# get case and control status from the matpheno file
cases = set()
controls = set()
for line in iterutils.stripped_lines(matpheno_lines):
name, classification = line.split(None, 1)
if classification == '1':
cases.add(name)
elif classification == '2':
controls.add(name)
elif classification in ('12', 'null'):
# skip individuals classified like this
pass
else:
msg = 'invalid MAT_pheno classification: ' + classification
raise Exception(msg)
# write the .ind file contents
out = StringIO()
for name in names:
gender = 'U'
classification = 'Ignore'
if name in cases:
classification = 'Case'
elif name in controls:
classification = 'Control'
row = [name, gender, classification]
print >> out, '\t'.join(row)
return out.getvalue().rstrip()
def get_form():
"""
@return: the body of a form
"""
form_objects = [
Form.MultiLine('hud',
'contents of a .hud file',
g_default_hud_string),
Form.MultiLine('matpheno',
'contents of a MAT_pheno.txt file',
g_default_matpheno_string)]
return form_objects
def get_form_out():
return FormOut.EigenstratInd('out')
def get_response_content(fs):
return process(fs.hud.splitlines(), fs.matpheno.splitlines()) + '\n'
def main(args):
with open(os.path.expanduser(args.hud)) as fin_hud:
with open(os.path.expanduser(args.matpheno)) as fin_matpheno:
print process(fin_hud, fin_matpheno)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--hud', required=True,
help='a .hud file')
parser.add_argument('--matpheno', required=True,
help = 'a MAT_pheno.txt file')
main(parser.parse_args())