/
subtraction.py
186 lines (169 loc) · 7.04 KB
/
subtraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import sys, glob, os.path, logging
import numpy as np
from optparse import OptionParser
from scipy.io.matlab import savemat, loadmat
from xformats.yamlformats import read_ydat, read_yaml, write_ydat
from xformats.matformats import read_matclean
from utils import md5_file, clean_indices
def get_bg(indir, scanno, posno):
"""Read background SAXS curve from an ydat-file.
"""
fname = indir + "/bufs%03d.p%02d.out.ydat" % (scanno, posno)
return read_ydat(fname)
def highq_scale(sam, buf):
"""Return the scale factor to match `buf to `sam` in the high-q region."""
q = buf[0,:]
qind = np.logical_and(q > 4.0, q < 5.0)
scale = np.mean(sam[1,qind]) / np.mean(buf[1,qind])
return scale
def errsubtract(a, b, bscale=1.0):
"""Return `a` - `b` with errors, where `a` and `b` are (3, N) arrays.
Optionally, scale I and Ierr of `b` with `bscale` before subtracting.
Input values should have the same q-scale (this is not checked).
"""
nn = clean_indices(a, b)
retval = np.zeros((3, a.shape[1]))
retval[0,:] = a[0,:]
retval[1,nn] = a[1,nn] - bscale*b[1,nn]
retval[2,nn] = np.sqrt(np.square(a[2,nn]) + np.square(bscale*b[2,nn]))
retval[1:3,np.logical_not(nn)] = np.nan
return retval
def subtract_background_from_stacks(scanfile, indir, outdir, scannumber=-1):
"""Subtract background from SAXS data in MAT-file stacks.
"""
scans = read_yaml(scanfile)
if scannumber > 0:
scannos = [ scannumber ]
else:
scannos = scans.keys()
scannos.sort()
for scanno in scannos:
print("Scan #%03d" % scanno)
try:
bufscan = scans[scanno][0]
except TypeError:
print("Scan #%03d is a buffer" % scanno)
continue
try:
conc = scans[scanno][1]
except TypeError:
print("No concentration for scan #02d." % scanno)
conc = 1.0
print("Using concentration %g g/l." % conc)
stackname = "s%03d" % scanno
stack = loadmat(indir+'/'+stackname+'.mat')[stackname]
subs = np.zeros_like(stack)
(npos, nrep, _, _) = stack.shape
for pos in range(npos):
print(pos)
buf = get_bg(indir, bufscan, pos)
for rep in range(nrep):
subs[pos,rep,...] = errsubtract(stack[pos,rep,...], buf)
subs[pos,rep,1:3,:] = subs[pos,rep,1:3,:] / conc
outname = "subs%03d" % scanno
savemat(outdir+'/'+outname + ".mat", {outname: subs}, do_compression=1,
oned_as='row')
def excess_ratio(scanfile, qrange=[4.0, 5.0], cnorm=True):
"""Return ratio of (sam/buf)-1 in subtractions in the `qrange` given.
Subtractions are made from data given in `scanfile` as in
'subtract_background_from_ydats()', but only the ratios of
sample / buffer intensity are returned in a list of arrays.
If `cnorm` is True (default) then the ratio is normalized to the
concentration read from the scanfile.
Results from this function can be used to calibrate high-q normalized
subtraction.
"""
scans = read_yaml(scanfile)
scannos = scans.keys()
scannos.sort()
indir = '.'
mlist = []
indlist = []
clist = []
for scanno in scannos:
try:
bufscan = scans[scanno][0]
except TypeError:
logging.warning("Scan #%03d is a buffer" % scanno)
continue
try:
conc = scans[scanno][1]
except TypeError:
print("No concentration for scan #02d." % scanno)
raise TypeError()
logging.warning("Scan #%03d" % scanno)
filelist = glob.glob(indir+"/s%03d.*.fil.ydat" % scanno)
marr= np.zeros((len(filelist)))
for posno in xrange(len(filelist)):
bufname = indir + "/bufs%03d.p%02d.out.ydat" % (bufscan, posno)
buf, dbuf = read_ydat(bufname, addict=1)
fname = indir + "/s%03d.p%02d.fil.ydat" % (scanno, posno)
sam, dsam = read_ydat(fname, addict=1)
# Assumes the standard q, I, Ierr ordering in index 0 columns
q = sam[0,:]
qind = np.logical_and(q > qrange[0], q < qrange[1])
ratio = np.mean(sam[1,qind]) / np.mean(buf[1,qind]) - 1.0
if cnorm:
ratio = ratio / conc
marr[posno] = ratio
mlist.append(marr)
indlist.append(scanno)
clist.append(conc)
return mlist, indlist, clist
def subtract_background_from_ydats(scanfile, indir, outdir, scannumber=-1, highqnorm=False):
"""Subtract backround from SAXS data in .ydat files.
If `highqnorm` is True, normalize the buffer to the sample intensity
in q-range [4.0, 5.0] 1/nm and adjust with a constant before subtracting.
"""
scans = read_yaml(scanfile)
if scannumber > 0:
scannos = [ scannumber ]
else:
scannos = scans.keys()
scannos.sort()
for scanno in scannos:
print("Scan #%03d" % scanno)
try:
bufscan = scans[scanno][0]
except TypeError:
print("Scan #%03d is a buffer" % scanno)
continue
try:
conc = scans[scanno][1]
except TypeError:
print("No concentration for scan #02d." % scanno)
conc = 1.0
print("Using concentration %g g/l." % conc)
filelist = glob.glob(indir+"/s%03d.*.fil.ydat" % scanno)
for posno in xrange(len(filelist)):
bufname = indir + "/bufs%03d.p%02d.out.ydat" % (bufscan, posno)
buf, dbuf = read_ydat(bufname, addict=1)
fname = indir + "/s%03d.p%02d.fil.ydat" % (scanno, posno)
sam, dsam = read_ydat(fname, addict=1)
outname = os.path.basename(fname)
outname = outdir+'/'+outname[:outname.find('.fil.ydat')]+'.sub.ydat'
ad = {
'samfile': [os.path.basename(fname), md5_file(fname)],
'buffile': [os.path.basename(bufname), md5_file(bufname)],
'position' : dsam.get('inputposition', "unknown"),
'q~unit' : dsam.get('q~unit', "unknown"),
'I~unit' : dsam.get('I~unit', "unknown"),
'Ierr~unit' : dsam.get('Ierr~unit', "unknown"),
}
if highqnorm:
# 1 + 0.007 1/(g/l) is the excess of scattered intensity
# in a protein sample versus buffer in the q-range
# used [4.0, 5.0] 1/nm per concentration.
scale = highq_scale(sam, buf)
bufscale = scale * 1.0/(1.0 + 0.007*conc)
print("scale: %g, bufscale: %g" % (scale, bufscale))
buf[1,:] = bufscale * buf[1,:]
buf[2,:] = bufscale * buf[2,:]
ad['normalization'] = float(bufscale)
else:
ad['normalization'] = 'transmission'
# Assumes the standard q, I, Ierr ordering in index 0 columns
sub = errsubtract(sam, buf)
sub[1:3,:] = sub[1:3,:] / conc
write_ydat(sub, outname, addict=ad, attributes=['~unit'])
print(os.path.basename(outname))