def average_positions(filenames, chi2cutoff=1.15, write=True, plot=1): """Filter and average over positions in a capillary. """ filenames.sort() stack = stack_datafiles(filenames) incinds, cdm, links = cluster_reps(stack, threshold=chi2cutoff, plot=plot) ms = mean_stack(stack[incinds,...]) disinds = range(len(filenames)) for i in incinds: disinds.remove(i) included = [ [filenames[i], md5_file(filenames[i])] for i in incinds ] discarded = [ [filenames[i], md5_file(filenames[i])] for i in disinds ] ad = { 'chi2cutoff': float(chi2cutoff), 'included': included, 'discarded': discarded, 'chi2matrix' : map(float, list(cdm)), 'incinds' : map(int, list(incinds)), 'linkage' : [ map(float, ll) for ll in list(links) ] } outarr = np.zeros((7, ms.shape[1])) outarr[0:3,:] = ms outarr[3:5,:] = stack[0,1:3,:] outarr[5:7,:] = mean_stack(stack)[1:3,:] if write: fname = filenames[0] fname = "%s.clu.ydat" % fname[:(fname.find('.p'))] print(fname) write_ydat(outarr, fname, addict=ad, cols=['q', 'I', 'Ierr', 'I_first', 'Ierr_first', 'I_all', 'Ierr_all']) return ms
def subtract_background_from_ydats(scanfile, indir, outdir, scannumber=-1, highqnorm=False): """Subtract backround from SAXS data in .ydat files. If `highqnorm` is True, normalize the buffer to the sample intensity in q-range [4.0, 5.0] 1/nm and adjust with a constant before subtracting. """ scans = read_yaml(scanfile) if scannumber > 0: scannos = [ scannumber ] else: scannos = scans.keys() scannos.sort() for scanno in scannos: print("Scan #%03d" % scanno) try: bufscan = scans[scanno][0] except TypeError: print("Scan #%03d is a buffer" % scanno) continue try: conc = scans[scanno][1] except TypeError: print("No concentration for scan #02d." % scanno) conc = 1.0 print("Using concentration %g g/l." % conc) filelist = glob.glob(indir+"/s%03d.*.fil.ydat" % scanno) for posno in xrange(len(filelist)): bufname = indir + "/bufs%03d.p%02d.out.ydat" % (bufscan, posno) buf, dbuf = read_ydat(bufname, addict=1) fname = indir + "/s%03d.p%02d.fil.ydat" % (scanno, posno) sam, dsam = read_ydat(fname, addict=1) outname = os.path.basename(fname) outname = outdir+'/'+outname[:outname.find('.fil.ydat')]+'.sub.ydat' ad = { 'samfile': [os.path.basename(fname), md5_file(fname)], 'buffile': [os.path.basename(bufname), md5_file(bufname)], 'position' : dsam.get('inputposition', "unknown"), 'q~unit' : dsam.get('q~unit', "unknown"), 'I~unit' : dsam.get('I~unit', "unknown"), 'Ierr~unit' : dsam.get('Ierr~unit', "unknown"), } if highqnorm: # 1 + 0.007 1/(g/l) is the excess of scattered intensity # in a protein sample versus buffer in the q-range # used [4.0, 5.0] 1/nm per concentration. scale = highq_scale(sam, buf) bufscale = scale * 1.0/(1.0 + 0.007*conc) print("scale: %g, bufscale: %g" % (scale, bufscale)) buf[1,:] = bufscale * buf[1,:] buf[2,:] = bufscale * buf[2,:] ad['normalization'] = float(bufscale) else: ad['normalization'] = 'transmission' # Assumes the standard q, I, Ierr ordering in index 0 columns sub = errsubtract(sam, buf) sub[1:3,:] = sub[1:3,:] / conc write_ydat(sub, outname, addict=ad, attributes=['~unit']) print(os.path.basename(outname))
def write_filtered(filtered, first, aver, incmap, fname, inputfile=None, pos=-1): """Write an 'ydat' YAML file `fname` with filtered data and index array. `filtered` contains the filtered data, `incmap` the point by point inclusion array (bool matrix) of points used in the averaging and `first` the data set used in comparison for the filtering. """ # FIXME: take a list of filenames which are filtered as an argument # and write them to the file with open(fname, "w") as fp: indent = ' ' fp.write('incmap: !!seq [\n' + indent) slist = incmap_to_strings(incmap.T) perrow = 1 + (80 / len(slist[0])) i = 0 while i < perrow * ((len(slist)-1)/perrow): # until last row fp.write(slist[i]) if (i+1) % perrow or not i: fp.write(', ') else: fp.write(',\n' + indent) i += 1 while i < len(slist): # last row fp.write(slist[i]) if i < len(slist)-1: fp.write(', ') else: fp.write(']\n') i += 1 ad = { 'method' : "filter_repetitions", 'q~unit' : '1/nm', 'I~unit' : 'arb.', 'Ierr~unit' : 'arb.', 'I_first~unit' : 'arb.', 'Ierr_first~unit' : 'arb.', 'I_all~unit' : 'arb.', 'Ierr_all~unit' : 'arb.', } if inputfile: ad['inputfile'] = [ inputfile, md5_file(inputfile) ] if pos >= 0: ad['inputposition'] = int(pos) outarr = np.zeros((7, filtered.shape[1])) outarr[0:3,:] = filtered outarr[3:5,:] = first[1:3,:] outarr[5:7,:] = aver[1:3,:] cols = ['q', 'I', 'Ierr', 'I_first', 'Ierr_first', 'I_all', 'Ierr_all'] write_ydat(outarr, fp, cols=cols, addict=ad, attributes=['~unit'])
def filter_matfile(fname, outstem, p_reject=0.001, plot=1): stack = read_mat(fname) md5 = md5_file(fname) print("Rejection probability: %0.3g" % p_reject) N = np.sum(np.logical_not(np.isnan(stack[0,0,1,:]))) print("Number of valid channels: %d" % N) threshold = chi2.ppf(1.0 - p_reject, N) / N print("Chisq rejection threshold: %0.3g" % threshold) for pos in range(stack.shape[0]): reps = stack[pos,...] incinds, cdm = filter_outliers(reps, threshold=threshold, plot=plot) ms = mean_stack(reps[incinds,...]) disinds = range(reps.shape[0]) for i in incinds: disinds.remove(i) print("Pos %d, discarded: %s" % (pos, str(disinds))) ad = { 'chi2cutoff' : float(threshold), 'rejection_prob' : float(p_reject), 'incinds' : map(int, list(incinds)), 'disinds' : map(int, list(disinds)), 'chi2matrix' : map(float, list(cdm)), 'method' : "filter_outliers", 'inputfile' : [ fname, md5 ], 'inputposition' : int(pos), 'q~unit' : '1/nm', 'I~unit' : 'arb.', 'Ierr~unit' : 'arb.', 'I_first~unit' : 'arb.', 'Ierr_first~unit' : 'arb.', 'I_all~unit' : 'arb.', 'Ierr_all~unit' : 'arb.', } outarr = np.zeros((7, ms.shape[1])) outarr[0:3,:] = ms outarr[3:5,:] = reps[0,1:3,:] outarr[5:7,:] = mean_stack(reps)[1:3,:] outname = "%s.p%02d.out.ydat" % (outstem, pos) print(outname) write_ydat(outarr, outname, addict=ad, cols=['q','I','Ierr','I_first','Ierr_first','I_all','Ierr_all'], attributes=['~unit'])
def write_stack_ydat(fname, stack, fnames, dvals, conf): """Write a single position from a stack to an .ydat file. """ sh = stack.shape outarr = np.zeros((2*sh[0]+1, sh[-1])) outarr[0,:] = stack[0,0,:] # q for pos in xrange(sh[0]): outarr[2*pos+1,:] = stack[pos,1,:] # I outarr[2*pos+2,:] = stack[pos,2,:] # Ierr ad = { 'frames': list(fnames), 'transmissions': dvals, 'indfile': [ os.path.basename(conf['Indfile']), md5_file(conf['Indfile']) ], 'q~unit': '1/nm', } cols = ['q'] Icols = [ "I%02d" % n for n in range(len(fnames))] errcols = [ "Ierr%02d" % n for n in range(len(fnames))] cols.extend([ col for lsub in zip(Icols, errcols) for col in lsub ]) ad.update([ ("I%02d~unit" % n, "arb.") for n in range(len(fnames)) ]) ad.update([ ("Ierr%02d~unit" % n, "arb.") for n in range(len(fnames)) ]) write_ydat(outarr, fname, cols=cols, addict=ad, attributes=['~unit'])