def get_lists_of_mzs(sf): try: isotope_ms = pyisocalc.isodist(sf,plot=False,sigma=0.01,charges=-2,resolution=100000.0,do_centroid=False) mzlist = list(isotope_ms.get_mzs()) intenslist = list(isotope_ms.get_intensities()) mzs_list, intensities_list, indices_list = gradient(isotope_ms.get_mzs(), isotope_ms.get_intensities(), max_output=-1, weighted_bins=0) indices_list = [i if intenslist[i] > intenslist[i+1] else i+1 for i in indices_list] mzs_list = [mzlist[i] for i in indices_list] intensities_list = [intenslist[i] for i in indices_list] min_i = np.min([ i for i in xrange(len(intenslist)) if intenslist[i] > 0.01]) max_i = np.max([ i for i in xrange(len(intenslist)) if intenslist[i] > 0.01]) return { "isodist_mzs" : mzlist[min_i:max_i], "isodist_int" : intenslist[min_i:max_i], "grad_mzs" : list(mzs_list), "grad_int" : list(intensities_list), "grad_ind" : list(indices_list - min_i) } except: return { "isodist_mzs" : [], "isodist_int" : [], "grad_mzs" : [], "grad_int" : [], "grad_ind" : [] }
def load_queries(self): config = self.config db_filename = config['file_inputs']['database_file'] db_dump_folder = config['file_inputs']['database_load_folder'] isocalc_sig = config['isotope_generation']['isocalc_sig'] isocalc_resolution = config['isotope_generation']['isocalc_resolution'] if len(config['isotope_generation']['charge']) > 1: print 'Warning: only first charge state currently accepted' charge = int('{}{}'.format(config['isotope_generation']['charge'][0]['polarity'], config['isotope_generation']['charge'][0]['n_charges'])) #currently only supports first charge!! self.adducts=[a['adduct'] for a in config['isotope_generation']['adducts']] # Read in molecules self.sum_formulae = [l.strip() for l in open(db_filename).readlines()] # Check if already generated and load if possible, otherwise calculate fresh db_name = os.path.splitext(os.path.basename(db_filename))[0] self.mz_list={} for adduct in self.adducts: for sum_formula in self.sum_formulae: isotope_ms = pyisocalc.isodist(sum_formula + adduct, plot=False, sigma=isocalc_sig, charges=charge, resolution=isocalc_resolution, do_centroid=True) if not sum_formula in self.mz_list: self.mz_list[sum_formula] = {} mzs, ints = map(np.array, isotope_ms.get_spectrum(source='centroids')) order = ints.argsort()[::-1] self.mz_list[sum_formula][adduct] = (mzs[order], ints[order])
def make_sf_adduct_optimusfilter(sum_formulae,adducts,output_filename,sigma=0.001,resolution=10000,charge=1): from pyMS.pyisocalc import pyisocalc # Extract variables from config dict # Check if already genrated and load if possible, otherwise calculate fresh with open(output_filename,'a') as f_out: for sum_formula in sum_formulae: #print sum_formula for adduct in adducts: try: sf = pyisocalc.complex_to_simple(sum_formula+adduct) if sf is None: # not possible to form adduct continue isotope_ms = pyisocalc.isodist(sf, plot=False, sigma=sigma, charges=charge, resolution=resolution) except KeyError as e: if str(e).startswith("KeyError:"): print str(e) continue except ValueError as e: if str(e).startswith("Element not recognised"): print str(e) continue except: print sf=="", sum_formula, adduct raise f_out.write("{} [M{}],-1,{}\n".format(sum_formula,adduct,isotope_ms.get_spectrum(source='centroids')[0][0]))
def calcualte_isotope_patterns(sum_formulae,adducts='',isocalc_sig=0.01,isocalc_resolution = 200000.,isocalc_do_centroid = True, charge='1'): ### Generate a mz list of peak centroids for each sum formula with the given adduct # todo - parse sum formula and adduct properly so that subtractions (losses) can be utilised (this code already exists somewhere) mz_list={} for n,sum_formula in enumerate(sum_formulae): isotope_ms = pyisocalc.isodist(sum_formula+adduct,plot=False,sigma=isocalc_sig,charges=charge,resolution=isocalc_resolution,do_centroid=isocalc_do_centroid) if not sum_formula in mz_list: mz_list[sum_formula] = {} mz_list[sum_formula][adduct] = isotope_ms.get_spectrum(source='centroids') return mz_list
def show_images_get(): dataset = bottle.request.params.get('dataset', app.paths.iterkeys().next()) formula = bottle.request.params.get('formula', '') tolerance = float(bottle.request.params.get('tolerance', 5.0)) resolution = float(bottle.request.params.get('resolution', 1e5)) selected_adduct = bottle.request.params.get('adduct', 'H') hs_removal = bottle.request.GET.get('hs_removal', False) k = int(bottle.request.params.get('npeaks', 4)) if hs_removal == 'on': hs_removal = True pts = float(bottle.request.params.get('pts', 10)) cutoff = float(bottle.request.params.get('pyisocalc_cutoff', 1e-3)) adducts = ['H', 'K', 'Na'] isotope_patterns = {} for adduct in adducts: sf = pyisocalc.SumFormulaParser.parse_string(formula + adduct) raw_pattern = pyisocalc.isodist(sf, cutoff) fwhm = raw_pattern.get_spectrum()[0][0] / resolution pattern = pyisocalc.apply_gaussian(raw_pattern, fwhm, pts, exact=True) mzs, intensities = map(np.array, pattern.get_spectrum(source='centroids')) if len(mzs) > k: order = intensities.argsort()[::-1] mzs = mzs[order][:k] intensities = intensities[order][:k] order = mzs.argsort() mzs = mzs[order] intensities = intensities[order] datacube = app.get_datacube(dataset, mzs, tolerance) if hs_removal: for img in datacube.xic: if len(img) > 0: pc = np.percentile(img, 99) img[img > pc] = pc chaos = measure_of_chaos(datacube.xic_to_image(0), 30, overwrite=False) iso_corr = isotope_pattern_match(datacube.xic, intensities) img_corr = 1.0 # return 1 if there's a single peak if len(intensities[1:]) > 1: img_corr = isotope_image_correlation(datacube.xic, weights=intensities[1:]) stats = {'measure of chaos': chaos, 'image correlation score': img_corr, 'isotope pattern score': iso_corr} isotope_patterns[adduct] = (mzs, intensities, stats) return bottle.template('show_images', hs_removal=hs_removal, isotope_patterns=isotope_patterns, formula=formula, selected_adduct=selected_adduct, pretty_formula=re.sub(r"(\d+)", r"<sub>\1</sub>", formula), resolution=resolution, tol=tolerance, datasets=app.paths.keys(), npeaks=k, selected_dataset=dataset)
def calculate_isotope_patterns(sum_formulae, adduct='', isocalc_sig=0.01, isocalc_resolution=200000., isocalc_do_centroid=True, charge=1, verbose=True): from pyMS.pyisocalc import pyisocalc ### Generate a mz list of peak centroids for each sum formula with the given adduct mz_list = {} for n, sum_formula in enumerate(sum_formulae): try: if verbose: print sum_formula, adduct sf = pyisocalc.complex_to_simple(sum_formula + adduct) except KeyError as e: if str(e).startswith("KeyError: "): print str(e) continue except ValueError as e: if str(e).startswith("Element not recognised"): print str(e) continue else: print sum_formula, adduct raise if sf == None: #negative atoms as a result of simplification print 'negative adduct for {} : {}'.format(sum_formula, adduct) continue try: isotope_ms = pyisocalc.isodist(sf, plot=False, sigma=isocalc_sig, charges=charge, resolution=isocalc_resolution, do_centroid=isocalc_do_centroid) except KeyError as e: if str(e).startswith("KeyError: "): print str(e) continue if not sum_formula in mz_list: mz_list[sum_formula] = {} mz_list[sum_formula][adduct] = isotope_ms.get_spectrum( source='centroids') return mz_list
def calculate_isotope_patterns(sum_formulae,adduct,isocalc_resolution,isocalc_do_centroid = True, charge=1): ### Generate a mz list of peak centroids for each sum formula with the given adduct mz_list={} for n, sum_formula in enumerate(sum_formulae): sf = pyisocalc.SumFormulaParser.parse_string(str(sum_formula + adduct)) raw_pattern = pyisocalc.isodist(sf, cutoff=1e-4, charge=charge) mz = raw_pattern.get_spectrum()[0][0] # if mz < 200 or mz > 2000: # continue fwhm = mz / isocalc_resolution # TODO: resolution = resolution(mz) isotope_ms = pyisocalc.apply_gaussian(raw_pattern, fwhm, exact=False) if not sum_formula in mz_list: mz_list[sum_formula] = {} mzs, intensities = isotope_ms.get_spectrum(source='centroids') order = intensities.argsort()[::-1][:5] mz_list[sum_formula][adduct] = (mzs[order], intensities[order]) return mz_list
def generate_patterns(formulas_fn, resolution_func, mz_range): mz_min, mz_max = mz_range patterns = {} adducts = ['H', 'K', 'Na'] formulae = [s.strip() for s in open(formulas_fn).readlines()] for f in formulae: for a in adducts: sf = pyisocalc.SumFormulaParser.parse_string(f + a) raw_pattern = pyisocalc.isodist(sf, cutoff=1e-4, charge=1) mz = raw_pattern.get_spectrum()[0][0] if mz < mz_min or mz > mz_max: continue fwhm = mz / resolution_func(mz) mzs, intensities = pyisocalc.apply_gaussian(raw_pattern, fwhm, exact=False).get_spectrum(source="centroids") mzs = np.array(mzs) intensities = np.array(intensities) order = np.argsort(intensities)[::-1] patterns[(f, a)] = (mzs[order], intensities[order]) return patterns
def get_lists_of_mzs(sf): try: isotope_ms = pyisocalc.isodist(sf, plot=False, sigma=0.01, charges=-2, resolution=100000.0, do_centroid=False) mzlist = list(isotope_ms.get_mzs()) intenslist = list(isotope_ms.get_intensities()) mzs_list, intensities_list, indices_list = gradient( isotope_ms.get_mzs(), isotope_ms.get_intensities(), max_output=-1, weighted_bins=0) indices_list = [ i if intenslist[i] > intenslist[i + 1] else i + 1 for i in indices_list ] mzs_list = [mzlist[i] for i in indices_list] intensities_list = [intenslist[i] for i in indices_list] min_i = np.min( [i for i in xrange(len(intenslist)) if intenslist[i] > 0.01]) max_i = np.max( [i for i in xrange(len(intenslist)) if intenslist[i] > 0.01]) return { "isodist_mzs": mzlist[min_i:max_i], "isodist_int": intenslist[min_i:max_i], "grad_mzs": list(mzs_list), "grad_int": list(intensities_list), "grad_ind": list(indices_list - min_i) } except: return { "isodist_mzs": [], "isodist_int": [], "grad_mzs": [], "grad_int": [], "grad_ind": [] }
def calculate_isotope_patterns(sum_formulae, adduct='', isocalc_sig=0.01, isocalc_resolution=200000., isocalc_do_centroid=True, charge=1,verbose=True): from pyMS.pyisocalc import pyisocalc ### Generate a mz list of peak centroids for each sum formula with the given adduct mz_list = {} for n, sum_formula in enumerate(sum_formulae): try: if verbose: print sum_formula, adduct sf = pyisocalc.complex_to_simple(sum_formula+adduct) except KeyError as e: if str(e).startswith("KeyError: "): print str(e) continue except ValueError as e: if str(e).startswith("Element not recognised"): print str(e) continue else: print sum_formula, adduct raise if sf == None: #negative atoms as a result of simplification print 'negative adduct for {} : {}'.format(sum_formula,adduct) continue try: isotope_ms = pyisocalc.isodist(sf, plot=False, sigma=isocalc_sig, charges=charge, resolution=isocalc_resolution, do_centroid=isocalc_do_centroid) except KeyError as e: if str(e).startswith("KeyError: "): print str(e) continue if not sum_formula in mz_list: mz_list[sum_formula] = {} mz_list[sum_formula][adduct] = isotope_ms.get_spectrum(source='centroids') return mz_list
def load_queries(self): config = self.config db_filename = config['file_inputs']['database_file'] db_dump_folder = config['file_inputs']['database_load_folder'] isocalc_sig = config['isotope_generation']['isocalc_sig'] isocalc_resolution = config['isotope_generation']['isocalc_resolution'] if len(config['isotope_generation']['charge']) > 1: print 'Warning: only first charge state currently accepted' charge = int('{}{}'.format( config['isotope_generation']['charge'][0]['polarity'], config['isotope_generation']['charge'][0] ['n_charges'])) #currently only supports first charge!! self.adducts = [ a['adduct'] for a in config['isotope_generation']['adducts'] ] # Read in molecules self.sum_formulae = [l.strip() for l in open(db_filename).readlines()] # Check if already generated and load if possible, otherwise calculate fresh db_name = os.path.splitext(os.path.basename(db_filename))[0] self.mz_list = {} for adduct in self.adducts: for sum_formula in self.sum_formulae: isotope_ms = pyisocalc.isodist(sum_formula + adduct, plot=False, sigma=isocalc_sig, charges=charge, resolution=isocalc_resolution, do_centroid=True) if not sum_formula in self.mz_list: self.mz_list[sum_formula] = {} mzs, ints = map(np.array, isotope_ms.get_spectrum(source='centroids')) order = ints.argsort()[::-1] self.mz_list[sum_formula][adduct] = (mzs[order], ints[order])
from pyMS.pyisocalc import pyisocalc sum_formulae = [l.strip() for l in open("formulae.txt")] adducts = ['H', 'Na', 'K'] patterns = {} import os import cPickle if os.path.isfile("patterns.pkl"): patterns = cPickle.load(open("patterns.pkl")) else: print "generating patterns..." for n, sum_formula in enumerate(sum_formulae): for adduct in adducts: isotope_ms = pyisocalc.isodist(sum_formula + adduct, plot=False, sigma=0.01, charges=1, resolution=200000, do_centroid=True) if not sum_formula in patterns: patterns[sum_formula] = {} patterns[sum_formula][adduct] = isotope_ms.get_spectrum(source='centroids') with open('patterns.pkl', 'w') as f: cPickle.dump(patterns, f) formulas = [k + '+' + a for k in patterns for a in patterns[k]] masses = [patterns[k][a][0] for k in patterns for a in patterns[k]] all_masses = np.concatenate(masses) order = all_masses.argsort() all_masses = all_masses[order] mol_indices = np.repeat(np.arange(len(masses)), map(len, masses))[order] mass_diffs = np.diff(all_masses)