def get_features_from_mean_spectrum(self, ppm=3., w=5., min_int=15., max_peaks=3): # Generate mean spectrum, go through all peaks and score TLC feature presence mean_spec = self.ims_dataset.generate_summary_spectrum(ppm=ppm) mean_spec_c = centroid_detection.gradient(np.asarray(mean_spec[0]), np.asarray(mean_spec[1]), min_intensity=3.) ion_datacube = self.ims_dataset.get_ion_image(mean_spec_c[0], ppm) self.feature_list=[] for ii, m in enumerate(mean_spec_c[0]): im = ion_datacube.xic_to_image(ii) im = im_smoothing.median(im, size=3) m_s_f_s = tlc_smoothing.sqrt_apodization(im, w=w) m_s_f_s_c = centroid_detection.gradient(np.asarray(range(len(m_s_f_s))), np.asarray(m_s_f_s), min_intensity=min_int) n_peaks = len(m_s_f_s_c[0]) if all((n_peaks<max_peaks,)): for x, i in zip(m_s_f_s_c[0],m_s_f_s_c[1]): self.feature_list.append((m,x,i))
def get_isotope_pattern(self, formula_adduct_string, charge): perfect_pattern = pyisocalc.perfect_pattern(pyisocalc.parseSumFormula(formula_adduct_string), charge=charge) sigma = self.sigma_at_mz(perfect_pattern.get_spectrum(source='centroids')[0][0]) pts_per_mz = self.points_per_mz(sigma) spec = pyisocalc.apply_gaussian(perfect_pattern, sigma, pts_per_mz) centroided_mzs, centroided_ints, _ = gradient(*spec.get_spectrum()) spec.add_centroids(centroided_mzs, centroided_ints) return spec
def preprocess_spectrum(mzs, ints): ints = signal.savgol_filter(ints, 5, 2) mzs, ints, _ = gradient(np.asarray(mzs), np.asarray(ints), max_output=-1, weighted_bins=3) order = mzs.argsort() return mzs[order], ints[order]
def get_peak_list(ds_id): print('Get Mean Spectrum') mean_spec = get_mean_spectrum(ds_id) mz_list = gradient(mean_spec[0], mean_spec[1], min_intensity=10 * mean_spec[1][mean_spec[1] > 0].min())[0] print(len(mz_list)) return mz_list
def rebin(self): from pyMSpec.centroid_detection import gradient ms = self.generate_summary_spectrum() p = gradient(ms[0], ms[1], max_output=2500) mzs = p[0] return np.asarray( self.get_ion_image((mzs[1:] + mzs[0:-1]) / 2., tols=mzs[1:] - mzs[0:-1], tol_type='abs').xic)
def get_isotope_pattern(self, formula_adduct_string, charge): perfect_pattern = pyisocalc.perfect_pattern( pyisocalc.parseSumFormula(formula_adduct_string), charge=charge) sigma = self.sigma_at_mz( perfect_pattern.get_spectrum(source='centroids')[0][0]) pts_per_mz = self.points_per_mz(sigma) spec = pyisocalc.apply_gaussian(perfect_pattern, sigma, pts_per_mz) centroided_mzs, centroided_ints, _ = gradient(*spec.get_spectrum()) spec.add_centroids(centroided_mzs, centroided_ints) return spec
def get_xic(self, mz, tol, w=5, min_int=1): mz = np.asarray([mz,]) tol = np.asarray(tol) im = self.ims_dataset.get_ion_image(mz, tol).xic_to_image(0) #if np.max(im)>0.0: print "Max IM:",np.max(im) im = im_smoothing.median(im, size=3) xic = tlc_smoothing.sqrt_apodization(im, w=w) xic = Xic(xic=[self.x_pos, xic], xic_features=centroid_detection.gradient(np.asarray(range(len(xic))), np.asarray(xic), min_intensity=min_int)) return xic
def exact_mass(JSON_config_file): config = get_variables(JSON_config_file) sum_formulae, adducts, mz_list = generate_isotope_patterns(config) IMS_dataset = load_data(config) spec_axis,mean_spec =IMS_dataset.generate_summary_spectrum(summary_type='mean',ppm=config['image_generation']['ppm']/2.) from pyMSpec.centroid_detection import gradient import numpy as np mzs,counts,idx_list = gradient(np.asarray(spec_axis),np.asarray(mean_spec),weighted_bins=2) ppm_value_score = run_exact_mass_search(config, mzs,counts, sum_formulae, adducts, mz_list) output_results_exactMass(config, ppm_value_score, sum_formulae, adducts, mz_list,fname='exactMass_all_adducts')
def centroid_IMS(input_filename, output_filename, instrumentInfo={}, sharedDataInfo={}): from pyMS.centroid_detection import gradient # write out a IMS_centroid.hdf5 file sl = slFile(input_filename) n_total = np.shape(sl.spectra)[0] with h5py.File(output_filename, 'w') as f_out: ### make root groups for output data spectral_data = f_out.create_group('spectral_data') spatial_data = f_out.create_group('spatial_data') shared_data = f_out.create_group('shared_data') ### populate common variables - can hardcode as I know what these are for h5 data # parameters instrument_parameters_1 = shared_data.create_group('instrument_parameters/001') if instrumentInfo != {}: for tag in instrumentInfo: instrument_parameters_1.attrs[tag] = instrumentInfo[tag] # ROIs # todo - determine and propagate all ROIs roi_1 = shared_data.create_group('regions_of_interest/001') roi_1.attrs['name'] = 'root region' roi_1.attrs['parent'] = '' # Sample sample_1 = shared_data.create_group('samples/001') if sharedDataInfo != {}: for tag in sharedDataInfo: sample_1.attrs[tag] = sharedDataInfo[tag] done = 0 for key in range(0, n_total): mzs, intensities = sl.get_spectrum(key) mzs_c, intensities_c, _ = gradient(mzs, intensities) this_spectrum = spectral_data.create_group(str(key)) _ = this_spectrum.create_dataset('centroid_mzs', data=np.float32(mzs_c), compression="gzip", compression_opts=9) # intensities _ = this_spectrum.create_dataset('centroid_intensities', data=np.float32(intensities_c), compression="gzip", compression_opts=9) # coordinates _ = this_spectrum.create_dataset('coordinates', data=(sl.coords[0, key], sl.coords[1, key], sl.coords[2, key])) ## link to shared parameters # ROI this_spectrum['ROIs/001'] = h5py.SoftLink('/shared_data/regions_of_interest/001') # Sample this_spectrum['samples/001'] = h5py.SoftLink('/shared_data/samples/001') # Instrument config this_spectrum['instrument_parameters'] = h5py.SoftLink('/shared_data/instrument_parameters/001') done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def generate_spectrum(self,x,y,mode='centroid', cent_kwargs={}): peakList = self.get_peaks(x,y) mzs,intensities = self.simulate_spectrum(peakList) if mode=='centroid': from pyMSpec.centroid_detection import gradient from pyMSpec import smoothing mzs, intensities = smoothing.fast_change(mzs,intensities) mzs,intensities,_ = gradient(np.asarray(mzs),np.asarray(intensities), **cent_kwargs) return mzs,intensities elif mode=='profile': return mzs,intensities else: raise ValueError("{} not recognised mode".format(mode))
def centroid_imzml(input_filename, output_filename, step=[], apodization=False, w_size=10, min_intensity=1e-5, region_name="", prevent_duplicate_pixels=False): # write a file to imzml format (centroided) """ :type min_intensity: float """ from pyimzml.ImzMLWriter import ImzMLWriter from pyMSpec.centroid_detection import gradient sl = slFile(input_filename, region_name=region_name) mz_dtype = sl.Mzs.dtype int_dtype = sl.get_spectrum(0)[1].dtype # Convert coords to index -> kinda hacky coords = np.asarray(sl.coords.copy()).T.round(5) coords -= np.amin(coords, axis=0) if step == []: # have a guesss step = np.array([np.median(np.diff(np.unique(coords[sl.spotlist, i]))) for i in range(3)]) step[np.isnan(step)] = 1 print 'estimated pixel size: {} x {}'.format(step[0], step[1]) coords = coords / np.reshape(step, (3,)).T coords = coords.round().astype(int) ncol, nrow, _ = np.amax(coords, axis=0) + 1 print 'new image size: {} x {}'.format(nrow, ncol) if prevent_duplicate_pixels: b = np.ascontiguousarray(coords).view(np.dtype((np.void, coords.dtype.itemsize * coords.shape[1]))) _, coord_idx = np.unique(b, return_index=True) print np.shape(sl.spotlist), np.shape(coord_idx) print "original number of spectra: {}".format(len(coords)) else: coord_idx = range(len(coords)) n_total = len(coord_idx) print 'spectra to write: {}'.format(n_total) with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml: done = 0 for key in sl.spotlist: if all((prevent_duplicate_pixels, key not in coord_idx)):# skip duplicate pixels #print 'skip {}'.format(key) continue mzs, intensities = sl.get_spectrum(key) if apodization: from pyMSpec import smoothing # todo - add to processing list in imzml mzs, intensities = smoothing.apodization(mzs, intensities) mzs_c, intensities_c, _ = gradient(mzs, intensities, weighted_bins=5, min_intensity=min_intensity) pos = coords[key] pos = (pos[0], nrow - 1 - pos[1], pos[2]) imzml.addSpectrum(mzs_c, intensities_c, pos) done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def correlation(self, basemz, mz_list=None): if not mz_list: from pyMSpec.centroid_detection import gradient mean_spec = self.generate_summary_spectrum(ppm=self.ppm) mz_list = gradient(mean_spec[0], mean_spec[1], min_intensity=3 * mean_spec[1][mean_spec[1] > 0].min())[0] baseim = self.get_ion_image(basemz, self.ppm).xic[0] corr = np.zeros(len(mz_list)) for ii, mz in enumerate(mz_list): ionim = self.get_ion_image(mz, self.ppm) corr[ii] = np.corrcoef(baseim, ionim.xic[0])[0][1] return mz_list, corr
def find_centroid_mzs(mzs, ints): try: if len(mzs <= 2): # If there aren't enough peaks to model a centroid, assume the centroid is # outside of the sampled range and return nothing. return np.empty(0) if signal is not None and gradient is not None: ints = signal.savgol_filter(ints, 5, 2) mzs, ints, _ = gradient( np.asarray(mzs), np.asarray(ints), max_output=-1, weighted_bins=max(min(3, (len(mzs) - 1) // 2), 1), ) return mzs except ValueError: return np.empty(0)
def correlation(ds_id, basemz, mz_list=None): if not mz_list: print('Get Mean Spectrum') mean_spec = get_mean_spectrum(ds_id) mz_list = gradient(mean_spec[0], mean_spec[1], min_intensity=10 * mean_spec[1][mean_spec[1] > 0].min())[0] print(len(mz_list)) ds_info = get_ds_info(ds_id) imzb = ImzbReader(ds_info['imzb']) baseim = imzb.get_mz_image(basemz, ds_info['ppm']).flatten() corr = np.zeros(len(mz_list)) for ii, mz in enumerate(mz_list): ionim = imzb.get_mz_image(mz, ds_info['ppm']) corrcoeff = np.corrcoef(baseim, ionim.flatten()) corr[ii] = corrcoeff[0][1] return mz_list, corr
def do_peak_plot(im, m_s, m_s_f,label): # Im should always be presented so that x-axis goes along the tlc track plt.figure(figsize=(10,10)) plt.subplot(211) plt.imshow(im) plt.subplot(212) for x in range(im.shape[0]): if x==1: plt.plot(im[x,:],color="black", label='data peak') else: plt.plot(im[x,:],color="black") plt.plot(np.mean(im,axis=0),color='blue', label='data mean') plt.plot(m_s_f, color='red', label=label) m_s_f_c = centroid_detection.gradient(np.asarray(range(np.shape(m_s_f)[0])), np.asarray(m_s_f), min_intensity=1.) if not len(m_s_f_c[0]) == 0: plt.stem(m_s_f_c[0], m_s_f_c[1]) plt.xlim((0,im.shape[1])) plt.legend() plt.show()
def centroid_imzml(input_filename, output_filename, step=[], apodization=False, w_size=10, min_intensity=1e-5, prevent_duplicate_pixels=False): # write a file to imzml format (centroided) """ :type input_filename string - source file path (must be .imzml) :type output_filename string - output file path (must be .imzml) :type step tuple grid spacing of pixels (if [] the script will try and guess it) :type apodization boolean whether to try and remove FT wiglet artefacts :type w_size window side (m/z bins) for apodization :type min_intensity: float minimum intensity peaks to return during centroiding :type prevent_duplicate_pixels bool if True will only return the first spectrum for pixels with the same coodinates """ from pyimzml.ImzMLParser import ImzMLParser from pyimzml.ImzMLWriter import ImzMLWriter from pyMSpec.centroid_detection import gradient imzml_in = ImzMLParser(input_filename) precisionDict = { 'f': ("32-bit float", np.float32), 'd': ("64-bit float", np.float64), 'i': ("32-bit integer", np.int32), 'l': ("64-bit integer", np.int64) } mz_dtype = precisionDict[imzml_in.mzPrecision][1] int_dtype = precisionDict[imzml_in.intensityPrecision][1] # Convert coords to index -> kinda hacky coords = np.asarray(imzml_in.coordinates).round(5) coords -= np.amin(coords, axis=0) if step == []: # have a guesss step = np.array([ np.median(np.diff(np.unique(coords[:, i]))) for i in range(coords.shape[1]) ]) step[np.isnan(step)] = 1 print 'estimated pixel size: {} x {}'.format(step[0], step[1]) coords = coords / np.reshape(step, (3, )).T coords = coords.round().astype(int) ncol, nrow, _ = np.amax(coords, axis=0) + 1 print 'new image size: {} x {}'.format(nrow, ncol) if prevent_duplicate_pixels: b = np.ascontiguousarray(coords).view( np.dtype((np.void, coords.dtype.itemsize * coords.shape[1]))) _, coord_idx = np.unique(b, return_index=True) print np.shape(imzml_in.coordinates), np.shape(coord_idx) print "original number of spectra: {}".format(len(coords)) else: coord_idx = range(len(coords)) n_total = len(coord_idx) print 'spectra to write: {}'.format(n_total) with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml_out: done = 0 for key in range(np.shape(imzml_in.coordinates)[0]): print key if all((prevent_duplicate_pixels, key not in coord_idx)): # skip duplicate pixels continue mzs, intensities = imzml_in.getspectrum(key) if apodization: from pyMSpec import smoothing # todo - add to processing list in imzml mzs, intensities = smoothing.apodization( mzs, intensities, {'w_size': w_size}) mzs_c, intensities_c, _ = gradient(mzs, intensities, min_intensity=min_intensity) pos = coords[key] if len(pos) == 2: pos.append(0) pos = (pos[0], nrow - 1 - pos[1], pos[2]) imzml_out.addSpectrum(mzs_c, intensities_c, pos) done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def centroid_IMS(input_filename, output_filename, instrumentInfo={}, sharedDataInfo={}): from pyMS.centroid_detection import gradient # write out a IMS_centroid.hdf5 file sl = slFile(input_filename) n_total = np.shape(sl.spectra)[0] with h5py.File(output_filename, 'w') as f_out: ### make root groups for output data spectral_data = f_out.create_group('spectral_data') spatial_data = f_out.create_group('spatial_data') shared_data = f_out.create_group('shared_data') ### populate common variables - can hardcode as I know what these are for h5 data # parameters instrument_parameters_1 = shared_data.create_group( 'instrument_parameters/001') if instrumentInfo != {}: for tag in instrumentInfo: instrument_parameters_1.attrs[tag] = instrumentInfo[tag] # ROIs # todo - determine and propagate all ROIs roi_1 = shared_data.create_group('regions_of_interest/001') roi_1.attrs['name'] = 'root region' roi_1.attrs['parent'] = '' # Sample sample_1 = shared_data.create_group('samples/001') if sharedDataInfo != {}: for tag in sharedDataInfo: sample_1.attrs[tag] = sharedDataInfo[tag] done = 0 for key in range(0, n_total): mzs, intensities = sl.get_spectrum(key) mzs_c, intensities_c, _ = gradient(mzs, intensities) this_spectrum = spectral_data.create_group(str(key)) _ = this_spectrum.create_dataset('centroid_mzs', data=np.float32(mzs_c), compression="gzip", compression_opts=9) # intensities _ = this_spectrum.create_dataset('centroid_intensities', data=np.float32(intensities_c), compression="gzip", compression_opts=9) # coordinates _ = this_spectrum.create_dataset('coordinates', data=(sl.coords[0, key], sl.coords[1, key], sl.coords[2, key])) ## link to shared parameters # ROI this_spectrum['ROIs/001'] = h5py.SoftLink( '/shared_data/regions_of_interest/001') # Sample this_spectrum['samples/001'] = h5py.SoftLink( '/shared_data/samples/001') # Instrument config this_spectrum['instrument_parameters'] = h5py.SoftLink( '/shared_data/instrument_parameters/001') done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def centroid_imzml(input_filename, output_filename, step=[], apodization=False, w_size=10, min_intensity=1e-5, region_name="", prevent_duplicate_pixels=False): # write a file to imzml format (centroided) """ :type min_intensity: float """ from pyimzml.ImzMLWriter import ImzMLWriter from pyMSpec.centroid_detection import gradient sl = slFile(input_filename, region_name=region_name) mz_dtype = sl.Mzs.dtype int_dtype = sl.get_spectrum(0)[1].dtype # Convert coords to index -> kinda hacky coords = np.asarray(sl.coords.copy()).T.round(5) coords -= np.amin(coords, axis=0) if step == []: # have a guesss step = np.array([ np.median(np.diff(np.unique(coords[sl.spotlist, i]))) for i in range(3) ]) step[np.isnan(step)] = 1 print 'estimated pixel size: {} x {}'.format(step[0], step[1]) coords = coords / np.reshape(step, (3, )).T coords = coords.round().astype(int) ncol, nrow, _ = np.amax(coords, axis=0) + 1 print 'new image size: {} x {}'.format(nrow, ncol) if prevent_duplicate_pixels: b = np.ascontiguousarray(coords).view( np.dtype((np.void, coords.dtype.itemsize * coords.shape[1]))) _, coord_idx = np.unique(b, return_index=True) print np.shape(sl.spotlist), np.shape(coord_idx) print "original number of spectra: {}".format(len(coords)) else: coord_idx = range(len(coords)) n_total = len(coord_idx) print 'spectra to write: {}'.format(n_total) with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml: done = 0 for key in sl.spotlist: if all((prevent_duplicate_pixels, key not in coord_idx)): # skip duplicate pixels #print 'skip {}'.format(key) continue mzs, intensities = sl.get_spectrum(key) if apodization: from pyMSpec import smoothing # todo - add to processing list in imzml mzs, intensities = smoothing.apodization(mzs, intensities) mzs_c, intensities_c, _ = gradient(mzs, intensities, min_intensity=min_intensity) pos = coords[key] pos = (pos[0], nrow - 1 - pos[1], pos[2]) imzml.addSpectrum(mzs_c, intensities_c, pos) done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"