def resolution_estimate(raw_data, n_spectra=25): slopes = [] intercepts = [] for i in range(n_spectra): mzs, intensities = read_random_spectrum(raw_data) peak_positions = np.array(gradient(mzs, intensities)[-1]) intensities_at_peaks = intensities[peak_positions] high_intensity_threshold = np.percentile(intensities_at_peaks, 40) peak_positions = peak_positions[intensities[peak_positions] > high_intensity_threshold] resolutions = [] for i, peak_pos in enumerate(peak_positions): resolutions.append(resolution_at_peak(peak_pos, mzs, intensities)) resolutions = np.array(resolutions) mzs = mzs[peak_positions] mzs = mzs[resolutions > 0] resolutions = resolutions[resolutions > 0] ransac = RANSACRegressor() ransac.fit(np.log(mzs).reshape((-1,1)), np.log(resolutions).reshape((-1,1))) slope = ransac.estimator_.coef_[0][0] intercept = ransac.estimator_.intercept_[0] slopes.append(slope) intercepts.append(intercept) slope = np.median(slopes) intercept = np.median(intercepts) return lambda mz: np.exp(intercept + slope * np.log(mz))
def imzml(input_filename, output_filename, smoothMethod="nosmooth", centroid=False): import h5py import numpy as np ### Open files h5 = h5py.File(input_filename, 'r') # Readonly, file must exist ### get root groups from input data root_group_names = h5.keys() spots = h5['Spots'] spectraGroup = 'InitialMeasurement' mzs = np.asarray( h5['/SamplePositions/GlobalMassAxis/']['SamplePositions'] ) # we don't write this but will use it for peak detection file_version = h5['Version'][ 0] # some hard-coding to deal with different file versions if file_version > 5: coords = h5['Registrations']['0']['Coordinates'] else: coords = h5['Coordinates'] coords = np.asarray(coords).T.round(5) coords -= np.amin(coords, axis=0) step = np.array( [np.mean(np.diff(np.unique(coords[:, i]))) for i in range(3)]) step[np.isnan(step)] = 1 coords /= np.reshape(step, (3, )) coords = coords.round().astype(int) ncol, nrow, _ = np.amax(coords, axis=0) + 1 g = h5['Spots/0/' + spectraGroup + '/'] mz_dtype = g['SamplePositions/SamplePositions'][:].dtype int_dtype = g['Intensities'][:].dtype print 'dim: {} x {}'.format(nrow, ncol) n_total = len(spots.keys()) done = 0 keys = map(str, sorted(map(int, h5['Spots'].keys()))) ### write spectra with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml: n = 0 for key, pos in zip(keys, coords): spot = spots[key] ## make new spectrum intensities = np.asarray(spot[spectraGroup]['Intensities']) if smoothMethod != []: intensities = smooth_spectrum(mzs, intensities, smoothMethod) if centroid: from pyMS import centroid_detection mzs, intensities, _ = centroid_detection.gradient( mzs, intensities, max_output=-1, weighted_bins=3) # write to file pos = (nrow - 1 - pos[1], pos[0], pos[2]) imzml.addSpectrum(mzs, intensities, pos) done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def get_lists_of_mzs(sf): try: isotope_ms = pyisocalc.isodist(sf,plot=False,sigma=0.01,charges=-2,resolution=100000.0,do_centroid=False) mzlist = list(isotope_ms.get_mzs()) intenslist = list(isotope_ms.get_intensities()) mzs_list, intensities_list, indices_list = gradient(isotope_ms.get_mzs(), isotope_ms.get_intensities(), max_output=-1, weighted_bins=0) indices_list = [i if intenslist[i] > intenslist[i+1] else i+1 for i in indices_list] mzs_list = [mzlist[i] for i in indices_list] intensities_list = [intenslist[i] for i in indices_list] min_i = np.min([ i for i in xrange(len(intenslist)) if intenslist[i] > 0.01]) max_i = np.max([ i for i in xrange(len(intenslist)) if intenslist[i] > 0.01]) return { "isodist_mzs" : mzlist[min_i:max_i], "isodist_int" : intenslist[min_i:max_i], "grad_mzs" : list(mzs_list), "grad_int" : list(intensities_list), "grad_ind" : list(indices_list - min_i) } except: return { "isodist_mzs" : [], "isodist_int" : [], "grad_mzs" : [], "grad_int" : [], "grad_ind" : [] }
def exact_mass(JSON_config_file): config = get_variables(JSON_config_file) sum_formulae, adducts, mz_list = generate_isotope_patterns(config) IMS_dataset = load_data(config) spec_axis,mean_spec =IMS_dataset.generate_summary_spectrum(summary_type='mean',ppm=config['image_generation']['ppm']/2) from pyMS.centroid_detection import gradient import numpy as np mzs,counts,idx_list = gradient(np.asarray(spec_axis),np.asarray(mean_spec),weighted_bins=2) ppm_value_score = run_exact_mass_search(config, mzs,counts, sum_formulae, adducts, mz_list) output_results_exactMass(config, ppm_value_score, sum_formulae, adducts, mz_list,fname='exactMass_all_adducts')
def mzImages(filename_in_list,save_dir): import sys sys.path.append('C:\\Users\\Luca Rappez\\Desktop\\python_codebase\\') from pyMS.centroid_detection import gradient from pyIMS.hdf5.inMemoryIMS_hdf5 import inMemoryIMS_hdf5 from pyIMS.image_measures import level_sets_measure import matplotlib.pyplot as plt import numpy as np #%matplotlib inline import bokeh as bk from bokeh.plotting import output_notebook output_notebook() ppm = 0.75 if len(list)>2: raise ValueError('list should only have two entries') c_thresh=0.05 mz_list_all=[] for filename_in in filename_in_list: # Parse data IMS_dataset = inMemoryIMS_hdf5(filename_in) hist_axis,freq_spec = IMS_dataset.generate_summary_spectrum(summary_type='freq',ppm=ppm/2) # Centroid detection of frequency spectrum mz_list,count_list,idx_list = gradient(np.asarray(hist_axis),np.asarray(freq_spec),weighted_bins=2) mz_list=[m for m,c in zip(mz_list,count_list) if c>c_thresh] mz_list_all.append(mz_list) im_list = [m for m in mz_list_all[0] if any([1e6*(m-mm)/m<ppm for mm in mz_list_all[1]]) ] """# Calcualte MoC for images of all peaks nlevels=30 im_list={} for ii, c in enumerate(count_list): ion_image = IMS_dataset.get_ion_image(np.asarray([mz_list[ii],]),ppm) im = ion_image.xic_to_image(0) m,im_moc,levels,nobjs = level_sets_measure.measure_of_chaos(im,nlevels,interp='median') #just output measure value m=1-m im_list[mz_list[ii]]={'image':im,'moc':m,'freq':c} """ from pySpatialMetabolomics.tools import colourmaps c_map = colourmaps.get_colormap('grey')#if black images: open->save->rerun c_pal=[[int(255*cc) for cc in c_map(c)] for c in range(0,254)] # Export all images import png as pypng for filename_in in filename_in_list: # Parse data IMS_dataset = inMemoryIMS_hdf5(filename_in) for mz in im_list: with open('{}/{}_{}.png'.format(save_dir,mz,filename_in),'wb') as f_out: im_out = IMS_dataset.get_ion_image([mz,],[ppm,]).xic_to_image(0) im_out = 254*im_out/np.max(im_out) w,h = np.shape(im_out) w = pypng.Writer(h, w, palette=c_pal, bitdepth=8) w.write(f_out,im_out)
def prepare(mzs, ints, centroids=True): if centroids == True: mzs_list, intensity_list = mzs, ints else: ints=signal.savgol_filter(ints, 5, 2) mzs_list, intensity_list, indices_list = \ centroid_detection.gradient(np.asarray(mzs), np.asarray(ints), max_output=-1, weighted_bins=3) mzs_list = np.asarray(mzs_list).astype(np.float64) intensity_list = np.asarray(intensity_list).astype(np.float32) intensity_list[intensity_list < 0] = 0 return mzs_list, intensity_list
def centroid_IMS(input_filename, output_filename,instrumentInfo={},sharedDataInfo={}): from pyMS.centroid_detection import gradient # write out a IMS_centroid.hdf5 file sl = slFile(input_filename) n_total = np.shape(sl.spectra)[0] with h5py.File(output_filename,'w') as f_out: ### make root groups for output data spectral_data = f_out.create_group('spectral_data') spatial_data = f_out.create_group('spatial_data') shared_data = f_out.create_group('shared_data') ### populate common variables - can hardcode as I know what these are for h5 data # parameters instrument_parameters_1 = shared_data.create_group('instrument_parameters/001') if instrumentInfo != {}: for tag in instrumentInfo: instrument_parameters_1.attrs[tag] = instrumentInfo[tag] # ROIs #todo - determine and propagate all ROIs roi_1 = shared_data.create_group('regions_of_interest/001') roi_1.attrs['name'] = 'root region' roi_1.attrs['parent'] = '' # Sample sample_1 = shared_data.create_group('samples/001') if sharedDataInfo != {}: for tag in sharedDataInfo: sample_1.attrs[tag] = sharedDataInfo[tag] done = 0 for key in range(0,n_total): mzs,intensities = sl.get_spectrum(key) mzs_c, intensities_c, _ = gradient(mzs, intensities) this_spectrum = spectral_data.create_group(str(key)) _ = this_spectrum.create_dataset('centroid_mzs',data=np.float32(mzs_c),compression="gzip",compression_opts=9) # intensities _ = this_spectrum.create_dataset('centroid_intensities',data=np.float32(intensities_c),compression="gzip",compression_opts=9) # coordinates _ = this_spectrum.create_dataset('coordinates',data=(sl.coords[0, key],sl.coords[1, key],sl.coords[2, key])) ## link to shared parameters # ROI this_spectrum['ROIs/001'] = h5py.SoftLink('/shared_data/regions_of_interest/001') # Sample this_spectrum['samples/001'] = h5py.SoftLink('/shared_data/samples/001') # Instrument config this_spectrum['instrument_parameters'] = h5py.SoftLink('/shared_data/instrument_parameters/001') done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def imzml(input_filename, output_filename,smoothMethod="nosmooth",centroid=False): import h5py import numpy as np ### Open files h5 = h5py.File(input_filename, 'r') # Readonly, file must exist ### get root groups from input data root_group_names = h5.keys() spots = h5['Spots'] spectraGroup = 'InitialMeasurement' mzs = np.asarray(h5['/SamplePositions/GlobalMassAxis/']['SamplePositions']) # we don't write this but will use it for peak detection file_version = h5['Version'][0] # some hard-coding to deal with different file versions if file_version > 5: coords = h5['Registrations']['0']['Coordinates'] else: coords = h5['Coordinates'] coords = np.asarray(coords).T.round(5) coords -= np.amin(coords, axis=0) step = np.array([np.mean(np.diff(np.unique(coords[:, i]))) for i in range(3)]) step[np.isnan(step)] = 1 coords /= np.reshape(step, (3,)) coords = coords.round().astype(int) ncol, nrow, _ = np.amax(coords, axis=0) + 1 g = h5['Spots/0/'+spectraGroup+'/'] mz_dtype = g['SamplePositions/SamplePositions'][:].dtype int_dtype = g['Intensities'][:].dtype print 'dim: {} x {}'.format(nrow,ncol) n_total = len(spots.keys()) done = 0 keys = map(str, sorted(map(int, h5['Spots'].keys()))) ### write spectra with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml: n = 0 for key, pos in zip(keys, coords): spot = spots[key] ## make new spectrum intensities = np.asarray(spot[spectraGroup]['Intensities']) if smoothMethod != []: intensities = smooth_spectrum(mzs,intensities,smoothMethod) if centroid: from pyMS import centroid_detection mzs, intensities, _ = centroid_detection.gradient(mzs,intensities, max_output=-1, weighted_bins=3) # write to file pos = (nrow - 1 - pos[1], pos[0], pos[2]) imzml.addSpectrum(mzs, intensities, pos) done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def exact_mass(JSON_config_file): config = get_variables(JSON_config_file) sum_formulae, adducts, mz_list = generate_isotope_patterns(config) IMS_dataset = load_data(config) spec_axis, mean_spec = IMS_dataset.generate_summary_spectrum( summary_type='mean', ppm=config['image_generation']['ppm'] / 2) from pyMS.centroid_detection import gradient import numpy as np mzs, counts, idx_list = gradient(np.asarray(spec_axis), np.asarray(mean_spec), weighted_bins=2) ppm_value_score = run_exact_mass_search(config, mzs, counts, sum_formulae, adducts, mz_list) output_results_exactMass(config, ppm_value_score, sum_formulae, adducts, mz_list, fname='exactMass_all_adducts')
def get_lists_of_mzs(sf): try: isotope_ms = pyisocalc.isodist(sf, plot=False, sigma=0.01, charges=-2, resolution=100000.0, do_centroid=False) mzlist = list(isotope_ms.get_mzs()) intenslist = list(isotope_ms.get_intensities()) mzs_list, intensities_list, indices_list = gradient( isotope_ms.get_mzs(), isotope_ms.get_intensities(), max_output=-1, weighted_bins=0) indices_list = [ i if intenslist[i] > intenslist[i + 1] else i + 1 for i in indices_list ] mzs_list = [mzlist[i] for i in indices_list] intensities_list = [intenslist[i] for i in indices_list] min_i = np.min( [i for i in xrange(len(intenslist)) if intenslist[i] > 0.01]) max_i = np.max( [i for i in xrange(len(intenslist)) if intenslist[i] > 0.01]) return { "isodist_mzs": mzlist[min_i:max_i], "isodist_int": intenslist[min_i:max_i], "grad_mzs": list(mzs_list), "grad_int": list(intensities_list), "grad_ind": list(indices_list - min_i) } except: return { "isodist_mzs": [], "isodist_int": [], "grad_mzs": [], "grad_int": [], "grad_ind": [] }
def centroid_imzml(input_filename, output_filename, step=[], apodization=False, w_size=10, min_intensity=1e-5): # write a file to imzml format (centroided) """ :type min_intensity: float """ from pyimzml.ImzMLWriter import ImzMLWriter from pyMS.centroid_detection import gradient sl = slFile(input_filename) mz_dtype = sl.Mzs.dtype int_dtype = sl.get_spectrum(0)[1].dtype # Convert coords to index -> kinda hacky coords = np.asarray(sl.coords).T.round(5) coords -= np.amin(coords, axis=0) if step==[]: #have a guesss step = np.array([np.mean(np.diff(np.unique(coords[:, i]))) for i in range(3)]) step[np.isnan(step)] = 1 coords /= np.reshape(step, (3,)) coords = coords.round().astype(int) ncol, nrow, _ = np.amax(coords, axis=0) + 1 print 'dim: {} x {}'.format(nrow,ncol) n_total = np.shape(sl.spectra)[0] with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml: done = 0 for key in range(0,n_total): mzs,intensities = sl.get_spectrum(key) if apodization: import scipy.signal as signal #todo - add to processing list in imzml win = signal.hann(w_size) intensities = signal.fftconvolve(intensities, win, mode='same') / sum(win) mzs_c, intensities_c, _ = gradient(mzs, intensities, min_intensity=min_intensity) pos = coords[key] pos = (nrow - 1 - pos[1], pos[0], pos[2]) imzml.addSpectrum(mzs_c, intensities_c, pos) done += 1 if done % 1000 == 0: print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total) print "finished!"
def isodist(molecules, charges=0, output='', plot=False, sigma=0.35, resolution=250, cutoff=0.0001, do_centroid=True, verbose=False): exit = checkhelpcall(molecules) save = checkoutput(output) if exit == True: sys.exit(0) molecules = molecules.split(',') for element in molecules: element = formulaExpander(element) if verbose: print ( 'The mass of %(substance)s is %(Mass)f and the calculated charge is %(Charge)d with m/z of %(Mz)f.' % { 'substance': \ element, 'Mass': molmass(element), 'Charge': molcharge(element), 'Mz': mz(molmass(element), molcharge(element), charges)}) if charges == 0: charges = molcharge(element) if charges == 0: charges = 1 else: if verbose: print "Using user-supplied charge of %d for mass spectrum" % charges isomasses = isotopemasses(element) isoratios = isotoperatios(element) if len(isomasses) != 1: ratios, masses = isotopes(isoratios, isomasses, cutoff) # slow final = genDict(masses, ratios, charges, cutoff) # very slow else: final = genDict(isomasses[0], isoratios[0], charges, cutoff) # for i in sorted(final.keys()): #fast # if final[i]>cutoff: # print i,final[i] ms_output = mass_spectrum() if do_centroid: from pyMS.centroid_detection import gradient pts = resolution2pts(min(final.keys()), max(final.keys()), resolution) xvector, yvector = genGaussian(final, sigma, pts) # slow ms_output.add_spectrum(xvector, yvector) mz_list, intensity_list, centroid_list = gradient(ms_output.get_spectrum()[0], ms_output.get_spectrum()[1], max_output=-1, weighted_bins=5) ms_output.add_centroids(mz_list, intensity_list) else: mz_idx = sorted(final.keys()) ms_output.add_centroids(mz_idx, [final[f] for f in mz_idx]) if plot == True: import matplotlib.pyplot as plt # for plotting plt.plot(xvector, yvector) plt.plot(mz_list, intensity_list, 'rx') plt.show() if save == True: g = open(savefile, 'w') xs = xvector.tolist() ys = yvector.tolist() for i in range(0, len(xs)): g.write(str(xs[i]) + "\t" + str(ys[i]) + "\n") g.close return ms_output
def hdf5(filename_in, filename_out, info, smoothMethod="nosmooth"): import h5py import numpy as np import datetime import scipy.signal as signal from pyMS import centroid_detection import sys #from IPython.display import display, clear_output ### Open files f_in = h5py.File(filename_in, 'r') # Readonly, file must exist f_out = h5py.File(filename_out, 'w') # create file, truncate if exists print filename_in print filename_out ### get root groups from input data root_group_names = f_in.keys() spots = f_in['Spots'] file_version = f_in['Version'][0] # some hard-coding to deal with different file versions if file_version > 5: coords = f_in['Registrations']['0']['Coordinates'] else: coords = f_in['Coordinates'] spectraGroup = 'InitialMeasurement' Mzs = np.asarray( f_in['/SamplePositions/GlobalMassAxis/']['SamplePositions'] ) # we don't write this but will use it for peak detection ### make root groups for output data spectral_data = f_out.create_group('spectral_data') spatial_data = f_out.create_group('spatial_data') shared_data = f_out.create_group('shared_data') ### populate common variables - can hardcode as I know what these are for h5 data # parameters instrument_parameters_1 = shared_data.create_group( 'instrument_parameters/001') instrument_parameters_1.attrs['instrument name'] = 'Bruker Solarix 7T' instrument_parameters_1.attrs['mass range'] = [Mzs[0], Mzs[-1]] instrument_parameters_1.attrs['analyser type'] = 'FTICR' instrument_parameters_1.attrs['smothing during convertion'] = smoothMethod instrument_parameters_1.attrs['data conversion'] = 'h5->hdf5:' + str( datetime.datetime.now()) # ROIs #todo - determine and propagate all ROIs sample_1 = shared_data.create_group('samples/001') sample_1.attrs['name'] = info["sample_name"] sample_1.attrs['source'] = info["sample_source"] sample_1.attrs['preparation'] = info["sample_preparation"] sample_1.attrs['MALDI matrix'] = info["maldi_matrix"] sample_1.attrs['MALDI matrix application'] = info["matrix_application"] ### write spectra n = 0 for key in spots.keys(): spot = spots[key] ## make new spectrum #mzs,intensities = nosmooth(Mzs,np.asarray(spot[spectraGroup]['Intensities'])) if smoothMethod == 'nosmooth': mzs, intensities = mzs, intensities = nosmooth( Mzs, np.asarray(spot[spectraGroup]['Intensities'])) elif smoothMethod == 'nosmooth': mzs, intensities = sg_smooth( Mzs, np.asarray(spot[spectraGroup]['Intensities'])) elif smoothMethod == 'apodization': mzs, intensities = apodization( Mzs, np.asarray(spot[spectraGroup]['Intensities'])) else: raise ValueError( 'smooth method not one of: [nosmooth,nosmooth,apodization]') mzs_list, intensity_list, indices_list = centroid_detection.gradient( mzs, intensities, max_output=-1, weighted_bins=3) # add intensities this_spectrum = spectral_data.create_group(key) this_intensities = this_spectrum.create_dataset( 'centroid_intensities', data=np.float32(intensity_list), compression="gzip", compression_opts=9) # add coordinates key_dbl = float(key) this_coordiantes = this_spectrum.create_dataset( 'coordinates', data=(coords[0, key_dbl], coords[1, key_dbl], coords[2, key_dbl])) ## link to shared parameters # mzs this_mzs = this_spectrum.create_dataset('centroid_mzs', data=np.float32(mzs_list), compression="gzip", compression_opts=9) # ROI this_spectrum['ROIs/001'] = h5py.SoftLink( '/shared_data/regions_of_interest/001') # Sample this_spectrum['samples/001'] = h5py.SoftLink( '/shared_data/samples/001') # Instrument config this_spectrum['instrument_parameters'] = h5py.SoftLink( '/shared_data/instrument_parameters/001') n += 1 if np.mod(n, 10) == 0: #clear_output(wait=True) print('{:3.2f}\% complete\r'.format(100. * n / np.shape(spots.keys())[0], end="\r")), sys.stdout.flush() f_in.close() f_out.close() print 'fin'
def mzImages(filename_in,save_dir): import sys sys.path.append('C:\\Users\\Luca Rappez\\Desktop\\python_codebase\\') from pyMS.centroid_detection import gradient from pyIMS.hdf5.inMemoryIMS_hdf5 import inMemoryIMS_hdf5 from pyIMS.image_measures import level_sets_measure import matplotlib.pyplot as plt import numpy as np #%matplotlib inline import bokeh as bk from bokeh.plotting import output_notebook output_notebook() print 'step1' #filename_in = '//psi.embl.de/alexandr/shared/Luca/20150825_CoCulture_Candida_Ecoli/20150821_ADP_LR_colony250K12_DHBsub_260x280_50um.hdf5' #using a temporary hdf5 based format #save_dir= '//psi.embl.de/alexandr/shared/Luca/20150825_CoCulture_Candida_Ecoli/20150821_ADP_LR_colony250K12_DHBsub_260x280_50um_figures' # Parse data IMS_dataset = inMemoryIMS_hdf5(filename_in) print 'In memory' ppm = 1.5 # Generate mean spectrum #hist_axis,mean_spec =IMS_dataset.generate_summary_spectrum(summary_type='mean') hist_axis,freq_spec = IMS_dataset.generate_summary_spectrum(summary_type='freq',ppm=ppm/2) #p1 = bk.plotting.figure() #p1.line(hist_axis,mean_spec/np.max(mean_spec),color='red') #p1.line(hist_axis,freq_spec/np.max(freq_spec),color='orange') #bk.plotting.show(p1) print len(hist_axis) #plt.figure(figsize=(20,10)) #plt.plot(hist_axis,freq_spec) #plt.show() # Centroid detection of frequency spectrum mz_list,count_list,idx_list = gradient(np.asarray(hist_axis),np.asarray(freq_spec),weighted_bins=2) c_thresh=0.05 moc_thresh=0.99 print np.sum(count_list>c_thresh) # Calcualte MoC for images of all peaks nlevels=30 im_list={} for ii, c in enumerate(count_list): #print ii #print c if c>c_thresh: ion_image = IMS_dataset.get_ion_image(np.asarray([mz_list[ii],]),ppm) im = ion_image.xic_to_image(0) m,im_moc,levels,nobjs = level_sets_measure.measure_of_chaos(im,nlevels,interp='median') #just output measure value m=1-m im_list[mz_list[ii]]={'image':im,'moc':m,'freq':c} from pySpatialMetabolomics.tools import colourmaps c_map = colourmaps.get_colormap('grey')#if black images: open->save->rerun c_pal=[[int(255*cc) for cc in c_map(c)] for c in range(0,254)] # Export all images import png as pypng for mz in im_list: if im_list[mz]['moc']>moc_thresh: with open('{}/{}_{}.png'.format(save_dir,mz,im_list[mz]['moc']),'wb') as f_out: im_out = im_list[mz]['image'] im_out = 254*im_out/np.max(im_out) w,h = np.shape(im_out) w = pypng.Writer(h, w, palette=c_pal, bitdepth=8) w.write(f_out,im_out) #im_out = im_list[mz]['image'] mz=333.334188269 ion_image = IMS_dataset.get_ion_image(np.asarray([mz,]),ppm) im_out=ion_image.xic_to_image(0) m,im_moc,levels,nobjs = level_sets_measure.measure_of_chaos(im,nlevels,interp='') #just output measure value print 1-m im_out = 254.*im_out/np.max(im_out) print mz #print im_list[mz]['moc'] #plt.figure() #plt.imshow(im_moc) #plt.show()
sample_1.attrs["MALDI matrix application"] = matrix_application ### write spectra n = 0 for i, coords in enumerate(f_in.coordinates): ## rename as I'm using old code :S spot = i key = str(i) ## make new spectrum mzs, ints = f_in.getspectrum(i) if centroids == True: mzs_list, intensity_list = mzs, ints else: ints = signal.savgol_filter(ints, 5, 2) mzs_list, intensity_list, indices_list = centroid_detection.gradient( np.asarray(mzs), np.asarray(ints), max_output=-1, weighted_bins=3 ) if not all([m > 0 for m in intensity_list]): raise ValueError("whoa, wtf?") # add intensities this_spectrum = spectral_data.create_group(key) this_intensities = this_spectrum.create_dataset( "centroid_intensities", data=np.float32(intensity_list), compression="gzip", compression_opts=9 ) # add coordinates if len(coords) == 2: coords = (coords[0], coords[1], 0) this_coordiantes = this_spectrum.create_dataset("coordinates", data=(coords[0], coords[1], coords[2])) ## link to shared parameters # mzs this_mzs = this_spectrum.create_dataset(
def hdf5(filename_in, filename_out,info,smoothMethod="nosmooth"): import h5py import numpy as np import datetime import scipy.signal as signal from pyMS import centroid_detection import sys #from IPython.display import display, clear_output ### Open files f_in = h5py.File(filename_in, 'r') # Readonly, file must exist f_out = h5py.File(filename_out, 'w') # create file, truncate if exists print filename_in print filename_out ### get root groups from input data root_group_names = f_in.keys() spots = f_in['Spots'] file_version = f_in['Version'][0] # some hard-coding to deal with different file versions if file_version > 5: coords = f_in['Registrations']['0']['Coordinates'] else: coords = f_in['Coordinates'] spectraGroup = 'InitialMeasurement' Mzs = np.asarray(f_in['/SamplePositions/GlobalMassAxis/']['SamplePositions']) # we don't write this but will use it for peak detection ### make root groups for output data spectral_data = f_out.create_group('spectral_data') spatial_data = f_out.create_group('spatial_data') shared_data = f_out.create_group('shared_data') ### populate common variables - can hardcode as I know what these are for h5 data # parameters instrument_parameters_1 = shared_data.create_group('instrument_parameters/001') instrument_parameters_1.attrs['instrument name'] = 'Bruker Solarix 7T' instrument_parameters_1.attrs['mass range'] = [Mzs[0],Mzs[-1]] instrument_parameters_1.attrs['analyser type'] = 'FTICR' instrument_parameters_1.attrs['smothing during convertion'] = smoothMethod instrument_parameters_1.attrs['data conversion'] = 'h5->hdf5:'+str(datetime.datetime.now()) # ROIs #todo - determine and propagate all ROIs sample_1 = shared_data.create_group('samples/001') sample_1.attrs['name'] = info["sample_name"] sample_1.attrs['source'] = info["sample_source"] sample_1.attrs['preparation'] = info["sample_preparation"] sample_1.attrs['MALDI matrix'] = info["maldi_matrix"] sample_1.attrs['MALDI matrix application'] = info["matrix_application"] ### write spectra n = 0 for key in spots.keys(): spot = spots[key] ## make new spectrum #mzs,intensities = nosmooth(Mzs,np.asarray(spot[spectraGroup]['Intensities'])) if smoothMethod == 'nosmooth': mzs,intensities = mzs,intensities = nosmooth(Mzs,np.asarray(spot[spectraGroup]['Intensities'])) elif smoothMethod == 'nosmooth': mzs,intensities = sg_smooth(Mzs,np.asarray(spot[spectraGroup]['Intensities'])) elif smoothMethod == 'apodization': mzs,intensities = apodization(Mzs,np.asarray(spot[spectraGroup]['Intensities'])) else: raise ValueError('smooth method not one of: [nosmooth,nosmooth,apodization]') mzs_list, intensity_list, indices_list = centroid_detection.gradient(mzs,intensities, max_output=-1, weighted_bins=3) # add intensities this_spectrum = spectral_data.create_group(key) this_intensities = this_spectrum.create_dataset('centroid_intensities', data=np.float32(intensity_list), compression="gzip", compression_opts=9) # add coordinates key_dbl = float(key) this_coordiantes = this_spectrum.create_dataset('coordinates', data=(coords[0, key_dbl], coords[1, key_dbl], coords[2, key_dbl])) ## link to shared parameters # mzs this_mzs = this_spectrum.create_dataset('centroid_mzs', data=np.float32(mzs_list), compression="gzip", compression_opts=9) # ROI this_spectrum['ROIs/001'] = h5py.SoftLink('/shared_data/regions_of_interest/001') # Sample this_spectrum['samples/001'] = h5py.SoftLink('/shared_data/samples/001') # Instrument config this_spectrum['instrument_parameters'] = h5py.SoftLink('/shared_data/instrument_parameters/001') n += 1 if np.mod(n, 10) == 0: #clear_output(wait=True) print('{:3.2f}\% complete\r'.format(100.*n/np.shape(spots.keys())[0], end="\r")), sys.stdout.flush() f_in.close() f_out.close() print 'fin'
def preprocess_spectrum(mzs, ints): ints = signal.savgol_filter(ints, 5, 2) mzs, ints, _ = gradient(np.asarray(mzs), np.asarray(ints), max_output=-1, weighted_bins=3) order = mzs.argsort() return mzs[order], ints[order]