Exemplo n.º 1
0
 def get_features_from_mean_spectrum(self, ppm=3., w=5., min_int=15., max_peaks=3):
     # Generate mean spectrum, go through all peaks and score TLC feature presence
     mean_spec = self.ims_dataset.generate_summary_spectrum(ppm=ppm)
     mean_spec_c = centroid_detection.gradient(np.asarray(mean_spec[0]), np.asarray(mean_spec[1]), min_intensity=3.)
     ion_datacube = self.ims_dataset.get_ion_image(mean_spec_c[0], ppm)
     self.feature_list=[]
     for ii, m in enumerate(mean_spec_c[0]):
         im = ion_datacube.xic_to_image(ii)
         im = im_smoothing.median(im, size=3)
         m_s_f_s = tlc_smoothing.sqrt_apodization(im, w=w)
         m_s_f_s_c = centroid_detection.gradient(np.asarray(range(len(m_s_f_s))), np.asarray(m_s_f_s),
                                                 min_intensity=min_int)
         n_peaks = len(m_s_f_s_c[0])
         if all((n_peaks<max_peaks,)):
             for x, i in zip(m_s_f_s_c[0],m_s_f_s_c[1]):
                 self.feature_list.append((m,x,i))
Exemplo n.º 2
0
 def get_isotope_pattern(self, formula_adduct_string, charge):
     perfect_pattern = pyisocalc.perfect_pattern(pyisocalc.parseSumFormula(formula_adduct_string), charge=charge)
     sigma = self.sigma_at_mz(perfect_pattern.get_spectrum(source='centroids')[0][0])
     pts_per_mz = self.points_per_mz(sigma)
     spec = pyisocalc.apply_gaussian(perfect_pattern, sigma, pts_per_mz)
     centroided_mzs, centroided_ints, _ = gradient(*spec.get_spectrum())
     spec.add_centroids(centroided_mzs, centroided_ints)
     return spec
Exemplo n.º 3
0
def preprocess_spectrum(mzs, ints):
    ints = signal.savgol_filter(ints, 5, 2)
    mzs, ints, _ = gradient(np.asarray(mzs),
                            np.asarray(ints),
                            max_output=-1,
                            weighted_bins=3)
    order = mzs.argsort()
    return mzs[order], ints[order]
Exemplo n.º 4
0
def get_peak_list(ds_id):
    print('Get Mean Spectrum')
    mean_spec = get_mean_spectrum(ds_id)
    mz_list = gradient(mean_spec[0],
                       mean_spec[1],
                       min_intensity=10 *
                       mean_spec[1][mean_spec[1] > 0].min())[0]
    print(len(mz_list))
    return mz_list
Exemplo n.º 5
0
 def rebin(self):
     from pyMSpec.centroid_detection import gradient
     ms = self.generate_summary_spectrum()
     p = gradient(ms[0], ms[1], max_output=2500)
     mzs = p[0]
     return np.asarray(
         self.get_ion_image((mzs[1:] + mzs[0:-1]) / 2.,
                            tols=mzs[1:] - mzs[0:-1],
                            tol_type='abs').xic)
Exemplo n.º 6
0
 def get_isotope_pattern(self, formula_adduct_string, charge):
     perfect_pattern = pyisocalc.perfect_pattern(
         pyisocalc.parseSumFormula(formula_adduct_string), charge=charge)
     sigma = self.sigma_at_mz(
         perfect_pattern.get_spectrum(source='centroids')[0][0])
     pts_per_mz = self.points_per_mz(sigma)
     spec = pyisocalc.apply_gaussian(perfect_pattern, sigma, pts_per_mz)
     centroided_mzs, centroided_ints, _ = gradient(*spec.get_spectrum())
     spec.add_centroids(centroided_mzs, centroided_ints)
     return spec
Exemplo n.º 7
0
 def get_xic(self, mz, tol, w=5, min_int=1):
     mz = np.asarray([mz,])
     tol = np.asarray(tol)
     im = self.ims_dataset.get_ion_image(mz, tol).xic_to_image(0)
     #if np.max(im)>0.0:
     print "Max IM:",np.max(im)
     im = im_smoothing.median(im, size=3)
     xic = tlc_smoothing.sqrt_apodization(im, w=w)
     xic = Xic(xic=[self.x_pos, xic], xic_features=centroid_detection.gradient(np.asarray(range(len(xic))), np.asarray(xic),                                     min_intensity=min_int))
     return xic
Exemplo n.º 8
0
def exact_mass(JSON_config_file):
    config = get_variables(JSON_config_file)
    sum_formulae, adducts, mz_list = generate_isotope_patterns(config)
    IMS_dataset = load_data(config)
    spec_axis,mean_spec =IMS_dataset.generate_summary_spectrum(summary_type='mean',ppm=config['image_generation']['ppm']/2.)
    from pyMSpec.centroid_detection import gradient
    import numpy as np
    mzs,counts,idx_list = gradient(np.asarray(spec_axis),np.asarray(mean_spec),weighted_bins=2)
    ppm_value_score = run_exact_mass_search(config,  mzs,counts, sum_formulae, adducts, mz_list)
    output_results_exactMass(config, ppm_value_score, sum_formulae, adducts, mz_list,fname='exactMass_all_adducts')
Exemplo n.º 9
0
def centroid_IMS(input_filename, output_filename, instrumentInfo={}, sharedDataInfo={}):
    from pyMS.centroid_detection import gradient
    # write out a IMS_centroid.hdf5 file
    sl = slFile(input_filename)
    n_total = np.shape(sl.spectra)[0]
    with h5py.File(output_filename, 'w') as f_out:
        ### make root groups for output data
        spectral_data = f_out.create_group('spectral_data')
        spatial_data = f_out.create_group('spatial_data')
        shared_data = f_out.create_group('shared_data')

        ### populate common variables - can hardcode as I know what these are for h5 data
        # parameters
        instrument_parameters_1 = shared_data.create_group('instrument_parameters/001')
        if instrumentInfo != {}:
            for tag in instrumentInfo:
                instrument_parameters_1.attrs[tag] = instrumentInfo[tag]
                # ROIs
                # todo - determine and propagate all ROIs
        roi_1 = shared_data.create_group('regions_of_interest/001')
        roi_1.attrs['name'] = 'root region'
        roi_1.attrs['parent'] = ''
        # Sample
        sample_1 = shared_data.create_group('samples/001')
        if sharedDataInfo != {}:
            for tag in sharedDataInfo:
                sample_1.attrs[tag] = sharedDataInfo[tag]

        done = 0
        for key in range(0, n_total):
            mzs, intensities = sl.get_spectrum(key)
            mzs_c, intensities_c, _ = gradient(mzs, intensities)
            this_spectrum = spectral_data.create_group(str(key))
            _ = this_spectrum.create_dataset('centroid_mzs', data=np.float32(mzs_c), compression="gzip",
                                             compression_opts=9)
            # intensities
            _ = this_spectrum.create_dataset('centroid_intensities', data=np.float32(intensities_c), compression="gzip",
                                             compression_opts=9)
            # coordinates
            _ = this_spectrum.create_dataset('coordinates',
                                             data=(sl.coords[0, key], sl.coords[1, key], sl.coords[2, key]))
            ## link to shared parameters
            # ROI
            this_spectrum['ROIs/001'] = h5py.SoftLink('/shared_data/regions_of_interest/001')
            # Sample
            this_spectrum['samples/001'] = h5py.SoftLink('/shared_data/samples/001')
            # Instrument config
            this_spectrum['instrument_parameters'] = h5py.SoftLink('/shared_data/instrument_parameters/001')
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total)
        print "finished!"
 def generate_spectrum(self,x,y,mode='centroid', cent_kwargs={}):
     peakList = self.get_peaks(x,y)
     mzs,intensities = self.simulate_spectrum(peakList)
     if mode=='centroid':
         from pyMSpec.centroid_detection import gradient
         from pyMSpec import smoothing
         mzs, intensities = smoothing.fast_change(mzs,intensities)
         mzs,intensities,_ = gradient(np.asarray(mzs),np.asarray(intensities), **cent_kwargs)
         return mzs,intensities
     elif mode=='profile':
         return mzs,intensities
     else:
         raise ValueError("{} not recognised mode".format(mode))
Exemplo n.º 11
0
def centroid_imzml(input_filename, output_filename, step=[], apodization=False, w_size=10, min_intensity=1e-5,
                   region_name="", prevent_duplicate_pixels=False):
    # write a file to imzml format (centroided)
    """
    :type min_intensity: float
    """
    from pyimzml.ImzMLWriter import ImzMLWriter
    from pyMSpec.centroid_detection import gradient
    sl = slFile(input_filename, region_name=region_name)
    mz_dtype = sl.Mzs.dtype
    int_dtype = sl.get_spectrum(0)[1].dtype
    # Convert coords to index -> kinda hacky
    coords = np.asarray(sl.coords.copy()).T.round(5)
    coords -= np.amin(coords, axis=0)
    if step == []:  # have a guesss
        step = np.array([np.median(np.diff(np.unique(coords[sl.spotlist, i]))) for i in range(3)])
        step[np.isnan(step)] = 1
    print 'estimated pixel size: {} x {}'.format(step[0], step[1])
    coords = coords / np.reshape(step, (3,)).T
    coords = coords.round().astype(int)
    ncol, nrow, _ = np.amax(coords, axis=0) + 1
    print 'new image size: {} x {}'.format(nrow, ncol)
    if prevent_duplicate_pixels:
        b = np.ascontiguousarray(coords).view(np.dtype((np.void, coords.dtype.itemsize * coords.shape[1])))
        _, coord_idx = np.unique(b, return_index=True)
        print np.shape(sl.spotlist), np.shape(coord_idx)

        print "original number of spectra: {}".format(len(coords))
    else:
        coord_idx = range(len(coords))
    n_total = len(coord_idx)
    print 'spectra to write: {}'.format(n_total)
    with ImzMLWriter(output_filename, mz_dtype=mz_dtype, intensity_dtype=int_dtype) as imzml:
        done = 0
        for key in sl.spotlist:
            if all((prevent_duplicate_pixels, key not in coord_idx)):# skip duplicate pixels
                #print 'skip {}'.format(key)
                continue
            mzs, intensities = sl.get_spectrum(key)
            if apodization:
                from pyMSpec import smoothing
                # todo - add to processing list in imzml
                mzs, intensities = smoothing.apodization(mzs, intensities)
            mzs_c, intensities_c, _ = gradient(mzs, intensities, weighted_bins=5, min_intensity=min_intensity)
            pos = coords[key]
            pos = (pos[0], nrow - 1 - pos[1], pos[2])
            imzml.addSpectrum(mzs_c, intensities_c, pos)
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename, float(done) * 100.0 / n_total)
        print "finished!"
Exemplo n.º 12
0
 def correlation(self, basemz, mz_list=None):
     if not mz_list:
         from pyMSpec.centroid_detection import gradient
         mean_spec = self.generate_summary_spectrum(ppm=self.ppm)
         mz_list = gradient(mean_spec[0],
                            mean_spec[1],
                            min_intensity=3 *
                            mean_spec[1][mean_spec[1] > 0].min())[0]
     baseim = self.get_ion_image(basemz, self.ppm).xic[0]
     corr = np.zeros(len(mz_list))
     for ii, mz in enumerate(mz_list):
         ionim = self.get_ion_image(mz, self.ppm)
         corr[ii] = np.corrcoef(baseim, ionim.xic[0])[0][1]
     return mz_list, corr
Exemplo n.º 13
0
def find_centroid_mzs(mzs, ints):
    try:
        if len(mzs <= 2):
            # If there aren't enough peaks to model a centroid, assume the centroid is
            # outside of the sampled range and return nothing.
            return np.empty(0)

        if signal is not None and gradient is not None:
            ints = signal.savgol_filter(ints, 5, 2)
            mzs, ints, _ = gradient(
                np.asarray(mzs),
                np.asarray(ints),
                max_output=-1,
                weighted_bins=max(min(3, (len(mzs) - 1) // 2), 1),
            )
            return mzs
    except ValueError:
        return np.empty(0)
Exemplo n.º 14
0
def correlation(ds_id, basemz, mz_list=None):
    if not mz_list:
        print('Get Mean Spectrum')
        mean_spec = get_mean_spectrum(ds_id)
        mz_list = gradient(mean_spec[0],
                           mean_spec[1],
                           min_intensity=10 *
                           mean_spec[1][mean_spec[1] > 0].min())[0]
        print(len(mz_list))
    ds_info = get_ds_info(ds_id)
    imzb = ImzbReader(ds_info['imzb'])
    baseim = imzb.get_mz_image(basemz, ds_info['ppm']).flatten()
    corr = np.zeros(len(mz_list))
    for ii, mz in enumerate(mz_list):
        ionim = imzb.get_mz_image(mz, ds_info['ppm'])
        corrcoeff = np.corrcoef(baseim, ionim.flatten())
        corr[ii] = corrcoeff[0][1]
    return mz_list, corr
Exemplo n.º 15
0
def do_peak_plot(im, m_s, m_s_f,label):
    # Im should always be presented so that x-axis goes along the tlc track
    plt.figure(figsize=(10,10))
    plt.subplot(211)
    plt.imshow(im)
    plt.subplot(212)
    for x in range(im.shape[0]):
        if x==1:
            plt.plot(im[x,:],color="black", label='data peak')
        else:
            plt.plot(im[x,:],color="black")
    plt.plot(np.mean(im,axis=0),color='blue', label='data mean')
    plt.plot(m_s_f, color='red', label=label)
    m_s_f_c = centroid_detection.gradient(np.asarray(range(np.shape(m_s_f)[0])), np.asarray(m_s_f), min_intensity=1.)
    if not len(m_s_f_c[0]) == 0:
        plt.stem(m_s_f_c[0], m_s_f_c[1])
    plt.xlim((0,im.shape[1]))
    plt.legend()
    plt.show()
Exemplo n.º 16
0
def centroid_imzml(input_filename,
                   output_filename,
                   step=[],
                   apodization=False,
                   w_size=10,
                   min_intensity=1e-5,
                   prevent_duplicate_pixels=False):

    # write a file to imzml format (centroided)
    """
    :type input_filename string - source file path (must be .imzml)
    :type output_filename string - output file path (must be .imzml)
    :type step tuple grid spacing of pixels (if [] the script will try and guess it)
    :type apodization boolean whether to try and remove FT wiglet artefacts
    :type w_size window side (m/z bins) for apodization
    :type min_intensity: float minimum intensity peaks to return during centroiding
    :type prevent_duplicate_pixels bool if True will only return the first spectrum for pixels with the same coodinates
    """
    from pyimzml.ImzMLParser import ImzMLParser
    from pyimzml.ImzMLWriter import ImzMLWriter
    from pyMSpec.centroid_detection import gradient

    imzml_in = ImzMLParser(input_filename)
    precisionDict = {
        'f': ("32-bit float", np.float32),
        'd': ("64-bit float", np.float64),
        'i': ("32-bit integer", np.int32),
        'l': ("64-bit integer", np.int64)
    }
    mz_dtype = precisionDict[imzml_in.mzPrecision][1]
    int_dtype = precisionDict[imzml_in.intensityPrecision][1]
    # Convert coords to index -> kinda hacky
    coords = np.asarray(imzml_in.coordinates).round(5)
    coords -= np.amin(coords, axis=0)
    if step == []:  # have a guesss
        step = np.array([
            np.median(np.diff(np.unique(coords[:, i])))
            for i in range(coords.shape[1])
        ])
        step[np.isnan(step)] = 1
    print 'estimated pixel size: {} x {}'.format(step[0], step[1])
    coords = coords / np.reshape(step, (3, )).T
    coords = coords.round().astype(int)
    ncol, nrow, _ = np.amax(coords, axis=0) + 1
    print 'new image size: {} x {}'.format(nrow, ncol)
    if prevent_duplicate_pixels:
        b = np.ascontiguousarray(coords).view(
            np.dtype((np.void, coords.dtype.itemsize * coords.shape[1])))
        _, coord_idx = np.unique(b, return_index=True)
        print np.shape(imzml_in.coordinates), np.shape(coord_idx)

        print "original number of spectra: {}".format(len(coords))
    else:
        coord_idx = range(len(coords))
    n_total = len(coord_idx)
    print 'spectra to write: {}'.format(n_total)
    with ImzMLWriter(output_filename,
                     mz_dtype=mz_dtype,
                     intensity_dtype=int_dtype) as imzml_out:
        done = 0
        for key in range(np.shape(imzml_in.coordinates)[0]):
            print key
            if all((prevent_duplicate_pixels, key
                    not in coord_idx)):  # skip duplicate pixels
                continue
            mzs, intensities = imzml_in.getspectrum(key)
            if apodization:
                from pyMSpec import smoothing
                # todo - add to processing list in imzml
                mzs, intensities = smoothing.apodization(
                    mzs, intensities, {'w_size': w_size})
            mzs_c, intensities_c, _ = gradient(mzs,
                                               intensities,
                                               min_intensity=min_intensity)
            pos = coords[key]
            if len(pos) == 2:
                pos.append(0)
            pos = (pos[0], nrow - 1 - pos[1], pos[2])
            imzml_out.addSpectrum(mzs_c, intensities_c, pos)
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename,
                                                 float(done) * 100.0 / n_total)
        print "finished!"
Exemplo n.º 17
0
def preprocess_spectrum(mzs, ints):
    ints = signal.savgol_filter(ints, 5, 2)
    mzs, ints, _ = gradient(np.asarray(mzs), np.asarray(ints), max_output=-1, weighted_bins=3)
    order = mzs.argsort()
    return mzs[order], ints[order]
Exemplo n.º 18
0
def centroid_IMS(input_filename,
                 output_filename,
                 instrumentInfo={},
                 sharedDataInfo={}):
    from pyMS.centroid_detection import gradient
    # write out a IMS_centroid.hdf5 file
    sl = slFile(input_filename)
    n_total = np.shape(sl.spectra)[0]
    with h5py.File(output_filename, 'w') as f_out:
        ### make root groups for output data
        spectral_data = f_out.create_group('spectral_data')
        spatial_data = f_out.create_group('spatial_data')
        shared_data = f_out.create_group('shared_data')

        ### populate common variables - can hardcode as I know what these are for h5 data
        # parameters
        instrument_parameters_1 = shared_data.create_group(
            'instrument_parameters/001')
        if instrumentInfo != {}:
            for tag in instrumentInfo:
                instrument_parameters_1.attrs[tag] = instrumentInfo[tag]
                # ROIs
                # todo - determine and propagate all ROIs
        roi_1 = shared_data.create_group('regions_of_interest/001')
        roi_1.attrs['name'] = 'root region'
        roi_1.attrs['parent'] = ''
        # Sample
        sample_1 = shared_data.create_group('samples/001')
        if sharedDataInfo != {}:
            for tag in sharedDataInfo:
                sample_1.attrs[tag] = sharedDataInfo[tag]

        done = 0
        for key in range(0, n_total):
            mzs, intensities = sl.get_spectrum(key)
            mzs_c, intensities_c, _ = gradient(mzs, intensities)
            this_spectrum = spectral_data.create_group(str(key))
            _ = this_spectrum.create_dataset('centroid_mzs',
                                             data=np.float32(mzs_c),
                                             compression="gzip",
                                             compression_opts=9)
            # intensities
            _ = this_spectrum.create_dataset('centroid_intensities',
                                             data=np.float32(intensities_c),
                                             compression="gzip",
                                             compression_opts=9)
            # coordinates
            _ = this_spectrum.create_dataset('coordinates',
                                             data=(sl.coords[0, key],
                                                   sl.coords[1, key],
                                                   sl.coords[2, key]))
            ## link to shared parameters
            # ROI
            this_spectrum['ROIs/001'] = h5py.SoftLink(
                '/shared_data/regions_of_interest/001')
            # Sample
            this_spectrum['samples/001'] = h5py.SoftLink(
                '/shared_data/samples/001')
            # Instrument config
            this_spectrum['instrument_parameters'] = h5py.SoftLink(
                '/shared_data/instrument_parameters/001')
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename,
                                                 float(done) * 100.0 / n_total)
        print "finished!"
Exemplo n.º 19
0
def centroid_imzml(input_filename,
                   output_filename,
                   step=[],
                   apodization=False,
                   w_size=10,
                   min_intensity=1e-5,
                   region_name="",
                   prevent_duplicate_pixels=False):
    # write a file to imzml format (centroided)
    """
    :type min_intensity: float
    """
    from pyimzml.ImzMLWriter import ImzMLWriter
    from pyMSpec.centroid_detection import gradient
    sl = slFile(input_filename, region_name=region_name)
    mz_dtype = sl.Mzs.dtype
    int_dtype = sl.get_spectrum(0)[1].dtype
    # Convert coords to index -> kinda hacky
    coords = np.asarray(sl.coords.copy()).T.round(5)
    coords -= np.amin(coords, axis=0)
    if step == []:  # have a guesss
        step = np.array([
            np.median(np.diff(np.unique(coords[sl.spotlist, i])))
            for i in range(3)
        ])
        step[np.isnan(step)] = 1
    print 'estimated pixel size: {} x {}'.format(step[0], step[1])
    coords = coords / np.reshape(step, (3, )).T
    coords = coords.round().astype(int)
    ncol, nrow, _ = np.amax(coords, axis=0) + 1
    print 'new image size: {} x {}'.format(nrow, ncol)
    if prevent_duplicate_pixels:
        b = np.ascontiguousarray(coords).view(
            np.dtype((np.void, coords.dtype.itemsize * coords.shape[1])))
        _, coord_idx = np.unique(b, return_index=True)
        print np.shape(sl.spotlist), np.shape(coord_idx)

        print "original number of spectra: {}".format(len(coords))
    else:
        coord_idx = range(len(coords))
    n_total = len(coord_idx)
    print 'spectra to write: {}'.format(n_total)
    with ImzMLWriter(output_filename,
                     mz_dtype=mz_dtype,
                     intensity_dtype=int_dtype) as imzml:
        done = 0
        for key in sl.spotlist:
            if all((prevent_duplicate_pixels, key
                    not in coord_idx)):  # skip duplicate pixels
                #print 'skip {}'.format(key)
                continue
            mzs, intensities = sl.get_spectrum(key)
            if apodization:
                from pyMSpec import smoothing
                # todo - add to processing list in imzml
                mzs, intensities = smoothing.apodization(mzs, intensities)
            mzs_c, intensities_c, _ = gradient(mzs,
                                               intensities,
                                               min_intensity=min_intensity)
            pos = coords[key]
            pos = (pos[0], nrow - 1 - pos[1], pos[2])
            imzml.addSpectrum(mzs_c, intensities_c, pos)
            done += 1
            if done % 1000 == 0:
                print "[%s] progress: %.1f%%" % (input_filename,
                                                 float(done) * 100.0 / n_total)
        print "finished!"