def load_imzml_data_set(file):
    """

    FLAG=0: SEND TO CSV, RETURN NOTHING
    FLAG=1: RETURN DICT OF DATAFRAMES
    FLAG=2: SEND TO CSV, RETURN DICT OF DATAFRAMES

    :param file:
    :param flag:
    :return:
    """
    imzml_data_path = os.path.join(data_path_imzml, file)
    p = ImzMLParser(imzml_data_path)
    mass_data = {}
    intensity_data = {}
    x_cord, y_cord = p.coordinates[-1][0], p.coordinates[-1][1]
    for idx, (x, y, z) in enumerate(p.coordinates):
        # mzs are masses over charge of 1 ion
        # intensities correspond to the abundance of the particular ion
        mzs, intensities = p.getspectrum(idx)
        mass_data[idx] = mzs
        intensity_data[idx] = intensities

    # CONVERT DICTS TO DATA FRAMES
    df_mass_data = pd.DataFrame(mass_data)
    df_intensity_data = pd.DataFrame(intensity_data)
    f_name = file.split('.')[0]

    return {"mass": df_mass_data, "intensity": df_intensity_data, "x": x_cord, "y":  y_cord, "f_name": f_name}
Exemplo n.º 2
0
def imzml_to_sbd(filepath_imzml, filepath_sbd):
    """Converts a pair of .imzml and .ibd files to .sbd   
      Returns:
      list:True on success    
      """    
    with open(filepath_sbd, 'wb') as out_file:
        p = ImzMLParser(filepath_imzml)
        n_spectra = len(p.coordinates)
        
        # First pass
        meta = []
        offset = 20 * n_spectra + 10       
        for idx, (x,y,z) in enumerate(p.coordinates):
            (mzs, intensities) = p.getspectrum(idx)
            n_points = len(mzs)
            
            meta.append((offset, n_points, np.sum(intensities), x, y))
            offset = offset + n_points * 12
        
        # Write data to stream...
        header = (0, n_spectra, 8)    
        out_file.write(struct.pack('<BQB', header[0], header[1], header[2]))    
        
        for meta_item in meta:
            out_file.write(struct.pack('<QLfHH',
                                       meta_item[0], meta_item[1], 
                                       meta_item[2], meta_item[3],
                                       meta_item[4]))
         
        # Second pass    
        for i in range(n_spectra):
            mzs, intensities = p.getspectrum(i)
            write_spectrum(out_file, (mzs, intensities)) 
    
    return True
Exemplo n.º 3
0
    def test_writer_image(get_temp_path, data_mode):
        """Test adding image to the dataset"""
        mz_x = np.linspace(100, 1000, 20)
        coordinates = [
            [1, 1, 1],
            [1, 2, 1],
            [1, 3, 1],
            [2, 1, 1],
            [2, 2, 1],
            [2, 3, 1],
            [3, 1, 1],
            [3, 2, 1],
            [3, 3, 1],
        ]
        mz_ys = np.random.rand(len(coordinates), mz_x.shape[0])

        output_filename = os.path.join(get_temp_path, "test.imzML")
        with ImzMLWriter(output_filename, mode=data_mode) as imzml:
            for mz_y, _coordinates in zip(mz_ys, coordinates):
                imzml.add_spectrum(mz_x, mz_y, coords=_coordinates)

        with ImzMLParser(output_filename) as parser:
            for px, (_mz_x, _mz_y) in enumerate(parser):
                assert_array_almost_equal(_mz_x, mz_x, 4)
                assert_array_almost_equal(_mz_y, mz_ys[px], 4)
                assert parser.n_pixels == len(coordinates)
Exemplo n.º 4
0
 def test_parser_get_spectrum(data_path, parse_lib):
     parser = ImzMLParser(data_path, parse_lib=parse_lib)
     for px in range(parser.n_pixels):
         mz_x, mz_y = parser.get_spectrum(px)
         assert len(mz_x) == len(mz_y)
         assert len(mz_x) > 0
         assert len(mz_y) > 0
Exemplo n.º 5
0
 def on_pushButton_clicked(self):
     """
     Slot documentation goes here.
     """
     try:
         path = os.getcwd()
         file_name, _ = QFileDialog.getOpenFileName(
             self, u'Choose Imzml file', path, 'Imzml files (*.imzml)')
         if file_name:
             self.lineEdit_1.setText(file_name)
             self.progressBar = My_Progress_Form()
             self.progressBar.progressBar.setValue(0)
             self.progressBar.pushButton.setVisible(True)
             self.progressBar.pushButton.setText('Cancel')
             self.progressBar.pushButton.clicked.connect(
                 self.thread_terminate)
             self.progressBar.show()
             self.p = ImzMLParser(self.lineEdit_1.text())
             self.mbt = Average_mz_cal(self.p)
             self.mbt.trigger.connect(self.progress_update)
             self.mbt.trigger2.connect(self.avg_mz_plot)
             self.mbt.start()
     except Exception as e:
         m = 'Running error, info: ' + str(e)
         self.error(m)
def get_ds_spots(ds_id):
    parser = ImzMLParser(f'raw_datasets/{ds_id}.imzML')
    grid_mask = np.load(f'spotting/grids/{ds_id}.npy')
    mask_names = json.load(open(f'spotting/grids/{ds_id}_mask_names.json'))

    # Make a mapping of coordinate -> spectrum index
    coords = np.array(parser.coordinates)[:, :2]
    base_coord = np.min(coords, axis=0)
    coord_to_idx = np.ones(np.max(coords, axis=0) - base_coord + 1,
                           dtype='i') * -1
    for i, (x, y) in enumerate(coords):
        coord_to_idx[x - base_coord[0], y - base_coord[1]] = i

    # Collect spectra for each mask item
    spots = {}
    for i, mask_name in enumerate(mask_names):
        if mask_name != 'background':
            spectra_ys, spectra_xs = np.nonzero(grid_mask == i)
            spectra = [
                parser.getspectrum(idx)
                for idx in coord_to_idx[spectra_xs, spectra_ys]
            ]
            norm_spectra = [(mzs, ints * 1e6 / np.sum(ints))
                            for mzs, ints in spectra]
            mzs, ints = merge_spectra(norm_spectra)
            spots[mask_name] = mzs, ints, len(norm_spectra)
    return spots
Exemplo n.º 7
0
    def __read_all(self, filename):
        """
        Internal helper function used to read all data. The
        function directly modifies the self.data entry.  Data is now a list of datacubes.
        """

        self.data = np.zeros(shape=self.shape, dtype=self.data_type)
        log_helper.info(__name__, 'Datacube shape is %s' % [self.data.shape])
        reader = ImzMLParser(filename)
        log_helper.debug(__name__, 'READING ALL DATA!! GIVE ME RAM (please)!')

        # Compute the bin edges for reinterpolation if needed
        if self.imzml_type == self.available_imzml_types['processed']:
            shift = np.diff(self.mz).mean()
            bin_edges = np.append(self.mz, self.mz[-1] + shift)
        else:
            bin_edges = None
        for ind in xrange(0, len(reader.coordinates)):
            xidx, yidx = reader.coordinates[ind]
            # Coordinates may start at arbitrary locations, hence, we need to substract the minimum to recenter at (0,0)
            xidx -= self.x_pos_min
            yidx -= self.y_pos_min
            # Read the spectrum
            mz, intens = reader.getspectrum(ind)
            # Reinterpolate intensities if we are in processed mode
            if bin_edges is not None:
                intens, bin_edges_new = np.histogram(mz,
                                                     bins=bin_edges,
                                                     weights=intens)
            # Save the intensity values in our data cube
            self.data[xidx, yidx, :] = intens
Exemplo n.º 8
0
    def run(self):
        from pyimzml.ImzMLParser import ImzMLParser
        import json
        n_peaks = []
        s_min = []
        s_max = []
        s_ptp = []
        pcts = [5, 25, 50, 75, 95]
        s_pcts = []
        p = ImzMLParser(self.imzml_filename)
        for i, (x, y, z_) in enumerate(p.coordinates):
            mzs, ints = p.getspectrum(i)
            n_peaks.append(len(mzs))
            s_min.append(np.min(ints))
            s_max.append(np.max(ints))
            s_ptp.append(np.ptp(ints))
            s_pcts.append(list(np.percentile(ints, pcts)))

        stats = {
            'n_peaks': n_peaks,
            's_min': s_min,
            's_max': s_max,
            's_ptp': s_ptp,
            's_pcts': s_pcts
        }
        with open(self.output().path, 'w+') as f:
            json.dump(stats, f)
        print 'wrote spec stats'
Exemplo n.º 9
0
    def spectrum_iter(self):
        """
        Generator function that yields a position and associated spectrum for a selected datacube type.
        :yield: (xidx, yidx) a tuple of ints representing x and y position in the image
        :yield: yi,          a numpy 1D-array of floats containing spectral intensities at the given position
                                and for the selected datacube type
        """
        reader = ImzMLParser(self.basename)
        for idx in xrange(0, len(reader.coordinates)):
            xidx, yidx, zidx = reader.coordinates[idx]
            # Coordinates may start at arbitrary locations, hence, we need to substract the minimum to recenter at (0,0)
            xidx -= self.x_pos_min
            yidx -= self.y_pos_min
            mz, intens = reader.getspectrum(idx)
            # Rehistogram the data if we are in procesed mode
            if self.imzml_type == self.available_imzml_types['processed']:
                # shift = np.diff(self.mz).mean()
                # bin_edges = np.append(self.mz, self.mz[-1]+ shift)
                f = interpolate.interp1d(mz,
                                         intens,
                                         fill_value=0,
                                         bounds_error=False)
                intens = f(self.mz)
                # intens, bin_edges_new = np.histogram(mz, bins=bin_edges, weights=intens)

            yield (xidx, yidx), np.asarray(intens)
def write_corrected_msi(msi, output_file, tolerance, database_exactmass, step,
                        dalim):
    # iterate throug each pixel of an MSI
    with ImzMLWriter(output_file) as w:
        p = ImzMLParser(msi, parse_lib='ElementTree')
        for idx, (x, y, z) in enumerate(p.coordinates):

            ms_mzs, ms_intensities = p.getspectrum(idx)
            peaks_ind = peak_selection(ms_intensities)
            peaks_mz = ms_mzs[peaks_ind]

            if len(peaks_mz) > 30:
                hit_exp, hit_errors = hits_generation(peaks_mz,
                                                      database_exactmass,
                                                      tolerance)
                if len(hit_errors) > 10:
                    roi = hits_selection(hit_errors,
                                         step,
                                         tolerance,
                                         da_limit=dalim)
                    if np.sum(roi) > 10:
                        mz_error_model = create_lm(hit_exp,
                                                   hit_errors,
                                                   tolerance=tolerance,
                                                   da_limit=dalim,
                                                   step=step)
                        if mz_error_model:
                            corrected_mzs = correct_mz_lm(
                                ms_mzs, mz_error_model)
                            w.addSpectrum(corrected_mzs, ms_intensities,
                                          (x, y, z))
Exemplo n.º 11
0
 def __init__(self, imzml_path: pathlib.Path):
     try:
         imzml_parser = ImzMLParser(imzml_path, parse_lib="ElementTree")
         self.spectrum_reader = imzml_parser.portable_spectrum_reader()
         del imzml_parser
     except Exception as e:
         raise ImzMLError(format_exc()) from e
     self._stream = None
Exemplo n.º 12
0
    def test_portable_get_spectrum(imzml_path, ibd_path, parse_lib):
        # get normal parser
        parser = ImzMLParser(imzml_path, parse_lib=parse_lib)

        # get detached parser and get handle of the portable reader
        detached_parser = ImzMLParser(imzml_path, parse_lib=parse_lib)
        portable_reader = detached_parser.portable_spectrum_reader()

        # pickle and unpickle to ensure it survives for its intended use case
        portable_reader = pickle.loads(pickle.dumps(portable_reader))

        for idx in range(parser.n_pixels):
            mz_x, mz_y = parser.get_spectrum(idx)

            _mz_x2, _mz_y2 = portable_reader.get_spectrum(idx)
            assert np.all(mz_x == _mz_x2)
            assert np.all(mz_y == _mz_y2)
Exemplo n.º 13
0
 def collect_metadata(self):
     print('parsing imzML from %s' % self.path)
     with ImzMLParser(self.path) as parser:
         md = parser.imzmldict
     md = {k: (int(v) if type(v) == np.int64 else v) for k, v in md.items()}
     #         for k, v in md.items():
     #             print(k, v, type(v))
     return md
Exemplo n.º 14
0
    def __init__(self,
                 filename,
                 startX=1,
                 startY=1,
                 width=None,
                 height=None,
                 cropToData=False):
        self.imzML = ImzMLParser(filename)

        # Find the min and max row and column where data is present
        maxWidth = 0
        maxHeight = 0

        minWidth = -1
        minHeight = -1

        for (x, y, z) in self.imzML.coordinates:
            if x > maxWidth:
                maxWidth = x
            if y > maxHeight:
                maxHeight = y
            if minWidth == -1 or minWidth > x:
                minWidth = x
            if minHeight == -1 or minHeight > y:
                minHeight = y

        if cropToData:
            startX = minWidth
            startY = minHeight

        if width is None:
            width = maxWidth - startX + 1
        if height is None:
            height = maxHeight - startY + 1

        self.startX = startX
        self.startY = startY
        self.width = width
        self.height = height
        self.coordinates = []
        self.cropToData = cropToData

        self.indexImage = np.ones((height, width), dtype=np.int) * -1

        index = 0

        for (x, y, z) in self.imzML.coordinates:
            if x >= startX and y >= startY and x < (startX + width) and y < (
                    startY + height):
                if cropToData:
                    self.coordinates.append(
                        (index, x - minWidth + 1, y - minHeight + 1))
                    self.indexImage[y - minHeight, x - minWidth] = index
                else:
                    self.coordinates.append((index, x, y))
                    self.indexImage[y - startY, x - startX] = index

            index = index + 1
Exemplo n.º 15
0
    def test_parser_init_paths_as_with(data_path, parse_lib):
        with ImzMLParser(data_path, parse_lib=parse_lib) as parser:
            assert len(parser.coordinates) == 9
            assert parser.n_pixels == 9

            mz_x, mz_y = parser.get_spectrum(0)
            assert len(mz_x) == len(mz_y)
            assert len(mz_x) > 0
            assert len(mz_y) > 0
Exemplo n.º 16
0
    def test_parser_init_ibd_as_filename(imzml_path, ibd_path, parse_lib):
        with ImzMLParser(imzml_path, parse_lib=parse_lib,
                         ibd_file=ibd_path) as parser:
            assert len(parser.coordinates) == 9
            assert parser.n_pixels == 9

            mz_x, mz_y = parser.get_spectrum(0)
            assert len(mz_x) == len(mz_y)
            assert len(mz_x) > 0
            assert len(mz_y) > 0
Exemplo n.º 17
0
def test_browse(data_path, parse_lib, item_ids):
    parser = ImzMLParser(data_path, parse_lib=parse_lib)
    browser = browse(parser)
    assert browser

    all_item_ids = set()
    for i in range(parser.n_pixels):
        all_item_ids.update(browser.for_spectrum(i).get_ids(item_ids))

    assert len(all_item_ids) != 0
Exemplo n.º 18
0
def get_spec(x, y1, y2, imzML_file):
    parser = ImzMLParser(imzML_file)
    part_map = dict()
    for y in range(y1, y2):
        try:
            idx = parser.coordinates.index((x, y, 1))
            spec_map = tupel2map(parser.getspectrum(idx))
            part_map[idx] = np.array(list(spec_map.values()))
        except:
            print(f"({x}, {y}, 1) is not in list.")
    return part_map
Exemplo n.º 19
0
    def __init__(self, path: Path):
        self.filename = find_file_by_ext(path, 'imzml')
        try:
            self._imzml_parser = ImzMLParser(
                self.filename,
                parse_lib='ElementTree',
                include_spectra_metadata=METADATA_FIELDS,
            )
        except Exception as e:
            raise ImzMLError(format_exc()) from e

        super().__init__(self._imzml_parser)
Exemplo n.º 20
0
def main(argv):
    from pyimzml.ImzMLParser import ImzMLParser
    inputfile = ''
    outputfile = ''
    try:
        opts, args = getopt.getopt(argv, "hi:o:", ["ifile=", "ofile="])
    except getopt.GetoptError:
        print('test.py -i <inputfile> -o <outputfile>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('test.py -i <inputfile> -o <outputfile>')
            sys.exit()
        elif opt in ("-i", "--ifile"):
            inputfile = arg
        elif opt in ("-o", "--ofile"):
            outputfile = arg
    if inputfile == '':
        print('test.py -i <inputfile> -o <outputfile>')
        raise IOError('input file not specified')
    if outputfile == '':
        outputfile = inputfile + '.imzML'
    imzml = ImzMLParser(inputfile)
    spectra = []
    with ImzMLWriter(outputfile,
                     mz_dtype=np.float32,
                     intensity_dtype=np.float32) as writer:
        for i, coords in enumerate(imzml.coordinates):
            mzs, intensities = imzml.getspectrum(i)
            writer.addSpectrum(mzs, intensities, coords)
            spectra.append((mzs, intensities, coords))

    imzml = ImzMLParser(outputfile)
    spectra2 = []
    for i, coords in enumerate(imzml.coordinates):
        mzs, intensities = imzml.getspectrum(i)
        spectra2.append((mzs, intensities, coords))

    print(spectra[0] == spectra2[0])
Exemplo n.º 21
0
    def __init__(self, filename):
        """ Initialize Filtering Framework from an imzml file """
        self.spectrum = ImzMLParser(filename)
        self.mzlist = []
        self.intensity_list = []
        self.filename = []
        self.filter_spec_mass = np.zeros(np.shape(self.mzlist))
        self.filter_spec_intens = np.zeros(np.shape(self.intensity_list))

        for idx, (x, y, z) in enumerate(self.spectrum.coordinates):
            self.mzs, self.intensities = self.spectrum.getspectrum(idx)
            self.mzlist.append(self.mzs)
            self.intensity_list.append(self.intensities)
Exemplo n.º 22
0
    def __init__(self, storage: Storage, imzml_cobject: CloudObject,
                 ibd_cobject: CloudObject):
        imzml_parser = ImzMLParser(
            storage.get_cloudobject(imzml_cobject, stream=True),
            ibd_file=None,
            parse_lib='ElementTree',
            include_spectra_metadata=METADATA_FIELDS,
        )

        self._ibd_cobject = ibd_cobject
        self.imzml_reader = imzml_parser.portable_spectrum_reader()

        super().__init__(imzml_parser)
Exemplo n.º 23
0
def convert_imzml_to_txt(input_imzml, output_txt, output_coords_txt):
    from pyimzml.ImzMLParser import ImzMLParser

    with ImzMLParser(input_imzml, parse_lib='ElementTree') as parser:
        with open(output_txt, 'w') as spectra_file:
            for i in range(len(parser.coordinates)):
                mzs, ints = parser.getspectrum(i)
                mzs_formatted = _to_space_separated_string(mzs)
                ints_formatted = _to_space_separated_string(ints)
                spectra_file.write(f'{i}|{mzs_formatted}|{ints_formatted}\n')

        with open(output_coords_txt, 'w') as coord_file:
            coord_file.writelines(f'{i},{coord[0]},{coord[1]}\n' for i, coord in enumerate(parser.coordinates))
Exemplo n.º 24
0
def robust_recalibration(imzml_fn,
                         imzml_fn_r,
                         ref_formula,
                         numpeaks,
                         smoothing,
                         x0=[1, 1]):
    import os
    imzml = ImzMLParser(imzml_fn)
    # calculate fit parameters with varying numbers of peaks
    fit = fit_dataset(imzml, ref_formula, x0=x0, max_delta_ppm=numpeaks)
    # do fit with different spatial smoothing
    recal(imzml_fn_r, imzml, fit, m=smoothing)
    return fit
Exemplo n.º 25
0
    def test_writer_single_pixel(get_temp_path):
        mz_x = np.linspace(100, 1000, 20)
        mz_y = np.random.rand(mz_x.shape[0])
        coordinates = [1, 1, 1]

        output_filename = os.path.join(get_temp_path, "test.imzML")
        with ImzMLWriter(output_filename, mode="processed") as imzml:
            imzml.add_spectrum(mz_x, mz_y, coords=coordinates)

        with ImzMLParser(output_filename) as parser:
            _mz_x, _mz_y = parser.get_spectrum(0)
            assert_array_almost_equal(_mz_x, mz_x, 4)
            assert_array_almost_equal(_mz_y, mz_y, 4)
            assert parser.n_pixels == 1
Exemplo n.º 26
0
def import_imzml_dataset(filepath):
    """Reads an .imzml and stores    
    Returns:
    list:List of spectra    
    """
    p = ImzMLParser(filepath)
    
    spectra = []
    
    for idx, (x,y,z) in enumerate(p.coordinates):
        mzs, intensities = p.getspectrum(idx)
        spectra.append(spectrum(mzs, intensities, x, y, z))
        
    return spectra
Exemplo n.º 27
0
    def test_parser_iter(data_path, parse_lib):
        parser = ImzMLParser(data_path, parse_lib=parse_lib)

        count = 0
        for px, (mz_x, mz_y) in enumerate(parser):
            _mz_x, _mz_y = parser.get_spectrum(px)
            assert len(mz_x) == len(mz_y)
            assert len(mz_x) == len(_mz_x)
            assert len(mz_y) == len(_mz_y)
            assert_equal(_mz_x, mz_x)
            assert_equal(_mz_y, mz_y)
            count += 1

        assert count == parser.n_pixels
def save_data_to_csv(filename):
    data_control_day_03 = os.path.join(data_path, filename)
    p = ImzMLParser(data_control_day_03)
    mass_data = {}
    intensity_data = {}
    for idx, (x, y, z) in enumerate(p.coordinates):
        # mzs are masses over charge of 1 ion
        # intensities correspond to the abundance of the particular ion
        mzs, intensities = p.getspectrum(idx)
        mass_data[idx] = mzs
        intensity_data[idx] = intensities
    df1 = pd.DataFrame(mass_data)
    df2 = pd.DataFrame(intensity_data)
    df1.to_csv('mass_data.csv')
    df2.to_csv('intensities.csv')
Exemplo n.º 29
0
    def __init__(self, fname, specStart=0):
        #fname = "/mnt/d/dev/data/190724_AR_ZT1_Proteins/190724_AR_ZT1_Proteins_spectra.imzML"

        self.fname = fname
        self.parser = ImzMLParser(fname)
        self.dregions = None

        self.mzValues = self.parser.getspectrum(0)[0]

        self.specStart = specStart

        if self.specStart != 0:
            self.mzValues = self.mzValues[self.specStart:]
            print("WARNING: SPECTRA STARTING AT POSITION", self.specStart)

        self.find_regions()
def load_and_split_ds_vm(storage, imzml_cobject, ibd_cobject, ds_segm_size_mb,
                         sort_memory):
    stats = []

    with TemporaryDirectory() as tmp_dir:
        logger.info("Temp dir is {}".format(tmp_dir))
        imzml_dir = Path(tmp_dir) / 'imzml'
        res = imzml_dir.mkdir()
        logger.info("Create {} result {}".format(imzml_dir, res))
        segments_dir = Path(tmp_dir) / 'segments'
        res = segments_dir.mkdir()
        logger.info("Create {} result {}".format(segments_dir, res))

        logger.info('Downloading dataset...')
        t = time()
        imzml_path, ibd_path = download_dataset(imzml_cobject, ibd_cobject,
                                                imzml_dir, storage)
        stats.append(('download_dataset', time() - t))

        logger.info('Loading parser...')
        t = time()
        imzml_parser = ImzMLParser(str(imzml_path))
        imzml_reader = imzml_parser.portable_spectrum_reader()
        stats.append(('load_parser', time() - t))

        logger.info('Defining segments bounds...')
        t = time()
        ds_segments_bounds = define_ds_segments(
            imzml_parser, ds_segm_size_mb=ds_segm_size_mb)
        segments_n = len(ds_segments_bounds)
        stats.append(('define_segments', time() - t))

        logger.info('Segmenting...')
        t = time()
        chunks_n, ds_segms_len = make_segments(imzml_reader, ibd_path,
                                               ds_segments_bounds,
                                               segments_dir, sort_memory)
        stats.append(('dataset_segmentation', time() - t))

        logger.info('Uploading segments...')
        t = time()
        ds_segms_cobjects = upload_segments(storage, segments_dir, chunks_n,
                                            segments_n)
        stats.append(('upload_segments', time() - t))

        return imzml_reader, ds_segments_bounds, ds_segms_cobjects, ds_segms_len, stats