def load_imzml_data_set(file): """ FLAG=0: SEND TO CSV, RETURN NOTHING FLAG=1: RETURN DICT OF DATAFRAMES FLAG=2: SEND TO CSV, RETURN DICT OF DATAFRAMES :param file: :param flag: :return: """ imzml_data_path = os.path.join(data_path_imzml, file) p = ImzMLParser(imzml_data_path) mass_data = {} intensity_data = {} x_cord, y_cord = p.coordinates[-1][0], p.coordinates[-1][1] for idx, (x, y, z) in enumerate(p.coordinates): # mzs are masses over charge of 1 ion # intensities correspond to the abundance of the particular ion mzs, intensities = p.getspectrum(idx) mass_data[idx] = mzs intensity_data[idx] = intensities # CONVERT DICTS TO DATA FRAMES df_mass_data = pd.DataFrame(mass_data) df_intensity_data = pd.DataFrame(intensity_data) f_name = file.split('.')[0] return {"mass": df_mass_data, "intensity": df_intensity_data, "x": x_cord, "y": y_cord, "f_name": f_name}
def imzml_to_sbd(filepath_imzml, filepath_sbd): """Converts a pair of .imzml and .ibd files to .sbd Returns: list:True on success """ with open(filepath_sbd, 'wb') as out_file: p = ImzMLParser(filepath_imzml) n_spectra = len(p.coordinates) # First pass meta = [] offset = 20 * n_spectra + 10 for idx, (x,y,z) in enumerate(p.coordinates): (mzs, intensities) = p.getspectrum(idx) n_points = len(mzs) meta.append((offset, n_points, np.sum(intensities), x, y)) offset = offset + n_points * 12 # Write data to stream... header = (0, n_spectra, 8) out_file.write(struct.pack('<BQB', header[0], header[1], header[2])) for meta_item in meta: out_file.write(struct.pack('<QLfHH', meta_item[0], meta_item[1], meta_item[2], meta_item[3], meta_item[4])) # Second pass for i in range(n_spectra): mzs, intensities = p.getspectrum(i) write_spectrum(out_file, (mzs, intensities)) return True
def test_writer_image(get_temp_path, data_mode): """Test adding image to the dataset""" mz_x = np.linspace(100, 1000, 20) coordinates = [ [1, 1, 1], [1, 2, 1], [1, 3, 1], [2, 1, 1], [2, 2, 1], [2, 3, 1], [3, 1, 1], [3, 2, 1], [3, 3, 1], ] mz_ys = np.random.rand(len(coordinates), mz_x.shape[0]) output_filename = os.path.join(get_temp_path, "test.imzML") with ImzMLWriter(output_filename, mode=data_mode) as imzml: for mz_y, _coordinates in zip(mz_ys, coordinates): imzml.add_spectrum(mz_x, mz_y, coords=_coordinates) with ImzMLParser(output_filename) as parser: for px, (_mz_x, _mz_y) in enumerate(parser): assert_array_almost_equal(_mz_x, mz_x, 4) assert_array_almost_equal(_mz_y, mz_ys[px], 4) assert parser.n_pixels == len(coordinates)
def test_parser_get_spectrum(data_path, parse_lib): parser = ImzMLParser(data_path, parse_lib=parse_lib) for px in range(parser.n_pixels): mz_x, mz_y = parser.get_spectrum(px) assert len(mz_x) == len(mz_y) assert len(mz_x) > 0 assert len(mz_y) > 0
def on_pushButton_clicked(self): """ Slot documentation goes here. """ try: path = os.getcwd() file_name, _ = QFileDialog.getOpenFileName( self, u'Choose Imzml file', path, 'Imzml files (*.imzml)') if file_name: self.lineEdit_1.setText(file_name) self.progressBar = My_Progress_Form() self.progressBar.progressBar.setValue(0) self.progressBar.pushButton.setVisible(True) self.progressBar.pushButton.setText('Cancel') self.progressBar.pushButton.clicked.connect( self.thread_terminate) self.progressBar.show() self.p = ImzMLParser(self.lineEdit_1.text()) self.mbt = Average_mz_cal(self.p) self.mbt.trigger.connect(self.progress_update) self.mbt.trigger2.connect(self.avg_mz_plot) self.mbt.start() except Exception as e: m = 'Running error, info: ' + str(e) self.error(m)
def get_ds_spots(ds_id): parser = ImzMLParser(f'raw_datasets/{ds_id}.imzML') grid_mask = np.load(f'spotting/grids/{ds_id}.npy') mask_names = json.load(open(f'spotting/grids/{ds_id}_mask_names.json')) # Make a mapping of coordinate -> spectrum index coords = np.array(parser.coordinates)[:, :2] base_coord = np.min(coords, axis=0) coord_to_idx = np.ones(np.max(coords, axis=0) - base_coord + 1, dtype='i') * -1 for i, (x, y) in enumerate(coords): coord_to_idx[x - base_coord[0], y - base_coord[1]] = i # Collect spectra for each mask item spots = {} for i, mask_name in enumerate(mask_names): if mask_name != 'background': spectra_ys, spectra_xs = np.nonzero(grid_mask == i) spectra = [ parser.getspectrum(idx) for idx in coord_to_idx[spectra_xs, spectra_ys] ] norm_spectra = [(mzs, ints * 1e6 / np.sum(ints)) for mzs, ints in spectra] mzs, ints = merge_spectra(norm_spectra) spots[mask_name] = mzs, ints, len(norm_spectra) return spots
def __read_all(self, filename): """ Internal helper function used to read all data. The function directly modifies the self.data entry. Data is now a list of datacubes. """ self.data = np.zeros(shape=self.shape, dtype=self.data_type) log_helper.info(__name__, 'Datacube shape is %s' % [self.data.shape]) reader = ImzMLParser(filename) log_helper.debug(__name__, 'READING ALL DATA!! GIVE ME RAM (please)!') # Compute the bin edges for reinterpolation if needed if self.imzml_type == self.available_imzml_types['processed']: shift = np.diff(self.mz).mean() bin_edges = np.append(self.mz, self.mz[-1] + shift) else: bin_edges = None for ind in xrange(0, len(reader.coordinates)): xidx, yidx = reader.coordinates[ind] # Coordinates may start at arbitrary locations, hence, we need to substract the minimum to recenter at (0,0) xidx -= self.x_pos_min yidx -= self.y_pos_min # Read the spectrum mz, intens = reader.getspectrum(ind) # Reinterpolate intensities if we are in processed mode if bin_edges is not None: intens, bin_edges_new = np.histogram(mz, bins=bin_edges, weights=intens) # Save the intensity values in our data cube self.data[xidx, yidx, :] = intens
def run(self): from pyimzml.ImzMLParser import ImzMLParser import json n_peaks = [] s_min = [] s_max = [] s_ptp = [] pcts = [5, 25, 50, 75, 95] s_pcts = [] p = ImzMLParser(self.imzml_filename) for i, (x, y, z_) in enumerate(p.coordinates): mzs, ints = p.getspectrum(i) n_peaks.append(len(mzs)) s_min.append(np.min(ints)) s_max.append(np.max(ints)) s_ptp.append(np.ptp(ints)) s_pcts.append(list(np.percentile(ints, pcts))) stats = { 'n_peaks': n_peaks, 's_min': s_min, 's_max': s_max, 's_ptp': s_ptp, 's_pcts': s_pcts } with open(self.output().path, 'w+') as f: json.dump(stats, f) print 'wrote spec stats'
def spectrum_iter(self): """ Generator function that yields a position and associated spectrum for a selected datacube type. :yield: (xidx, yidx) a tuple of ints representing x and y position in the image :yield: yi, a numpy 1D-array of floats containing spectral intensities at the given position and for the selected datacube type """ reader = ImzMLParser(self.basename) for idx in xrange(0, len(reader.coordinates)): xidx, yidx, zidx = reader.coordinates[idx] # Coordinates may start at arbitrary locations, hence, we need to substract the minimum to recenter at (0,0) xidx -= self.x_pos_min yidx -= self.y_pos_min mz, intens = reader.getspectrum(idx) # Rehistogram the data if we are in procesed mode if self.imzml_type == self.available_imzml_types['processed']: # shift = np.diff(self.mz).mean() # bin_edges = np.append(self.mz, self.mz[-1]+ shift) f = interpolate.interp1d(mz, intens, fill_value=0, bounds_error=False) intens = f(self.mz) # intens, bin_edges_new = np.histogram(mz, bins=bin_edges, weights=intens) yield (xidx, yidx), np.asarray(intens)
def write_corrected_msi(msi, output_file, tolerance, database_exactmass, step, dalim): # iterate throug each pixel of an MSI with ImzMLWriter(output_file) as w: p = ImzMLParser(msi, parse_lib='ElementTree') for idx, (x, y, z) in enumerate(p.coordinates): ms_mzs, ms_intensities = p.getspectrum(idx) peaks_ind = peak_selection(ms_intensities) peaks_mz = ms_mzs[peaks_ind] if len(peaks_mz) > 30: hit_exp, hit_errors = hits_generation(peaks_mz, database_exactmass, tolerance) if len(hit_errors) > 10: roi = hits_selection(hit_errors, step, tolerance, da_limit=dalim) if np.sum(roi) > 10: mz_error_model = create_lm(hit_exp, hit_errors, tolerance=tolerance, da_limit=dalim, step=step) if mz_error_model: corrected_mzs = correct_mz_lm( ms_mzs, mz_error_model) w.addSpectrum(corrected_mzs, ms_intensities, (x, y, z))
def __init__(self, imzml_path: pathlib.Path): try: imzml_parser = ImzMLParser(imzml_path, parse_lib="ElementTree") self.spectrum_reader = imzml_parser.portable_spectrum_reader() del imzml_parser except Exception as e: raise ImzMLError(format_exc()) from e self._stream = None
def test_portable_get_spectrum(imzml_path, ibd_path, parse_lib): # get normal parser parser = ImzMLParser(imzml_path, parse_lib=parse_lib) # get detached parser and get handle of the portable reader detached_parser = ImzMLParser(imzml_path, parse_lib=parse_lib) portable_reader = detached_parser.portable_spectrum_reader() # pickle and unpickle to ensure it survives for its intended use case portable_reader = pickle.loads(pickle.dumps(portable_reader)) for idx in range(parser.n_pixels): mz_x, mz_y = parser.get_spectrum(idx) _mz_x2, _mz_y2 = portable_reader.get_spectrum(idx) assert np.all(mz_x == _mz_x2) assert np.all(mz_y == _mz_y2)
def collect_metadata(self): print('parsing imzML from %s' % self.path) with ImzMLParser(self.path) as parser: md = parser.imzmldict md = {k: (int(v) if type(v) == np.int64 else v) for k, v in md.items()} # for k, v in md.items(): # print(k, v, type(v)) return md
def __init__(self, filename, startX=1, startY=1, width=None, height=None, cropToData=False): self.imzML = ImzMLParser(filename) # Find the min and max row and column where data is present maxWidth = 0 maxHeight = 0 minWidth = -1 minHeight = -1 for (x, y, z) in self.imzML.coordinates: if x > maxWidth: maxWidth = x if y > maxHeight: maxHeight = y if minWidth == -1 or minWidth > x: minWidth = x if minHeight == -1 or minHeight > y: minHeight = y if cropToData: startX = minWidth startY = minHeight if width is None: width = maxWidth - startX + 1 if height is None: height = maxHeight - startY + 1 self.startX = startX self.startY = startY self.width = width self.height = height self.coordinates = [] self.cropToData = cropToData self.indexImage = np.ones((height, width), dtype=np.int) * -1 index = 0 for (x, y, z) in self.imzML.coordinates: if x >= startX and y >= startY and x < (startX + width) and y < ( startY + height): if cropToData: self.coordinates.append( (index, x - minWidth + 1, y - minHeight + 1)) self.indexImage[y - minHeight, x - minWidth] = index else: self.coordinates.append((index, x, y)) self.indexImage[y - startY, x - startX] = index index = index + 1
def test_parser_init_paths_as_with(data_path, parse_lib): with ImzMLParser(data_path, parse_lib=parse_lib) as parser: assert len(parser.coordinates) == 9 assert parser.n_pixels == 9 mz_x, mz_y = parser.get_spectrum(0) assert len(mz_x) == len(mz_y) assert len(mz_x) > 0 assert len(mz_y) > 0
def test_parser_init_ibd_as_filename(imzml_path, ibd_path, parse_lib): with ImzMLParser(imzml_path, parse_lib=parse_lib, ibd_file=ibd_path) as parser: assert len(parser.coordinates) == 9 assert parser.n_pixels == 9 mz_x, mz_y = parser.get_spectrum(0) assert len(mz_x) == len(mz_y) assert len(mz_x) > 0 assert len(mz_y) > 0
def test_browse(data_path, parse_lib, item_ids): parser = ImzMLParser(data_path, parse_lib=parse_lib) browser = browse(parser) assert browser all_item_ids = set() for i in range(parser.n_pixels): all_item_ids.update(browser.for_spectrum(i).get_ids(item_ids)) assert len(all_item_ids) != 0
def get_spec(x, y1, y2, imzML_file): parser = ImzMLParser(imzML_file) part_map = dict() for y in range(y1, y2): try: idx = parser.coordinates.index((x, y, 1)) spec_map = tupel2map(parser.getspectrum(idx)) part_map[idx] = np.array(list(spec_map.values())) except: print(f"({x}, {y}, 1) is not in list.") return part_map
def __init__(self, path: Path): self.filename = find_file_by_ext(path, 'imzml') try: self._imzml_parser = ImzMLParser( self.filename, parse_lib='ElementTree', include_spectra_metadata=METADATA_FIELDS, ) except Exception as e: raise ImzMLError(format_exc()) from e super().__init__(self._imzml_parser)
def main(argv): from pyimzml.ImzMLParser import ImzMLParser inputfile = '' outputfile = '' try: opts, args = getopt.getopt(argv, "hi:o:", ["ifile=", "ofile="]) except getopt.GetoptError: print('test.py -i <inputfile> -o <outputfile>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('test.py -i <inputfile> -o <outputfile>') sys.exit() elif opt in ("-i", "--ifile"): inputfile = arg elif opt in ("-o", "--ofile"): outputfile = arg if inputfile == '': print('test.py -i <inputfile> -o <outputfile>') raise IOError('input file not specified') if outputfile == '': outputfile = inputfile + '.imzML' imzml = ImzMLParser(inputfile) spectra = [] with ImzMLWriter(outputfile, mz_dtype=np.float32, intensity_dtype=np.float32) as writer: for i, coords in enumerate(imzml.coordinates): mzs, intensities = imzml.getspectrum(i) writer.addSpectrum(mzs, intensities, coords) spectra.append((mzs, intensities, coords)) imzml = ImzMLParser(outputfile) spectra2 = [] for i, coords in enumerate(imzml.coordinates): mzs, intensities = imzml.getspectrum(i) spectra2.append((mzs, intensities, coords)) print(spectra[0] == spectra2[0])
def __init__(self, filename): """ Initialize Filtering Framework from an imzml file """ self.spectrum = ImzMLParser(filename) self.mzlist = [] self.intensity_list = [] self.filename = [] self.filter_spec_mass = np.zeros(np.shape(self.mzlist)) self.filter_spec_intens = np.zeros(np.shape(self.intensity_list)) for idx, (x, y, z) in enumerate(self.spectrum.coordinates): self.mzs, self.intensities = self.spectrum.getspectrum(idx) self.mzlist.append(self.mzs) self.intensity_list.append(self.intensities)
def __init__(self, storage: Storage, imzml_cobject: CloudObject, ibd_cobject: CloudObject): imzml_parser = ImzMLParser( storage.get_cloudobject(imzml_cobject, stream=True), ibd_file=None, parse_lib='ElementTree', include_spectra_metadata=METADATA_FIELDS, ) self._ibd_cobject = ibd_cobject self.imzml_reader = imzml_parser.portable_spectrum_reader() super().__init__(imzml_parser)
def convert_imzml_to_txt(input_imzml, output_txt, output_coords_txt): from pyimzml.ImzMLParser import ImzMLParser with ImzMLParser(input_imzml, parse_lib='ElementTree') as parser: with open(output_txt, 'w') as spectra_file: for i in range(len(parser.coordinates)): mzs, ints = parser.getspectrum(i) mzs_formatted = _to_space_separated_string(mzs) ints_formatted = _to_space_separated_string(ints) spectra_file.write(f'{i}|{mzs_formatted}|{ints_formatted}\n') with open(output_coords_txt, 'w') as coord_file: coord_file.writelines(f'{i},{coord[0]},{coord[1]}\n' for i, coord in enumerate(parser.coordinates))
def robust_recalibration(imzml_fn, imzml_fn_r, ref_formula, numpeaks, smoothing, x0=[1, 1]): import os imzml = ImzMLParser(imzml_fn) # calculate fit parameters with varying numbers of peaks fit = fit_dataset(imzml, ref_formula, x0=x0, max_delta_ppm=numpeaks) # do fit with different spatial smoothing recal(imzml_fn_r, imzml, fit, m=smoothing) return fit
def test_writer_single_pixel(get_temp_path): mz_x = np.linspace(100, 1000, 20) mz_y = np.random.rand(mz_x.shape[0]) coordinates = [1, 1, 1] output_filename = os.path.join(get_temp_path, "test.imzML") with ImzMLWriter(output_filename, mode="processed") as imzml: imzml.add_spectrum(mz_x, mz_y, coords=coordinates) with ImzMLParser(output_filename) as parser: _mz_x, _mz_y = parser.get_spectrum(0) assert_array_almost_equal(_mz_x, mz_x, 4) assert_array_almost_equal(_mz_y, mz_y, 4) assert parser.n_pixels == 1
def import_imzml_dataset(filepath): """Reads an .imzml and stores Returns: list:List of spectra """ p = ImzMLParser(filepath) spectra = [] for idx, (x,y,z) in enumerate(p.coordinates): mzs, intensities = p.getspectrum(idx) spectra.append(spectrum(mzs, intensities, x, y, z)) return spectra
def test_parser_iter(data_path, parse_lib): parser = ImzMLParser(data_path, parse_lib=parse_lib) count = 0 for px, (mz_x, mz_y) in enumerate(parser): _mz_x, _mz_y = parser.get_spectrum(px) assert len(mz_x) == len(mz_y) assert len(mz_x) == len(_mz_x) assert len(mz_y) == len(_mz_y) assert_equal(_mz_x, mz_x) assert_equal(_mz_y, mz_y) count += 1 assert count == parser.n_pixels
def save_data_to_csv(filename): data_control_day_03 = os.path.join(data_path, filename) p = ImzMLParser(data_control_day_03) mass_data = {} intensity_data = {} for idx, (x, y, z) in enumerate(p.coordinates): # mzs are masses over charge of 1 ion # intensities correspond to the abundance of the particular ion mzs, intensities = p.getspectrum(idx) mass_data[idx] = mzs intensity_data[idx] = intensities df1 = pd.DataFrame(mass_data) df2 = pd.DataFrame(intensity_data) df1.to_csv('mass_data.csv') df2.to_csv('intensities.csv')
def __init__(self, fname, specStart=0): #fname = "/mnt/d/dev/data/190724_AR_ZT1_Proteins/190724_AR_ZT1_Proteins_spectra.imzML" self.fname = fname self.parser = ImzMLParser(fname) self.dregions = None self.mzValues = self.parser.getspectrum(0)[0] self.specStart = specStart if self.specStart != 0: self.mzValues = self.mzValues[self.specStart:] print("WARNING: SPECTRA STARTING AT POSITION", self.specStart) self.find_regions()
def load_and_split_ds_vm(storage, imzml_cobject, ibd_cobject, ds_segm_size_mb, sort_memory): stats = [] with TemporaryDirectory() as tmp_dir: logger.info("Temp dir is {}".format(tmp_dir)) imzml_dir = Path(tmp_dir) / 'imzml' res = imzml_dir.mkdir() logger.info("Create {} result {}".format(imzml_dir, res)) segments_dir = Path(tmp_dir) / 'segments' res = segments_dir.mkdir() logger.info("Create {} result {}".format(segments_dir, res)) logger.info('Downloading dataset...') t = time() imzml_path, ibd_path = download_dataset(imzml_cobject, ibd_cobject, imzml_dir, storage) stats.append(('download_dataset', time() - t)) logger.info('Loading parser...') t = time() imzml_parser = ImzMLParser(str(imzml_path)) imzml_reader = imzml_parser.portable_spectrum_reader() stats.append(('load_parser', time() - t)) logger.info('Defining segments bounds...') t = time() ds_segments_bounds = define_ds_segments( imzml_parser, ds_segm_size_mb=ds_segm_size_mb) segments_n = len(ds_segments_bounds) stats.append(('define_segments', time() - t)) logger.info('Segmenting...') t = time() chunks_n, ds_segms_len = make_segments(imzml_reader, ibd_path, ds_segments_bounds, segments_dir, sort_memory) stats.append(('dataset_segmentation', time() - t)) logger.info('Uploading segments...') t = time() ds_segms_cobjects = upload_segments(storage, segments_dir, chunks_n, segments_n) stats.append(('upload_segments', time() - t)) return imzml_reader, ds_segments_bounds, ds_segms_cobjects, ds_segms_len, stats