def __init__(self, imzml_path: pathlib.Path): try: imzml_parser = ImzMLParser(imzml_path, parse_lib="ElementTree") self.spectrum_reader = imzml_parser.portable_spectrum_reader() del imzml_parser except Exception as e: raise ImzMLError(format_exc()) from e self._stream = None
def __init__(self, storage: Storage, imzml_cobject: CloudObject, ibd_cobject: CloudObject): imzml_parser = ImzMLParser( storage.get_cloudobject(imzml_cobject, stream=True), ibd_file=None, parse_lib='ElementTree', include_spectra_metadata=METADATA_FIELDS, ) self._ibd_cobject = ibd_cobject self.imzml_reader = imzml_parser.portable_spectrum_reader() super().__init__(imzml_parser)
def test_portable_get_spectrum(imzml_path, ibd_path, parse_lib): # get normal parser parser = ImzMLParser(imzml_path, parse_lib=parse_lib) # get detached parser and get handle of the portable reader detached_parser = ImzMLParser(imzml_path, parse_lib=parse_lib) portable_reader = detached_parser.portable_spectrum_reader() # pickle and unpickle to ensure it survives for its intended use case portable_reader = pickle.loads(pickle.dumps(portable_reader)) for idx in range(parser.n_pixels): mz_x, mz_y = parser.get_spectrum(idx) _mz_x2, _mz_y2 = portable_reader.get_spectrum(idx) assert np.all(mz_x == _mz_x2) assert np.all(mz_y == _mz_y2)
def load_and_split_ds_vm(storage, imzml_cobject, ibd_cobject, ds_segm_size_mb, sort_memory): stats = [] with TemporaryDirectory() as tmp_dir: logger.info("Temp dir is {}".format(tmp_dir)) imzml_dir = Path(tmp_dir) / 'imzml' res = imzml_dir.mkdir() logger.info("Create {} result {}".format(imzml_dir, res)) segments_dir = Path(tmp_dir) / 'segments' res = segments_dir.mkdir() logger.info("Create {} result {}".format(segments_dir, res)) logger.info('Downloading dataset...') t = time() imzml_path, ibd_path = download_dataset(imzml_cobject, ibd_cobject, imzml_dir, storage) stats.append(('download_dataset', time() - t)) logger.info('Loading parser...') t = time() imzml_parser = ImzMLParser(str(imzml_path)) imzml_reader = imzml_parser.portable_spectrum_reader() stats.append(('load_parser', time() - t)) logger.info('Defining segments bounds...') t = time() ds_segments_bounds = define_ds_segments( imzml_parser, ds_segm_size_mb=ds_segm_size_mb) segments_n = len(ds_segments_bounds) stats.append(('define_segments', time() - t)) logger.info('Segmenting...') t = time() chunks_n, ds_segms_len = make_segments(imzml_reader, ibd_path, ds_segments_bounds, segments_dir, sort_memory) stats.append(('dataset_segmentation', time() - t)) logger.info('Uploading segments...') t = time() ds_segms_cobjects = upload_segments(storage, segments_dir, chunks_n, segments_n) stats.append(('upload_segments', time() - t)) return imzml_reader, ds_segments_bounds, ds_segms_cobjects, ds_segms_len, stats
def get_portable_imzml_reader(storage): imzml_stream = requests.get(imzml_path, stream=True).raw parser = ImzMLParser(imzml_stream, ibd_file=None) imzml_reader = parser.portable_spectrum_reader() imzml_cobject = storage.put_cobject(pickle.dumps(imzml_reader)) return imzml_reader, imzml_cobject
def get_portable_imzml_reader(storage): imzml_stream = storage.get_cloudobject(imzml_cobject, stream=True) parser = ImzMLParser(imzml_stream, ibd_file=None) imzml_reader = parser.portable_spectrum_reader() imzml_reader_cobject = storage.put_cloudobject(serialise(imzml_reader)) return imzml_reader, imzml_reader_cobject