class BlockIndex: def __init__(self): self.avl = FastAVLTree() def add_block(self, block): self.avl[block.offset + block.size] = block def get_blocks(self, start, length): end = start + length found_block = False for key, block in self.avl.iter_items(start, end): found_block = True yield block try: if (found_block): _, block = self.avl.succ_item(key) if (block.offset < end): yield block else: _, block = self.avl.ceiling_item(start) yield block except KeyError: pass def get_chunks(self, offset, length): for block in self.get_blocks(offset, length): assert length >= 0 if (length == 0): break if (block.offset <= offset and offset < block.offset + block.size): chunk_offset = offset - block.offset chunk_size = min((length, block.size - chunk_offset)) yield (chunk_offset, chunk_size, block) offset += chunk_size length -= chunk_size
class RangeTree(Generic[V]): """A specialized tree dealing with ranges.""" def __init__(self) -> None: self._tree = FastAVLTree( ) # Map ints to tuples (val, Union[end, InfinityMarker]) def __setitem__(self, key: Union[slice, range], value: V) -> None: """Set a value to the given interval. If the interval is already occupied, a ValueError will be thrown. Only slices and ranges with the default step (1) are supported. If the slice or range is inverted (end < start), the interval will be flipped. Open slices and ranges ([:1], [1:]) are supported. """ if isinstance(key, (slice, range)): if key.step is not None and key.step != 1: m = 'Intervals with custom steps ({}) not' \ ' supported.'.format(key) raise ValueError(m) else: raise ValueError('Only slices and ranges supported.') s, e = key.start, key.stop if s is not None and e is not None and s > e: s, e = e, s # The check for an empty space is a little complex. # First check the lower bound. anchor = s if s is not None else e - 1 try: lower_item = self._tree.floor_item(anchor) except KeyError: lower_item = None if lower_item is not None: if (s is None or lower_item[1][1] is InfinityMarker.INF_PLUS or (lower_item[1][1] is not InfinityMarker.INF_MINUS and lower_item[1][1] > s)): raise KeyError('Overlapping intervals.') # Now the higher bound. try: higher_item = self._tree.ceiling_item(anchor) except KeyError: higher_item = None if higher_item is not None: if e is None or higher_item[1][ 1] is InfinityMarker.INF_MINUS or higher_item[0] < e: raise KeyError('Overlapping intervals') if e is None: e = InfinityMarker.INF_PLUS elif s is None: e = InfinityMarker.INF_MINUS self._tree[anchor] = (value, e) def __getitem__(self, key: int) -> V: try: res = self._tree.floor_item(key) except KeyError: res = self._tree.ceiling_item(key) val, e = res[1] if e is InfinityMarker.INF_MINUS: return val else: raise KeyError(key) val, e = res[1] if (e is InfinityMarker.INF_PLUS or (e is InfinityMarker.INF_MINUS and res[0] == key) or (e is not InfinityMarker.INF_MINUS and key < e)): return val else: raise KeyError(key) def get(self, key, default: D = None) -> Union[V, D]: try: res = self._tree.floor_item(key) except KeyError: try: res = self._tree.ceiling_item(key) except KeyError: return default val, e = res[1] if e is InfinityMarker.INF_MINUS: return val else: return default val, e = res[1] if (e is InfinityMarker.INF_PLUS or (e is InfinityMarker.INF_MINUS and res[0] == key) or (e is not InfinityMarker.INF_MINUS and key < e)): return val else: return default def __contains__(self, key: int) -> bool: try: existing = self._tree.floor_item(key) except KeyError: try: existing = self._tree.ceiling_item(key) except KeyError: return False else: return existing[1][1] is InfinityMarker.INF_MINUS else: start, (_, end) = existing if end is InfinityMarker.INF_MINUS: return start == key elif end is InfinityMarker.INF_PLUS: return True else: return key < end
def get_ROIs(path, delta_mz=0.005, required_points=15, dropped_points=3, progress_callback=None): ''' :param path: path to mzml file :param delta_mz: :param required_points: :param dropped_points: can be zero points :param pbar: an pyQt5 progress bar to visualize :return: ROIs - a list of ROI objects found in current file ''' # read all scans in mzML file run = pymzml.run.Reader(path) scans = [] for scan in run: if scan.ms_level == 1: scans.append(scan) ROIs = [] # completed ROIs process_ROIs = FastAVLTree() # processed ROIs # initialize a processed data number = 1 # number of processed scan init_scan = scans[0] start_time = init_scan.scan_time[0] min_mz = max(init_scan.mz) max_mz = min(init_scan.mz) for mz, i in zip(init_scan.mz, init_scan.i): if i != 0: process_ROIs[mz] = ProcessROI([1, 1], [start_time, start_time], [i], [mz], mz) min_mz = min(min_mz, mz) max_mz = max(max_mz, mz) for scan in tqdm(scans): if number == 1: # already processed scan number += 1 continue # expand ROI for n, mz in enumerate(scan.mz): if scan.i[n] != 0: ceiling_mz, ceiling_item = None, None floor_mz, floor_item = None, None if mz < max_mz: _, ceiling_item = process_ROIs.ceiling_item(mz) ceiling_mz = ceiling_item.mzmean if mz > min_mz: _, floor_item = process_ROIs.floor_item(mz) floor_mz = floor_item.mzmean # choose closest if ceiling_mz is None and floor_mz is None: time = scan.scan_time[0] process_ROIs[mz] = ProcessROI([number, number], [time, time], [scan.i[n]], [mz], mz) continue elif ceiling_mz is None: closest_mz, closest_item = floor_mz, floor_item elif floor_mz is None: closest_mz, closest_item = ceiling_mz, ceiling_item else: if ceiling_mz - mz > mz - floor_mz: closest_mz, closest_item = floor_mz, floor_item else: closest_mz, closest_item = ceiling_mz, ceiling_item if abs(closest_item.mzmean - mz) < delta_mz: roi = closest_item if roi.scan[1] == number: # ROIs is already extended (two peaks in one mz window) roi.mzmean = (roi.mzmean * roi.points + mz) / (roi.points + 1) roi.points += 1 roi.mz[-1] = (roi.i[-1]*roi.mz[-1] + scan.i[n]*mz) / (roi.i[-1] + scan.i[n]) roi.i[-1] = (roi.i[-1] + scan.i[n]) else: roi.mzmean = (roi.mzmean * roi.points + mz) / (roi.points + 1) roi.points += 1 roi.mz.append(mz) roi.i.append(scan.i[n]) roi.scan[1] = number # show that we extended the roi roi.rt[1] = scan.scan_time[0] else: time = scan.scan_time[0] process_ROIs[mz] = ProcessROI([number, number], [time, time], [scan.i[n]], [mz], mz) # Check and cleanup to_delete = [] for mz, roi in process_ROIs.items(): if roi.scan[1] < number <= roi.scan[1] + dropped_points: # insert 'zero' in the end roi.mz.append(roi.mzmean) roi.i.append(0) elif roi.scan[1] != number: to_delete.append(mz) if roi.points >= required_points: ROIs.append(ROI( roi.scan, roi.rt, roi.i, roi.mz, roi.mzmean )) process_ROIs.remove_items(to_delete) try: min_mz, _ = process_ROIs.min_item() max_mz, _ = process_ROIs.max_item() except ValueError: min_mz = float('inf') max_mz = 0 number += 1 if progress_callback is not None and not number % 10: progress_callback.emit(int(number * 100 / len(scans))) # add final rois for mz, roi in process_ROIs.items(): if roi.points >= required_points: for n in range(dropped_points - (number - 1 - roi.scan[1])): # insert 'zero' in the end roi.mz.append(roi.mzmean) roi.i.append(0) ROIs.append(ROI( roi.scan, roi.rt, roi.i, roi.mz, roi.mzmean )) # expand constructed roi for roi in ROIs: for n in range(dropped_points): # insert in the begin roi.i.insert(0, 0) roi.mz.insert(0, roi.mzmean) # change scan numbers (necessary for future matching) roi.scan = (roi.scan[0] - dropped_points, roi.scan[1] + dropped_points) assert roi.scan[1] - roi.scan[0] == len(roi.i) - 1 return ROIs