def __getitem__(self, identifier): """ Access the item with id 'identifier' in the file by iterating the xml-tree. Arguments: identifier (str): native id of the item to access Returns: data (str): text associated with the given identifier """ old_pos = self.file_handler.tell() self.file_handler.seek(0, 0) mzml_iter = iter(iterparse(self.file_handler, events=['end'])) while True: event, element = next(mzml_iter) if event == 'end': if element.tag.endswith('}spectrum'): if int( regex_patterns.SPECTRUM_ID_PATTERN.search( element.get('id')).group(0)) == identifier: self.file_handler.seek(old_pos, 0) return spec.Spectrum(element, measured_precision=5e-6) elif element.tag.endswith('}chromatogram'): if element.get('id') == identifier: self.file_handler.seek(old_pos, 0) return spec.Chromatogram(element, measured_precision=5e-6)
def __getitem__(self, identifier): """ Access the item with id 'identifier'. Either use linear, binary or interpolated search. Arguments: identifier (str): native id of the item to access Returns: data (str): text associated with the given identifier """ ############################################################################# # DOES NOT HOLD IF NUMBERS DONT START WITH ONE AND/OR DONT INCREASE BY ONE # # TODO FIXME # ############################################################################# self.file_handler.seek(0) spectrum = None if str(identifier).upper() == 'TIC': # print(str(identifier).upper()) found = False mzmliter = iter(iterparse(self.file_handler, events=['end'])) while found is False: event, element = next(mzmliter, ('STOP', 'STOP')) if event == 'end': if element.tag.endswith('}chromatogram'): if element.get('id') == 'TIC': found = True spectrum = spec.Chromatogram( element, measured_precision=5e-6) elif event == 'STOP': raise StopIteration elif identifier in self.offset_dict: start = self.offset_dict[identifier] with open(self.path, 'rb') as seeker: seeker.seek(start[0]) start, end = self._read_to_spec_end(seeker) self.file_handler.seek(start, 0) data = self.file_handler.read(end - start) spectrum = spec.Spectrum(XML(data), measured_precision=5e-6) elif type(identifier) == str: return self._search_string_identifier(identifier) else: spectrum = self._interpol_search(identifier) return spectrum
def _search_string_identifier(self, search_string, chunk_size=8): with open(self.path, 'rb') as seeker: data = None total_chunk_size = chunk_size * 512 spec_start = None # NOTE: This needs to go intp regex_patterns.py regex_string = re.compile( "<\s*spectrum[^>]*index=\"[0-9]+\"\sid=\"({0})\"\sdefaultArrayLength=\"[0-9]+\">".format( "".join( ['.*', search_string, '.*'] ) ).encode() ) search_string = search_string.encode() while True: file_pointer = seeker.tell() data = seeker.read(total_chunk_size) string, seeker = self._read_until_tag_end(seeker, byte_mode=True) data += string spec_start = regex_string.search(data) chrom_start = regex_patterns.CHROMO_OPEN_PATTERN.search(data) if spec_start: spec_start_offset = file_pointer + spec_start.start() current_index = spec_start.group(1) if search_string in current_index: seeker.seek(spec_start_offset) start, end = self._read_to_spec_end(seeker) seeker.seek(start) spec_string = seeker.read(end-start) xml_string = XML(spec_string) return spec.Spectrum( xml_string, measured_precision=5e-6 ) elif chrom_start: chrom_start_offset = file_pointer + chrom_start.start() if search_string == chrom_start.group(1): seeker.seek(chrom_start_offset) start, end = self._read_to_spec_end(seeker) seeker.seek(start) chrom_string = seeker.read(end-start) xml_string = XML(chrom_string) return spec.Chromatogram(xml_string) elif len(data) == 0: raise Exception('cant find specified string')
def __getitem__(self, key): """ Execute a SQL request, process the data and return a spectrum object. Args: key (str or int): unique identifier for the given spectrum in the database """ self.cursor.execute("SELECT * FROM spectra WHERE id=?", key) ID, element = self.cursor.fetchone() element = et.XML(element) if "spectrum" in element.tag: spectrum = spec.Spectrum(element) elif "chromatogram" in element.tag: spectrum = spec.Chromatogram(element) return spectrum
def __getitem__(self, identifier): """ Access the item with id 'identifier' in the file. Arguments: identifier (str): native id of the item to access Returns: data (str): text associated with the given identifier """ #TODO more elegant way to add NameSpace (.register_namespace maybe??) ns_prefix = '<mzML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="test_Creinhardtii_QE_pH8" version="1.1.0" xmlns="http://psi.hupo.org/ms/mzml">' ns_suffix = '</mzML>' data = self.Reader.read_block(identifier) element = XML(ns_prefix + data.decode('utf-8') + ns_suffix) if 'chromatogram' in element[0].tag: return spec.Chromatogram(list(element)[0], measured_precision=5e-6) else: return spec.Spectrum(list(element)[0], measured_precision=5e-6)