示例#1
0
 def test_getAllNestedItems(self):
     expectedItems = {'fullName': 'Proteomics Standards Initiative Mass Spectrometry Ontology', 'id': '', 'URI': 'http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo','fullName': 'Proteomics Standards Initiative Mass Spectrometry Ontology','id': 'MS'}
     
     # dict to hold everythin that getAllNestedItems returns
     actualItems = {}
     elementFile = open(testFolder+'peaksMzmlTestfile.peaks.mzML')
     for event, element in cElementTree.iterparse(elementFile):
         items = elementFunctions.getAllNestedItems(element)
         for item in items:
             actualItems.update(item)
         # only doing one to test, break
         break
     
     self.assertDictEqual(expectedItems, actualItems)
示例#2
0
 def getSimpleSpectraInfo(self):
     """
     Iterator function that yields all the feature elements in the file given to Reader()
     It saves info from the features in a dict, self.spectraInfo, which is used in the L{Reader.__getitem__} retrieval function.
     This function has predefined information like intensity, overallquality, convexhull etc that make for easier browsing, but because of this
     it does not contain all information. If you want to get all information exactly as found in the xml file, use L{parsePeaksMzML.Reader.getAllSpectraInfo}.  
     
     @rtype: Element
     @return: Iterator of all the elements in the file where element.tag == 'spectrum'
     @raise RuntimeError: No features in the file
     
     B{Example}:
     
     Print all the information of all the MS/MS spectra in examplePeaksfile.peaks.mzML. Only showing one result:
     
     >>> peaksMzML = Reader('example_peaks_file.peaks.mzML')   # make a Reader instance
     >>> spectra = peaksMzML.getSpectra()                    # get all the spectra of the Reader instance
     ...     for spectrum in spectra:                               # loop through all the spectra
     ...        if int(peaksMzML['ms level']) == 2:
     ...            for keys in peaksMzML.getKeys():
     ...                print 'key: '+str(keys)+'\tvalue: '+str(peaksMzML[keys])
     ...            print '-'*20
     key: scan_id    value: 1
     key: centroid spectrum    value: centroid spectrum
     key: ms level    value: 2
     key: mass spectrum    value: mass spectrum
     key: positive scan    value: positive scan
     key: base peak m/z    value: 368.750823974609
     key: base peak intensity    value: 37719.2890625
     key: total ion current    value: 110887.0078125
     key: lowest observed m/z    value: 108.770645141602
     key: highest observed m/z    value: 754.29296875
     key: filter string    value: ITMS + c NSI d Full ms2 [email protected] [90.00-770.00]
     key: preset scan configuration    value: 4
     key: no combination    value: no combination
     key: scan start time    value: 1158.9672
     key: [thermo trailer extra]monoisotopic m/z:    value: 377.673858642578
     key: scan window lower limit    value: 90
     key: scan window upper limit    value: 770
     key: isolation window target m/z    value: 377.673858642578
     key: isolation window lower offset    value: 1
     key: isolation window upper offset    value: 1
     key: selected ion m/z    value: 377.673858642578
     key: charge state    value: 2
     key: peak intensity    value: 55344.1875
     key: activation energy    value: 0
     key: collision-induced dissociation    value: collision-induced dissociation
     key: collision energy    value: 35
     """
     # counter for the amount of elements with a userparam tag. If it stay 0 at the end of the yielding this function raises a runtime error
     userParamCount = 0
     # looping through all the elements to get the cvParam and userParam of the element
     for element in self.getAllElements():
         # get the spectrum elements
         if element.tag.split('}')[1] == 'spectrum':
             # reset the keyset
             self.__spectraKeySet = []
             # First I only took things that I thought would be useful for analyzing, but maybe someone at some point needs to know if it was a positive or a negative scan
             # and it is better practice to have everything in already and deal with what is needed later. So this uses an recursive function to get all items of all elements
             # that are nested in the first element
             for info in elementFunctions.getAllNestedItems(element):
                 if info.has_key('name'):
                     # some dicts have a name but not a value, that case the name is also the value (more informative than just null
                     if not info.has_key('value'):
                         value = info['name']
                     else:
                         value = info['value']
                     self.spectraInfo[element][info['name'].lower()] = value
                     self.__spectraKeySet.append(info['name'].lower())
                 # setting the id to a number instead of a big string (there is mroe in front of scan=
                 elif info.has_key('id'):
                     self.spectraInfo[element]['scan_id'] = info[
                         'id'].split('scan=')[1]
                     self.__spectraKeySet.append('scan_id')
             yield element
             # this gets called after every yield statement and clears every element that is under the current element. Because all the
             # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not
             # necessary anymore and clearing them lowers the memory usage.
             for nestedElement in element:
                 nestedElement.clear()
             element.clear()
示例#3
0
 def getSimpleSpectraInfo(self):
     """
     Iterator function that yields all the feature elements in the file given to Reader()
     It saves info from the features in a dict, self.spectraInfo, which is used in the L{Reader.__getitem__} retrieval function.
     This function has predefined information like intensity, overallquality, convexhull etc that make for easier browsing, but because of this
     it does not contain all information. If you want to get all information exactly as found in the xml file, use L{parsePeaksMzML.Reader.getAllSpectraInfo}.  
     
     @rtype: Element
     @return: Iterator of all the elements in the file where element.tag == 'spectrum'
     @raise RuntimeError: No features in the file
     
     B{Example}:
     
     Print all the information of all the MS/MS spectra in examplePeaksfile.peaks.mzML. Only showing one result:
     
     >>> peaksMzML = Reader('example_peaks_file.peaks.mzML')   # make a Reader instance
     >>> spectra = peaksMzML.getSpectra()                    # get all the spectra of the Reader instance
     ...     for spectrum in spectra:                               # loop through all the spectra
     ...        if int(peaksMzML['ms level']) == 2:
     ...            for keys in peaksMzML.getKeys():
     ...                print 'key: '+str(keys)+'\tvalue: '+str(peaksMzML[keys])
     ...            print '-'*20
     key: scan_id    value: 1
     key: centroid spectrum    value: centroid spectrum
     key: ms level    value: 2
     key: mass spectrum    value: mass spectrum
     key: positive scan    value: positive scan
     key: base peak m/z    value: 368.750823974609
     key: base peak intensity    value: 37719.2890625
     key: total ion current    value: 110887.0078125
     key: lowest observed m/z    value: 108.770645141602
     key: highest observed m/z    value: 754.29296875
     key: filter string    value: ITMS + c NSI d Full ms2 [email protected] [90.00-770.00]
     key: preset scan configuration    value: 4
     key: no combination    value: no combination
     key: scan start time    value: 1158.9672
     key: [thermo trailer extra]monoisotopic m/z:    value: 377.673858642578
     key: scan window lower limit    value: 90
     key: scan window upper limit    value: 770
     key: isolation window target m/z    value: 377.673858642578
     key: isolation window lower offset    value: 1
     key: isolation window upper offset    value: 1
     key: selected ion m/z    value: 377.673858642578
     key: charge state    value: 2
     key: peak intensity    value: 55344.1875
     key: activation energy    value: 0
     key: collision-induced dissociation    value: collision-induced dissociation
     key: collision energy    value: 35
     """
     # counter for the amount of elements with a userparam tag. If it stay 0 at the end of the yielding this function raises a runtime error
     userParamCount = 0
     # looping through all the elements to get the cvParam and userParam of the element
     for element in self.getAllElements():
         # get the spectrum elements
         if element.tag.split('}')[1] == 'spectrum':
             # reset the keyset
             self.__spectraKeySet = []
             # First I only took things that I thought would be useful for analyzing, but maybe someone at some point needs to know if it was a positive or a negative scan
             # and it is better practice to have everything in already and deal with what is needed later. So this uses an recursive function to get all items of all elements
             # that are nested in the first element    
             for info in elementFunctions.getAllNestedItems(element):
                 if info.has_key('name'):
                     # some dicts have a name but not a value, that case the name is also the value (more informative than just null
                     if not info.has_key('value'):       
                         value = info['name']
                     else:
                         value = info['value']
                     self.spectraInfo[element][info['name'].lower()] = value
                     self.__spectraKeySet.append(info['name'].lower())   
                 # setting the id to a number instead of a big string (there is mroe in front of scan=                 
                 elif info.has_key('id'):
                     self.spectraInfo[element]['scan_id'] = info['id'].split('scan=')[1]
                     self.__spectraKeySet.append('scan_id')
             yield element
             # this gets called after every yield statement and clears every element that is under the current element. Because all the 
             # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not
             # necessary anymore and clearing them lowers the memory usage. 
             for nestedElement in element:
                 nestedElement.clear()
             element.clear()