def test_getAllNestedElementInformation(self): expectedResultPeaks = {'fullName': 'Proteomics Standards Initiative Mass Spectrometry Ontology', 'id': 'MS', 'tagName': '{http://psi.hupo.org/ms/mzml}cv', 'URI': 'http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo'} expectedResultMzml = {'fullName': 'Proteomics Standards Initiative Mass Spectrometry Ontology', 'id': 'MS', 'tagName': '{http://psi.hupo.org/ms/mzml}cv', 'URI': 'http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo', 'version':'2.26.0'} expectedResultFeatureXML = {'name': 'FeatureFinder', 'tagName': 'software', 'version': '1.8.0'} actualResultPeaks = {} elementFile = open(testFolder+'peaksMzmlTestfile.peaks.mzML') for event, element in cElementTree.iterparse(elementFile): actualResultPeaks = elementFunctions.getAllNestedElementInformation(element) # only doing one to test, break break actualResultMzml = {} elementFile = open(testFolder+'mzml_test_file_1.mzML') for event, element in cElementTree.iterparse(elementFile): actualResultMzml = elementFunctions.getAllNestedElementInformation(element) # only doing one to test, break break actualResultFeatureXML = {} elementFile = open(testFolder+'featurexmlTestFile_1.featureXML') for event, element in cElementTree.iterparse(elementFile): actualResultFeatureXML = elementFunctions.getAllNestedElementInformation(element) # only doing one to test, break break self.assertDictEqual(expectedResultPeaks, actualResultPeaks) self.assertDictEqual(expectedResultMzml, actualResultMzml) self.assertDictEqual(expectedResultFeatureXML, actualResultFeatureXML)
def getAllFeatureInfo(self): """ Iterator function that yields all the feature elements in the file given to Reader(). It saves info from the features in a dict, self.elementInfo, which is used in the L{parseFeatureXML.Reader.__getitem__} retrieval function. This function gets all the information from a feature element and does no processing. Because of this the key names are not very intuitive. If you want a more intuitive key-name system, use L{parseFeatureXML.Reader.getSimpleFeatureInfo}. That comes at the cost of loss of information though. @rtype: Element @return: Iterator of all the elements in the file where element.tag == 'feature' @raise RuntimeError: No features in the file B{Example}: Printing all the features in a file: >>> featureXML = Reader('example_feature_file.featureXML') # make a reader instance >>> allElements = featureXML.getAllElements() # get all feature elements of the reader instance, you can now iterate over allElements >>> features = featureXML.getAllFeatureInfo() >>> for feature in features: # loop through all the features print featureXML['nestedElement'] [{'content': '7052.29224', 'dim': '0', 'tagName': 'position'}, {'content': '322.251104824796', 'dim': '1', 'tagName': 'position'}, {'content': '52234', 'tagName': 'intensity'}, {'content': '0', 'dim': '0', 'tagName': 'quality'}, {'content': '0', 'dim': '1', 'tagName': 'quality'}, {'content': '225053', 'tagName': 'overallquality'}, {'content': '2', 'tagName': 'charge'}, {'nr': '0', 'nestedElement': [{'y': '336.125209180674', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '336.124751115092', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '336.124841989895', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '336.12529301464', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '336.124957942644', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '338.251041063348', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '338.251376135343', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '338.250925110599', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '338.250834235796', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '338.251292301377', 'x': '7052.29224', 'tagName': 'pt'}], 'tagName': 'convexhull'}, {'value': '421', 'type': 'int', 'name': 'spectrum_index', 'tagName': 'userParam'}, {'value': 'controllerType=0 controllerNumber=1 scan=5342', 'type': 'string', 'name': 'spectrum_native_id', 'tagName': 'userParam'}] [{'content': '5109.29224', 'dim': '0', 'tagName': 'position'}, {'content': '336.251104824796', 'dim': '1', 'tagName': 'position'}, {'content': '234284', 'tagName': 'intensity'}, {'content': '0', 'dim': '0', 'tagName': 'quality'}, {'content': '0', 'dim': '1', 'tagName': 'quality'}, {'content': '225053', 'tagName': 'overallquality'}, {'content': '2', 'tagName': 'charge'}, {'nr': '0', 'nestedElement': [{'y': '336.125209180674', 'x': '5105.9217', 'tagName': 'pt'}, {'y': '336.124751115092', 'x': '5108.7642', 'tagName': 'pt'}, {'y': '336.124841989895', 'x': '5109.6031', 'tagName': 'pt'}, {'y': '336.12529301464', 'x': '5110.4848', 'tagName': 'pt'}, {'y': '336.124957942644', 'x': '5111.6874', 'tagName': 'pt'}, {'y': '338.251041063348', 'x': '5111.6874', 'tagName': 'pt'}, {'y': '338.251376135343', 'x': '5110.4848', 'tagName': 'pt'}, {'y': '338.250925110599', 'x': '5109.6031', 'tagName': 'pt'}, {'y': '338.250834235796', 'x': '5108.7642', 'tagName': 'pt'}, {'y': '338.251292301377', 'x': '5105.9217', 'tagName': 'pt'}], 'tagName': 'convexhull'}, {'value': '3916', 'type': 'int', 'name': 'spectrum_index', 'tagName': 'userParam'}, {'value': 'controllerType=0 controllerNumber=1 scan=18484', 'type': 'string', 'name': 'spectrum_native_id', 'tagName': 'userParam'}] [{'content': '4009.58726', 'dim': '0', 'tagName': 'position'}, {'content': '428.197275997238', 'dim': '1', 'tagName': 'position'}, {'content': '111429', 'tagName': 'intensity'}, {'content': '0', 'dim': '0', 'tagName': 'quality'}, {'content': '0', 'dim': '1', 'tagName': 'quality'}, {'content': '35753.2', 'tagName': 'overallquality'}, {'content': '2', 'tagName': 'charge'}, {'nr': '0', 'nestedElement': [{'y': '428.071338720547', 'x': '4001.7973', 'tagName': 'pt'}, {'y': '428.071177661641', 'x': '4004.4017', 'tagName': 'pt'}, {'y': '428.071136832932', 'x': '4009.2555', 'tagName': 'pt'}, {'y': '428.071491868401', 'x': '4014.7713', 'tagName': 'pt'}, {'y': '428.070943557216', 'x': '4017.7105', 'tagName': 'pt'}, {'y': '430.19702667792', 'x': '4017.7105', 'tagName': 'pt'}, {'y': '430.197574989105', 'x': '4014.7713', 'tagName': 'pt'}, {'y': '430.197219953635', 'x': '4009.2555', 'tagName': 'pt'}, {'y': '430.197260782345', 'x': '4004.4017', 'tagName': 'pt'}, {'y': '430.197421841251', 'x': '4001.7973', 'tagName': 'pt'}], 'tagName': 'convexhull'}, {'value': '2895', 'type': 'int', 'name': 'spectrum_index', 'tagName': 'userParam'}, {'value': 'controllerType=0 controllerNumber=1 scan=15394', 'type': 'string', 'name': 'spectrum_native_id', 'tagName': 'userParam'}] [{'content': '5107.29224', 'dim': '0', 'tagName': 'position'}, {'content': '337.251104824796', 'dim': '1', 'tagName': 'position'}, {'content': '556384', 'tagName': 'intensity'}, {'content': '0', 'dim': '0', 'tagName': 'quality'}, {'content': '0', 'dim': '1', 'tagName': 'quality'}, {'content': '225053', 'tagName': 'overallquality'}, {'content': '2', 'tagName': 'charge'}, {'nr': '0', 'nestedElement': [{'y': '337.125209180674', 'x': '5107.9217', 'tagName': 'pt'}, {'y': '337.124751115092', 'x': '5108.7642', 'tagName': 'pt'}, {'y': '337.124841989895', 'x': '5109.6031', 'tagName': 'pt'}, {'y': '337.12529301464', 'x': '5110.4848', 'tagName': 'pt'}, {'y': '337.124957942644', 'x': '5112.6874', 'tagName': 'pt'}, {'y': '339.251041063348', 'x': '5111.6874', 'tagName': 'pt'}, {'y': '339.251376135343', 'x': '5110.4848', 'tagName': 'pt'}, {'y': '339.250925110599', 'x': '5109.6031', 'tagName': 'pt'}, {'y': '339.250834235796', 'x': '5108.7642', 'tagName': 'pt'}, {'y': '339.251292301377', 'x': '5108.9217', 'tagName': 'pt'}], 'tagName': 'convexhull'}, {'value': '3916', 'type': 'int', 'name': 'spectrum_index', 'tagName': 'userParam'}, {'value': 'controllerType=0 controllerNumber=1 scan=18484', 'type': 'string', 'name': 'spectrum_native_id', 'tagName': 'userParam'}] """ # Set the simpleFlag to false for __getitem__ self.simpleFlag = False # reset elementKeySet (because this is hard coded in __init__ for getSimpleFeatureInfo) self.__elementKeySet = set() # counter for the amount of elements with a feature tag. If it stay 0 at the end of the yielding this function raises a runtime error featureCount = 0 # for all the elements for element in self.getAllElements(): # if the element is a feature if element.tag == 'feature': # keep a count of the amount of features featureCount += 1 # get all the info from the feature element and put it in elementInfo self.elementInfo.update( elementFunctions.getAllNestedElementInformation(element)) self.__elementKeySet = set(self.elementInfo.keys()) # also yield the element so anyone can get any information out of it yield element # this gets called after every yield statement and clears every element that is under the current element. Because all the # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not # necessary anymore and clearing them lowers the memory usage. for nestedElement in element: nestedElement.clear() element.clear() if featureCount == 0: raise RuntimeError, 'There were no features found in self.getAllElements(). Not a valid featureXML file:' + str( self.path)
def getAllFeatureInfo(self): """ Iterator function that yields all the feature elements in the file given to Reader(). It saves info from the features in a dict, self.elementInfo, which is used in the L{parseFeatureXML.Reader.__getitem__} retrieval function. This function gets all the information from a feature element and does no processing. Because of this the key names are not very intuitive. If you want a more intuitive key-name system, use L{parseFeatureXML.Reader.getSimpleFeatureInfo}. That comes at the cost of loss of information though. @rtype: Element @return: Iterator of all the elements in the file where element.tag == 'feature' @raise RuntimeError: No features in the file B{Example}: Printing all the features in a file: >>> featureXML = Reader('example_feature_file.featureXML') # make a reader instance >>> allElements = featureXML.getAllElements() # get all feature elements of the reader instance, you can now iterate over allElements >>> features = featureXML.getAllFeatureInfo() >>> for feature in features: # loop through all the features print featureXML['nestedElement'] [{'content': '7052.29224', 'dim': '0', 'tagName': 'position'}, {'content': '322.251104824796', 'dim': '1', 'tagName': 'position'}, {'content': '52234', 'tagName': 'intensity'}, {'content': '0', 'dim': '0', 'tagName': 'quality'}, {'content': '0', 'dim': '1', 'tagName': 'quality'}, {'content': '225053', 'tagName': 'overallquality'}, {'content': '2', 'tagName': 'charge'}, {'nr': '0', 'nestedElement': [{'y': '336.125209180674', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '336.124751115092', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '336.124841989895', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '336.12529301464', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '336.124957942644', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '338.251041063348', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '338.251376135343', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '338.250925110599', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '338.250834235796', 'x': '7052.29224', 'tagName': 'pt'}, {'y': '338.251292301377', 'x': '7052.29224', 'tagName': 'pt'}], 'tagName': 'convexhull'}, {'value': '421', 'type': 'int', 'name': 'spectrum_index', 'tagName': 'userParam'}, {'value': 'controllerType=0 controllerNumber=1 scan=5342', 'type': 'string', 'name': 'spectrum_native_id', 'tagName': 'userParam'}] [{'content': '5109.29224', 'dim': '0', 'tagName': 'position'}, {'content': '336.251104824796', 'dim': '1', 'tagName': 'position'}, {'content': '234284', 'tagName': 'intensity'}, {'content': '0', 'dim': '0', 'tagName': 'quality'}, {'content': '0', 'dim': '1', 'tagName': 'quality'}, {'content': '225053', 'tagName': 'overallquality'}, {'content': '2', 'tagName': 'charge'}, {'nr': '0', 'nestedElement': [{'y': '336.125209180674', 'x': '5105.9217', 'tagName': 'pt'}, {'y': '336.124751115092', 'x': '5108.7642', 'tagName': 'pt'}, {'y': '336.124841989895', 'x': '5109.6031', 'tagName': 'pt'}, {'y': '336.12529301464', 'x': '5110.4848', 'tagName': 'pt'}, {'y': '336.124957942644', 'x': '5111.6874', 'tagName': 'pt'}, {'y': '338.251041063348', 'x': '5111.6874', 'tagName': 'pt'}, {'y': '338.251376135343', 'x': '5110.4848', 'tagName': 'pt'}, {'y': '338.250925110599', 'x': '5109.6031', 'tagName': 'pt'}, {'y': '338.250834235796', 'x': '5108.7642', 'tagName': 'pt'}, {'y': '338.251292301377', 'x': '5105.9217', 'tagName': 'pt'}], 'tagName': 'convexhull'}, {'value': '3916', 'type': 'int', 'name': 'spectrum_index', 'tagName': 'userParam'}, {'value': 'controllerType=0 controllerNumber=1 scan=18484', 'type': 'string', 'name': 'spectrum_native_id', 'tagName': 'userParam'}] [{'content': '4009.58726', 'dim': '0', 'tagName': 'position'}, {'content': '428.197275997238', 'dim': '1', 'tagName': 'position'}, {'content': '111429', 'tagName': 'intensity'}, {'content': '0', 'dim': '0', 'tagName': 'quality'}, {'content': '0', 'dim': '1', 'tagName': 'quality'}, {'content': '35753.2', 'tagName': 'overallquality'}, {'content': '2', 'tagName': 'charge'}, {'nr': '0', 'nestedElement': [{'y': '428.071338720547', 'x': '4001.7973', 'tagName': 'pt'}, {'y': '428.071177661641', 'x': '4004.4017', 'tagName': 'pt'}, {'y': '428.071136832932', 'x': '4009.2555', 'tagName': 'pt'}, {'y': '428.071491868401', 'x': '4014.7713', 'tagName': 'pt'}, {'y': '428.070943557216', 'x': '4017.7105', 'tagName': 'pt'}, {'y': '430.19702667792', 'x': '4017.7105', 'tagName': 'pt'}, {'y': '430.197574989105', 'x': '4014.7713', 'tagName': 'pt'}, {'y': '430.197219953635', 'x': '4009.2555', 'tagName': 'pt'}, {'y': '430.197260782345', 'x': '4004.4017', 'tagName': 'pt'}, {'y': '430.197421841251', 'x': '4001.7973', 'tagName': 'pt'}], 'tagName': 'convexhull'}, {'value': '2895', 'type': 'int', 'name': 'spectrum_index', 'tagName': 'userParam'}, {'value': 'controllerType=0 controllerNumber=1 scan=15394', 'type': 'string', 'name': 'spectrum_native_id', 'tagName': 'userParam'}] [{'content': '5107.29224', 'dim': '0', 'tagName': 'position'}, {'content': '337.251104824796', 'dim': '1', 'tagName': 'position'}, {'content': '556384', 'tagName': 'intensity'}, {'content': '0', 'dim': '0', 'tagName': 'quality'}, {'content': '0', 'dim': '1', 'tagName': 'quality'}, {'content': '225053', 'tagName': 'overallquality'}, {'content': '2', 'tagName': 'charge'}, {'nr': '0', 'nestedElement': [{'y': '337.125209180674', 'x': '5107.9217', 'tagName': 'pt'}, {'y': '337.124751115092', 'x': '5108.7642', 'tagName': 'pt'}, {'y': '337.124841989895', 'x': '5109.6031', 'tagName': 'pt'}, {'y': '337.12529301464', 'x': '5110.4848', 'tagName': 'pt'}, {'y': '337.124957942644', 'x': '5112.6874', 'tagName': 'pt'}, {'y': '339.251041063348', 'x': '5111.6874', 'tagName': 'pt'}, {'y': '339.251376135343', 'x': '5110.4848', 'tagName': 'pt'}, {'y': '339.250925110599', 'x': '5109.6031', 'tagName': 'pt'}, {'y': '339.250834235796', 'x': '5108.7642', 'tagName': 'pt'}, {'y': '339.251292301377', 'x': '5108.9217', 'tagName': 'pt'}], 'tagName': 'convexhull'}, {'value': '3916', 'type': 'int', 'name': 'spectrum_index', 'tagName': 'userParam'}, {'value': 'controllerType=0 controllerNumber=1 scan=18484', 'type': 'string', 'name': 'spectrum_native_id', 'tagName': 'userParam'}] """ # Set the simpleFlag to false for __getitem__ self.simpleFlag = False # reset elementKeySet (because this is hard coded in __init__ for getSimpleFeatureInfo) self.__elementKeySet = set() # counter for the amount of elements with a feature tag. If it stay 0 at the end of the yielding this function raises a runtime error featureCount = 0 # for all the elements for element in self.getAllElements(): # if the element is a feature if element.tag == 'feature': # keep a count of the amount of features featureCount += 1 # get all the info from the feature element and put it in elementInfo self.elementInfo.update(elementFunctions.getAllNestedElementInformation(element)) self.__elementKeySet = set(self.elementInfo.keys()) # also yield the element so anyone can get any information out of it yield element # this gets called after every yield statement and clears every element that is under the current element. Because all the # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not # necessary anymore and clearing them lowers the memory usage. for nestedElement in element: nestedElement.clear() element.clear() if elementCount == 0: raise RuntimeError, 'There were no features found in self.getAllElements(). Not a valid featureXML file:'+str(self.path)
def getAllSpectraInfo(self): """ Iterator function that yields all the feature elements in the file given to Reader(). It saves info from the features in a dict, self.spectraInfo, which is used in the L{Reader.__getitem__} retrieval function. This function gets all the information from a feature element and does no processing. Because of this the key names are not very intuitive. If you want a more intuitive key-name system, use L{parsePeaksMzML.Reader.getSimpleSpectraInfo}. That comes at the cost of loss of information though. @rtype: Element @return: Iterator of all the elements in the file where element.tag == 'spectrum' @raise RuntimeError: No features in the file B{Example}: <TODO> """ # Set the simpleFlag to false for __getitem__ self.simpleFlag = False # counter for the amount of elements with a feature tag. If it stay 0 at the end of the yielding this function raises a runtime error featureCount = 0 # for all the elements for element in self.getAllElements(): # if the element is a feature if element.tag == 'feature': # keep a count of the amount of features featureCount += 1 # get all the info from the feature element and put it in elementInfo self.elementInfo.update( elementFunctions.getAllNestedElementInformation(element)) # also yield the element so anyone can get any information out of it yield element # this gets called after every yield statement and clears every element that is under the current element. Because all the # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not # necessary anymore and clearing them lowers the memory usage. for nestedElement in element: nestedElement.clear() element.clear() if featureCount == 0: raise RuntimeError, 'There were no features found in self.getAllElements(). Not a valid featureXML file:' + str( self.path)
def getAllSpectraInfo(self): """ Iterator function that yields all the feature elements in the file given to Reader(). It saves info from the features in a dict, self.spectraInfo, which is used in the L{Reader.__getitem__} retrieval function. This function gets all the information from a feature element and does no processing. Because of this the key names are not very intuitive. If you want a more intuitive key-name system, use L{parsePeaksMzML.Reader.getSimpleSpectraInfo}. That comes at the cost of loss of information though. @rtype: Element @return: Iterator of all the elements in the file where element.tag == 'spectrum' @raise RuntimeError: No features in the file B{Example}: <TODO> """ # Set the simpleFlag to false for __getitem__ self.simpleFlag = False # counter for the amount of elements with a feature tag. If it stay 0 at the end of the yielding this function raises a runtime error featureCount = 0 # for all the elements for element in self.getAllElements(): # if the element is a feature if element.tag == 'feature': # keep a count of the amount of features featureCount += 1 # get all the info from the feature element and put it in elementInfo self.elementInfo.update(elementFunctions.getAllNestedElementInformation(element)) # also yield the element so anyone can get any information out of it yield element # this gets called after every yield statement and clears every element that is under the current element. Because all the # nested elements of the current element have already been used and the results saved in self.elementInfo, they are not # necessary anymore and clearing them lowers the memory usage. for nestedElement in element: nestedElement.clear() element.clear() if featureCount == 0: raise RuntimeError, 'There were no features found in self.getAllElements(). Not a valid featureXML file:'+str(self.path)