Exemplo n.º 1
0
    def get_dataset_metadata(self):
        """
        Get dict of additional metadata associated with the current dataset

        NOTE: In the case that multiple regions and/or datasets are supported,
        this function should return the metadata of the currently selected
        dataset only. If no particular dataset is selected, then all should
        be returned.

        :return: Instance of omsi.shared.metadata_data.metadata_dict
        """
        return metadata_dict()
Exemplo n.º 2
0
    def get_metadata(self, key=None):
        """
        Get dict with the full description of the metadata for the given key or all
        metadata if no key is given.

        :param key: The name of the metadata object to be retrieved. Default is None in
                    which case all metadata will be retrieved.

        :returns: `omsi.shared.metadata_data.metadata_value` object if a key is given
            or a `omsi.shared.metadata_data.metadata_dict` with all metadata
            if key is set to None.

        :raises: KeyError is raised in case that the specified key does not exist
        """
        descr_attr = omsi_format_metadata_collection.description_value_attribute
        unit_attr = omsi_format_metadata_collection.unit_value_attribute
        ontology_attr = omsi_format_metadata_collection.ontology_value_attribute
        isjson_attr = omsi_format_metadata_collection.is_json_dict_attribute
        if key is None:
            output_meta_dict = metadata_dict()
            for metadata_name, metadata_dataset in self.managed_group.iteritems():
                unit = None if unit_attr not in metadata_dataset.attrs else metadata_dataset.attrs[unit_attr]
                description = None if descr_attr not in metadata_dataset.attrs else metadata_dataset.attrs[descr_attr]
                ontology = None if ontology_attr not in metadata_dataset.attrs else \
                    json.loads(metadata_dataset.attrs[ontology_attr])
                md_value = metadata_dataset[:] if len(metadata_dataset.shape) > 0 else metadata_dataset[()]
                if isjson_attr in metadata_dataset.attrs:
                    try:
                        md_value = json.loads(md_value)
                    except:
                        log_helper.error(__name__, "Parsing of json metadata object failed for " + str(metadata_name))
                output_meta_dict[metadata_name] = metadata_value(
                    name=metadata_name,
                    value=md_value,
                    description=description,
                    unit=unit,
                    ontology=ontology)
            return output_meta_dict
        else:
            metadata_dataset = self.managed_group[key]
            unit = None if unit_attr not in metadata_dataset.attrs else metadata_dataset.attrs[unit_attr]
            description = None if descr_attr not in metadata_dataset.attrs else metadata_dataset.attrs[descr_attr]
            ontology = None if ontology_attr not in metadata_dataset.attrs else \
                json.loads(metadata_dataset.attrs[ontology_attr])
            return metadata_value(
                name=key,
                value=metadata_dataset[:],
                description=description,
                unit=unit,
                ontology=ontology)
Exemplo n.º 3
0
    def __create__(cls,
                   parent_group,
                   instrument_name=None,
                   mzdata=None,
                   flush_io=True):
        """
        Create an instrument group and populate it with the given data.

        :param parent_group: The parent h5py group where the instrument group should be created in.
        :type parent_group. h5py.Group
        :param instrument_name: The name of the instrument
        :type instrument_name: string, None
        :param mzdata: Numpy array of the mz data values of the instrument
        :type mzdata: numpy array or None
        :param flush_io: Call flush on the HDF5 file to ensure all HDF5 bufferes are flushed so that all
                         data has been written to file

        :returns: The function returns the h5py HDF5 handler to the instrument info group created for the experiment.

        """
        if instrument_name is not None or mzdata is not None:
            all_meta = metadata_dict()
            if instrument_name is not None:
                all_meta[omsi_format_instrument.instrument_name] = \
                    metadata_value(value=instrument_name,
                                   name=omsi_format_instrument.instrument_name,
                                   description='Name of the instrument')
            if mzdata is not None:
                all_meta[omsi_format_instrument.instrument_name] = \
                    metadata_value(value=mzdata,
                                   name=omsi_format_instrument.instrument_mz_name,
                                   description='The global m/z axis for the recordings')

        else:
            all_meta = None

        # Initialize the group and populate the data using the create method of the parent class
        metadata_obj = omsi_file_metadata_collection.___create___(
            parent_group=parent_group,
            group_name=omsi_format_instrument.instrument_groupname,
            metadata=all_meta,
            type_attr_value="omsi_file_instrument",
            version_attr_value=omsi_format_instrument.current_version,
            flush_io=flush_io)

        if flush_io:
            parent_group.file.flush()
        return omsi_file_instrument.__create_instrument_info___(instrument_group=metadata_obj.managed_group)
Exemplo n.º 4
0
    def __parse_scan_parameters(self):
        """
        Internal helper function used to parse out scan parameters from the scan filter string
        """
        # precursor m/z
        # example scan filter: MS2: ITMS + p MALDI Z ms2 [email protected] [500.00-700.00]
        ## example scan filter: MS1: FTMS + p MALDI Full ms [850.00-1000.00]

        scan_params = []

        for scan_idx, scantype in enumerate(self.scan_types):

            #MSnValueOfN
            n = filter(None, re.findall('(?<=ms)\d*', scantype))
            if n:
                MSnValueOfN = int(n[0])
            else:
                MSnValueOfN = 1

            #precursor
            ms2pre = filter(None, re.findall('[\d.]+(?=@)', scantype))
            if ms2pre:
                ms2_precursor = float(ms2pre[0])
            else:
                ms2_precursor = None
            #dissociation type
            dissot = filter(None, re.findall('(?<=\d@)[A-z]*', scantype))
            if dissot:
                dissociationtype = dissot[0]
            else:
                dissociationtype = 'None'
            #dissociation energy
            dissoe = filter(None, re.findall('(?<='+dissociationtype+')'+'[\d.]+', scantype))
            if dissoe:
                dissociationenergy = float(dissoe[0])
            else:
                dissociationenergy = None
            #polarity
            pol = filter(None, re.findall('([+][ ]p)', scantype))
            if pol:
                polarity = 'pos'
            else:
                pol = filter(None, re.findall('([-][ ]p)', scantype))
                if pol:
                    polarity = 'neg'
                else:
                    polarity = 'unk'
            #put all params in dictionary
            paramdict = metadata_dict()
            msn_von_ontology = METADATA_ONTOLOGIES['msn_value_of_n']
            paramdict['msn_value_of_n'] = metadata_value(name='msn_value_of_n',
                                                         value=MSnValueOfN,
                                                         unit=msn_von_ontology['unit'],
                                                         description=msn_von_ontology['description'],
                                                         ontology=msn_von_ontology)
            if dissociationenergy:
                paramdict['dissociation_energy'] = metadata_value(name='dissociation_energy',
                                                                  value=dissociationenergy,
                                                                  unit='V',
                                                                  description='Dissociation energy')
            if ms2_precursor is not None:
                paramdict['msn_precursor_mz'] = metadata_value(name='msn_precursor_mz',
                                                               value=ms2_precursor,
                                                               unit='m/z',
                                                               description='The precursor m/z value')
            paramdict['dissociation_type'] = metadata_value(name='dissociation_type',
                                                            value=dissociationtype,
                                                            unit=None,
                                                            description='Dissociation type')
            polarity_ontology = METADATA_ONTOLOGIES['polarity']
            paramdict['polarity'] = metadata_value(name='polarity',
                                                   value=polarity,
                                                   unit=polarity_ontology['unit'],
                                                   description=polarity_ontology['description'],
                                                   ontology=polarity_ontology)

            scan_params.append(paramdict)

        return scan_params
Exemplo n.º 5
0
    def __parse_scan_parameters(self):
        """
        Internal helper function used to parse out scan parameters from the scan filter string
        """
        # precursor m/z
        # example scan filter: MS2: ITMS + p MALDI Z ms2 [email protected] [500.00-700.00]
        ## example scan filter: MS1: FTMS + p MALDI Full ms [850.00-1000.00]

        scan_params = []

        for scan_idx, scantype in enumerate(self.scan_types):

            #MSnValueOfN
            n = filter(None, re.findall('(?<=ms)\d*', scantype))
            if n:
                MSnValueOfN = int(n[0])
            else:
                MSnValueOfN = 1

            #precursor
            ms2pre = filter(None, re.findall('[\d.]+(?=@)', scantype))
            if ms2pre:
                ms2_precursor = float(ms2pre[0])
            else:
                ms2_precursor = None
            #dissociation type
            dissot = filter(None, re.findall('(?<=\d@)[A-z]*', scantype))
            if dissot:
                dissociationtype = dissot[0]
            else:
                dissociationtype = 'None'
            #dissociation energy
            dissoe = filter(
                None,
                re.findall('(?<=' + dissociationtype + ')' + '[\d.]+',
                           scantype))
            if dissoe:
                dissociationenergy = float(dissoe[0])
            else:
                dissociationenergy = None
            #polarity
            pol = filter(None, re.findall('([+][ ]p)', scantype))
            if pol:
                polarity = 'pos'
            else:
                pol = filter(None, re.findall('([-][ ]p)', scantype))
                if pol:
                    polarity = 'neg'
                else:
                    polarity = 'unk'
            #put all params in dictionary
            paramdict = metadata_dict()
            msn_von_ontology = METADATA_ONTOLOGIES['msn_value_of_n']
            paramdict['msn_value_of_n'] = metadata_value(
                name='msn_value_of_n',
                value=MSnValueOfN,
                unit=msn_von_ontology['unit'],
                description=msn_von_ontology['description'],
                ontology=msn_von_ontology)
            if dissociationenergy:
                paramdict['dissociation_energy'] = metadata_value(
                    name='dissociation_energy',
                    value=dissociationenergy,
                    unit='V',
                    description='Dissociation energy')
            if ms2_precursor is not None:
                paramdict['msn_precursor_mz'] = metadata_value(
                    name='msn_precursor_mz',
                    value=ms2_precursor,
                    unit='m/z',
                    description='The precursor m/z value')
            paramdict['dissociation_type'] = metadata_value(
                name='dissociation_type',
                value=dissociationtype,
                unit=None,
                description='Dissociation type')
            polarity_ontology = METADATA_ONTOLOGIES['polarity']
            paramdict['polarity'] = metadata_value(
                name='polarity',
                value=polarity,
                unit=polarity_ontology['unit'],
                description=polarity_ontology['description'],
                ontology=polarity_ontology)

            scan_params.append(paramdict)

        return scan_params
Exemplo n.º 6
0
    def __compute_file_info(cls, filename, resolution):
        ## TODO completely refactor this to make it smartly handle profile or centroid datasets
        ## TODO: centroid datasets should take in a user parameter "Resolution" and resample data at that resolution
        ## TODO: profile datasets should work as is
        ## TODO: checks for profile data vs. centroid data on the variation in length of ['m/z array']
        """
        Internal helper function used to compute the mz axis, data type for the intensities, format type

        :return: Numpy array with mz axis
        :return: string with data type
        :return: imzml file type
        :return:
        """
        reader = ImzMLParser(filename)
        # Read the first spectrum
        mz_axes, intens = reader.getspectrum(0)  # NOTE: mz_axes is a tuple
        # Read the coordinates
        coordinates = np.asarray(reader.coordinates)
        # Determine the data type for the internsity values
        dtype = np.asarray(intens).dtype.str

        # Compute the mz axis and file type
        file_type = cls.available_imzml_types['continuous']
        min_mz, max_mz = np.amin(mz_axes), np.amax(mz_axes)
        for ind in range(coordinates.shape[0]
                         ):  #for ind, loc in enumerate(reader.coordinates):
            mz, intens = reader.getspectrum(ind)
            if mz == mz_axes:
                pass
            else:
                file_type = cls.available_imzml_types['processed']
                if min_mz > np.amin(mz):
                    min_mz = np.amin(mz)
                if max_mz < np.amax(mz):
                    max_mz = np.amax(mz)
        # Reinterpolate the mz-axis if we have a processed mode imzml file
        if file_type == cls.available_imzml_types['processed']:
            f = np.ceil(1e6 * np.log(max_mz / min_mz) / resolution)
            mz_axes = np.logspace(np.log10(min_mz), np.log10(max_mz), f)
            log_helper.info(
                __name__, "Reinterpolated m/z axis for processed imzML file")

        # Construct the imzml metadata information
        dataset_metadata = metadata_dict()
        instrument_metadata = metadata_dict()
        method_metadata = metadata_dict()
        for k, v in reader.imzmldict.iteritems():
            dataset_metadata[k] = metadata_value(name=k,
                                                 value=v,
                                                 unit=None,
                                                 description=k,
                                                 ontology=None)

        # Delete the parser and read the metadata
        del reader

        # Parse the metadata for the file. We try to parse only the header and ignore the
        # <run > group in the XML file to avoid going throught the whole file again
        # while extracting the majority of the relevant metadata
        try:
            with open(filename, 'r') as ins:
                metdata_header = ''
                for line in ins:
                    if '<run' in line:
                        break
                    else:
                        metdata_header += line
                metdata_header += '</mzML>'
                metdata_header_dict = xmltodict.parse(metdata_header)['mzML']
                for k, v in metdata_header_dict.iteritems():
                    store_value = metadata_value(
                        name=k,
                        value=v,
                        unit=None,
                        description=str(k) +
                        " extracted from imzML XML header.",
                        ontology=None)
                    if k == 'instrumentConfigurationList':
                        instrument_metadata[k] = store_value
                    elif k == 'dataProcessingList':
                        method_metadata[k] = store_value
                    elif k == 'scanSettingsList':
                        dataset_metadata[k] = store_value
                    elif k == 'softwareList':
                        method_metadata[k] = store_value
                    elif k == 'sampleList':
                        method_metadata[k] = store_value
                    else:
                        dataset_metadata[k] = store_value
                dataset_metadata['imzml_xml_metadata_header'] = metadata_value(
                    name='imzml_xml_metadata_header',
                    value=metdata_header,
                    unit=None,
                    description='XML imzML header',
                    ontology=None)
        except:
            log_helper.warning(
                __name__, "Extraction of additional imzML metadata failed")

        return coordinates, np.asarray(
            mz_axes
        ), dtype, file_type, dataset_metadata, instrument_metadata, method_metadata
Exemplo n.º 7
0
    def __compute_file_info(cls, filename, resolution):
        ## TODO completely refactor this to make it smartly handle profile or centroid datasets
        ## TODO: centroid datasets should take in a user parameter "Resolution" and resample data at that resolution
        ## TODO: profile datasets should work as is
        ## TODO: checks for profile data vs. centroid data on the variation in length of ['m/z array']
        """
        Internal helper function used to compute the mz axis, data type for the intensities, format type

        :return: Numpy array with mz axis
        :return: string with data type
        :return: imzml file type
        :return:
        """
        reader = ImzMLParser(filename)
        # Read the first spectrum
        mz_axes, intens = reader.getspectrum(0)   # NOTE: mz_axes is a tuple
        # Read the coordinates
        coordinates = np.asarray(reader.coordinates)

        # #Start the data at [0,0,0]
        # coordinates[:,0] = coordinates[:,0] - np.amin(coordinates,axis=0)[0]
        # coordinates[:,1] = coordinates[:,1] - np.amin(coordinates,axis=0)[1]
        # coordinates[:,2] = coordinates[:,2] - np.amin(coordinates,axis=0)[2]

        # Determine the data type for the internsity values
        dtype = np.asarray(intens).dtype.str

        # Compute the mz axis and file type
        file_type = cls.available_imzml_types['continuous']
        min_mz, max_mz = np.amin(mz_axes), np.amax(mz_axes)
        for ind in range(coordinates.shape[0]):      #for ind, loc in enumerate(reader.coordinates):
            mz, intens = reader.getspectrum(ind)
            if mz == mz_axes:
                pass
            else:
                file_type = cls.available_imzml_types['processed']
                if min_mz > np.amin(mz):
                    min_mz = np.amin(mz)
                if max_mz < np.amax(mz):
                    max_mz = np.amax(mz)
        # Reinterpolate the mz-axis if we have a processed mode imzml file
        if file_type == cls.available_imzml_types['processed']:
            f = np.ceil(1e6 * np.log(max_mz/min_mz)/resolution)
            mz_axes = np.logspace(np.log10(min_mz), np.log10(max_mz), f)
            log_helper.info(__name__, "Reinterpolated m/z axis for processed imzML file")

        # Construct the imzml metadata information
        dataset_metadata = metadata_dict()
        instrument_metadata = metadata_dict()
        method_metadata = metadata_dict()
        for k, v in reader.imzmldict.iteritems():
            dataset_metadata[k] = metadata_value(name=k,
                                                 value=v,
                                                 unit=None,
                                                 description=k,
                                                 ontology=None)

        # Delete the parser and read the metadata
        del reader

        # Parse the metadata for the file. We try to parse only the header and ignore the
        # <run > group in the XML file to avoid going throught the whole file again
        # while extracting the majority of the relevant metadata
        try:
            with open(filename, 'r') as ins:
                metdata_header = ''
                for line in ins:
                    if '<run' in line:
                        break
                    else:
                        metdata_header += line
                metdata_header += '</mzML>'
                metdata_header_dict = xmltodict.parse(metdata_header)['mzML']
                for k, v in metdata_header_dict.iteritems():
                    store_value = metadata_value(name=k,
                                                 value=v,
                                                 unit=None,
                                                 description=str(k) + " extracted from imzML XML header.",
                                                 ontology=None)
                    if k == 'instrumentConfigurationList':
                        instrument_metadata[k] = store_value
                    elif k == 'dataProcessingList':
                        method_metadata[k] = store_value
                    elif k == 'scanSettingsList':
                        dataset_metadata[k] = store_value
                    elif k == 'softwareList':
                        method_metadata[k] = store_value
                    elif k =='sampleList':
                        method_metadata[k] = store_value
                    else:
                        dataset_metadata[k] = store_value
                dataset_metadata['imzml_xml_metadata_header'] = metadata_value(name='imzml_xml_metadata_header',
                                                                               value=metdata_header,
                                                                               unit=None,
                                                                               description='XML imzML header',
                                                                               ontology=None)
        except:
            log_helper.warning(__name__, "Extraction of additional imzML metadata failed")

        return coordinates, np.asarray(mz_axes), dtype, file_type, dataset_metadata, instrument_metadata, method_metadata