Exemplo n.º 1
0
class mzFile(baseFile):
    def __init__(self, datafile, **kwargs):
        self.file_type = '.d'
        self.data_file = datafile
        self._filters = None

        self.ticObj = None
        self.info = None
        self.noFilter = CreateObject(
            r'Agilent.MassSpectrometry.DataAnalysis.MsdrPeakFilter')

        self.source = CreateObject(
            r'Agilent.MassSpectrometry.DataAnalysis.MassSpecDataReader')

        s = self.source.OpenDataFile(datafile)
        if not s:
            raise IOError("Error opening %s" % datafile)

    def close(self):
        self.source.CloseDataFile()

    def time_range(self):
        if not self.ticObj:
            self.ticObj = self.source.GetTIC()

        # DEEP IN OBSCURE AND FORGOTTEN COMTYPES DOCUMENTATION, I DISCOVERED
        # THE LONG-LOST METHOD OF BENDING POINTER(IUnknown) TO OBEY HUMAN
        # COMMAND!
        ranges = self.ticObj.AcquiredTimeRange
        assert len(
            ranges
        ) == 1, "Multiple time ranges per file not currently supported."
        timerange = ranges[0].QueryInterface(bc.IRange)
        return timerange.Start, timerange.End

    def scan_range(self):
        # There's probably a more efficient way of doing this.
        info = list(zip(*self.scan_info()))[2]
        return min(info), max(info)

    def filters(self):
        """
        Thermo-style filter strings for all spectra; used for compatibility with
        various legacy functions.
        """

        ionization = self.source.MSScanFileInformation.IonModes
        if not ionization:
            vprint(
                "Could not determine separation/ionization; defaulting to GCMS."
            )
            separator = 'GC'
        elif ionization & (4 | 2):  # Bitwise OR and AND.
            separator = 'GC'
        else:
            separator = 'TOF'

        colEs = self.source.MSScanFileInformation.CollisionEnergy
        if len(colEs) == 1:
            colE = colEs[0]
        else:
            colE = None

        if not self._filters:
            self._filters = []
            for rt, mz, index, level, polarity in self.scan_info():
                scanObj = self.source.GetSpectrum_6(index)
                rangeobj = scanObj.MeasuredMassRange.QueryInterface(
                    bc.IRange)  # Yep, definitely spectrum-specific.

                if colE:  # Singular collision energy in the file.
                    energy = colE
                else:
                    energy = float(scanObj.CollisionEnergy)

                if level != 'MS1':
                    precstr = '%.4f@%.2f' % (mz, energy)
                else:
                    precstr = ''
                string = "%s MS %s NSI Full ms%s %s[%.2f-%.2f]" % (
                    separator, polarity,
                    int(level[2]) if level != 'MS1' else '', precstr,
                    (rangeobj.Start), (rangeobj.End))
                self._filters.append((rt, string))

        return self._filters

    def headers(self):
        return self.scan_info()

    def scan(self, scan, centroid=None):
        """
        Returns a spectrum from the specified scan index.
        
        If both centroided and profile-mode data are present in the file,
        which is returned can be controlled by setting the mode argument to
        'profile' or 'centroid'. If the requested mode is not present, an
        empty spectrum will be returned. 'ProfileElsePeak' or
        'PeakElseProfile' will return spectrum of the preferred kind if
        present, else the other.
        """

        if centroid == None:
            mode = desiredModeDict['PeakElseProfile'.lower()]
        elif isinstance(centroid, str):
            mode = desiredModeDict[
                centroid.lower()]  # Usually 'profile' or 'centroid'.
        else:
            mode = desiredModeDict['centroid' if centroid else 'profile']

        scanObj = self.source.GetSpectrum_8(scan, self.noFilter, self.noFilter,
                                            mode)
        return list(zip(scanObj.XArray, scanObj.YArray))

    def cscan(self, scan):
        """
        Calculates a centroided scan from profile-mode data. If a profile
        mode copy of the specified scan is not available, this raises an
        exception; in that case, you can use mzFile.scan() with mode =
        'centroid' to return the machine-centroided scan.
        """

        mode = desiredModeDict['profile']
        scanObj = self.source.GetSpectrum_8(scan, self.noFilter, self.noFilter,
                                            mode)
        mzs, ints = scanObj.XArray, scanObj.YArray
        if not mzs:
            raise IOError("Profile data for scan %s not available." % scan)

        threshold = average(ints)
        peaks = []
        peak = []
        for pt in zip(mzs, ints):
            if pt[1] > threshold:
                peak.append(pt)
            elif peak:
                #centroid = average(zip(*peak)[0]), average(zip(*peak)[1])
                centroid = average(list(zip(*peak))[0],
                                   weights=list(zip(*peak))[1]), max(
                                       list(zip(*peak))[1])
                peaks.append(centroid)
                peak = []

        return peaks

    def scan_info(self,
                  start_time=0,
                  stop_time=999999,
                  start_mz=0,
                  stop_mz=99999):
        if self.info == None:
            self.info = []
            for index in range(1000000):  # Largenum.
                infoObj = self.source.GetScanRecord(index)
                rt = infoObj.RetentionTime
                mz = infoObj.MZOfInterest  # I *think* this is MZ when applicable?
                if not rt:
                    break
                if not (start_time <= rt <= stop_time
                        and start_mz <= mz <= stop_mz):
                    continue

                level = 'MS%d' % infoObj.MSLevel
                #scantype = infoObj.MSScanType
                polarity = infoObj.IonPolarity

                self.info.append(
                    (rt, mz, index, level, ionPolarityDict[polarity]))
            if index == 1000000:
                raise IOError("File too large for constant!")

        return self.info

    def xic(self,
            start_time=0,
            stop_time=None,
            start_mz=0,
            stop_mz=None,
            filter=None,
            UV=False):
        if filter:
            assert filter.strip().lower(
            ) == 'full ms', 'Thermo-style XIC filters are not supported for Agilent files.'

        # A full XIC can be performed with the TIC object that may have been
        # retrieved regardless.
        if self.ticObj and not any([start_time, stop_time, start_mz, stop_mz]):
            return list(zip(self.ticObj.XArray, self.ticObj.YArray))

        if stop_time == None:
            stop_time = 999999
        if stop_mz == None:
            stop_mz = 999999

        chromFilter = CreateObject(
            r'Agilent.MassSpectrometry.DataAnalysis.BDAChromFilter')

        chromFilter.MSLevelFilter = 0  # "All", should perhaps instead be 1 for "ms1"?
        if not UV:
            chromFilter.ChromatogramType = 7  # Extracted-Ion
        else:
            chromFilter.ChromatogramType = 4  # ExtractedWavelength
        chromFilter.SingleChromatogramForAllMasses = True

        mzRange = CreateObject(
            r'Agilent.MassSpectrometry.DataAnalysis.MinMaxRange')
        mzRange.Min = start_mz
        mzRange.Max = stop_mz
        mzRangeIR = mzRange.QueryInterface(bc.IRange)
        chromFilter.IncludeMassRanges = (mzRange, )
        # If THAT works...!

        rtRange = CreateObject(
            r'Agilent.MassSpectrometry.DataAnalysis.MinMaxRange')
        rtRange.Min = start_time
        rtRange.Max = stop_time
        rtRangeIR = rtRange.QueryInterface(bc.IRange)
        chromFilter.ScanRange = rtRangeIR

        xic = self.source.GetChromatogram(chromFilter)[0].QueryInterface(
            bda.IBDAChromData)
        return list(zip(xic.XArray, xic.YArray))

    def uv_trace(self):
        nonmsSource = self.source.QueryInterface(msdr.INonmsDataReader)
        nonmsDevs = nonmsSource.GetNonmsDevices()
        return nonmsSource.GetTWC(nonmsDevs[0])

    def deisotope_scan(self,
                       scan,
                       tolerance_da=0.0025,
                       tolerance_ppm=7,
                       max_charge=None,
                       require_peptide_profile=False):
        """
        The Agilent MassHunter DAC has the neat feature of including its own
        deisotoping algorithm!  This function uses that to return the specified
        scan in deisotoped form.
        
        tolerance_da (Daltons) and tolerance_ppm (Parts-per-million) are
        ADDED TOGETHER to obtain the total tolerance value for each peak.
        
        max_charge can be set to an integer to only consider isotopic envelopes of
        charge equal to or less; 'None' performs no charge filtering.
        
        require_peptide_profile filters based on isotopic sequences having
        the relative intensity profile caused by standard relative isotopic
        abundances.
        """
        scanObj = self.source.GetSpectrum_6(scan)

        deisoFilter = CreateObject(
            r'Agilent.MassSpectrometry.DataAnalysis.MsdrChargeStateAssignmentFilter'
        )
        if not (tolerance_da == 0.0025 and tolerance_ppm == 7
                and not (max_charge or require_peptide_profile)):

            deisoFilter.AbsoluteTolerance = tolerance_da
            if max_charge:
                deisoFilter.LimitMaxChargeState = max_charge
            deisoFilter.RelativeTolerance = tolerance_ppm
            deisoFilter.RequirePeptideLikeAbundanceProfile = require_peptide_profile

        self.source.Deisotope(
            scanObj,
            deisoFilter)  # Void type, not even a success return value.

        return list(zip(scanObj.XArray, scanObj.YArray))