Exemplo n.º 1
0
    def _make_deconvoluted_peak_solution(self, fit, composition, charge_carrier):
        eid = fit.experimental
        tid = fit.theoretical
        charge = fit.charge
        rep_eid = drop_placeholders(eid)
        total_abundance = sum(
            p.intensity for p in eid if p.intensity > 1)

        monoisotopic_mass = neutral_mass(
            tid.monoisotopic_mz, charge, charge_carrier)
        monoisotopic_mz = tid.monoisotopic_mz

        reference_peak = first_peak(eid)
        peak = DeconvolutedPeakSolution(
            composition, fit,
            monoisotopic_mass, total_abundance, charge,
            signal_to_noise=mean(p.signal_to_noise for p in rep_eid),
            index=reference_peak.index,
            full_width_at_half_max=mean(
                p.full_width_at_half_max for p in rep_eid),
            a_to_a2_ratio=a_to_a2_ratio(tid),
            most_abundant_mass=neutral_mass(
                most_abundant_mz(eid), charge),
            average_mass=neutral_mass(average_mz(eid), charge),
            score=fit.score,
            envelope=[(p.mz, p.intensity) for p in rep_eid],
            mz=monoisotopic_mz, area=sum(e.area for e in eid))
        return peak
Exemplo n.º 2
0
    def _make_deconvoluted_peak_solution(self, fit, composition, charge_carrier):
        eid = fit.experimental
        tid = fit.theoretical
        charge = fit.charge
        rep_eid = drop_placeholders(eid)
        total_abundance = sum(
            p.intensity for p in eid if p.intensity > 1)

        monoisotopic_mass = neutral_mass(
            tid.monoisotopic_mz, charge, charge_carrier)
        monoisotopic_mz = tid.monoisotopic_mz

        reference_peak = first_peak(eid)
        peak = DeconvolutedPeakSolution(
            composition, fit,
            monoisotopic_mass, total_abundance, charge,
            signal_to_noise=mean(p.signal_to_noise for p in rep_eid),
            index=reference_peak.index,
            full_width_at_half_max=mean(
                p.full_width_at_half_max for p in rep_eid),
            a_to_a2_ratio=a_to_a2_ratio(tid),
            most_abundant_mass=neutral_mass(
                most_abundant_mz(eid), charge),
            average_mass=neutral_mass(average_mz(eid), charge),
            score=fit.score,
            envelope=[(p.mz, p.intensity) for p in rep_eid],
            mz=monoisotopic_mz, area=sum(e.area for e in eid))
        return peak
Exemplo n.º 3
0
    def _make_deconvoluted_peak(self, fit, charge_carrier):
        score, charge, eid, tid = fit
        rep_eid = drop_placeholders(eid)
        total_abundance = sum(p.intensity for p in rep_eid)
        monoisotopic_mass = neutral_mass(tid.monoisotopic_mz, charge,
                                         charge_carrier)
        reference_peak = first_peak(eid)

        dpeak = DeconvolutedPeak(
            neutral_mass=monoisotopic_mass,
            intensity=total_abundance,
            charge=charge,
            signal_to_noise=mean(p.signal_to_noise for p in rep_eid),
            index=reference_peak.index,
            full_width_at_half_max=mean(p.full_width_at_half_max
                                        for p in rep_eid),
            a_to_a2_ratio=a_to_a2_ratio(tid),
            most_abundant_mass=neutral_mass(most_abundant_mz(eid), charge),
            average_mass=neutral_mass(average_mz(eid), charge),
            score=score,
            envelope=[(p.mz, p.intensity) for p in eid],
            mz=tid.monoisotopic_mz,
            fit=fit,
            area=sum(e.area for e in eid))
        return dpeak
Exemplo n.º 4
0
def deserialize_deconvoluted_peak_set(scan_dict):
    envelopes = decode_envelopes(scan_dict["isotopic envelopes array"])
    peaks = []
    mz_array = scan_dict['m/z array']
    intensity_array = scan_dict['intensity array']
    charge_array = scan_dict['charge array']
    score_array = scan_dict['deconvolution score array']
    n = len(scan_dict['m/z array'])
    for i in range(n):
        mz = mz_array[i]
        charge = charge_array[i]
        peak = peak_set.DeconvolutedPeak(neutral_mass(mz, charge),
                                         intensity_array[i],
                                         charge=charge,
                                         signal_to_noise=score_array[i],
                                         index=0,
                                         full_width_at_half_max=0,
                                         a_to_a2_ratio=0,
                                         most_abundant_mass=0,
                                         average_mass=0,
                                         score=score_array[i],
                                         envelope=envelopes[i],
                                         mz=mz)
        peaks.append(peak)
    peaks = peak_set.DeconvolutedPeakSet(peaks)
    peaks._reindex()
    return peaks
Exemplo n.º 5
0
def from_fitted_peak(peak, charge=1):
    """Convert a :class:`~.FittedPeak` into a :class:`~.DeconvolutedPeak`
    at the specified charge state.

    Parameters
    ----------
    peak : :class:`~.FittedPeak`
        The fitted peak to use as the template
    charge : int, optional
        The charge state to use, defaults to 1+

    Returns
    -------
    :class:`~.DeconvolutedPeak`
    """
    mass = neutral_mass(peak.mz, charge)
    dpeak = DeconvolutedPeak(mass,
                             peak.intensity,
                             charge,
                             peak.signal_to_noise,
                             -1,
                             peak.full_width_at_half_max,
                             0,
                             mass,
                             mass,
                             0,
                             Envelope([(peak.mz, peak.intensity)]),
                             peak.mz,
                             area=peak.area)
    return dpeak
    def retain_peaks(self,
                     peaklist,
                     original_peaklist=None,
                     charge_range=None,
                     solutions=None):
        if original_peaklist is None:
            base_peak_sequence = peaklist
        else:
            base_peak_sequence = original_peaklist
        try:
            base_peak = max([peak.intensity for peak in base_peak_sequence])
        except ValueError:
            return []
        min_charge = self.infer_minimum_charge(charge_range)
        min_charge /= abs(min_charge)
        threshold = self.base_peak_coefficient * base_peak
        peaklist = sorted(peaklist, key=intensity_getter, reverse=True)
        result = []
        for peak in peaklist:
            if neutral_mass(peak.mz, min_charge) > self.max_mass:
                continue
            if peak.intensity >= threshold:
                result.append(self.create_peak(peak, min_charge))
                if len(result) == self.n_peaks:
                    break
            else:
                break

        return result
Exemplo n.º 7
0
    def default(self, orphan=False):
        '''Populate the extracted attributes of this object from the matching
        original attributes.

        This usually reflects a failure to find an acceptable deconvolution solution,
        and may indicate that there was no peak at the specified location when ``orphan``
        is :const:`True`

        Parameters
        ----------
        orphan: :class:`bool`
            Whether or not to set :attr:`orphan` to :const:`True`, indicating no peak was
            found near :attr:`mz`.
        '''
        if self.charge == ChargeNotProvided:
            warnings.warn(
                "A precursor has been defaulted with an unknown charge state.")
            self.extracted_charge = ChargeNotProvided
            self.extracted_neutral_mass = neutral_mass(
                self.mz, DEFAULT_CHARGE_WHEN_NOT_RESOLVED)
            self.extracted_intensity = self.intensity
            self.defaulted = True
        else:
            self.extracted_charge = int(self.charge)
            self.extracted_neutral_mass = self.neutral_mass
            self.extracted_intensity = self.intensity
            self.defaulted = True
        if orphan:
            self.orphan = True
Exemplo n.º 8
0
 def _package_precursor_information(self, product):
     precursor_information = product.precursor_information
     if precursor_information.extracted_neutral_mass != 0:
         package = {
             "neutral_mass": precursor_information.extracted_neutral_mass,
             "mz": precursor_information.extracted_mz,
             "intensity": precursor_information.extracted_intensity,
             "charge": precursor_information.extracted_charge,
             "precursor_scan_id": precursor_information.precursor_scan_id,
             "product_scan_id": product.id,
             "scan_time": product.scan_time
         }
     else:
         package = {
             "neutral_mass":
             neutral_mass(precursor_information.mz,
                          precursor_information.charge),
             "mz":
             precursor_information.mz,
             "intensity":
             precursor_information.intensity,
             "charge":
             precursor_information.charge,
             "precursor_scan_id":
             precursor_information.precursor_scan_id,
             "product_scan_id":
             product.id,
             "scan_time":
             product.scan_time
         }
     return package
Exemplo n.º 9
0
 def _fit_feature_set(self,
                      mz,
                      error_tolerance,
                      charge,
                      charge_carrier=PROTON,
                      truncate_after=0.8,
                      max_missed_peaks=1,
                      threshold_scale=0.3,
                      feature=None):
     base_tid = self.create_theoretical_distribution(
         mz, charge, charge_carrier, truncate_after)
     feature_groups = self.match_theoretical_isotopic_distribution(
         base_tid, error_tolerance, interval=feature)
     feature_fits = []
     for features in product(*feature_groups):
         if all(f is None for f in features):
             continue
         # If the monoisotopic feature wasn't actually observed, create a dummy feature
         # since the monoisotopic feature cannot be None
         if features[0] is None:
             features = list(features)
             features[0] = EmptyFeature(mz)
         feat_iter = FeatureSetIterator(features)
         scores = []
         times = []
         counter = 0
         for eid in feat_iter:
             cleaned_eid, tid, n_missing = self.conform_envelopes(
                 eid, base_tid)
             if n_missing > max_missed_peaks:
                 continue
             score = self.scorer.evaluate(None, cleaned_eid, tid)
             if np.isnan(score):
                 continue
             scores.append(score)
             times.append(feat_iter.current_time)
             counter += 1
         final_score = self._find_thresholded_score(scores, threshold_scale)
         missing_features = 0
         for f in features:
             if f is None:
                 missing_features += 1
         fit = LCMSFeatureSetFit(features,
                                 base_tid,
                                 final_score,
                                 charge,
                                 missing_features,
                                 neutral_mass=neutral_mass(
                                     features[0].mz, charge,
                                     charge_carrier),
                                 missing_features=missing_features,
                                 scores=scores,
                                 times=times)
         if self.scorer.reject_score(fit.score):
             continue
         feature_fits.append(fit)
     return feature_fits
Exemplo n.º 10
0
def build_deconvoluted_peak_set_from_arrays(mz_array, intensity_array, charge_array):
    peaks = []
    for i in range(len(mz_array)):
        peak = DeconvolutedPeak(
            neutral_mass(mz_array[i], charge_array[i]), intensity_array[i], charge_array[i],
            intensity_array[i], i, 0)
        peaks.append(peak)
    peak_set = DeconvolutedPeakSet(peaks)
    peak_set.reindex()
    return peak_set
Exemplo n.º 11
0
def build_deconvoluted_peak_set_from_arrays(mz_array, intensity_array, charge_array):
    peaks = []
    for i in range(len(mz_array)):
        peak = DeconvolutedPeak(
            neutral_mass(mz_array[i], charge_array[i]), intensity_array[i], charge_array[i],
            intensity_array[i], i, 0)
        peaks.append(peak)
    peak_set = DeconvolutedPeakSet(peaks)
    peak_set.reindex()
    return peak_set
Exemplo n.º 12
0
    def _make_deconvoluted_peak(self, fit, charge_carrier):
        score, charge, eid, tid = fit
        rep_eid = drop_placeholders(eid)
        total_abundance = sum(p.intensity for p in rep_eid)
        monoisotopic_mass = neutral_mass(
            tid.monoisotopic_mz, charge, charge_carrier)
        reference_peak = first_peak(eid)

        dpeak = DeconvolutedPeak(
            neutral_mass=monoisotopic_mass, intensity=total_abundance, charge=charge,
            signal_to_noise=mean(p.signal_to_noise for p in rep_eid),
            index=reference_peak.index,
            full_width_at_half_max=mean(
                p.full_width_at_half_max for p in rep_eid),
            a_to_a2_ratio=a_to_a2_ratio(tid),
            most_abundant_mass=neutral_mass(
                most_abundant_mz(eid), charge),
            average_mass=neutral_mass(average_mz(eid), charge),
            score=score,
            envelope=[(p.mz, p.intensity) for p in eid],
            mz=tid.monoisotopic_mz, fit=fit,
            area=sum(e.area for e in eid))
        return dpeak
Exemplo n.º 13
0
 def _fit_feature_set(self, mz, error_tolerance, charge,
                      charge_carrier=PROTON, truncate_after=0.8, max_missed_peaks=1,
                      threshold_scale=0.3, feature=None):
     base_tid = self.create_theoretical_distribution(mz, charge, charge_carrier, truncate_after)
     feature_groups = self.match_theoretical_isotopic_distribution(base_tid, error_tolerance, interval=feature)
     feature_fits = []
     for features in product(*feature_groups):
         if all(f is None for f in features):
             continue
         # If the monoisotopic feature wasn't actually observed, create a dummy feature
         # since the monoisotopic feature cannot be None
         if features[0] is None:
             features = list(features)
             features[0] = EmptyFeature(mz)
         feat_iter = FeatureSetIterator(features)
         scores = []
         times = []
         counter = 0
         for eid in feat_iter:
             cleaned_eid, tid, n_missing = self.conform_envelopes(eid, base_tid)
             if n_missing > max_missed_peaks:
                 continue
             score = self.scorer.evaluate(None, cleaned_eid, tid)
             if np.isnan(score):
                 continue
             scores.append(score)
             times.append(feat_iter.current_time)
             counter += 1
         final_score = self._find_thresholded_score(scores, threshold_scale)
         missing_features = 0
         for f in features:
             if f is None:
                 missing_features += 1
         fit = LCMSFeatureSetFit(
             features, base_tid, final_score, charge, missing_features,
             neutral_mass=neutral_mass(features[0].mz, charge, charge_carrier),
             missing_features=missing_features,
             scores=scores, times=times)
         if self.scorer.reject_score(fit.score):
             continue
         feature_fits.append(fit)
     return feature_fits
Exemplo n.º 14
0
def deserialize_deconvoluted_peak_set(scan_dict):
    envelopes = decode_envelopes(scan_dict["isotopic envelopes array"])
    peaks = []
    mz_array = scan_dict['m/z array']
    intensity_array = scan_dict['intensity array']
    charge_array = scan_dict['charge array']
    score_array = scan_dict['deconvolution score array']
    n = len(scan_dict['m/z array'])
    for i in range(n):
        mz = mz_array[i]
        charge = charge_array[i]
        peak = DeconvolutedPeak(
            neutral_mass(mz, charge), intensity_array[i], charge=charge, signal_to_noise=score_array[i],
            index=0, full_width_at_half_max=0, a_to_a2_ratio=0, most_abundant_mass=0,
            average_mass=0, score=score_array[i], envelope=envelopes[i], mz=mz
        )
        peaks.append(peak)
    peaks = DeconvolutedPeakSet(peaks)
    peaks._reindex()
    return peaks
Exemplo n.º 15
0
 def _package_precursor_information(self, product):
     precursor_information = product.precursor_information
     if precursor_information.extracted_neutral_mass != 0:
         package = {
             "neutral_mass": precursor_information.extracted_neutral_mass,
             "mz": precursor_information.extracted_mz,
             "intensity": precursor_information.extracted_intensity,
             "charge": precursor_information.extracted_charge,
             "precursor_scan_id": precursor_information.precursor_scan_id,
             "product_scan_id": product.id,
             "scan_time": product.scan_time
         }
     else:
         package = {
             "neutral_mass": neutral_mass(
                 precursor_information.mz, precursor_information.charge),
             "mz": precursor_information.mz,
             "intensity": precursor_information.intensity,
             "charge": precursor_information.charge,
             "precursor_scan_id": precursor_information.precursor_scan_id,
             "product_scan_id": product.id,
             "scan_time": product.scan_time
         }
     return package
Exemplo n.º 16
0
 def neutral_mass(self):
     if self.charge == ChargeNotProvided:
         warnings.warn(
             "A precursor with an unknown charge state was used to compute a neutral mass.")
         return neutral_mass(self.mz, DEFAULT_CHARGE_WHEN_NOT_RESOLVED)
     return neutral_mass(self.mz, self.charge)
Exemplo n.º 17
0
    def finalize_fit(self, feature_fit, charge_carrier=PROTON, subtract=True,
                     detection_threshold=0.1, max_missed_peaks=1):
        nodes = []
        start_time, end_time = find_bounds(feature_fit, detection_threshold)
        feat_iter = FeatureSetIterator(
            feature_fit.features, start_time, end_time)
        base_tid = feature_fit.theoretical
        charge = feature_fit.charge
        abs_charge = abs(charge)
        for eid in feat_iter:
            cleaned_eid, tid, n_missing = conform_envelopes(eid, base_tid.truncated_tid)
            rep_eid = drop_placeholders(cleaned_eid)
            n_real_peaks = len(rep_eid)
            if n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or \
               n_missing > max_missed_peaks:
                continue
            score = self.scorer.evaluate(None, cleaned_eid, tid)
            is_valid = True
            if np.isnan(score) or score < 0:
                is_valid = False
            envelope = [(e.mz, min(e.intensity, t.intensity)) for e, t in zip(cleaned_eid, tid)]
            if is_valid:
                total_abundance = sum(p[1] for p in envelope)
                monoisotopic_mass = neutral_mass(
                    base_tid.monoisotopic_mz, charge, charge_carrier=charge_carrier)
                reference_peak = first_peak(cleaned_eid)

                dpeak = DeconvolutedPeak(
                    neutral_mass=monoisotopic_mass, intensity=total_abundance,
                    charge=charge,
                    signal_to_noise=mean(p.signal_to_noise for p in rep_eid),
                    index=reference_peak.index,
                    full_width_at_half_max=mean(p.full_width_at_half_max for p in rep_eid),
                    a_to_a2_ratio=a_to_a2_ratio(tid),
                    most_abundant_mass=neutral_mass(
                        most_abundant_mz(cleaned_eid), charge, charge_carrier=charge_carrier),
                    average_mass=neutral_mass(
                        average_mz(cleaned_eid), charge, charge_carrier=charge_carrier),
                    score=score,
                    envelope=envelope,
                    mz=base_tid.monoisotopic_mz,
                    area=sum(e.area for e in cleaned_eid))

                time = feat_iter.current_time
                precursor_info_set = []
                for peak in rep_eid:
                    pinfo = self.precursor_map.precursor_for_peak((time, peak.index))
                    if pinfo is not None:
                        precursor_info_set.append(pinfo)

                node = DeconvolutedLCMSFeatureTreeNode(time, [dpeak], precursor_info_set)
                nodes.append(node)
            if subtract:
                for fpeak, tpeak in zip(cleaned_eid, envelope):
                    # If a theoretical peak uses up more than 70%
                    # of the abundance of a single peak, this peak
                    # should not contribute meaninfully to any other
                    # fits from now on. Set it's abundance to 1.0 as
                    # if it were fully used up.
                    ruin = (fpeak.intensity * 0.7) < tpeak[1]
                    if ruin:
                        fpeak.intensity = 1.0
                    else:
                        fpeak.intensity -= tpeak[1]
                    if fpeak.intensity < 0:
                        fpeak.intensity = 1.0
        for feature in feature_fit.features:
            if feature is None or isinstance(feature, EmptyFeature):
                continue
            feature.invalidate()
        if len(nodes) < self.minimum_size:
            return None

        result_feature = DeconvolutedLCMSFeature(
            nodes, feature_fit.charge,
            score=feature_fit.score, n_features=len(feature_fit),
            supporters=feature_fit.supporters)

        return result_feature
Exemplo n.º 18
0
    def finalize_fit(self,
                     feature_fit,
                     charge_carrier=PROTON,
                     subtract=True,
                     detection_threshold=0.1,
                     max_missed_peaks=1):
        nodes = []
        start_time, end_time, _segments = find_bounds(feature_fit,
                                                      detection_threshold)
        feat_iter = FeatureSetIterator(feature_fit.features, start_time,
                                       end_time)
        base_tid = feature_fit.theoretical
        charge = feature_fit.charge
        abs_charge = abs(charge)
        for eid in feat_iter:
            cleaned_eid, tid, n_missing = conform_envelopes(
                eid, base_tid.peaklist)
            rep_eid = drop_placeholders(cleaned_eid)
            n_real_peaks = len(rep_eid)
            if n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or \
               n_missing > max_missed_peaks:
                continue
            score = self.scorer.evaluate(None, cleaned_eid, tid)
            is_valid = True
            if np.isnan(score) or score < 0:
                is_valid = False
            envelope = [(e.mz, min(e.intensity, t.intensity))
                        for e, t in zip(cleaned_eid, tid)]
            if is_valid:
                total_abundance = sum(p[1] for p in envelope)
                monoisotopic_mass = neutral_mass(base_tid.monoisotopic_mz,
                                                 charge,
                                                 charge_carrier=charge_carrier)
                reference_peak = first_peak(cleaned_eid)

                dpeak = DeconvolutedPeak(
                    neutral_mass=monoisotopic_mass,
                    intensity=total_abundance,
                    charge=charge,
                    signal_to_noise=mean(p.signal_to_noise for p in rep_eid),
                    index=reference_peak.index,
                    full_width_at_half_max=mean(p.full_width_at_half_max
                                                for p in rep_eid),
                    a_to_a2_ratio=a_to_a2_ratio(tid),
                    most_abundant_mass=neutral_mass(
                        most_abundant_mz(cleaned_eid),
                        charge,
                        charge_carrier=charge_carrier),
                    average_mass=neutral_mass(average_mz(cleaned_eid),
                                              charge,
                                              charge_carrier=charge_carrier),
                    score=score,
                    envelope=envelope,
                    mz=base_tid.monoisotopic_mz,
                    area=sum(e.area for e in cleaned_eid))

                time = feat_iter.current_time
                precursor_info_set = []
                for peak in rep_eid:
                    pinfo = self.precursor_map.precursor_for_peak(
                        (time, peak.index))
                    if pinfo is not None:
                        precursor_info_set.append(pinfo)

                node = DeconvolutedLCMSFeatureTreeNode(time, [dpeak],
                                                       precursor_info_set)
                nodes.append(node)
            if subtract:
                for fpeak, tpeak in zip(cleaned_eid, envelope):
                    # If a theoretical peak uses up more than 70%
                    # of the abundance of a single peak, this peak
                    # should not contribute meaninfully to any other
                    # fits from now on. Set it's abundance to 1.0 as
                    # if it were fully used up.
                    ruin = (fpeak.intensity * 0.7) < tpeak[1]
                    if ruin:
                        fpeak.intensity = 1.0
                    else:
                        fpeak.intensity -= tpeak[1]
                    if fpeak.intensity < 0:
                        fpeak.intensity = 1.0
        for feature in feature_fit.features:
            if feature is None or isinstance(feature, EmptyFeature):
                continue
            feature.invalidate()
        if len(nodes) < self.minimum_size:
            return None

        result_feature = DeconvolutedLCMSFeature(
            nodes,
            feature_fit.charge,
            score=feature_fit.score,
            n_features=len(feature_fit),
            supporters=feature_fit.supporters)

        return result_feature