def _make_deconvoluted_peak_solution(self, fit, composition, charge_carrier): eid = fit.experimental tid = fit.theoretical charge = fit.charge rep_eid = drop_placeholders(eid) total_abundance = sum( p.intensity for p in eid if p.intensity > 1) monoisotopic_mass = neutral_mass( tid.monoisotopic_mz, charge, charge_carrier) monoisotopic_mz = tid.monoisotopic_mz reference_peak = first_peak(eid) peak = DeconvolutedPeakSolution( composition, fit, monoisotopic_mass, total_abundance, charge, signal_to_noise=mean(p.signal_to_noise for p in rep_eid), index=reference_peak.index, full_width_at_half_max=mean( p.full_width_at_half_max for p in rep_eid), a_to_a2_ratio=a_to_a2_ratio(tid), most_abundant_mass=neutral_mass( most_abundant_mz(eid), charge), average_mass=neutral_mass(average_mz(eid), charge), score=fit.score, envelope=[(p.mz, p.intensity) for p in rep_eid], mz=monoisotopic_mz, area=sum(e.area for e in eid)) return peak
def _make_deconvoluted_peak(self, fit, charge_carrier): score, charge, eid, tid = fit rep_eid = drop_placeholders(eid) total_abundance = sum(p.intensity for p in rep_eid) monoisotopic_mass = neutral_mass(tid.monoisotopic_mz, charge, charge_carrier) reference_peak = first_peak(eid) dpeak = DeconvolutedPeak( neutral_mass=monoisotopic_mass, intensity=total_abundance, charge=charge, signal_to_noise=mean(p.signal_to_noise for p in rep_eid), index=reference_peak.index, full_width_at_half_max=mean(p.full_width_at_half_max for p in rep_eid), a_to_a2_ratio=a_to_a2_ratio(tid), most_abundant_mass=neutral_mass(most_abundant_mz(eid), charge), average_mass=neutral_mass(average_mz(eid), charge), score=score, envelope=[(p.mz, p.intensity) for p in eid], mz=tid.monoisotopic_mz, fit=fit, area=sum(e.area for e in eid)) return dpeak
def deserialize_deconvoluted_peak_set(scan_dict): envelopes = decode_envelopes(scan_dict["isotopic envelopes array"]) peaks = [] mz_array = scan_dict['m/z array'] intensity_array = scan_dict['intensity array'] charge_array = scan_dict['charge array'] score_array = scan_dict['deconvolution score array'] n = len(scan_dict['m/z array']) for i in range(n): mz = mz_array[i] charge = charge_array[i] peak = peak_set.DeconvolutedPeak(neutral_mass(mz, charge), intensity_array[i], charge=charge, signal_to_noise=score_array[i], index=0, full_width_at_half_max=0, a_to_a2_ratio=0, most_abundant_mass=0, average_mass=0, score=score_array[i], envelope=envelopes[i], mz=mz) peaks.append(peak) peaks = peak_set.DeconvolutedPeakSet(peaks) peaks._reindex() return peaks
def from_fitted_peak(peak, charge=1): """Convert a :class:`~.FittedPeak` into a :class:`~.DeconvolutedPeak` at the specified charge state. Parameters ---------- peak : :class:`~.FittedPeak` The fitted peak to use as the template charge : int, optional The charge state to use, defaults to 1+ Returns ------- :class:`~.DeconvolutedPeak` """ mass = neutral_mass(peak.mz, charge) dpeak = DeconvolutedPeak(mass, peak.intensity, charge, peak.signal_to_noise, -1, peak.full_width_at_half_max, 0, mass, mass, 0, Envelope([(peak.mz, peak.intensity)]), peak.mz, area=peak.area) return dpeak
def retain_peaks(self, peaklist, original_peaklist=None, charge_range=None, solutions=None): if original_peaklist is None: base_peak_sequence = peaklist else: base_peak_sequence = original_peaklist try: base_peak = max([peak.intensity for peak in base_peak_sequence]) except ValueError: return [] min_charge = self.infer_minimum_charge(charge_range) min_charge /= abs(min_charge) threshold = self.base_peak_coefficient * base_peak peaklist = sorted(peaklist, key=intensity_getter, reverse=True) result = [] for peak in peaklist: if neutral_mass(peak.mz, min_charge) > self.max_mass: continue if peak.intensity >= threshold: result.append(self.create_peak(peak, min_charge)) if len(result) == self.n_peaks: break else: break return result
def default(self, orphan=False): '''Populate the extracted attributes of this object from the matching original attributes. This usually reflects a failure to find an acceptable deconvolution solution, and may indicate that there was no peak at the specified location when ``orphan`` is :const:`True` Parameters ---------- orphan: :class:`bool` Whether or not to set :attr:`orphan` to :const:`True`, indicating no peak was found near :attr:`mz`. ''' if self.charge == ChargeNotProvided: warnings.warn( "A precursor has been defaulted with an unknown charge state.") self.extracted_charge = ChargeNotProvided self.extracted_neutral_mass = neutral_mass( self.mz, DEFAULT_CHARGE_WHEN_NOT_RESOLVED) self.extracted_intensity = self.intensity self.defaulted = True else: self.extracted_charge = int(self.charge) self.extracted_neutral_mass = self.neutral_mass self.extracted_intensity = self.intensity self.defaulted = True if orphan: self.orphan = True
def _package_precursor_information(self, product): precursor_information = product.precursor_information if precursor_information.extracted_neutral_mass != 0: package = { "neutral_mass": precursor_information.extracted_neutral_mass, "mz": precursor_information.extracted_mz, "intensity": precursor_information.extracted_intensity, "charge": precursor_information.extracted_charge, "precursor_scan_id": precursor_information.precursor_scan_id, "product_scan_id": product.id, "scan_time": product.scan_time } else: package = { "neutral_mass": neutral_mass(precursor_information.mz, precursor_information.charge), "mz": precursor_information.mz, "intensity": precursor_information.intensity, "charge": precursor_information.charge, "precursor_scan_id": precursor_information.precursor_scan_id, "product_scan_id": product.id, "scan_time": product.scan_time } return package
def _fit_feature_set(self, mz, error_tolerance, charge, charge_carrier=PROTON, truncate_after=0.8, max_missed_peaks=1, threshold_scale=0.3, feature=None): base_tid = self.create_theoretical_distribution( mz, charge, charge_carrier, truncate_after) feature_groups = self.match_theoretical_isotopic_distribution( base_tid, error_tolerance, interval=feature) feature_fits = [] for features in product(*feature_groups): if all(f is None for f in features): continue # If the monoisotopic feature wasn't actually observed, create a dummy feature # since the monoisotopic feature cannot be None if features[0] is None: features = list(features) features[0] = EmptyFeature(mz) feat_iter = FeatureSetIterator(features) scores = [] times = [] counter = 0 for eid in feat_iter: cleaned_eid, tid, n_missing = self.conform_envelopes( eid, base_tid) if n_missing > max_missed_peaks: continue score = self.scorer.evaluate(None, cleaned_eid, tid) if np.isnan(score): continue scores.append(score) times.append(feat_iter.current_time) counter += 1 final_score = self._find_thresholded_score(scores, threshold_scale) missing_features = 0 for f in features: if f is None: missing_features += 1 fit = LCMSFeatureSetFit(features, base_tid, final_score, charge, missing_features, neutral_mass=neutral_mass( features[0].mz, charge, charge_carrier), missing_features=missing_features, scores=scores, times=times) if self.scorer.reject_score(fit.score): continue feature_fits.append(fit) return feature_fits
def build_deconvoluted_peak_set_from_arrays(mz_array, intensity_array, charge_array): peaks = [] for i in range(len(mz_array)): peak = DeconvolutedPeak( neutral_mass(mz_array[i], charge_array[i]), intensity_array[i], charge_array[i], intensity_array[i], i, 0) peaks.append(peak) peak_set = DeconvolutedPeakSet(peaks) peak_set.reindex() return peak_set
def _make_deconvoluted_peak(self, fit, charge_carrier): score, charge, eid, tid = fit rep_eid = drop_placeholders(eid) total_abundance = sum(p.intensity for p in rep_eid) monoisotopic_mass = neutral_mass( tid.monoisotopic_mz, charge, charge_carrier) reference_peak = first_peak(eid) dpeak = DeconvolutedPeak( neutral_mass=monoisotopic_mass, intensity=total_abundance, charge=charge, signal_to_noise=mean(p.signal_to_noise for p in rep_eid), index=reference_peak.index, full_width_at_half_max=mean( p.full_width_at_half_max for p in rep_eid), a_to_a2_ratio=a_to_a2_ratio(tid), most_abundant_mass=neutral_mass( most_abundant_mz(eid), charge), average_mass=neutral_mass(average_mz(eid), charge), score=score, envelope=[(p.mz, p.intensity) for p in eid], mz=tid.monoisotopic_mz, fit=fit, area=sum(e.area for e in eid)) return dpeak
def _fit_feature_set(self, mz, error_tolerance, charge, charge_carrier=PROTON, truncate_after=0.8, max_missed_peaks=1, threshold_scale=0.3, feature=None): base_tid = self.create_theoretical_distribution(mz, charge, charge_carrier, truncate_after) feature_groups = self.match_theoretical_isotopic_distribution(base_tid, error_tolerance, interval=feature) feature_fits = [] for features in product(*feature_groups): if all(f is None for f in features): continue # If the monoisotopic feature wasn't actually observed, create a dummy feature # since the monoisotopic feature cannot be None if features[0] is None: features = list(features) features[0] = EmptyFeature(mz) feat_iter = FeatureSetIterator(features) scores = [] times = [] counter = 0 for eid in feat_iter: cleaned_eid, tid, n_missing = self.conform_envelopes(eid, base_tid) if n_missing > max_missed_peaks: continue score = self.scorer.evaluate(None, cleaned_eid, tid) if np.isnan(score): continue scores.append(score) times.append(feat_iter.current_time) counter += 1 final_score = self._find_thresholded_score(scores, threshold_scale) missing_features = 0 for f in features: if f is None: missing_features += 1 fit = LCMSFeatureSetFit( features, base_tid, final_score, charge, missing_features, neutral_mass=neutral_mass(features[0].mz, charge, charge_carrier), missing_features=missing_features, scores=scores, times=times) if self.scorer.reject_score(fit.score): continue feature_fits.append(fit) return feature_fits
def deserialize_deconvoluted_peak_set(scan_dict): envelopes = decode_envelopes(scan_dict["isotopic envelopes array"]) peaks = [] mz_array = scan_dict['m/z array'] intensity_array = scan_dict['intensity array'] charge_array = scan_dict['charge array'] score_array = scan_dict['deconvolution score array'] n = len(scan_dict['m/z array']) for i in range(n): mz = mz_array[i] charge = charge_array[i] peak = DeconvolutedPeak( neutral_mass(mz, charge), intensity_array[i], charge=charge, signal_to_noise=score_array[i], index=0, full_width_at_half_max=0, a_to_a2_ratio=0, most_abundant_mass=0, average_mass=0, score=score_array[i], envelope=envelopes[i], mz=mz ) peaks.append(peak) peaks = DeconvolutedPeakSet(peaks) peaks._reindex() return peaks
def _package_precursor_information(self, product): precursor_information = product.precursor_information if precursor_information.extracted_neutral_mass != 0: package = { "neutral_mass": precursor_information.extracted_neutral_mass, "mz": precursor_information.extracted_mz, "intensity": precursor_information.extracted_intensity, "charge": precursor_information.extracted_charge, "precursor_scan_id": precursor_information.precursor_scan_id, "product_scan_id": product.id, "scan_time": product.scan_time } else: package = { "neutral_mass": neutral_mass( precursor_information.mz, precursor_information.charge), "mz": precursor_information.mz, "intensity": precursor_information.intensity, "charge": precursor_information.charge, "precursor_scan_id": precursor_information.precursor_scan_id, "product_scan_id": product.id, "scan_time": product.scan_time } return package
def neutral_mass(self): if self.charge == ChargeNotProvided: warnings.warn( "A precursor with an unknown charge state was used to compute a neutral mass.") return neutral_mass(self.mz, DEFAULT_CHARGE_WHEN_NOT_RESOLVED) return neutral_mass(self.mz, self.charge)
def finalize_fit(self, feature_fit, charge_carrier=PROTON, subtract=True, detection_threshold=0.1, max_missed_peaks=1): nodes = [] start_time, end_time = find_bounds(feature_fit, detection_threshold) feat_iter = FeatureSetIterator( feature_fit.features, start_time, end_time) base_tid = feature_fit.theoretical charge = feature_fit.charge abs_charge = abs(charge) for eid in feat_iter: cleaned_eid, tid, n_missing = conform_envelopes(eid, base_tid.truncated_tid) rep_eid = drop_placeholders(cleaned_eid) n_real_peaks = len(rep_eid) if n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or \ n_missing > max_missed_peaks: continue score = self.scorer.evaluate(None, cleaned_eid, tid) is_valid = True if np.isnan(score) or score < 0: is_valid = False envelope = [(e.mz, min(e.intensity, t.intensity)) for e, t in zip(cleaned_eid, tid)] if is_valid: total_abundance = sum(p[1] for p in envelope) monoisotopic_mass = neutral_mass( base_tid.monoisotopic_mz, charge, charge_carrier=charge_carrier) reference_peak = first_peak(cleaned_eid) dpeak = DeconvolutedPeak( neutral_mass=monoisotopic_mass, intensity=total_abundance, charge=charge, signal_to_noise=mean(p.signal_to_noise for p in rep_eid), index=reference_peak.index, full_width_at_half_max=mean(p.full_width_at_half_max for p in rep_eid), a_to_a2_ratio=a_to_a2_ratio(tid), most_abundant_mass=neutral_mass( most_abundant_mz(cleaned_eid), charge, charge_carrier=charge_carrier), average_mass=neutral_mass( average_mz(cleaned_eid), charge, charge_carrier=charge_carrier), score=score, envelope=envelope, mz=base_tid.monoisotopic_mz, area=sum(e.area for e in cleaned_eid)) time = feat_iter.current_time precursor_info_set = [] for peak in rep_eid: pinfo = self.precursor_map.precursor_for_peak((time, peak.index)) if pinfo is not None: precursor_info_set.append(pinfo) node = DeconvolutedLCMSFeatureTreeNode(time, [dpeak], precursor_info_set) nodes.append(node) if subtract: for fpeak, tpeak in zip(cleaned_eid, envelope): # If a theoretical peak uses up more than 70% # of the abundance of a single peak, this peak # should not contribute meaninfully to any other # fits from now on. Set it's abundance to 1.0 as # if it were fully used up. ruin = (fpeak.intensity * 0.7) < tpeak[1] if ruin: fpeak.intensity = 1.0 else: fpeak.intensity -= tpeak[1] if fpeak.intensity < 0: fpeak.intensity = 1.0 for feature in feature_fit.features: if feature is None or isinstance(feature, EmptyFeature): continue feature.invalidate() if len(nodes) < self.minimum_size: return None result_feature = DeconvolutedLCMSFeature( nodes, feature_fit.charge, score=feature_fit.score, n_features=len(feature_fit), supporters=feature_fit.supporters) return result_feature
def finalize_fit(self, feature_fit, charge_carrier=PROTON, subtract=True, detection_threshold=0.1, max_missed_peaks=1): nodes = [] start_time, end_time, _segments = find_bounds(feature_fit, detection_threshold) feat_iter = FeatureSetIterator(feature_fit.features, start_time, end_time) base_tid = feature_fit.theoretical charge = feature_fit.charge abs_charge = abs(charge) for eid in feat_iter: cleaned_eid, tid, n_missing = conform_envelopes( eid, base_tid.peaklist) rep_eid = drop_placeholders(cleaned_eid) n_real_peaks = len(rep_eid) if n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or \ n_missing > max_missed_peaks: continue score = self.scorer.evaluate(None, cleaned_eid, tid) is_valid = True if np.isnan(score) or score < 0: is_valid = False envelope = [(e.mz, min(e.intensity, t.intensity)) for e, t in zip(cleaned_eid, tid)] if is_valid: total_abundance = sum(p[1] for p in envelope) monoisotopic_mass = neutral_mass(base_tid.monoisotopic_mz, charge, charge_carrier=charge_carrier) reference_peak = first_peak(cleaned_eid) dpeak = DeconvolutedPeak( neutral_mass=monoisotopic_mass, intensity=total_abundance, charge=charge, signal_to_noise=mean(p.signal_to_noise for p in rep_eid), index=reference_peak.index, full_width_at_half_max=mean(p.full_width_at_half_max for p in rep_eid), a_to_a2_ratio=a_to_a2_ratio(tid), most_abundant_mass=neutral_mass( most_abundant_mz(cleaned_eid), charge, charge_carrier=charge_carrier), average_mass=neutral_mass(average_mz(cleaned_eid), charge, charge_carrier=charge_carrier), score=score, envelope=envelope, mz=base_tid.monoisotopic_mz, area=sum(e.area for e in cleaned_eid)) time = feat_iter.current_time precursor_info_set = [] for peak in rep_eid: pinfo = self.precursor_map.precursor_for_peak( (time, peak.index)) if pinfo is not None: precursor_info_set.append(pinfo) node = DeconvolutedLCMSFeatureTreeNode(time, [dpeak], precursor_info_set) nodes.append(node) if subtract: for fpeak, tpeak in zip(cleaned_eid, envelope): # If a theoretical peak uses up more than 70% # of the abundance of a single peak, this peak # should not contribute meaninfully to any other # fits from now on. Set it's abundance to 1.0 as # if it were fully used up. ruin = (fpeak.intensity * 0.7) < tpeak[1] if ruin: fpeak.intensity = 1.0 else: fpeak.intensity -= tpeak[1] if fpeak.intensity < 0: fpeak.intensity = 1.0 for feature in feature_fit.features: if feature is None or isinstance(feature, EmptyFeature): continue feature.invalidate() if len(nodes) < self.minimum_size: return None result_feature = DeconvolutedLCMSFeature( nodes, feature_fit.charge, score=feature_fit.score, n_features=len(feature_fit), supporters=feature_fit.supporters) return result_feature