Exemplo n.º 1
0
 def __init__(self, scan, sequence, model=None, mass_shift=None):
     super(FrequencyScorer, self).__init__(scan, sequence, mass_shift)
     self._score = None
     self.solution_map = FragmentMatchMap()
     self.glycosylated_b_ion_count = 0
     self.glycosylated_y_ion_count = 0
     self.model = model
Exemplo n.º 2
0
    def match(self, error_tolerance=2e-5):
        solution_map = FragmentMatchMap()
        spectrum = self.spectrum

        n_glycosylated_b_ions = 0
        for frags in self.target.get_fragments('b'):
            glycosylated_position = False
            for frag in frags:
                glycosylated_position |= frag.is_glycosylated
                peak = spectrum.has_peak(frag.mass, error_tolerance)
                if peak:
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_b_ions += 1

        n_glycosylated_y_ions = 0
        for frags in self.target.get_fragments('y'):
            glycosylated_position = False
            for frag in frags:
                glycosylated_position |= frag.is_glycosylated
                peak = spectrum.has_peak(frag.mass, error_tolerance)
                if peak:
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_y_ions += 1
        for frag in self.target.stub_fragments(extended=True):
            peak = spectrum.has_peak(frag.mass, error_tolerance)
            if peak:
                solution_map.add(peak, frag)

        self.glycosylated_b_ion_count = n_glycosylated_b_ions
        self.glycosylated_y_ion_count = n_glycosylated_y_ions
        self.solution_map = solution_map
        return solution_map
Exemplo n.º 3
0
class SimpleCoverageScorer(PeptideSpectrumMatcherBase):
    def __init__(self, scan, sequence, mass_shift=None):
        super(SimpleCoverageScorer, self).__init__(scan, sequence, mass_shift)
        self._score = None
        self.solution_map = FragmentMatchMap()

    def _compute_coverage_vectors(self):
        n_term_ions = np.zeros(len(self.target))
        c_term_ions = np.zeros(len(self.target))

        for frag in self.solution_map.fragments():
            if frag.series in (IonSeries.b, IonSeries.c):
                n_term_ions[frag.position] = 1
            elif frag.series in (IonSeries.y, IonSeries.z):
                c_term_ions[frag.position] = 1
        return n_term_ions, c_term_ions

    def compute_coverage(self):
        (n_term_ions, c_term_ions) = self._compute_coverage_vectors()

        mean_coverage = np.mean(
            np.log2(n_term_ions + c_term_ions[::-1] + 1) / np.log2(3))

        return mean_coverage

    def calculate_score(self, **kwargs):
        score = self._coverage_score()
        self._score = score
        return score

    def _coverage_score(self):
        return self.compute_coverage()
 def __init__(self, scan, target, mass_shift=None):
     super(BinomialSpectrumMatcher, self).__init__(scan, target, mass_shift)
     self._sanitized_spectrum = set(self.spectrum)
     self._score = None
     self.solution_map = FragmentMatchMap()
     self.n_theoretical = 0
     self._backbone_mass_series = []
 def __init__(self, scan, sequence, model=None, mass_shift=None):
     super(FrequencyScorer, self).__init__(scan, sequence, mass_shift)
     self._score = None
     self.solution_map = FragmentMatchMap()
     self.glycosylated_b_ion_count = 0
     self.glycosylated_y_ion_count = 0
     self.model = model
Exemplo n.º 6
0
class SimpleCoverageScorer(PeptideSpectrumMatcherBase):
    def __init__(self, scan, sequence, mass_shift=None):
        super(SimpleCoverageScorer, self).__init__(scan, sequence, mass_shift)
        self._score = None
        self.solution_map = FragmentMatchMap()

    def _compute_coverage_vectors(self):
        n_term_ions = np.zeros(len(self.target))
        c_term_ions = np.zeros(len(self.target))

        for frag in self.solution_map.fragments():
            if frag.series in (IonSeries.b, IonSeries.c):
                n_term_ions[frag.position] = 1
            elif frag.series in (IonSeries.y, IonSeries.z):
                c_term_ions[frag.position] = 1
        return n_term_ions, c_term_ions

    def compute_coverage(self):
        (n_term_ions, c_term_ions) = self._compute_coverage_vectors()

        mean_coverage = np.mean(np.log2(n_term_ions + c_term_ions[::-1] + 1) / np.log2(3))

        return mean_coverage

    def calculate_score(self, **kwargs):
        score = self._coverage_score()
        self._score = score
        return score

    def _coverage_score(self):
        mean_coverage = self.compute_coverage()
        score = mean_coverage
        return score
    def match(self, error_tolerance=2e-5):
        solution_map = FragmentMatchMap()
        spectrum = self.spectrum

        n_glycosylated_b_ions = 0
        for frags in self.target.get_fragments('b'):
            glycosylated_position = False
            for frag in frags:
                glycosylated_position |= frag.is_glycosylated
                peak = spectrum.has_peak(frag.mass, error_tolerance)
                if peak:
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_b_ions += 1

        n_glycosylated_y_ions = 0
        for frags in self.target.get_fragments('y'):
            glycosylated_position = False
            for frag in frags:
                glycosylated_position |= frag.is_glycosylated
                peak = spectrum.has_peak(frag.mass, error_tolerance)
                if peak:
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_y_ions += 1
        for frag in self.target.stub_fragments(extended=True):
            peak = spectrum.has_peak(frag.mass, error_tolerance)
            if peak:
                solution_map.add(peak, frag)

        self.glycosylated_b_ion_count = n_glycosylated_b_ions
        self.glycosylated_y_ion_count = n_glycosylated_y_ions
        self.solution_map = solution_map
        return solution_map
    def match(self, error_tolerance=2e-5, *args, **kwargs):
        n_theoretical = 0
        solution_map = FragmentMatchMap()
        spectrum = self.spectrum
        backbone_mass_series = []

        oxonium_ion_matches = set()
        for frag in self.target.glycan_fragments(
                all_series=False,
                allow_ambiguous=False,
                include_large_glycan_fragments=False,
                maximum_fragment_size=4):
            for peak in spectrum.all_peaks_for(frag.mass, error_tolerance):
                solution_map.add(peak, frag)
                oxonium_ion_matches.add(peak)
                try:
                    self._sanitized_spectrum.remove(peak)
                except KeyError:
                    continue
        for frags in self.target.get_fragments('b'):
            for frag in frags:
                backbone_mass_series.append(frag.mass)
                n_theoretical += 1
                for peak in spectrum.all_peaks_for(frag.mass, error_tolerance):
                    if peak in oxonium_ion_matches:
                        continue
                    solution_map.add(peak, frag)
                self._backbone_mass_series
        for frags in self.target.get_fragments('y'):
            backbone_mass_series.append(frag.mass)
            for frag in frags:
                n_theoretical += 1
                for peak in spectrum.all_peaks_for(frag.mass, error_tolerance):
                    if peak in oxonium_ion_matches:
                        continue
                    solution_map.add(peak, frag)
        for frag in self.target.stub_fragments(extended=True):
            for peak in spectrum.all_peaks_for(frag.mass, error_tolerance):
                solution_map.add(peak, frag)
        self.solution_map = solution_map
        self.n_theoretical = n_theoretical
        self._backbone_mass_series = backbone_mass_series
        return solution_map
Exemplo n.º 9
0
    def match(self, error_tolerance=2e-5, *args, **kwargs):
        self.solution_map = FragmentMatchMap()
        include_neutral_losses = kwargs.get("include_neutral_losses", False)
        max_cleavages = kwargs.get("max_cleavages", 2)
        is_hcd = self.is_hcd()
        is_exd = self.is_exd()
        if include_neutral_losses and isinstance(include_neutral_losses, (int, bool)):
            include_neutral_losses = [(glypy.Composition("H2O"), "-H2O")]

        if is_hcd:
            self._match_fragments(
                "BY", error_tolerance, max_cleavages=max_cleavages,
                include_neutral_losses=include_neutral_losses)
        else:
            self._match_fragments(
                "ABCXYZ", error_tolerance, max_cleavages=max_cleavages,
                include_neutral_losses=include_neutral_losses)
Exemplo n.º 10
0
 def __init__(self, scan, sequence, mass_shift=None):
     super(SimpleCoverageScorer, self).__init__(scan, sequence, mass_shift)
     self._score = None
     self.solution_map = FragmentMatchMap()
     self.glycosylated_n_term_ion_count = 0
     self.glycosylated_c_term_ion_count = 0
Exemplo n.º 11
0
class SimpleCoverageScorer(GlycopeptideSpectrumMatcherBase):
    backbone_weight = 0.5
    glycosylated_weight = 0.5
    stub_weight = 0.2

    def __init__(self, scan, sequence, mass_shift=None):
        super(SimpleCoverageScorer, self).__init__(scan, sequence, mass_shift)
        self._score = None
        self.solution_map = FragmentMatchMap()
        self.glycosylated_n_term_ion_count = 0
        self.glycosylated_c_term_ion_count = 0

    @property
    def glycosylated_b_ion_count(self):
        return self.glycosylated_n_term_ion_count

    @glycosylated_b_ion_count.setter
    def glycosylated_b_ion_count(self, value):
        self.glycosylated_n_term_ion_count = value

    @property
    def glycosylated_y_ion_count(self):
        return self.glycosylated_c_term_ion_count

    @glycosylated_y_ion_count.setter
    def glycosylated_y_ion_count(self, value):
        self.glycosylated_c_term_ion_count = value

    def _match_backbone_series(self, series, error_tolerance=2e-5, masked_peaks=None, strategy=None,
                               include_neutral_losses=False):
        if strategy is None:
            strategy = HCDFragmentationStrategy
        # Assumes that fragmentation proceeds from the start of the ladder (series position 1)
        # which means that if the last fragment could be glycosylated then the next one will be
        # but if the last fragment wasn't the next one might be.
        previous_position_glycosylated = False
        for frags in self.get_fragments(series, strategy=strategy, include_neutral_losses=include_neutral_losses):
            glycosylated_position = previous_position_glycosylated
            for frag in frags:
                if not glycosylated_position:
                    glycosylated_position |= frag.is_glycosylated
                for peak in self.spectrum.all_peaks_for(frag.mass, error_tolerance):
                    if peak.index.neutral_mass in masked_peaks:
                        continue
                    self.solution_map.add(peak, frag)
            if glycosylated_position:
                if series.direction > 0:
                    self.glycosylated_n_term_ion_count += 1
                else:
                    self.glycosylated_c_term_ion_count += 1
            previous_position_glycosylated = glycosylated_position

    def _compute_coverage_vectors(self):
        n_term_ions = np.zeros(len(self.target))
        c_term_ions = np.zeros(len(self.target))
        stub_count = 0
        glycosylated_n_term_ions = set()
        glycosylated_c_term_ions = set()

        for frag in self.solution_map.fragments():
            series = frag.get_series()
            if series in (IonSeries.b, IonSeries.c):
                n_term_ions[frag.position] = 1
                if frag.is_glycosylated:
                    glycosylated_n_term_ions.add((series, frag.position))
            elif series in (IonSeries.y, IonSeries.z):
                c_term_ions[frag.position] = 1
                if frag.is_glycosylated:
                    glycosylated_c_term_ions.add((series, frag.position))
            elif series == IonSeries.stub_glycopeptide:
                stub_count += 1
        return n_term_ions, c_term_ions, stub_count, len(glycosylated_n_term_ions), len(glycosylated_c_term_ions)

    def _compute_glycosylated_coverage(self, glycosylated_n_term_ions, glycosylated_c_term_ions):
        ladders = 0.
        numer = 0.0
        denom = 0.0
        if self.glycosylated_n_term_ion_count > 0:
            numer += glycosylated_n_term_ions
            denom += self.glycosylated_n_term_ion_count
            ladders += 1.
        if self.glycosylated_c_term_ion_count > 0:
            numer += glycosylated_c_term_ions
            denom += self.glycosylated_c_term_ion_count
            ladders += 1.
        if denom == 0.0:
            return 0.0
        return numer / denom

    def _get_internal_size(self, glycan_composition):
        return approximate_internal_size_of_glycan(glycan_composition)

    def compute_coverage(self):
        (n_term_ions, c_term_ions, stub_count,
         glycosylated_n_term_ions,
         glycosylated_c_term_ions) = self._compute_coverage_vectors()

        mean_coverage = np.mean(np.log2(n_term_ions + c_term_ions[::-1] + 1) / np.log2(3))

        glycosylated_coverage = self._compute_glycosylated_coverage(
            glycosylated_n_term_ions,
            glycosylated_c_term_ions)

        stub_fraction = min(stub_count, 3) / 3.

        return mean_coverage, glycosylated_coverage, stub_fraction

    @classmethod
    def get_params(self, backbone_weight=None, glycosylated_weight=None, stub_weight=None, **kwargs):
        if backbone_weight is None:
            backbone_weight = self.backbone_weight
        if glycosylated_weight is None:
            glycosylated_weight = self.glycosylated_weight
        if stub_weight is None:
            stub_weight = self.stub_weight
        return backbone_weight, glycosylated_weight, stub_weight, kwargs

    def calculate_score(self, backbone_weight=None, glycosylated_weight=None, stub_weight=None, **kwargs):
        if backbone_weight is None:
            backbone_weight = self.backbone_weight
        if glycosylated_weight is None:
            glycosylated_weight = self.glycosylated_weight
        if stub_weight is None:
            stub_weight = self.stub_weight
        score = self._coverage_score(backbone_weight, glycosylated_weight, stub_weight)
        self._score = score
        return score

    def _coverage_score(self, backbone_weight=None, glycosylated_weight=None, stub_weight=None):
        if backbone_weight is None:
            backbone_weight = self.backbone_weight
        if glycosylated_weight is None:
            glycosylated_weight = self.glycosylated_weight
        if stub_weight is None:
            stub_weight = self.stub_weight
        mean_coverage, glycosylated_coverage, stub_fraction = self.compute_coverage()
        score = (((mean_coverage * backbone_weight) + (glycosylated_coverage * glycosylated_weight)) * (
            1 - stub_weight)) + (stub_fraction * stub_weight)
        return score
    def match(self, error_tolerance=2e-5, *args, **kwargs):
        GlycanCompositionSignatureMatcher.match(
            self, error_tolerance=error_tolerance)
        solution_map = FragmentMatchMap()
        spectrum = self.spectrum
        n_theoretical = 0
        backbone_mass_series = []
        neutral_losses = tuple(kwargs.pop("neutral_losses", []))

        masked_peaks = set()
        for frag in self.target.glycan_fragments(
                all_series=False,
                allow_ambiguous=False,
                include_large_glycan_fragments=False,
                maximum_fragment_size=4):
            peak = spectrum.has_peak(frag.mass, error_tolerance)
            if peak:
                solution_map.add(peak, frag)
                masked_peaks.add(peak.index.neutral_mass)
                try:
                    self._sanitized_spectrum.remove(peak)
                except KeyError:
                    continue

        for frag in self.target.stub_fragments(extended=True):
            for peak in spectrum.all_peaks_for(frag.mass, error_tolerance):
                # should we be masking these? peptides which have amino acids which are
                # approximately the same mass as a monosaccharide unit at ther terminus
                # can produce cases where a stub ion and a backbone fragment match the
                # same peak.
                #
                masked_peaks.add(peak.index.neutral_mass)
                solution_map.add(peak, frag)

        n_glycosylated_b_ions = 0
        for frags in self.target.get_fragments('b', neutral_losses):
            glycosylated_position = False
            n_theoretical += 1
            for frag in frags:
                backbone_mass_series.append(frag)
                glycosylated_position |= frag.is_glycosylated
                for peak in spectrum.all_peaks_for(frag.mass, error_tolerance):
                    if peak.index.neutral_mass in masked_peaks:
                        continue
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_b_ions += 1

        n_glycosylated_y_ions = 0
        for frags in self.target.get_fragments('y', neutral_losses):
            glycosylated_position = False
            n_theoretical += 1
            for frag in frags:
                backbone_mass_series.append(frag)
                glycosylated_position |= frag.is_glycosylated
                for peak in spectrum.all_peaks_for(frag.mass, error_tolerance):
                    if peak.index.neutral_mass in masked_peaks:
                        continue
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_y_ions += 1

        self.n_theoretical = n_theoretical
        self.glycosylated_b_ion_count = n_glycosylated_b_ions
        self.glycosylated_y_ion_count = n_glycosylated_y_ions
        self.solution_map = solution_map
        self._backbone_mass_series = backbone_mass_series
        return solution_map
Exemplo n.º 13
0
 def __init__(self, scan, target, mass_shift=None):
     super(BinomialSpectrumMatcher, self).__init__(scan, target, mass_shift)
     self.solution_map = FragmentMatchMap()
     self._init_binomial()
Exemplo n.º 14
0
    def match(self,
              error_tolerance=2e-5,
              include_compound=False,
              combination_size=3,
              *args,
              **kwargs):
        glycan_composition = self.target
        peak_set = self.spectrum
        matches = FragmentMatchMap()
        water = Composition("H2O")
        counter = 0
        try:
            max_peak = max([p.intensity for p in peak_set])
            threshold = max_peak * self.minimum_intensity_threshold
        except ValueError:
            self.solution_map = matches
            self.fragments_searched = counter
            self.pairs = SpectrumGraph()
            return matches
        # Simple oxonium ions
        for k in glycan_composition.keys():
            # dhex does not produce a reliable oxonium ion
            if is_dhex(k):
                continue
            counter += 1
            f = Fragment('B', {}, [],
                         k.mass(),
                         name=str(k),
                         composition=k.total_composition())
            for hit in peak_set.all_peaks_for(f.mass, error_tolerance):
                if hit.intensity < threshold:
                    continue
                matches.add(hit, f)
            f = Fragment('B', {}, [],
                         k.mass() - water.mass,
                         name="%s-H2O" % str(k),
                         composition=k.total_composition() - water)
            for hit in peak_set.all_peaks_for(f.mass, error_tolerance):
                if hit.intensity / max_peak < self.minimum_intensity_threshold:
                    continue
                matches.add(hit, f)

        # Compound oxonium ions
        if include_compound:
            for i in range(2, combination_size + 1):
                for kk in itertools.combinations_with_replacement(
                        sorted(glycan_composition, key=str), i):
                    counter += 1
                    invalid = False
                    for k, v in Counter(kk).items():
                        if glycan_composition[k] < v:
                            invalid = True
                            break
                    if invalid:
                        continue
                    key = '-'.join(map(str, kk))
                    mass = sum(k.mass() for k in kk)
                    composition = sum((k.total_composition() for k in kk),
                                      Composition())
                    f = Fragment('B', {}, [],
                                 mass,
                                 name=key,
                                 composition=composition)
                    for hit in peak_set.all_peaks_for(f.mass, error_tolerance):
                        if hit.intensity / max_peak < 0.01:
                            continue
                        matches.add(hit, f)

                    f = Fragment('B', {}, [],
                                 mass - water.mass,
                                 name="%s-H2O" % key,
                                 composition=composition - water)
                    for hit in peak_set.all_peaks_for(f.mass, error_tolerance):
                        if hit.intensity / max_peak < 0.01:
                            continue
                        matches.add(hit, f)

        self.spectrum_graph = self._find_peak_pairs(error_tolerance,
                                                    include_compound)
        self.solution_map = matches
        self.fragments_searched = counter
        return matches
    def match(self, error_tolerance=2e-5, include_compound=False, combination_size=3, *args, **kwargs):
        glycan_composition = self.target
        peak_set = self.spectrum
        matches = FragmentMatchMap()
        water = Composition("H2O")
        counter = 0
        try:
            max_peak = max([p.intensity for p in peak_set])
            threshold = max_peak * self.minimum_intensity_threshold
        except ValueError:
            self.solution_map = matches
            self.fragments_searched = counter
            self.pairs = SpectrumGraph()
            return matches
        # Simple oxonium ions
        for k in glycan_composition.keys():
            # dhex does not produce a reliable oxonium ion
            if is_dhex(k):
                continue
            counter += 1
            f = Fragment('B', {}, [], k.mass(), name=str(k),
                         composition=k.total_composition())
            for hit in peak_set.all_peaks_for(f.mass, error_tolerance):
                if hit.intensity < threshold:
                    continue
                matches.add(hit, f)
            f = Fragment('B', {}, [], k.mass() - water.mass, name="%s-H2O" % str(k),
                         composition=k.total_composition() - water)
            for hit in peak_set.all_peaks_for(f.mass, error_tolerance):
                if hit.intensity / max_peak < self.minimum_intensity_threshold:
                    continue
                matches.add(hit, f)

        # Compound oxonium ions
        if include_compound:
            for i in range(2, combination_size + 1):
                for kk in itertools.combinations_with_replacement(sorted(glycan_composition, key=str), i):
                    counter += 1
                    invalid = False
                    for k, v in Counter(kk).items():
                        if glycan_composition[k] < v:
                            invalid = True
                            break
                    if invalid:
                        continue
                    key = '-'.join(map(str, kk))
                    mass = sum(k.mass() for k in kk)
                    composition = sum((k.total_composition() for k in kk), Composition())
                    f = Fragment('B', {}, [], mass, name=key,
                                 composition=composition)
                    for hit in peak_set.all_peaks_for(f.mass, error_tolerance):
                        if hit.intensity / max_peak < 0.01:
                            continue
                        matches.add(hit, f)

                    f = Fragment('B', {}, [], mass - water.mass, name="%s-H2O" % key,
                                 composition=composition - water)
                    for hit in peak_set.all_peaks_for(f.mass, error_tolerance):
                        if hit.intensity / max_peak < 0.01:
                            continue
                        matches.add(hit, f)

        self.spectrum_graph = self._find_peak_pairs(error_tolerance, include_compound)
        self.solution_map = matches
        self.fragments_searched = counter
        return matches
 def _sanitize_solution_map(self):
     san = FragmentMatchMap()
     for pair in self.solution_map:
         if pair.fragment.series != "oxonium_ion":
             san.add(pair)
     return san
class FrequencyScorer(GlycopeptideSpectrumMatcherBase):
    def __init__(self, scan, sequence, model=None, mass_shift=None):
        super(FrequencyScorer, self).__init__(scan, sequence, mass_shift)
        self._score = None
        self.solution_map = FragmentMatchMap()
        self.glycosylated_b_ion_count = 0
        self.glycosylated_y_ion_count = 0
        self.model = model

    def match(self, error_tolerance=2e-5):
        solution_map = FragmentMatchMap()
        spectrum = self.spectrum

        n_glycosylated_b_ions = 0
        for frags in self.target.get_fragments('b'):
            glycosylated_position = False
            for frag in frags:
                glycosylated_position |= frag.is_glycosylated
                peak = spectrum.has_peak(frag.mass, error_tolerance)
                if peak:
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_b_ions += 1

        n_glycosylated_y_ions = 0
        for frags in self.target.get_fragments('y'):
            glycosylated_position = False
            for frag in frags:
                glycosylated_position |= frag.is_glycosylated
                peak = spectrum.has_peak(frag.mass, error_tolerance)
                if peak:
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_y_ions += 1
        for frag in self.target.stub_fragments(extended=True):
            peak = spectrum.has_peak(frag.mass, error_tolerance)
            if peak:
                solution_map.add(peak, frag)

        self.glycosylated_b_ion_count = n_glycosylated_b_ions
        self.glycosylated_y_ion_count = n_glycosylated_y_ions
        self.solution_map = solution_map
        return solution_map

    def _compute_total(self):
        total = 0.
        for frags in chain(self.target.get_fragments('b'), self.target.get_fragments('y')):
            for frag in frags:
                n_term, c_term = frag.flanking_amino_acids
                score = self.frequency_counter.n_term_probability(
                    n_term) * self.frequency_counter.c_term_probability(c_term)
                total += score * 0.5
        return total

    def _score_backbone(self):
        total = self._compute_total()
        observed = 0.0
        track_site = set()
        for frag in self.solution_map.fragments():
            if (frag.series == 'b') or (frag.series == 'y'):
                position = frag.position
                n_term, c_term = frag.flanking_amino_acids
                score = self.model.n_term_probability(
                    n_term) * self.model.c_term_probability(c_term)
                weight = 0.6 if position not in track_site else 0.4
                track_site.add(position)
                observed += score * weight
        return observed / total
Exemplo n.º 18
0
class BinomialSpectrumMatcher(GlycopeptideSpectrumMatcherBase):
    def __init__(self, scan, target, mass_shift=None):
        super(BinomialSpectrumMatcher, self).__init__(scan, target, mass_shift)
        self.solution_map = FragmentMatchMap()
        self._init_binomial()

    def _init_binomial(self):
        self._sanitized_spectrum = set(self.spectrum)
        self.n_theoretical = 0

    def _match_oxonium_ions(self, error_tolerance=2e-5, masked_peaks=None):
        if masked_peaks is None:
            masked_peaks = set()
        val = super(BinomialSpectrumMatcher,
                    self)._match_oxonium_ions(error_tolerance=error_tolerance,
                                              masked_peaks=masked_peaks)
        self._sanitized_spectrum -= {self.spectrum[i] for i in masked_peaks}
        return val

    def _match_backbone_series(self,
                               series,
                               error_tolerance=2e-5,
                               masked_peaks=None,
                               strategy=None,
                               include_neutral_losses=False):
        if strategy is None:
            strategy = HCDFragmentationStrategy
        for frags in self.get_fragments(
                series,
                strategy=strategy,
                include_neutral_losses=include_neutral_losses):
            # Should this be on the level of position, or the level of the individual fragment ions?
            # At the level of position, this makes missing only glycosylated or unglycosylated ions
            # less punishing, while at the level of the fragment makes more sense by the definition
            # of the geometric mass accuracy interpretation.
            #
            # Using the less severe case to be less pessimistic
            self.n_theoretical += 1
            for frag in frags:
                for peak in self.spectrum.all_peaks_for(
                        frag.mass, error_tolerance):
                    if peak.index.neutral_mass in masked_peaks:
                        continue
                    self.solution_map.add(peak, frag)

    def _sanitize_solution_map(self):
        san = list()
        for pair in self.solution_map:
            if pair.fragment.series != "oxonium_ion":
                san.append(pair)
        return san

    def _compute_average_window_size(self, error_tolerance=2e-5):
        average_window_size = ((self.target.peptide_composition().mass) /
                               3.) * error_tolerance * 2
        return average_window_size

    def _fragment_matched_binomial(self, error_tolerance=2e-5):
        precursor_mass = calculate_precursor_mass(self)

        fragment_match_component = binomial_fragments_matched(
            self.n_theoretical, len(self._sanitize_solution_map()),
            self._compute_average_window_size(error_tolerance), precursor_mass)
        if fragment_match_component < 1e-170:
            fragment_match_component = 1e-170
        return fragment_match_component

    def _intensity_component_binomial(self):
        intensity_component = binomial_intensity(self._sanitized_spectrum,
                                                 self._sanitize_solution_map(),
                                                 self.n_theoretical)

        if intensity_component < 1e-170:
            intensity_component = 1e-170
        return intensity_component

    def _binomial_score(self, error_tolerance=2e-5, *args, **kwargs):
        precursor_mass = calculate_precursor_mass(self)

        solution_map = self._sanitize_solution_map()
        n_matched = len(solution_map)
        if n_matched == 0 or len(self._sanitized_spectrum) == 0:
            return 0

        fragment_match_component = binomial_fragments_matched(
            self.n_theoretical, len(solution_map),
            self._compute_average_window_size(error_tolerance), precursor_mass)

        if fragment_match_component < 1e-170:
            fragment_match_component = 1e-170

        intensity_component = binomial_intensity(self._sanitized_spectrum,
                                                 solution_map,
                                                 self.n_theoretical)

        if intensity_component < 1e-170:
            intensity_component = 1e-170
        score = -np.log10(intensity_component) + -np.log10(
            fragment_match_component)

        if np.isinf(score):
            print("infinite score", self.scan, self.target,
                  intensity_component, fragment_match_component, self.scan)

        return score

    def calculate_score(self, error_tolerance=2e-5, *args, **kwargs):
        score = self._binomial_score(error_tolerance)
        self._score = score
        return score
Exemplo n.º 19
0
 def __init__(self, scan, target, mass_shift=None):
     super(BinomialSpectrumMatcher, self).__init__(scan, target, mass_shift)
     self.solution_map = FragmentMatchMap()
     self._init_binomial()
Exemplo n.º 20
0
class SimpleCoverageScorer(GlycopeptideSpectrumMatcherBase):
    def __init__(self, scan, sequence, mass_shift=None):
        super(SimpleCoverageScorer, self).__init__(scan, sequence, mass_shift)
        self._score = None
        self.solution_map = FragmentMatchMap()
        self.glycosylated_b_ion_count = 0
        self.glycosylated_y_ion_count = 0

    def match(self, error_tolerance=2e-5):
        solution_map = FragmentMatchMap()
        spectrum = self.spectrum

        n_glycosylated_b_ions = 0
        for frags in self.target.get_fragments('b'):
            glycosylated_position = False
            for frag in frags:
                glycosylated_position |= frag.is_glycosylated
                for peak in spectrum.all_peaks_for(frag.mass, error_tolerance):
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_b_ions += 1

        n_glycosylated_y_ions = 0
        for frags in self.target.get_fragments('y'):
            glycosylated_position = False
            for frag in frags:
                glycosylated_position |= frag.is_glycosylated
                for peak in spectrum.all_peaks_for(frag.mass, error_tolerance):
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_y_ions += 1
        for frag in self.target.stub_fragments(extended=True):
            for peak in spectrum.all_peaks_for(frag.mass, error_tolerance):
                solution_map.add(peak, frag)

        self.glycosylated_b_ion_count = n_glycosylated_b_ions
        self.glycosylated_y_ion_count = n_glycosylated_y_ions
        self.solution_map = solution_map
        return solution_map

    def _compute_coverage_vectors(self):
        b_ions = np.zeros(len(self.target))
        y_ions = np.zeros(len(self.target))
        stub_count = 0
        glycosylated_b_ions = 0
        glycosylated_y_ions = 0

        for frag in self.solution_map.fragments():
            if frag.series == IonSeries.b:
                b_ions[frag.position] = 1
                if frag.is_glycosylated:
                    glycosylated_b_ions += 1
            elif frag.series == IonSeries.y:
                y_ions[frag.position] = 1
                if frag.is_glycosylated:
                    glycosylated_y_ions += 1
            elif frag.series == IonSeries.stub_glycopeptide:
                stub_count += 1
        return b_ions, y_ions, stub_count, glycosylated_b_ions, glycosylated_y_ions

    def compute_coverage(self):
        (b_ions, y_ions, stub_count,
         glycosylated_b_ions,
         glycosylated_y_ions) = self._compute_coverage_vectors()

        mean_coverage = np.mean(np.log2(b_ions + y_ions[::-1] + 1) / np.log2(3))

        glycosylated_coverage = 0.
        ladders = 0.
        if self.glycosylated_b_ion_count > 0:
            glycosylated_coverage += (glycosylated_b_ions / float(self.glycosylated_b_ion_count))
            ladders += 1.
        if self.glycosylated_y_ion_count > 0:
            glycosylated_coverage += (glycosylated_y_ions / float(self.glycosylated_y_ion_count))
            ladders += 1.
        if ladders > 0:
            glycosylated_coverage /= ladders

        stub_fraction = min(stub_count, 3) / 3.

        return mean_coverage, glycosylated_coverage, stub_fraction

    def calculate_score(self, backbone_weight=0.5, glycosylated_weight=0.5, stub_weight=0.2, **kwargs):
        score = self._coverage_score(backbone_weight, glycosylated_weight, stub_weight)
        self._score = score
        return score

    def _coverage_score(self, backbone_weight=0.5, glycosylated_weight=0.5, stub_weight=0.2):
        mean_coverage, glycosylated_coverage, stub_fraction = self.compute_coverage()
        score = (((mean_coverage * backbone_weight) + (glycosylated_coverage * glycosylated_weight)) * (
            1 - stub_weight)) + (stub_fraction * stub_weight)
        return score
Exemplo n.º 21
0
 def __init__(self, scan, sequence, mass_shift=None):
     super(HyperscoreScorer, self).__init__(scan, sequence, mass_shift)
     self._score = None
     self.solution_map = FragmentMatchMap()
Exemplo n.º 22
0
class SimpleCoverageScorer(GlycopeptideSpectrumMatcherBase):
    backbone_weight = 0.5
    glycosylated_weight = 0.5
    stub_weight = 0.2

    def __init__(self, scan, sequence, mass_shift=None):
        super(SimpleCoverageScorer, self).__init__(scan, sequence, mass_shift)
        self._score = None
        self.solution_map = FragmentMatchMap()
        self.glycosylated_n_term_ion_count = 0
        self.glycosylated_c_term_ion_count = 0

    @property
    def glycosylated_b_ion_count(self):
        return self.glycosylated_n_term_ion_count

    @glycosylated_b_ion_count.setter
    def glycosylated_b_ion_count(self, value):
        self.glycosylated_n_term_ion_count = value

    @property
    def glycosylated_y_ion_count(self):
        return self.glycosylated_c_term_ion_count

    @glycosylated_y_ion_count.setter
    def glycosylated_y_ion_count(self, value):
        self.glycosylated_c_term_ion_count = value

    def _match_backbone_series(self,
                               series,
                               error_tolerance=2e-5,
                               masked_peaks=None,
                               strategy=None,
                               include_neutral_losses=False):
        if strategy is None:
            strategy = HCDFragmentationStrategy
        # Assumes that fragmentation proceeds from the start of the ladder (series position 1)
        # which means that if the last fragment could be glycosylated then the next one will be
        # but if the last fragment wasn't the next one might be.
        previous_position_glycosylated = False
        for frags in self.get_fragments(
                series,
                strategy=strategy,
                include_neutral_losses=include_neutral_losses):
            glycosylated_position = previous_position_glycosylated
            for frag in frags:
                if not glycosylated_position:
                    glycosylated_position |= frag.is_glycosylated
                for peak in self.spectrum.all_peaks_for(
                        frag.mass, error_tolerance):
                    if peak.index.neutral_mass in masked_peaks:
                        continue
                    self.solution_map.add(peak, frag)
            if glycosylated_position:
                if series.direction > 0:
                    self.glycosylated_n_term_ion_count += 1
                else:
                    self.glycosylated_c_term_ion_count += 1
            previous_position_glycosylated = glycosylated_position

    def _compute_coverage_vectors(self):
        n_term_ions = np.zeros(len(self.target))
        c_term_ions = np.zeros(len(self.target))
        stub_count = 0
        glycosylated_n_term_ions = set()
        glycosylated_c_term_ions = set()

        for frag in self.solution_map.fragments():
            series = frag.get_series()
            if series in (IonSeries.b, IonSeries.c):
                n_term_ions[frag.position] = 1
                if frag.is_glycosylated:
                    glycosylated_n_term_ions.add((series, frag.position))
            elif series in (IonSeries.y, IonSeries.z):
                c_term_ions[frag.position] = 1
                if frag.is_glycosylated:
                    glycosylated_c_term_ions.add((series, frag.position))
            elif series == IonSeries.stub_glycopeptide:
                stub_count += 1
        return n_term_ions, c_term_ions, stub_count, len(
            glycosylated_n_term_ions), len(glycosylated_c_term_ions)

    def _compute_glycosylated_coverage(self, glycosylated_n_term_ions,
                                       glycosylated_c_term_ions):
        ladders = 0.
        numer = 0.0
        denom = 0.0
        if self.glycosylated_n_term_ion_count > 0:
            numer += glycosylated_n_term_ions
            denom += self.glycosylated_n_term_ion_count
            ladders += 1.
        if self.glycosylated_c_term_ion_count > 0:
            numer += glycosylated_c_term_ions
            denom += self.glycosylated_c_term_ion_count
            ladders += 1.
        if denom == 0.0:
            return 0.0
        return numer / denom

    def _get_internal_size(self, glycan_composition):
        return approximate_internal_size_of_glycan(glycan_composition)

    def compute_coverage(self):
        (n_term_ions, c_term_ions, stub_count, glycosylated_n_term_ions,
         glycosylated_c_term_ions) = self._compute_coverage_vectors()

        mean_coverage = np.mean(
            np.log2(n_term_ions + c_term_ions[::-1] + 1) / np.log2(3))

        glycosylated_coverage = self._compute_glycosylated_coverage(
            glycosylated_n_term_ions, glycosylated_c_term_ions)

        stub_fraction = min(stub_count, 3) / 3.

        return mean_coverage, glycosylated_coverage, stub_fraction

    @classmethod
    def get_params(self,
                   backbone_weight=None,
                   glycosylated_weight=None,
                   stub_weight=None,
                   **kwargs):
        if backbone_weight is None:
            backbone_weight = self.backbone_weight
        if glycosylated_weight is None:
            glycosylated_weight = self.glycosylated_weight
        if stub_weight is None:
            stub_weight = self.stub_weight
        return backbone_weight, glycosylated_weight, stub_weight, kwargs

    def calculate_score(self,
                        backbone_weight=None,
                        glycosylated_weight=None,
                        stub_weight=None,
                        **kwargs):
        if backbone_weight is None:
            backbone_weight = self.backbone_weight
        if glycosylated_weight is None:
            glycosylated_weight = self.glycosylated_weight
        if stub_weight is None:
            stub_weight = self.stub_weight
        score = self._coverage_score(backbone_weight, glycosylated_weight,
                                     stub_weight)
        self._score = score
        return score

    def _coverage_score(self,
                        backbone_weight=None,
                        glycosylated_weight=None,
                        stub_weight=None):
        if backbone_weight is None:
            backbone_weight = self.backbone_weight
        if glycosylated_weight is None:
            glycosylated_weight = self.glycosylated_weight
        if stub_weight is None:
            stub_weight = self.stub_weight
        mean_coverage, glycosylated_coverage, stub_fraction = self.compute_coverage(
        )
        score = (((mean_coverage * backbone_weight) +
                  (glycosylated_coverage * glycosylated_weight)) *
                 (1 - stub_weight)) + (stub_fraction * stub_weight)
        return score
Exemplo n.º 23
0
class FrequencyScorer(GlycopeptideSpectrumMatcherBase):
    def __init__(self, scan, sequence, model=None, mass_shift=None):
        super(FrequencyScorer, self).__init__(scan, sequence, mass_shift)
        self._score = None
        self.solution_map = FragmentMatchMap()
        self.glycosylated_b_ion_count = 0
        self.glycosylated_y_ion_count = 0
        self.model = model

    def match(self, error_tolerance=2e-5):
        solution_map = FragmentMatchMap()
        spectrum = self.spectrum

        n_glycosylated_b_ions = 0
        for frags in self.target.get_fragments('b'):
            glycosylated_position = False
            for frag in frags:
                glycosylated_position |= frag.is_glycosylated
                peak = spectrum.has_peak(frag.mass, error_tolerance)
                if peak:
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_b_ions += 1

        n_glycosylated_y_ions = 0
        for frags in self.target.get_fragments('y'):
            glycosylated_position = False
            for frag in frags:
                glycosylated_position |= frag.is_glycosylated
                peak = spectrum.has_peak(frag.mass, error_tolerance)
                if peak:
                    solution_map.add(peak, frag)
            if glycosylated_position:
                n_glycosylated_y_ions += 1
        for frag in self.target.stub_fragments(extended=True):
            peak = spectrum.has_peak(frag.mass, error_tolerance)
            if peak:
                solution_map.add(peak, frag)

        self.glycosylated_b_ion_count = n_glycosylated_b_ions
        self.glycosylated_y_ion_count = n_glycosylated_y_ions
        self.solution_map = solution_map
        return solution_map

    def _compute_total(self):
        total = 0.
        for frags in chain(self.target.get_fragments('b'),
                           self.target.get_fragments('y')):
            for frag in frags:
                n_term, c_term = frag.flanking_amino_acids
                score = self.frequency_counter.n_term_probability(
                    n_term) * self.frequency_counter.c_term_probability(c_term)
                total += score * 0.5
        return total

    def _score_backbone(self):
        total = self._compute_total()
        observed = 0.0
        track_site = set()
        for frag in self.solution_map.fragments():
            if (frag.series == 'b') or (frag.series == 'y'):
                position = frag.position
                n_term, c_term = frag.flanking_amino_acids
                score = self.model.n_term_probability(
                    n_term) * self.model.c_term_probability(c_term)
                weight = 0.6 if position not in track_site else 0.4
                track_site.add(position)
                observed += score * weight
        return observed / total
Exemplo n.º 24
0
 def __init__(self, scan, sequence, mass_shift=None):
     super(SimpleCoverageScorer, self).__init__(scan, sequence, mass_shift)
     self._score = None
     self.solution_map = FragmentMatchMap()
     self.glycosylated_n_term_ion_count = 0
     self.glycosylated_c_term_ion_count = 0
Exemplo n.º 25
0
class BinomialSpectrumMatcher(GlycopeptideSpectrumMatcherBase):

    def __init__(self, scan, target, mass_shift=None):
        super(BinomialSpectrumMatcher, self).__init__(scan, target, mass_shift)
        self.solution_map = FragmentMatchMap()
        self._init_binomial()

    def _init_binomial(self):
        self._sanitized_spectrum = set(self.spectrum)
        self.n_theoretical = 0

    def _match_oxonium_ions(self, error_tolerance=2e-5, masked_peaks=None):
        if masked_peaks is None:
            masked_peaks = set()
        val = super(BinomialSpectrumMatcher, self)._match_oxonium_ions(
            error_tolerance=error_tolerance, masked_peaks=masked_peaks)
        self._sanitized_spectrum -= {self.spectrum[i] for i in masked_peaks}
        return val

    def _match_backbone_series(self, series, error_tolerance=2e-5, masked_peaks=None, strategy=None,
                               include_neutral_losses=False):
        if strategy is None:
            strategy = HCDFragmentationStrategy
        for frags in self.get_fragments(series, strategy=strategy, include_neutral_losses=include_neutral_losses):
            # Should this be on the level of position, or the level of the individual fragment ions?
            # At the level of position, this makes missing only glycosylated or unglycosylated ions
            # less punishing, while at the level of the fragment makes more sense by the definition
            # of the geometric mass accuracy interpretation.
            #
            # Using the less severe case to be less pessimistic
            self.n_theoretical += 1
            for frag in frags:
                for peak in self.spectrum.all_peaks_for(frag.mass, error_tolerance):
                    if peak.index.neutral_mass in masked_peaks:
                        continue
                    self.solution_map.add(peak, frag)

    def _sanitize_solution_map(self):
        san = list()
        for pair in self.solution_map:
            if pair.fragment.series != "oxonium_ion":
                san.append(pair)
        return san

    def _compute_average_window_size(self, error_tolerance=2e-5):
        average_window_size = (
            (self.target.peptide_composition(
            ).mass) / 3.) * error_tolerance * 2
        return average_window_size

    def _fragment_matched_binomial(self, error_tolerance=2e-5):
        precursor_mass = calculate_precursor_mass(self)

        fragment_match_component = binomial_fragments_matched(
            self.n_theoretical,
            len(self._sanitize_solution_map()),
            self._compute_average_window_size(error_tolerance),
            precursor_mass
        )
        if fragment_match_component < 1e-170:
            fragment_match_component = 1e-170
        return fragment_match_component

    def _intensity_component_binomial(self):
        intensity_component = binomial_intensity(
            self._sanitized_spectrum,
            self._sanitize_solution_map(),
            self.n_theoretical)

        if intensity_component < 1e-170:
            intensity_component = 1e-170
        return intensity_component

    def _binomial_score(self, error_tolerance=2e-5, *args, **kwargs):
        precursor_mass = calculate_precursor_mass(self)

        solution_map = self._sanitize_solution_map()
        n_matched = len(solution_map)
        if n_matched == 0 or len(self._sanitized_spectrum) == 0:
            return 0

        fragment_match_component = binomial_fragments_matched(
            self.n_theoretical,
            len(solution_map),
            self._compute_average_window_size(error_tolerance),
            precursor_mass
        )

        if fragment_match_component < 1e-170:
            fragment_match_component = 1e-170

        intensity_component = binomial_intensity(
            self._sanitized_spectrum,
            solution_map,
            self.n_theoretical)

        if intensity_component < 1e-170:
            intensity_component = 1e-170
        score = -np.log10(intensity_component) + -np.log10(fragment_match_component)

        if np.isinf(score):
            print("infinite score", self.scan, self.target, intensity_component, fragment_match_component, self.scan)

        return score

    def calculate_score(self, error_tolerance=2e-5, *args, **kwargs):
        score = self._binomial_score(error_tolerance)
        self._score = score
        return score
Exemplo n.º 26
0
 def __init__(self, scan, sequence, mass_shift=None):
     super(SimpleCoverageScorer, self).__init__(scan, sequence, mass_shift)
     self._score = None
     self.solution_map = FragmentMatchMap()