示例#1
0
 def _get_sequence(self, gpm):
     try:
         glycopeptide = gpm.structure
         glycopeptide = PeptideSequence(str(glycopeptide))
         return glycopeptide
     except AttributeError:
         return PeptideSequence(str(gpm))
示例#2
0
 def build_structures(self):
     gp = PeptideSequence(
         'YLGN(N-Glycosylation)ATAIFFLPDEGK{Hex:5; HexNAc:4; Neu5Ac:1}')
     gp2 = PeptideSequence(
         'YLGN(#:iupac,glycosylation_type=N-Linked:?-?-Hexp-(?-?)-?-?-'
         'Hexp2NAc-(?-?)-a-D-Manp-(1-6)-[a-D-Neup5Ac-(?-?)-?-?-Hexp-(?-?'
         ')-?-?-Hexp2NAc-(?-?)-a-D-Manp-(1-3)]b-D-Manp-(1-4)-b-D-Glcp2NA'
         'c-(1-4)-b-D-Glcp2NAc)ATAIFFLPDEGK')
     return gp, gp2
示例#3
0
def glycopeptide_string(sequence, long=False, include_glycan=True):
    sequence = PeptideSequence(str(sequence))
    parts = []
    template = "(<span class='modification-chip'"\
        " style='background-color:%s;padding-left:1px;padding-right:2px;border-radius:2px;'"\
        " title='%s' data-modification='%s'>%s</span>)"

    n_term_template = template.replace("(", "").replace(")", "") + '-'
    c_term_template = "-" + (template.replace("(", "").replace(")", ""))

    def render(mod, template=template):
        color = colors.get_color(str(mod))
        letter = escape(mod.name if long else mod.name[0])
        name = escape(mod.name)
        parts.append(template % (rgbpack(color), name, name, letter))

    if sequence.n_term.modification is not None:
        render(sequence.n_term.modification, n_term_template)
    for res, mods in sequence:
        parts.append(res.symbol)
        if mods:
            for mod in mods:
                render(mod)
    if sequence.c_term.modification is not None:
        render(sequence.c_term.modification, c_term_template)
    parts.append((' ' +
                  glycan_composition_string(str(sequence.glycan)) if sequence.
                  glycan is not None else "") if include_glycan else "")
    return ''.join(parts)
示例#4
0
 def _compute_sequence_color(self, gpm):
     try:
         glycopeptide = gpm.structure
         glycopeptide = PeptideSequence(str(glycopeptide))
         if "N-Glycosylation" in glycopeptide.modification_index:
             return 'forestgreen', 0.5
         elif 'O-Glycosylation' in glycopeptide.modification_index:
             return 'aquamarine', 0.5
         elif 'GAG-Linker' in glycopeptide.modification_index:
             return 'orange', 0.5
         else:
             raise ValueError(glycopeptide)
     except AttributeError:
         return 'red', 0.5
示例#5
0
def classify_proton_mobility(scan: ProcessedScan, structure: glycopeptidepy.PeptideSequence) -> str:
    try:
        k = structure.proton_mobility
    except AttributeError:
        k = proton_mobility(structure)
        # Try to abuse non-strict attributes for caching.
        try:
            structure.proton_mobility = k
        except AttributeError:
            pass
    charge = scan.precursor_information.charge
    if charge == ChargeNotProvided:
        return "mobile"
    elif k < charge:
        return 'mobile'
    elif k == charge:
        return 'partial'
    else:
        return 'immobile'
示例#6
0
def get_base_peptide(peptide_obj):
    if isinstance(peptide_obj, Peptide):
        return PeptideSequence(peptide_obj.base_peptide_sequence)
    else:
        return PeptideSequence(str(peptide_obj))
示例#7
0
import unittest

import glypy
from glycopeptidepy import PeptideSequence
from ms_deisotope.output import ProcessedMzMLDeserializer

from glycan_profiling.test.fixtures import get_test_data

from glycan_profiling.tandem.glycopeptide import core_search
from glycan_profiling.tandem.glycopeptide.core_search import (
    GlycanCombinationRecord, GlycanTypes, GlycanFilteringPeptideMassEstimator)

peptide_mass = PeptideSequence("YLGNATAIFFLPDEGK").mass
gc1 = glypy.glycan_composition.HashableGlycanComposition.parse(
    "{Hex:5; HexNAc:4; Neu5Ac:1}")
gc2 = glypy.glycan_composition.HashableGlycanComposition.parse(
    "{Hex:5; HexNAc:4; Neu5Ac:2}")
gc3 = glypy.glycan_composition.HashableGlycanComposition.parse(
    "{Hex:6; HexNAc:5; Neu5Ac:2}")
glycan_compositions = [gc1, gc2, gc3]

glycan_database = []
for i, gc in enumerate(glycan_compositions):
    record = GlycanCombinationRecord(i + 1,
                                     gc.mass() - gc.composition_offset.mass,
                                     gc, 1, [
                                         GlycanTypes.n_glycan,
                                         GlycanTypes.o_glycan,
                                     ])
    glycan_database.append(record)
示例#8
0
 def _get_peptide_key(self, chromatogram):
     return PeptideSequence(str(chromatogram.structure)).deglycosylate()
示例#9
0
 def from_obj(cls, obj, **kwargs):
     gp = PeptideSequence(str(obj.structure))
     return super(GlycopeptideChromatogramProxy, cls).from_obj(obj,
                                                               structure=gp,
                                                               **kwargs)
示例#10
0
 def structure(self):
     if self._structure is None:
         self._structure = PeptideSequence(str(self.kwargs["structure"]))
     return self._structure
示例#11
0
 def _get_sequence(self, gpm):
     try:
         return gpm.structure
     except AttributeError:
         return PeptideSequence(str(gpm))
def get_sequoninfodict_from_files_accid(csvfilelist,
                                        accessionid=None,
                                        seq='',
                                        score_cutoff=30,
                                        replicate_cutoff=2,
                                        sequon_length=4):
    """Takes list of DictReader CSV file objects produced from GlycReSoft
    glycopeptide-identification. Requires either a protein sequence or UniProt accession
    ID; if both provided, defaults to sequence. Returns dictionary mapping glycosylation
    sites of protein associated with accessionid to sequon and list of averaged
    SiteSpecificGlycan objects. Glycans with an ms2 score less than score_cutoff and
    observed in fewer replicates than replicate_cutoff will not be included in the
    returned siteinfodict. Sequon length determined by sequon_length. Dictionary returned
    is of the form:
    {
        SITE: {
            "sequon": "XXXX",
            "glycans": [ glycan1, glycan2, ... ]
        },
        ...
    }"""

    # check number of csvs is not less than the number of required replicates
    assert replicate_cutoff <= len(csvfilelist), \
        'Replicate cutoff = ' + \
        str(replicate_cutoff) + ' > ' + \
        str(len(csvfilelist)) + ' = number of csvs'

    # check either accessionid or seq provided
    assert accessionid is not None or seq is not None, 'accessionid or sequence required'

    # if sequence not provided, get sequence from accessionid
    if len(seq) == 0:
        seq = get_seq(accessionid)
    pepseq = PeptideSequence(seq)

    # init siteinfodict
    site_to_glycans = {x: [] for x in pepseq.n_glycan_sequon_sites}

    # use csv index as replicate ID
    # add initial glycans to siteinfodict
    for replicate, content in enumerate(csvfilelist):
        site_to_glycans = update_siteinfodict(site_to_glycans,
                                              content,
                                              replicate,
                                              accessionid,
                                              pepseq,
                                              score_cutoff=score_cutoff)

    # prune off all glycans with replicate count < replicate_cutoff
    # averaging remaining glycans
    site_to_glycans = prune_siteinfodict(site_to_glycans,
                                         replicate_cutoff=replicate_cutoff)

    # build dict relating site to sequon and glycan list, then return
    sequoninfodict = {
        seq[x:x + sequon_length]: {
            'site': x,
            'glycans': y
        }
        for x, y in site_to_glycans.items()
    }
    return sequoninfodict