Exemplo n.º 1
0
    def _init_sites(self):
        try:
            parsed_sequence = self._get_sequence()
        except residue.UnknownAminoAcidException:
            return
        sites = []
        try:
            n_glycosites = sequence.find_n_glycosylation_sequons(parsed_sequence)
            for n_glycosite in n_glycosites:
                sites.append(
                    ProteinSite(name=ProteinSite.N_GLYCOSYLATION, location=n_glycosite))
        except residue.UnknownAminoAcidException:
            pass

        # The O- and GAG-linker sites are not determined by a multi AA sequon. We don't
        # need to abstract them away and they are much too common.
        # try:
        #     o_glycosites = sequence.find_o_glycosylation_sequons(
        #         parsed_sequence)
        #     for o_glycosite in o_glycosites:
        #         sites.append(
        #             ProteinSite(name=ProteinSite.O_GLYCOSYLATION, location=o_glycosite))
        # except residue.UnknownAminoAcidException:
        #     pass

        # try:
        #     gag_sites = sequence.find_glycosaminoglycan_sequons(
        #         parsed_sequence)
        #     for gag_site in gag_sites:
        #         sites.append(
        #             ProteinSite(name=ProteinSite.GAGYLATION, location=gag_site))
        # except residue.UnknownAminoAcidException:
        #     pass
        self.sites.extend(sites)
Exemplo n.º 2
0
    def __init__(self, peptide_dict, enzyme=None, constant_modifications=None,
                 modification_translation_table=None, process=True):
        if modification_translation_table is None:
            modification_translation_table = dict()
        if constant_modifications is None:
            constant_modifications = list()

        self.peptide_dict = peptide_dict

        self.insert_sites = []
        self.deleteion_sites = []
        self.modification_counter = 0
        self.missed_cleavages = 0

        self.base_sequence = peptide_dict["PeptideSequence"]
        self.peptide_sequence = PeptideSequence(peptide_dict["PeptideSequence"])

        self.glycosite_candidates = sequence.find_n_glycosylation_sequons(
            self.peptide_sequence, WHITELIST_GLYCOSITE_PTMS)

        self.constant_modifications = constant_modifications
        self.modification_translation_table = modification_translation_table
        self.enzyme = enzyme
        self.mzid_id = peptide_dict.get('id')

        if process:
            self.process()
Exemplo n.º 3
0
    def __init__(self, peptide_dict, enzyme=None, constant_modifications=None,
                 modification_translation_table=None, process=True):
        if modification_translation_table is None:
            modification_translation_table = dict()
        if constant_modifications is None:
            constant_modifications = list()

        self.peptide_dict = peptide_dict

        self.insert_sites = []
        self.deleteion_sites = []
        self.modification_counter = 0
        self.missed_cleavages = 0

        self.base_sequence = peptide_dict["PeptideSequence"]
        self.peptide_sequence = PeptideSequence(peptide_dict["PeptideSequence"])

        self.glycosite_candidates = sequence.find_n_glycosylation_sequons(
            self.peptide_sequence, WHITELIST_GLYCOSITE_PTMS)

        self.constant_modifications = constant_modifications
        self.modification_translation_table = modification_translation_table
        self.enzyme = enzyme
        self.mzid_id = peptide_dict.get('id')

        if process:
            self.process()
Exemplo n.º 4
0
 def n_glycan_sequon_sites(self):
     if self._n_glycan_sequon_sites is None:
         try:
             self._n_glycan_sequon_sites = sequence.find_n_glycosylation_sequons(
                 self.protein_sequence)
         except residue.UnknownAminoAcidException:
             return []
     return self._n_glycan_sequon_sites
Exemplo n.º 5
0
def n_glycan_sequon_sites(peptide, protein, use_local_sequence=False):
    sites = set()
    sites |= set(site - peptide.start_position for site in span_test(
        protein.n_glycan_sequon_sites, peptide.start_position, peptide.end_position))
    if use_local_sequence:
        sites |= set(sequence.find_n_glycosylation_sequons(
            peptide.modified_peptide_sequence))
    return sorted(sites)
def n_glycan_sequon_sites(peptide, protein, use_local_sequence=False):
    sites = set()
    sites |= set(site - peptide.start_position for site in protein.n_glycan_sequon_sites
                 if peptide.start_position <= site < peptide.end_position)
    if use_local_sequence:
        sites |= set(sequence.find_n_glycosylation_sequons(
            peptide.modified_peptide_sequence))
    return list(sites)
def parent_sequence_aware_n_glycan_sequon_sites(peptide, protein):
    sites = set(
        sequence.find_n_glycosylation_sequons(
            peptide.modified_peptide_sequence))
    sites |= set(site - peptide.start_position
                 for site in protein.glycosylation_sites
                 if peptide.start_position <= site < peptide.end_position)
    return list(sites)
Exemplo n.º 8
0
    def extract_proteins(self):
        i = 0
        for protein in ProteinFastaFileParser(self.fasta_file):
            original_sequence = protein.protein_sequence
            n = len(original_sequence)
            if "(" in protein.protein_sequence:
                try:
                    protein.protein_sequence = str(
                        reverse_sequence(protein.protein_sequence,
                                         suffix_len=0))
                except UnknownAminoAcidException:
                    continue
            else:
                protein.protein_sequence = protein.protein_sequence[::-1]
            protein.hypothesis_id = self.hypothesis_id
            sites = []
            try:
                original_sequence = PeptideSequence(original_sequence)
            except UnknownAminoAcidException:
                continue
            try:
                n_glycosites = find_n_glycosylation_sequons(original_sequence)
                for n_glycosite in n_glycosites:
                    sites.append(
                        ProteinSite(name=ProteinSite.N_GLYCOSYLATION,
                                    location=n - n_glycosite - 1))
            except UnknownAminoAcidException:
                pass

            # See Protein._init_sites for explanation
            # try:
            #     o_glycosites = find_o_glycosylation_sequons(original_sequence)
            #     for o_glycosite in o_glycosites:
            #         sites.append(
            #             ProteinSite(name=ProteinSite.O_GLYCOSYLATION, location=n - o_glycosite - 1))
            # except UnknownAminoAcidException:
            #     pass

            # try:
            #     gag_sites = find_glycosaminoglycan_sequons(original_sequence)
            #     for gag_site in gag_sites:
            #         sites.append(
            #             ProteinSite(name=ProteinSite.GAGYLATION, location=n - gag_site - 1))
            # except UnknownAminoAcidException:
            #     pass
            protein.sites.extend(sites)

            self.session.add(protein)
            i += 1
            if i % 5000 == 0:
                self.log("... %d Proteins Extracted" % (i, ))
                self.session.commit()

        self.session.commit()
        self.log("%d Proteins Extracted" % (i, ))
        return i
Exemplo n.º 9
0
 def n_glycan_sequon_sites(self):
     if self._n_glycan_sequon_sites is None:
         sites = self.sites.filter(ProteinSite.name == ProteinSite.N_GLYCOSYLATION).all()
         if sites:
             self._n_glycan_sequon_sites = [int(i) for i in sites]
         elif self.sites.count() == 0:
             try:
                 self._n_glycan_sequon_sites = sequence.find_n_glycosylation_sequons(self._get_sequence())
             except residue.UnknownAminoAcidException:
                 return []
         else:
             return []
     return self._n_glycan_sequon_sites
    def extract_proteins(self):
        i = 0
        for protein in ProteinFastaFileParser(self.fasta_file):
            original_sequence = protein.protein_sequence
            n = len(original_sequence)
            protein.protein_sequence = str(reverse_sequence(protein.protein_sequence, suffix_len=0))
            protein.hypothesis_id = self.hypothesis_id

            original_sequence = PeptideSequence(original_sequence)
            try:
                n_glycosites = find_n_glycosylation_sequons(original_sequence)
                for n_glycosite in n_glycosites:
                    protein.sites.append(
                        ProteinSite(name=ProteinSite.N_GLYCOSYLATION, location=n - n_glycosite - 1))
            except UnknownAminoAcidException:
                pass

            try:
                o_glycosites = find_o_glycosylation_sequons(original_sequence)
                for o_glycosite in o_glycosites:
                    protein.sites.append(
                        ProteinSite(name=ProteinSite.O_GLYCOSYLATION, location=n - o_glycosite - 1))
            except UnknownAminoAcidException:
                pass

            try:
                gag_sites = find_glycosaminoglycan_sequons(original_sequence)
                for gag_site in gag_sites:
                    protein.sites.append(
                        ProteinSite(name=ProteinSite.GAGYLATION, location=n - gag_site - 1))
            except UnknownAminoAcidException:
                pass

            self.session.add(protein)
            i += 1
            if i % 100 == 0:
                self.log("... %d Proteins Extracted" % (i,))
                self.session.commit()

        self.session.commit()
        self.log("%d Proteins Extracted" % (i,))
        return i
Exemplo n.º 11
0
    def _init_sites(self):
        try:
            n_glycosites = sequence.find_n_glycosylation_sequons(self._get_sequence())
            for n_glycosite in n_glycosites:
                self.sites.append(
                    ProteinSite(name=ProteinSite.N_GLYCOSYLATION, location=n_glycosite))
        except residue.UnknownAminoAcidException:
            pass

        try:
            o_glycosites = sequence.find_o_glycosylation_sequons(self._get_sequence())
            for o_glycosite in o_glycosites:
                self.sites.append(
                    ProteinSite(name=ProteinSite.O_GLYCOSYLATION, location=o_glycosite))
        except residue.UnknownAminoAcidException:
            pass

        try:
            gag_sites = sequence.find_glycosaminoglycan_sequons(self._get_sequence())
            for gag_site in gag_sites:
                self.sites.append(
                    ProteinSite(name=ProteinSite.GAGYLATION, location=gag_site))
        except residue.UnknownAminoAcidException:
            pass