def split_proteins(self):
        self.log("... Begin Applying Protein Annotations")
        mod_table = modification.RestrictedModificationTable(
            constant_modifications=self.constant_modifications,
            variable_modifications=[])
        const_modifications = [
            mod_table[c] for c in self.constant_modifications
        ]

        splitter = ProteinSplitter(constant_modifications=const_modifications,
                                   variable_modifications=[])
        i = 0
        j = 0
        protein_ids = self.retrieve_target_protein_ids()
        n = len(protein_ids)
        interval = min(n / 10., 100000)
        acc = []
        for protein_id in protein_ids:
            i += 1
            protein = self.query(Protein).get(protein_id)
            if i % interval == 0:
                self.log(
                    "... %0.3f%% Complete (%d/%d). %d Peptides Produced." %
                    (i * 100. / n, i, n, j))
            for peptide in splitter.handle_protein(protein):
                acc.append(peptide)
                j += 1
                if len(acc) > 100000:
                    self.session.bulk_save_objects(acc)
                    self.session.commit()
                    acc = []
        self.session.bulk_save_objects(acc)
        self.session.commit()
        acc = []
示例#2
0
    def build_baseline_peptides(self):
        mod_table = modification.RestrictedModificationTable(
            constant_modifications=self.constant_modifications,
            variable_modifications=[])
        const_modifications = [mod_table[c] for c in self.constant_modifications]
        digestor = ProteinDigestor(
            self.enzymes[0], const_modifications,
            max_missed_cleavages=self.enzymes[0].used_missed_cleavages,
            min_length=self.peptide_length_range[0],
            max_length=self.peptide_length_range[1])
        accumulator = []
        i = 0
        for protein in self.get_target_proteins():
            for peptide in digestor.process_protein(protein):
                peptide.hypothesis_id = self.hypothesis_id
                accumulator.append(peptide)
                i += 1
                if len(accumulator) > 5000:
                    self.session.bulk_save_objects(accumulator)
                    self.session.commit()
                    accumulator = []
                if i % 1000 == 0:
                    self.log("... %d Baseline Peptides Created" % i)

        self.session.bulk_save_objects(accumulator)
        self.session.commit()
示例#3
0
    def split_proteins(self):
        mod_table = modification.RestrictedModificationTable(
            constant_modifications=self.constant_modifications,
            variable_modifications=[])
        const_modifications = [mod_table[c] for c in self.constant_modifications]
        protein_ids = self.retrieve_target_protein_ids()

        annotator = UniprotProteinAnnotator(self, protein_ids, const_modifications, [])
        annotator.run()
示例#4
0
decorin = """
>sp|P21793|PGS2_BOVIN Decorin
MKATIIFLLVAQVSWAGPFQQKGLFDFMLEDEASGIGPEEHFPEVPEIEPMGPVCPFRCQ
CHLRVVQCSDLGLEKVPKDLPPDTALLDLQNNKITEIKDGDFKNLKNLHTLILINNKISK
ISPGAFAPLVKLERLYLSKNQLKELPEKMPKTLQELRVHENEITKVRKSVFNGLNQMIVV
ELGTNPLKSSGIENGAFQGMKKLSYIRIADTNITTIPQGLPPSLTELHLDGNKITKVDAA
SLKGLNNLAKLGLSFNSISAVDNGSLANTPHLRELHLNNNKLVKVPGGLADHKYIQVVYL
HNNNISAIGSNDFCPPGYNTKKASYSGVSLFSNPVQYWEIQPSTFRCVYVRAAVQLGNYK
"""

constant_modifications = ["Carbamidomethyl (C)"]
variable_modifications = ["Deamidation (N)", "Pyro-glu from Q (Q@N-term)"]

mt = modification.RestrictedModificationTable(
    constant_modifications=constant_modifications,
    variable_modifications=variable_modifications)

variable_modifications = [mt[v] for v in variable_modifications]
constant_modifications = [mt[c] for c in constant_modifications]


class FastaGlycopeptideTests(unittest.TestCase):
    def setup_tempfile(self, source):
        file_name = tempfile.mktemp()
        open(file_name, 'w').write(source)
        return file_name

    def clear_file(self, path):
        open(path, 'wb')