def split_proteins(self): self.log("... Begin Applying Protein Annotations") mod_table = modification.RestrictedModificationTable( constant_modifications=self.constant_modifications, variable_modifications=[]) const_modifications = [ mod_table[c] for c in self.constant_modifications ] splitter = ProteinSplitter(constant_modifications=const_modifications, variable_modifications=[]) i = 0 j = 0 protein_ids = self.retrieve_target_protein_ids() n = len(protein_ids) interval = min(n / 10., 100000) acc = [] for protein_id in protein_ids: i += 1 protein = self.query(Protein).get(protein_id) if i % interval == 0: self.log( "... %0.3f%% Complete (%d/%d). %d Peptides Produced." % (i * 100. / n, i, n, j)) for peptide in splitter.handle_protein(protein): acc.append(peptide) j += 1 if len(acc) > 100000: self.session.bulk_save_objects(acc) self.session.commit() acc = [] self.session.bulk_save_objects(acc) self.session.commit() acc = []
def build_baseline_peptides(self): mod_table = modification.RestrictedModificationTable( constant_modifications=self.constant_modifications, variable_modifications=[]) const_modifications = [mod_table[c] for c in self.constant_modifications] digestor = ProteinDigestor( self.enzymes[0], const_modifications, max_missed_cleavages=self.enzymes[0].used_missed_cleavages, min_length=self.peptide_length_range[0], max_length=self.peptide_length_range[1]) accumulator = [] i = 0 for protein in self.get_target_proteins(): for peptide in digestor.process_protein(protein): peptide.hypothesis_id = self.hypothesis_id accumulator.append(peptide) i += 1 if len(accumulator) > 5000: self.session.bulk_save_objects(accumulator) self.session.commit() accumulator = [] if i % 1000 == 0: self.log("... %d Baseline Peptides Created" % i) self.session.bulk_save_objects(accumulator) self.session.commit()
def split_proteins(self): mod_table = modification.RestrictedModificationTable( constant_modifications=self.constant_modifications, variable_modifications=[]) const_modifications = [mod_table[c] for c in self.constant_modifications] protein_ids = self.retrieve_target_protein_ids() annotator = UniprotProteinAnnotator(self, protein_ids, const_modifications, []) annotator.run()
decorin = """ >sp|P21793|PGS2_BOVIN Decorin MKATIIFLLVAQVSWAGPFQQKGLFDFMLEDEASGIGPEEHFPEVPEIEPMGPVCPFRCQ CHLRVVQCSDLGLEKVPKDLPPDTALLDLQNNKITEIKDGDFKNLKNLHTLILINNKISK ISPGAFAPLVKLERLYLSKNQLKELPEKMPKTLQELRVHENEITKVRKSVFNGLNQMIVV ELGTNPLKSSGIENGAFQGMKKLSYIRIADTNITTIPQGLPPSLTELHLDGNKITKVDAA SLKGLNNLAKLGLSFNSISAVDNGSLANTPHLRELHLNNNKLVKVPGGLADHKYIQVVYL HNNNISAIGSNDFCPPGYNTKKASYSGVSLFSNPVQYWEIQPSTFRCVYVRAAVQLGNYK """ constant_modifications = ["Carbamidomethyl (C)"] variable_modifications = ["Deamidation (N)", "Pyro-glu from Q (Q@N-term)"] mt = modification.RestrictedModificationTable( constant_modifications=constant_modifications, variable_modifications=variable_modifications) variable_modifications = [mt[v] for v in variable_modifications] constant_modifications = [mt[c] for c in constant_modifications] class FastaGlycopeptideTests(unittest.TestCase): def setup_tempfile(self, source): file_name = tempfile.mktemp() open(file_name, 'w').write(source) return file_name def clear_file(self, path): open(path, 'wb')