def forge_prediction_record(sequence, reference): seq_obj = Sequence(sequence) if isinstance( sequence, basestring) else copy.deepcopy(sequence) calculated_mass = seq_obj.mass glycan_map = {} modifications = [] loc_rules = modification.get_position_modifier_rules_dict(seq_obj) for i, (aa, mods) in enumerate(seq_obj): for mod in mods: if "Glycan" in mod.name: glycan_map[i] = mod.name else: # Construct the set of acceptable reasons why this modification is here. # Infer the least permissive modification rule. try: why = mod.why_valid(aa, loc_rules[i]) modifications.append(modification.Modification(why, (i,))) except AttributeError: print(mod) raise # Remove glycans from the sequence string to conform to the SequenceSpace # expectations for site, glycan in glycan_map.items(): # Don't discard anonymous HexNAcs. Downstream functions can handle them if glycan != "HexNAc": seq_obj.drop_modification(site, glycan) seq_obj.add_modification(site, "HexNAc") # Build the semicolon separated string for glycan compositions glycan_composition = [] glycan_composition = [map(int, glycan.replace("Glycan", '').replace("[", "").replace("]", "").split(";")) for glycan in glycan_map.values()] glycan_composition = map(sum, zip(*glycan_composition)) glycan_composition_restring = "[" + \ ";".join(map(str, glycan_composition)) + "]" forgery = reference.copy() forgery["_old_Glycopeptide_identifier"] = str(sequence) forgery.Calc_mass = calculated_mass forgery.Obs_Mass = forgery.Calc_mass - reference.ppm_error forgery.Glycopeptide_identifier = str(seq_obj) forgery.Glycan = glycan_composition_restring forgery.glyco_sites = len(glycan_map) forgery.Seq_with_mod = str(seq_obj) return forgery
def random_glycopeptide_to_fragments(sequence_record): try: seq_obj = Sequence(sequence_record.Glycopeptide_identifier) except: print(sequence_record) raise glycan_map = {} modifications = [] loc_rules = modification.get_position_modifier_rules_dict(seq_obj) for i, (aa, mods) in enumerate(seq_obj): for mod in mods: if "Glycan" in mod.name or "HexNAc" in mod.name: glycan_map[i] = mod.name else: # Construct the set of acceptable reasons why this modification is here. # Infer the least permissive modification rule. try: why = mod.why_valid(aa, loc_rules[i]) modifications.append(modification.Modification(why, (i,))) except AttributeError: print(mod) raise # Remove glycans from the sequence string to conform to the SequenceSpace expectations for site, glycan in glycan_map.items(): # Don't discard anonymous HexNAcs. Downstream functions can handle them if glycan != "HexNAc": seq_obj.drop_modification(site, glycan) glycosylation_sites = glycan_map.keys() if sequence_record.Glycan is None: # Build the semicolon separated string for glycan compositions glycan_composition = [] glycan_composition = [map(int, glycan.replace("Glycan", '').replace("[", "").replace("]", "").split(";")) for glycan in glycan_map.values()] glycan_composition = map(sum, zip(*glycan_composition)) glycan_composition_restring = "[" + ";".join(map(str, glycan_composition)) + "]" else: glycan_composition_restring = sequence_record.Glycan # Begin generating fragment ions b_type = seq_obj.get_fragments('B') b_ions = [] b_ions_HexNAc = [] for b in b_type: for fm in b: key = fm.get_fragment_name() if key == "B1" or re.search(r'B1\+', key): # B1 Ions aren't actually seen in reality, but are an artefact of the generation process # so do not include them in the output continue mass = fm.get_mass() if "HexNAc" in key: b_ions_HexNAc.append({"key": key, "mass": mass}) else: b_ions.append({"key": key, "mass": mass}) y_type = seq_obj.get_fragments('Y') y_ions = [] y_ions_HexNAc = [] for y in y_type: for fm in y: key = fm.get_fragment_name() mass = fm.get_mass() if "HexNAc" in key: y_ions_HexNAc.append({"key": key, "mass": mass}) else: y_ions.append({"key": key, "mass": mass}) peptide_seq = strip_modifications(seq_obj.get_sequence()) pep_stubs = StubGlycopeptide(peptide_seq, glycan_composition_restring, len(glycosylation_sites), glycan_composition_restring) stub_ions = pep_stubs.get_stubs() assert len(stub_ions) > 1 oxonium_ions = pep_stubs.get_oxonium_ions() ions = { "MS1_Score": sequence_record.MS1_Score, "Obs_Mass": sequence_record.Obs_Mass, "Calc_mass": sequence_record.Calc_mass, "ppm_error": sequence_record.ppm_error, "Peptide": peptide_seq, "Peptide_mod": sequence_record.Peptide_mod, "Glycan": glycan_composition_restring, "vol": sequence_record.vol, "glyco_sites": len(glycan_map), "startAA": None, "endAA": None, "Seq_with_mod": seq_obj.get_sequence(include_glycan=False), "bare_b_ions": b_ions, "b_ions_with_HexNAc": b_ions_HexNAc, "bare_y_ions": y_ions, "y_ions_with_HexNAc": y_ions_HexNAc, "pep_stub_ions": stub_ions, "Oxonium_ions": oxonium_ions, "Glycopeptide_identifier": seq_obj.get_sequence(include_glycan=False) + glycan_composition_restring, "_batch_id": int(sequence_record._batch_id) } return ions