def random_glycopeptide_to_sequence_space(sequence, proxy, glycan_string=None): seq_obj = Sequence(sequence) if isinstance( sequence, basestring) else sequence glycan_map = {} modifications = [] for i, (aa, mods) in enumerate(seq_obj): for mod in mods: if mod.name in {"Glycan", "HexNAc"}: glycan_map[i] = mod.name else: # Construct the set of acceptable reasons why this modification is here. # Infer the least permissive modification rule. why = mod.why_valid(aa, i) modifications.append(modification.Modification(why, (i,))) # Remove glycans from the sequence string to conform to the SequenceSpace # expectations for site, glycan in glycan_map.items(): # Don't discard anonymous HexNAcs. Downstream functions can handle them if glycan != "HexNAc": seq_obj.drop_modification(site, glycan) glycosylation_sites = glycan_map.keys() if glycan_string is None: # Build the semicolon separated string for glycan compositions glycan_composition = [] glycan_composition = [map(int, glycan.replace("Glycan", '').replace("[", "").replace("]", "").split(";")) for glycan in glycan_map.values()] glycan_composition = map(sum, zip(*glycan_composition)) glycan_composition_restring = "[" + \ ";".join(map(str, glycan_composition)) + "]" else: glycan_composition_restring = glycan_string # Begin generating fragment ions b_type = seq_obj.get_fragments('B') b_ions = [] b_ions_HexNAc = [] for b in b_type: for fm in b: key = fm.get_fragment_name() if key == "B1" or re.search(r'B1\+', key): # B1 Ions aren't actually seen in reality, but are an artefact of the generation process # so do not include them in the output continue mass = fm.get_mass() if "HexNAc" in key: b_ions_HexNAc.append({"key": key, "mass": mass}) else: b_ions.append({"key": key, "mass": mass}) y_type = seq_obj.get_fragments('Y') y_ions = [] y_ions_HexNAc = [] for y in y_type: for fm in y: key = fm.get_fragment_name() mass = fm.get_mass() if "HexNAc" in key: y_ions_HexNAc.append({"key": key, "mass": mass}) else: y_ions.append({"key": key, "mass": mass}) peptide_seq = strip_modifications(seq_obj.get_sequence()) pep_stubs = StubGlycopeptide(peptide_seq, None, len(glycosylation_sites), glycan_composition_restring) stub_ions = pep_stubs.get_stubs() oxonium_ions = pep_stubs.get_oxonium_ions() ions = { "MS1_Score": proxy["MS1_Score"], "Obs_Mass": seq_obj.mass - proxy["ppm_error"], "Calc_mass": seq_obj.mass, "ppm_error": proxy["ppm_error"], "Peptide": peptide_seq, "Peptide_mod": None, "Glycan": glycan_composition_restring, "vol": proxy['vol'], "glyco_sites": len(glycan_map), "startAA": -1, "endAA": -1, "Seq_with_mod": seq_obj.get_sequence(), "bare_b_ions": b_ions, "b_ions_with_HexNAc": b_ions_HexNAc, "bare_y_ions": y_ions, "y_ions_with_HexNAc": y_ions_HexNAc, "pep_stub_ions": stub_ions, "Oxonium_ions": oxonium_ions, "Glycopeptide_identifier": seq_obj.get_sequence() + glycan_composition_restring } return ions
def generate_fragments(seq, ms1_result): """Consumes a :class:`.sequence.Sequence` object, and the contents of an MS1 Result row to generate the set of all theoretically observed fragments Parameters ---------- seq: :class:`.sequence.Sequence` The binding of modifications to particular sites on a peptide sequence ms1_result: :class:`MS1GlycopeptideResult` Description of the precursor match Returns ------- dict: Collection of theoretical ions from the given sequence, as well as the precursor information. """ seq_mod = seq.get_sequence() fragments = zip(*map(seq.break_at, range(1, len(seq)))) b_type = fragments[0] b_ions = [] b_ions_hexnac = [] for b in b_type: for fm in b: key = fm.get_fragment_name() if key == ("B1" or re.search(r'B1\+', key)) and constants.EXCLUDE_B1: # B1 Ions aren't actually seen in reality, but are an artefact of the generation process # so do not include them in the output continue mass = fm.get_mass() golden_pairs = fm.golden_pairs if "HexNAc" in key: b_ions_hexnac.append({"key": key, "mass": mass, "golden_pairs": golden_pairs}) else: b_ions.append({"key": key, "mass": mass, "golden_pairs": golden_pairs}) y_type = fragments[1] # seq.get_fragments('Y') y_ions = [] y_ions_hexnac = [] for y in y_type: for fm in y: key = fm.get_fragment_name() mass = fm.get_mass() golden_pairs = fm.golden_pairs if "HexNAc" in key: y_ions_hexnac.append({"key": key, "mass": mass, "golden_pairs": golden_pairs}) else: y_ions.append({"key": key, "mass": mass, "golden_pairs": golden_pairs}) pep_stubs = StubGlycopeptide( ms1_result.peptide_sequence, ms1_result.peptide_modifications, ms1_result.num_glycosylation_sites, ms1_result.glycan_composition_str) stub_ions = pep_stubs.get_stubs() oxonium_ions = pep_stubs.get_oxonium_ions() # Eventually, return this object instead of a dictionary ms1_result.glycopeptide_identifier = seq_mod + ms1_result.glycan_composition_str, ms1_result.modified_sequence = seq_mod ms1_result.oxonium_ions = oxonium_ions ms1_result.stub_ions = stub_ions ms1_result.bare_b_ions = b_ions ms1_result.bare_y_ions = y_ions ms1_result.b_ions_hexnac = b_ions_hexnac ms1_result.y_ions_hexnac = y_ions_hexnac fragments_dict = { "Seq_with_mod": seq_mod, "Glycopeptide_identifier": seq_mod + ms1_result.glycan_composition_str, "Oxonium_ions": oxonium_ions, "pep_stub_ions": stub_ions, "bare_b_ions": b_ions, "bare_y_ions": y_ions, "b_ions_with_HexNAc": b_ions_hexnac, "y_ions_with_HexNAc": y_ions_hexnac } fragments_dict.update(ms1_result.as_dict()) return fragments_dict