def forge_prediction_record(sequence, reference):
    seq_obj = Sequence(sequence) if isinstance(
        sequence, basestring) else copy.deepcopy(sequence)

    calculated_mass = seq_obj.mass

    glycan_map = {}
    modifications = []
    loc_rules = modification.get_position_modifier_rules_dict(seq_obj)
    for i, (aa, mods) in enumerate(seq_obj):
        for mod in mods:
            if "Glycan" in mod.name:
                glycan_map[i] = mod.name
            else:
                # Construct the set of acceptable reasons why this modification is here.
                # Infer the least permissive modification rule.
                try:
                    why = mod.why_valid(aa, loc_rules[i])
                    modifications.append(modification.Modification(why, (i,)))
                except AttributeError:
                    print(mod)
                    raise

    # Remove glycans from the sequence string to conform to the SequenceSpace
    # expectations
    for site, glycan in glycan_map.items():
        # Don't discard anonymous HexNAcs. Downstream functions can handle them
        if glycan != "HexNAc":
            seq_obj.drop_modification(site, glycan)
            seq_obj.add_modification(site, "HexNAc")

    # Build the semicolon separated string for glycan compositions
    glycan_composition = []
    glycan_composition = [map(int, glycan.replace("Glycan", '').replace("[", "").replace("]", "").split(";"))
                          for glycan in glycan_map.values()]
    glycan_composition = map(sum, zip(*glycan_composition))
    glycan_composition_restring = "[" + \
        ";".join(map(str, glycan_composition)) + "]"

    forgery = reference.copy()
    forgery["_old_Glycopeptide_identifier"] = str(sequence)
    forgery.Calc_mass = calculated_mass
    forgery.Obs_Mass = forgery.Calc_mass - reference.ppm_error
    forgery.Glycopeptide_identifier = str(seq_obj)
    forgery.Glycan = glycan_composition_restring
    forgery.glyco_sites = len(glycan_map)
    forgery.Seq_with_mod = str(seq_obj)

    return forgery
def random_glycopeptide_to_fragments(sequence_record):
    try:
        seq_obj = Sequence(sequence_record.Glycopeptide_identifier)
    except:
        print(sequence_record)
        raise
    glycan_map = {}
    modifications = []
    loc_rules = modification.get_position_modifier_rules_dict(seq_obj)
    for i, (aa, mods) in enumerate(seq_obj):
        for mod in mods:
            if "Glycan" in mod.name or "HexNAc" in mod.name:
                glycan_map[i] = mod.name
            else:
                # Construct the set of acceptable reasons why this modification is here.
                # Infer the least permissive modification rule.
                try:
                    why = mod.why_valid(aa, loc_rules[i])
                    modifications.append(modification.Modification(why, (i,)))
                except AttributeError:
                    print(mod)
                    raise

    # Remove glycans from the sequence string to conform to the SequenceSpace expectations
    for site, glycan in glycan_map.items():
        # Don't discard anonymous HexNAcs. Downstream functions can handle them
        if glycan != "HexNAc":
            seq_obj.drop_modification(site, glycan)
    glycosylation_sites = glycan_map.keys()
    if sequence_record.Glycan is None:
        # Build the semicolon separated string for glycan compositions
        glycan_composition = []
        glycan_composition = [map(int, glycan.replace("Glycan", '').replace("[", "").replace("]", "").split(";"))
                              for glycan in glycan_map.values()]
        glycan_composition = map(sum, zip(*glycan_composition))
        glycan_composition_restring = "[" + ";".join(map(str, glycan_composition)) + "]"
    else:
        glycan_composition_restring = sequence_record.Glycan
    # Begin generating fragment ions
    b_type = seq_obj.get_fragments('B')
    b_ions = []
    b_ions_HexNAc = []
    for b in b_type:
        for fm in b:
            key = fm.get_fragment_name()
            if key == "B1" or re.search(r'B1\+', key):
                # B1 Ions aren't actually seen in reality, but are an artefact of the generation process
                # so do not include them in the output
                continue
            mass = fm.get_mass()
            if "HexNAc" in key:
                b_ions_HexNAc.append({"key": key, "mass": mass})
            else:
                b_ions.append({"key": key, "mass": mass})

    y_type = seq_obj.get_fragments('Y')
    y_ions = []
    y_ions_HexNAc = []
    for y in y_type:
        for fm in y:
            key = fm.get_fragment_name()
            mass = fm.get_mass()
            if "HexNAc" in key:
                y_ions_HexNAc.append({"key": key, "mass": mass})
            else:
                y_ions.append({"key": key, "mass": mass})

    peptide_seq = strip_modifications(seq_obj.get_sequence())
    pep_stubs = StubGlycopeptide(peptide_seq, glycan_composition_restring,
                                 len(glycosylation_sites), glycan_composition_restring)
    stub_ions = pep_stubs.get_stubs()
    assert len(stub_ions) > 1
    oxonium_ions = pep_stubs.get_oxonium_ions()
    ions = {
        "MS1_Score": sequence_record.MS1_Score,
        "Obs_Mass": sequence_record.Obs_Mass,
        "Calc_mass": sequence_record.Calc_mass,
        "ppm_error": sequence_record.ppm_error,
        "Peptide": peptide_seq,
        "Peptide_mod": sequence_record.Peptide_mod,
        "Glycan": glycan_composition_restring,
        "vol": sequence_record.vol,
        "glyco_sites": len(glycan_map),
        "startAA": None,
        "endAA": None,
        "Seq_with_mod": seq_obj.get_sequence(include_glycan=False),
        "bare_b_ions": b_ions,
        "b_ions_with_HexNAc": b_ions_HexNAc,
        "bare_y_ions": y_ions,
        "y_ions_with_HexNAc": y_ions_HexNAc,
        "pep_stub_ions": stub_ions,
        "Oxonium_ions": oxonium_ions,
        "Glycopeptide_identifier": seq_obj.get_sequence(include_glycan=False) + glycan_composition_restring,
        "_batch_id": int(sequence_record._batch_id)
    }
    return ions