예제 #1
0
def apply_msgfdb(in_file, msms_run_summary, modifications, num_mods):
    """ Read output file of MS-GFDB and add child elements to msms_run_summary """
    spectrum2element = {}
    enzyme_list = [ArgC,LysC,Trypsin,LysCP,Chymotrypsin,TrypChymo,TrypsinP,PepsinA,
                   CNBr,V8E,AspN,Formic_acid,AspNambic,V8DE]
    semi_list = []
    sample_enzyme = msms_run_summary.find('sample_enzyme')
    for f in read_msgfdb(in_file):
        spectrum = '%(name)s.%(scan)05i.%(scan)05i.%(charge)i' % \
            {'name': remove_file_extention(f['#SpecFile']),
             'scan': f['Scan#'], 'charge': f['Charge']}
        enzyme_list, semi_list = what_enzyme(enzyme_list, semi_list, f['Peptide'])
        peptide_prev_aa = f['Peptide'][0]
        if peptide_prev_aa == '_':
            peptide_prev_aa = '-'
        peptide_middle = f['Peptide'][2:-2]
        peptide_next_aa = f['Peptide'][-1]
        if peptide_next_aa == '_':
            peptide_next_aa = '-'
        if ' ' in f['Protein']:
            protein_name, protein_descr = f['Protein'].split(' ', 1)
        else:
            protein_name = f['Protein']
            protein_descr = ''
        precursor_neutral_mass = f['Precursor'] * f['Charge'] - f['Charge'] * H_plus

        if spectrum not in spectrum2element:
            spectrum_query = SubElement(msms_run_summary, 'spectrum_query')
            spectrum2element[spectrum] = spectrum_query
            spectrum_query.append(Element('search_result'))
            spectrum_query.set('spectrum', spectrum)
            spectrum_query.set('start_scan', str(f['Scan#']))
            spectrum_query.set('end_scan', str(f['Scan#']))
            spectrum_query.set('assumed_charge', str(f['Charge']))
            spectrum_query.set('precursor_neutral_mass', str(precursor_neutral_mass))

        spectrum_query = spectrum2element[spectrum]
        search_result = spectrum_query.find('search_result')
        search_hit = SubElement(search_result, 'search_hit')
        search_hit.set('peptide', "".join(aa for aa in peptide_middle if aa.isalpha()))
        search_hit.set('peptide_prev_aa', peptide_prev_aa)
        search_hit.set('peptide_next_aa', peptide_next_aa)
        search_hit.set('protein', protein_name)
        search_hit.set('protein_descr', protein_descr)

        modification_instances = sum((find_modifications(mod, peptide_middle) for mod in modifications), [])
        calc_neutral_pep_mass = modified_peptide_mass(modification_instances, peptide_middle, num_mods)
        if modification_instances:
            modification_info = SubElement(search_hit, 'modification_info')
            for mass, mass_diff, aa_number, is_opt in modification_instances:
                maam = SubElement(modification_info, 'mod_aminoacid_mass')
                maam.set('position', str(aa_number))
                maam.set('mass', str(mass))
        search_hit.set('calc_neutral_pep_mass', str(calc_neutral_pep_mass))
        search_hit.set('massdiff', str(precursor_neutral_mass - calc_neutral_pep_mass))
        for field in score_fields:
            if field in f:
                SubElement(search_hit, 'search_score', name=field, value=f[field])
#    sample_enzyme.set('fidelity',flag)
    if enzyme_list == []:
        if semi_list == []:
            sample_enzyme.set('name','NoEnzyme')
            sample_enzyme.set('fidelity','nonspecific')
        else:
            sample_enzyme.set('fidelity','semispecific')
            enzyme = re.split("\|",enzyme2name[re.search(r'<(\w+)>',semi_list[0][0]).group(1)])
    else:
        sample_enzyme.set('fidelity','specific')
        enzyme = re.split("\|",enzyme2name[re.search(r'<(\w+)>',enzyme_list[0][0]).group(1)])
    if not(enzyme_list == [] and semi_list == []):
        sample_enzyme.set('name',enzyme[0])
        specificity = SubElement(sample_enzyme, 'specificity')
        specificity.set('cut',enzyme[1])
        if enzyme[2]:
            specificity.set('no_cut',enzyme[2])
        specificity.set('sense',enzyme[3])
예제 #2
0
def apply_msgfdb(in_file, msms_run_summary, modifications, num_mods):
    """ Read output file of MS-GFDB and add child elements to msms_run_summary """
    spectrum2element = {}
    enzyme_list = [
        ArgC, LysC, Trypsin, LysCP, Chymotrypsin, TrypChymo, TrypsinP, PepsinA,
        CNBr, V8E, AspN, Formic_acid, AspNambic, V8DE
    ]
    semi_list = []
    sample_enzyme = msms_run_summary.find('sample_enzyme')
    for f in read_msgfdb(in_file):
        spectrum = '%(name)s.%(scan)05i.%(scan)05i.%(charge)i' % \
            {'name': remove_file_extention(f['#SpecFile']),
             'scan': f['Scan#'], 'charge': f['Charge']}
        enzyme_list, semi_list = what_enzyme(enzyme_list, semi_list,
                                             f['Peptide'])
        peptide_prev_aa = f['Peptide'][0]
        if peptide_prev_aa == '_':
            peptide_prev_aa = '-'
        peptide_middle = f['Peptide'][2:-2]
        peptide_next_aa = f['Peptide'][-1]
        if peptide_next_aa == '_':
            peptide_next_aa = '-'
        if ' ' in f['Protein']:
            protein_name, protein_descr = f['Protein'].split(' ', 1)
        else:
            protein_name = f['Protein']
            protein_descr = ''
        precursor_neutral_mass = f['Precursor'] * f['Charge'] - f[
            'Charge'] * H_plus

        if spectrum not in spectrum2element:
            spectrum_query = SubElement(msms_run_summary, 'spectrum_query')
            spectrum2element[spectrum] = spectrum_query
            spectrum_query.append(Element('search_result'))
            spectrum_query.set('spectrum', spectrum)
            spectrum_query.set('start_scan', str(f['Scan#']))
            spectrum_query.set('end_scan', str(f['Scan#']))
            spectrum_query.set('assumed_charge', str(f['Charge']))
            spectrum_query.set('precursor_neutral_mass',
                               str(precursor_neutral_mass))

        spectrum_query = spectrum2element[spectrum]
        search_result = spectrum_query.find('search_result')
        search_hit = SubElement(search_result, 'search_hit')
        search_hit.set('peptide',
                       "".join(aa for aa in peptide_middle if aa.isalpha()))
        search_hit.set('peptide_prev_aa', peptide_prev_aa)
        search_hit.set('peptide_next_aa', peptide_next_aa)
        search_hit.set('protein', protein_name)
        search_hit.set('protein_descr', protein_descr)

        modification_instances = sum((find_modifications(mod, peptide_middle)
                                      for mod in modifications), [])
        calc_neutral_pep_mass = modified_peptide_mass(modification_instances,
                                                      peptide_middle, num_mods)
        if modification_instances:
            modification_info = SubElement(search_hit, 'modification_info')
            for mass, mass_diff, aa_number, is_opt in modification_instances:
                maam = SubElement(modification_info, 'mod_aminoacid_mass')
                maam.set('position', str(aa_number))
                maam.set('mass', str(mass))
        search_hit.set('calc_neutral_pep_mass', str(calc_neutral_pep_mass))
        search_hit.set('massdiff',
                       str(precursor_neutral_mass - calc_neutral_pep_mass))
        for field in score_fields:
            if field in f:
                SubElement(search_hit,
                           'search_score',
                           name=field,
                           value=f[field])


#    sample_enzyme.set('fidelity',flag)
    if enzyme_list == []:
        if semi_list == []:
            sample_enzyme.set('name', 'NoEnzyme')
            sample_enzyme.set('fidelity', 'nonspecific')
        else:
            sample_enzyme.set('fidelity', 'semispecific')
            enzyme = re.split(
                "\|", enzyme2name[re.search(r'<(\w+)>',
                                            semi_list[0][0]).group(1)])
    else:
        sample_enzyme.set('fidelity', 'specific')
        enzyme = re.split(
            "\|", enzyme2name[re.search(r'<(\w+)>',
                                        enzyme_list[0][0]).group(1)])
    if not (enzyme_list == [] and semi_list == []):
        sample_enzyme.set('name', enzyme[0])
        specificity = SubElement(sample_enzyme, 'specificity')
        specificity.set('cut', enzyme[1])
        if enzyme[2]:
            specificity.set('no_cut', enzyme[2])
        specificity.set('sense', enzyme[3])