示例#1
0
def testPeptideHit():
    """
    @tests:
     PeptideHit.__init__
     PeptideHit.addProteinAccession
     PeptideHit.clearMetaInfo
     PeptideHit.getAAAfter
     PeptideHit.getAABefore
     PeptideHit.getKeys
     PeptideHit.getMetaValue
     PeptideHit.getProteinAccessions
     PeptideHit.getRank
     PeptideHit.getScore
     PeptideHit.getSequence
     PeptideHit.isMetaEmpty
     PeptideHit.metaValueExists
     PeptideHit.removeMetaValue
     PeptideHit.setAAAfter
     PeptideHit.setAABefore
     PeptideHit.setCharge
     PeptideHit.setMetaValue
     PeptideHit.setProteinAccessions
     PeptideHit.setRank
     PeptideHit.setScore
     PeptideHit.setSequence
     PeptideHit.__eq__
     PeptideHit.__ge__
     PeptideHit.__gt__
     PeptideHit.__le__
     PeptideHit.__lt__
     PeptideHit.__ne__
    """
    ph = pyopenms.PeptideHit()
    assert ph == ph
    assert not ph != ph

    ph = pyopenms.PeptideHit(1.0, 1, 0, pyopenms.AASequence("A"))
    _testMetaInfoInterface(ph)
    ph.addProteinAccession("A")
    assert ph.getProteinAccessions() == ["A"]

    assert ph.getScore() == 1.0
    assert ph.getRank() == 1
    assert ph.getSequence().toString() == "A"

    ph.setScore(2.0)
    assert ph.getScore() == 2.0
    ph.setRank(30)
    assert ph.getRank() == 30
    ph.setSequence(pyopenms.AASequence("AAA"))
    assert ph.getSequence().toString() == "AAA"

    ph.setAABefore('B')
    assert ph.getAABefore() == "B"
    ph.setAAAfter('C')
    assert ph.getAAAfter() == 'C'

    assert ph == ph
    assert not ph != ph
示例#2
0
import pyopenms
"""
Producing the test data for TOPP_FeatureLinkerUnlabeledQT_5 and TOPP_FeatureLinkerUnlabeledQT_6
"""

fmaps = [pyopenms.FeatureMap() for i in range(3)]
pepids = []
pepseq = ["PEPTIDEA", "PEPTIDEK", "PEPTIDER"]
for s in pepseq:
    pepid = pyopenms.PeptideIdentification()
    hit = pyopenms.PeptideHit()
    hit.setSequence(pyopenms.AASequence.fromString(s, True))
    pepid.insertHit(hit)
    pepid.setIdentifier("Protein0")
    pepids.append(pepid)

protid = pyopenms.ProteinIdentification()
protid.setIdentifier("Protein0")
for i, fmap in enumerate(fmaps):
    fmap.setProteinIdentifications([protid])
    # add 3 features to each map, but with a twist (adding different peptide ids to different maps)
    for k in range(3):
        f = pyopenms.Feature()
        f.setRT(300 + k * 100 + i * 10)
        f.setMZ(500 + k * 0.001 + i * 0.01)
        f.setIntensity(500 + i * 100)
        f.setMetaValue("sequence",
                       pepseq[(i + k) % 3])  # easier viewing in TOPPView
        f.setPeptideIdentifications([pepids[(i + k) % 3]])
        fmap.push_back(f)
    pyopenms.FeatureXMLFile().store("output_%s.featureXML" % i, fmap)
示例#3
0
def testPeptideIdentification():
    """
    @tests:
     PeptideIdentification.__init__
     PeptideIdentification.assignRanks
     PeptideIdentification.clearMetaInfo
     PeptideIdentification.empty
     PeptideIdentification.getHits
     PeptideIdentification.getIdentifier
     PeptideIdentification.getKeys
     PeptideIdentification.getMetaValue
     PeptideIdentification.getNonReferencingHits
     PeptideIdentification.getReferencingHits
     PeptideIdentification.getScoreType
     PeptideIdentification.getSignificanceThreshold
     PeptideIdentification.insertHit
     PeptideIdentification.isHigherScoreBetter
     PeptideIdentification.isMetaEmpty
     PeptideIdentification.metaValueExists
     PeptideIdentification.removeMetaValue
     PeptideIdentification.setHigherScoreBetter
     PeptideIdentification.setHits
     PeptideIdentification.setIdentifier
     PeptideIdentification.setMetaValue
     PeptideIdentification.setScoreType
     PeptideIdentification.sort
     PeptideIdentification.__eq__
     PeptideIdentification.__ge__
     PeptideIdentification.__gt__
     PeptideIdentification.__le__
     PeptideIdentification.__lt__
     PeptideIdentification.__ne__
     """
    pi = pyopenms.PeptideIdentification()
    _testMetaInfoInterface(pi)
    assert pi == pi
    assert not pi != pi

    ph = pyopenms.PeptideHit(1.0, 1, 0, pyopenms.AASequence("A"))
    pi.insertHit(ph)
    phx, = pi.getHits()
    assert phx == ph

    pi.setHits([ph])
    phx, = pi.getHits()
    assert phx == ph

    assert isinstance(pi.getSignificanceThreshold(), float)
    assert isinstance(pi.getScoreType(), str)
    pi.setScoreType("A")
    assert isinstance(pi.isHigherScoreBetter(), int)
    assert isinstance(pi.getIdentifier(), str)
    pi.setIdentifier("id")
    pi.assignRanks()
    pi.sort()
    assert not pi.empty()

    rv = []
    pi.getReferencingHits("A", rv)
    assert rv == []
    pi.getNonReferencingHits("A", rv)
    hit, = rv
    assert hit.getSequence().toString() == "A"
    assert hit.getScore() == 1.0
    assert hit.getRank() == 1

    rv = []
    pi.getReferencingHits(["A"], rv)
    assert rv == []
    pi.getNonReferencingHits(["A"], rv)
    hit, = rv
    assert hit.getSequence().toString() == "A"
    assert hit.getScore() == 1.0
    assert hit.getRank() == 1

    ph = pyopenms.ProteinHit()
    pi.getReferencingHits([ph], rv)
    hit, = rv
    assert hit.getSequence().toString() == "A"
    assert hit.getScore() == 1.0
    assert hit.getRank() == 1
    rv = []
    pi.getNonReferencingHits([ph], rv)
    hit, = rv
    assert hit.getSequence().toString() == "A"
    assert hit.getScore() == 1.0
    assert hit.getRank() == 1
run_name = 'unknown'
skiplines = 0
with open(args.mztab) as f_in:
    line = next(f_in)
    while line.split('\t', 1)[0] != 'PSH':
        if 'ms_run[1]-location' in line:
            run_name = line.split('\t')[2]
        line = next(f_in)
        skiplines += 1

psms = pd.read_csv(args.mztab, sep='\t', header=skiplines, index_col='PSM_ID')

peptide_ids = []
for _, psm in psms.iterrows():
    peptide_id = pyms.PeptideIdentification()
    peptide_id.setRT(psm['retention_time'])
    peptide_id.setMZ(psm['exp_mass_to_charge'])
    peptide_id.setScoreType('q-value')
    peptide_id.setHigherScoreBetter(False)
    peptide_id.setIdentifier(run_name)
    peptide_hit = pyms.PeptideHit()
    peptide_hit.setScore(psm['search_engine_score[2]'])
    peptide_hit.setRank(1)
    peptide_hit.setCharge(psm['charge'])
    peptide_hit.setSequence(pyms.AASequence.fromString(psm['sequence']))
    peptide_id.setHits([peptide_hit])
    peptide_ids.append(peptide_id)

protein_id = pyms.ProteinIdentification()
protein_id.setIdentifier(run_name)
pyms.IdXMLFile().store(output_path, [protein_id], peptide_ids)