Пример #1
0
        return cmp(x_num, y_num)

    for ppi in ppis:
        ppi.p1 = sorted(xrefs[ppi.p1].swissprot_id, cmp=s_cmp)[0]
        ppi.p2 = sorted(xrefs[ppi.p2].swissprot_id, cmp=s_cmp)[0]

    for ppi in unknown_ppis:
        if ppi.p1 != '-':
            ppi.p1 = sorted(xrefs[ppi.p1].swissprot_id, cmp=s_cmp)[0]
        if ppi.p2 != '-':
            ppi.p2 = sorted(xrefs[ppi.p2].swissprot_id, cmp=s_cmp)[0]

    # Filter out small samples, self interactions and interactions with missing p2/p1
    ppis = filter(lambda ppi: ppi.p1 != '-' and ppi.p2 != '-', ppis)
    ppis = filter(lambda ppi: ppi.p1 != ppi.p2, ppis)
    ppis = [ppi.split() for ppi in ppis]
    ppis = set([ppi for sl in ppis for ppi in sl])

    types = [ppi.get_reaction_type_string() for ppi in ppis]
    type_counts = {k:0 for k in types}
    for t in types:
        type_counts[t] += 1
    ppis = [ppi for ppi in ppis if type_counts[ppi.get_reaction_type_string()] >= 5]

    fp1 = open('tmp/hprd_training_ppi.tsv', 'w')
    fp2 = open('tmp/hprd_testing_ppi.tsv', 'w')
    fp3 = open('tmp/unknown_hprd_ppi.tsv', 'w')
    fp1.write("uniprot_a\tuniprot_a\tlabel\n")
    fp2.write("uniprot_a\tuniprot_a\tlabel\n")
    fp3.write("uniprot_a\tuniprot_a\tlabel\n")