return cmp(x_num, y_num) for ppi in ppis: ppi.p1 = sorted(xrefs[ppi.p1].swissprot_id, cmp=s_cmp)[0] ppi.p2 = sorted(xrefs[ppi.p2].swissprot_id, cmp=s_cmp)[0] for ppi in unknown_ppis: if ppi.p1 != '-': ppi.p1 = sorted(xrefs[ppi.p1].swissprot_id, cmp=s_cmp)[0] if ppi.p2 != '-': ppi.p2 = sorted(xrefs[ppi.p2].swissprot_id, cmp=s_cmp)[0] # Filter out small samples, self interactions and interactions with missing p2/p1 ppis = filter(lambda ppi: ppi.p1 != '-' and ppi.p2 != '-', ppis) ppis = filter(lambda ppi: ppi.p1 != ppi.p2, ppis) ppis = [ppi.split() for ppi in ppis] ppis = set([ppi for sl in ppis for ppi in sl]) types = [ppi.get_reaction_type_string() for ppi in ppis] type_counts = {k:0 for k in types} for t in types: type_counts[t] += 1 ppis = [ppi for ppi in ppis if type_counts[ppi.get_reaction_type_string()] >= 5] fp1 = open('tmp/hprd_training_ppi.tsv', 'w') fp2 = open('tmp/hprd_testing_ppi.tsv', 'w') fp3 = open('tmp/unknown_hprd_ppi.tsv', 'w') fp1.write("uniprot_a\tuniprot_a\tlabel\n") fp2.write("uniprot_a\tuniprot_a\tlabel\n") fp3.write("uniprot_a\tuniprot_a\tlabel\n")