def test_write_2(self): msa = [ ">d1a1x__ b.63.1.1 (-) p13-MTCP1 {Human (H**o sapiens)}", "PPDHLWVHQEGIYRDEYQRTWVAVVEEETSFLRARVQQIQVPLGDAARPSHLLTSQL", ">gi|6678257|ref|NP_033363.1|:(7-103) T-cell lymphoma breakpoint 1 [Mus musculus]", "HPNRLWIWEKHVYLDEFRRSWLPVVIKSNEKFQVILRQEDVTLGEAMSPSQLVPYEL", ">gi|7305557|ref|NP_038800.1|:(8-103) T-cell leukemia/lymphoma 1B, 3 [Mus musculus]", "PPRFLVCTRDDIYEDENGRQWVVAKVETSRSpygsrietcITVHLQHMTTIPQEPTPQQPINNNSL", ">gi|11415028|ref|NP_068801.1|:(2-106) T-cell lymphoma-1; T-cell lymphoma-1A [H**o sapiens]", "HPDRLWAWEKFVYLDEKQHAWLPLTIEikDRLQLRVLLRREDVVLGRPMTPTQIGPSLL", ">gi|7305561|ref|NP_038804.1|:(7-103) T-cell leukemia/lymphoma 1B, 5 [Mus musculus]", "----------GIYEDEHHRVWIAVNVETSHSSHgnrietcvtVHLQHMTTLPQEPTPQQPINNNSL", ">gi|7305553|ref|NP_038801.1|:(5-103) T-cell leukemia/lymphoma 1B, 1 [Mus musculus]", "LPVYLVSVRLGIYEDEHHRVWIVANVETshSSHGNRRRTHVTVHLWKLIPQQVIPFNplnydFL", ">gi|27668591|ref|XP_234504.1|:(7-103) similar to Chain A, Crystal Structure Of Murine Tcl1", "-PDRLWLWEKHVYLDEFRRSWLPIVIKSNGKFQVIMRQKDVILGDSMTPSQLVPYEL", ">gi|27668589|ref|XP_234503.1|:(9-91) similar to T-cell leukemia/lymphoma 1B, 5;", "-PHILTLRTHGIYEDEHHRLWVVLDLQAShlSFSNRLLIYLTVYLQqgvafplESTPPSPMNLNGL", ">gi|7305559|ref|NP_038802.1|:(8-102) T-cell leukemia/lymphoma 1B, 4 [Mus musculus]", "PPCFLVCTRDDIYEDEHGRQWVAAKVETSSHSPycskietcvtVHLWQMTTLFQEPSPDSLKTFNFL", ">gi|7305555|ref|NP_038803.1|:(9-102) T-cell leukemia/lymphoma 1B, 2 [Mus musculus]", "---------PGFYEDEHHRLWMVAKLETCSHSPycnkietcvtVHLWQMTRYPQEPAPYNPMNYNFL", ] f_name_in = create_tmp_f(content=os.linesep.join(msa)) f_name_out = create_tmp_f() parser = A3mParser() with open(f_name_in, 'r') as f_in, open(f_name_out, 'w') as f_out: sequence_file = parser.read(f_in, remove_insert=False) parser.write(f_out, sequence_file) ref = [ ">d1a1x__ b.63.1.1 (-) p13-MTCP1 {Human (H**o sapiens)}", "PPDHLWVHQEGIYRDEYQRTWVAVVEE--E--T--SF---------LR----------ARVQQIQVPLG-------DAARPSHLLTS-----QL", ">gi|6678257|ref|NP_033363.1|:(7-103) T-cell lymphoma breakpoint 1 [Mus musculus]", "HPNRLWIWEKHVYLDEFRRSWLPVVIK--S--N--EK---------FQ----------VILRQEDVTLG-------EAMSPSQLVPY-----EL", ">gi|7305557|ref|NP_038800.1|:(8-103) T-cell leukemia/lymphoma 1B, 3 [Mus musculus]", "PPRFLVCTRDDIYEDENGRQWVVAKVE--T--S--RSpygsrietcIT----------VHLQHMTTIPQ-------EPTPQQPINNN-----SL", ">gi|11415028|ref|NP_068801.1|:(2-106) T-cell lymphoma-1; T-cell lymphoma-1A [H**o sapiens]", "HPDRLWAWEKFVYLDEKQHAWLPLTIEikD--R--LQ---------LR----------VLLRREDVVLG-------RPMTPTQIGPS-----LL", ">gi|7305561|ref|NP_038804.1|:(7-103) T-cell leukemia/lymphoma 1B, 5 [Mus musculus]", "----------GIYEDEHHRVWIAVNVE--T--S--HS---------SHgnrietcvt-VHLQHMTTLPQ-------EPTPQQPINNN-----SL", ">gi|7305553|ref|NP_038801.1|:(5-103) T-cell leukemia/lymphoma 1B, 1 [Mus musculus]", "LPVYLVSVRLGIYEDEHHRVWIVANVE--TshS--SH---------GN----------RRRTHVTVHLW-------KLIPQQVIPFNplnydFL", ">gi|27668591|ref|XP_234504.1|:(7-103) similar to Chain A, Crystal Structure Of Murine Tcl1", "-PDRLWLWEKHVYLDEFRRSWLPIVIK--S--N--GK---------FQ----------VIMRQKDVILG-------DSMTPSQLVPY-----EL", ">gi|27668589|ref|XP_234503.1|:(9-91) similar to T-cell leukemia/lymphoma 1B, 5;", "-PHILTLRTHGIYEDEHHRLWVVLDLQ--A--ShlSF---------SN----------RLLIYLTVYLQqgvafplESTPPSPMNLN-----GL", ">gi|7305559|ref|NP_038802.1|:(8-102) T-cell leukemia/lymphoma 1B, 4 [Mus musculus]", "PPCFLVCTRDDIYEDEHGRQWVAAKVE--T--S--SH---------SPycskietcvtVHLWQMTTLFQ-------EPSPDSLKTFN-----FL", ">gi|7305555|ref|NP_038803.1|:(9-102) T-cell leukemia/lymphoma 1B, 2 [Mus musculus]", "---------PGFYEDEHHRLWMVAKLE--T--C--SH---------SPycnkietcvtVHLWQMTRYPQ-------EPAPYNPMNYN-----FL", "", ] ref = os.linesep.join(ref) with open(f_name_out, 'r') as f_in: output = "".join(f_in.readlines()) self.assertEqual(ref, output) os.unlink(f_name_in) os.unlink(f_name_out)
def test_read_3(self): msa = """CLUSTAL FORMAT for seq_0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA seq_1 BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB seq_2 CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC seq_0 AAAAAAAAA seq_1 BBBBBBBBB seq_2 CCCCCCCCC """ f_name = create_tmp_f(content=msa) parser = ClustalParser() with open(f_name, 'r') as f_in: sequence_file = parser.read(f_in) for i, sequence_entry in enumerate(sequence_file): if i == 0: self.assertEqual('seq_0', sequence_entry.id) self.assertEqual( 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', sequence_entry.seq) elif i == 1: self.assertEqual('seq_1', sequence_entry.id) self.assertEqual( 'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB', sequence_entry.seq) elif i == 2: self.assertEqual('seq_2', sequence_entry.id) self.assertEqual( 'CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC', sequence_entry.seq) os.unlink(f_name)
def test_read_1(self): content = """1,2,0.12212 1,3,0.14004 1,4,0.12926 1,5,0.089211 1,6,0.079976 1,7,0.078954 1,8,0.052275 1,9,0.026012 1,10,0.049844 1,11,0.045109 """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = PlmDCAParser().read(f_in) contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(10, len(contact_map1)) self.assertEqual([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [c.res1_seq for c in contact_map1]) self.assertEqual([2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [c.res2_seq for c in contact_map1]) self.assertEqual([ 0.12212, 0.14004, 0.12926, 0.089211, 0.079976, 0.078954, 0.052275, 0.026012, 0.049844, 0.045109 ], [c.raw_score for c in contact_map1]) os.unlink(f_name)
def test_read_2(self): content = """# Check one two Hello WOrld 1 2 0.93514 1 3 0.67324 1 4 0.23692 1 5 0.13166 1 6 0.09188 1 7 0.07957 1 8 0.06556 1 9 0.05188 1 10 0.04146 1 11 0.03264 1 12 0.02515 1 13 0.02137 1 14 0.01961 1 15 0.01710 1 16 0.01397 1 17 0.01192 """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = PconsParser().read(f_in) contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(16, len(contact_map1)) self.assertEqual([1] * 16, [c.res1_seq for c in contact_map1]) self.assertEqual(list(range(2, 18)), [c.res2_seq for c in contact_map1]) self.assertEqual([0.93514, 0.67324, 0.23692, 0.13166, 0.09188], [c.raw_score for c in contact_map1][:5]) os.unlink(f_name)
def test_write_1(self): contact_file = ContactFile('RR') contact_file.target = 'R9999' contact_file.author = '1234-5678-9000' contact_file.remark = ['Predictor remarks'] contact_file.method = [ 'Description of methods used', 'Description of methods used' ] contact_map = ContactMap('1') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence('1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD') contact_map.set_sequence_register() f_name = create_tmp_f() with open(f_name, 'w') as f_out: ComsatParser().write(f_out, contact_file) content = [ "1 H 9 L Hx-Hx", "1 H 10 L Hx-Hx", "2 L 8 I Hx-Hx", "3 E 12 K Hx-Hx", ] with open(f_name, 'r') as f_in: output = f_in.read().splitlines() self.assertEqual(content, output) os.unlink(f_name)
def test_read_1(self): content = """5 I 9 Q 0.000 0.286 0.185 0.836 0.875 0.749 5 I 10 R 0.000 0.000 0.105 0.875 0.482 0.634 5 I 11 I 0.000 0.178 0.066 0.730 0.876 0.727 5 I 21 I 0.030 0.021 0.233 0.645 0.733 0.557 5 I 58 G 0.000 0.054 0.010 0.642 0.799 0.535 6 T 62 V 0.000 0.000 0.027 0.485 0.428 0.585 6 T 63 S 0.000 0.004 0.051 0.547 0.387 0.529 6 T 78 L 0.000 0.000 0.039 0.624 0.384 0.581 6 T 79 T 0.000 0.000 0.036 0.657 0.415 0.679 6 T 80 I 0.000 0.076 0.003 0.513 0.386 0.578 6 T 94 Q 0.000 0.068 0.041 0.534 0.489 0.679 """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = BCLContactParser().read(f_in) contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(11, len(contact_map1)) self.assertEqual([5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6], [c.res1_seq for c in contact_map1]) self.assertEqual([9, 10, 11, 21, 58, 62, 63, 78, 79, 80, 94], [c.res2_seq for c in contact_map1]) self.assertEqual([ 0.749, 0.634, 0.727, 0.557, 0.535, 0.585, 0.529, 0.581, 0.679, 0.578, 0.679 ], [c.raw_score for c in contact_map1]) os.unlink(f_name)
def test_write_1(self): contact_file = ContactFile('RR') contact_file.target = 'R9999' contact_file.author = '1234-5678-9000' contact_file.remark = ['Predictor remarks'] contact_file.method = ['Description of methods used', 'Description of methods used'] contact_map = ContactMap('1') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence('1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD') contact_map.assign_sequence_register() f_name = create_tmp_f() with open(f_name, 'w') as f_out: PsicovParser().write(f_out, contact_file) content = [ "1 9 0 8 0.700000", "1 10 0 8 0.700000", "2 8 0 8 0.900000", "3 12 0 8 0.400000", "", ] content = os.linesep.join(content) with open(f_name, 'r') as f_in: data = "".join(f_in.readlines()) self.assertEqual(content, data) os.unlink(f_name)
def test_read_1(self): content = """PFRMAT RR TARGET R9999 AUTHOR 1234-5678-9000 REMARK Predictor remarks METHOD Description of methods used METHOD Description of methods used MODEL 1 HLEGSIGILLKKHEIVFDGC HDFGRTYIWQMSD 1 9 0 8 0.70 1 10 0 8 0.70 1 12 0 8 0.60 2 8 0 8 0.90 3 7 0 8 0.70 3 12 0 8 0.40 4 6 0 8 0.90 7 14 0 8 0.30 9 14 0 8 0.50 END """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = CaspParser().read(f_in) contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(9, len(contact_map1)) self.assertEqual("HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD", contact_map1.sequence.seq) self.assertEqual("HLEG-IGILL-K-E-------------------", contact_map1.repr_sequence.seq) os.unlink(f_name)
def test_read_3(self): content = """ATOM 1 N TYR A 36 39.107 51.628 3.103 0.50 43.13 N ATOM 2 CA TYR A 36 38.300 50.814 2.204 0.50 41.80 C ATOM 3 O TYR A 36 38.712 48.587 1.405 0.50 41.03 O ATOM 4 CB TYR A 36 37.586 51.694 1.175 0.50 41.61 C ATOM 5 N PHE A 86 32.465 47.498 5.487 0.50 25.81 N ATOM 6 CA PHE A 86 32.670 48.303 4.288 0.50 26.45 C ATOM 7 O PHE A 86 31.469 50.326 3.758 0.50 28.47 O ATOM 8 CB PHE A 86 32.977 47.392 3.090 0.50 25.35 C ATOM 9 N TRP A 171 23.397 37.507 -1.161 0.50 18.04 N ATOM 10 CA TRP A 171 23.458 36.846 0.143 0.50 20.46 C ATOM 11 O TRP A 171 22.235 34.954 0.951 0.50 22.45 O ATOM 12 CB TRP A 171 23.647 37.866 1.275 0.50 18.83 C ATOM 13 N PHE A 208 32.221 42.624 -5.829 0.50 19.96 N ATOM 14 CA PHE A 208 31.905 43.710 -4.909 0.50 20.31 C ATOM 15 O PHE A 208 32.852 45.936 -5.051 0.50 17.69 O ATOM 16 CB PHE A 208 31.726 43.102 -3.518 0.50 19.90 C END """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = PdbParser().read(f_in, distance_cutoff=7, atom_type='CB') contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(1, len(contact_map1)) self.assertEqual([36], [c.res1_seq for c in contact_map1 if c.true_positive]) self.assertEqual([86], [c.res2_seq for c in contact_map1 if c.true_positive]) self.assertEqual( [0.934108], [c.raw_score for c in contact_map1 if c.true_positive]) os.unlink(f_name)
def test_write_2(self): seq = [ "# Hello World", ">00FAF_A|<unknown description>", "GSMFTPKPPQDSAVIKAGYCVKQGAVMKNWKRRYFQLDENTIGYFKSELEKEPLRVIPLK", ] f_name_in = create_tmp_f(content='\n'.join(seq)) f_name_out = create_tmp_f() parser = FastaParser() with open(f_name_in, 'r') as f_in, open(f_name_out, 'w') as f_out: sequence_file = parser.read(f_in) parser.write(f_out, sequence_file) with open(f_name_out, 'r') as f_in: output = f_in.read().splitlines() self.assertEqual(seq, output) map(os.unlink, [f_name_in, f_name_out])
def test_read_1(self): msa = """GSMFTPKPPQDSAVI--GYCVKQGAVMKNWKRRY--LDENTIGYF EVHK--ECKQSDIMMRD--FEIVTTSRTFYVQADSPEEMHSWIKA EVHKVQECK--DIMMRDNLFEI--TSRTFWKRRY--LDENTIGYF EVHKVQECK--DIMMRDNLFEI--TSRTF--RRY--LDENTIGYF """ f_name = create_tmp_f(content=msa) parser = A2mParser() with open(f_name, 'r') as f_in: sequence_file = parser.read(f_in) for i, sequence_entry in enumerate(sequence_file): if i == 0: self.assertEqual('seq_0', sequence_entry.id) self.assertEqual( 'GSMFTPKPPQDSAVI--GYCVKQGAVMKNWKRRY--LDENTIGYF', sequence_entry.seq) elif i == 1: self.assertEqual('seq_1', sequence_entry.id) self.assertEqual( 'EVHK--ECKQSDIMMRD--FEIVTTSRTFYVQADSPEEMHSWIKA', sequence_entry.seq) elif i == 2: self.assertEqual('seq_2', sequence_entry.id) self.assertEqual( 'EVHKVQECK--DIMMRDNLFEI--TSRTFWKRRY--LDENTIGYF', sequence_entry.seq) elif i == 3: self.assertEqual('seq_3', sequence_entry.id) self.assertEqual( 'EVHKVQECK--DIMMRDNLFEI--TSRTF--RRY--LDENTIGYF', sequence_entry.seq) os.unlink(f_name)
def test_read_1(self): content = """46 78 0 8 9.301869 80 105 0 8 8.856009 111 129 0 8 7.252451 75 205 0 8 6.800462 19 44 0 8 6.588349 111 130 0 8 6.184269 23 41 0 8 6.163786 171 205 0 8 5.519271 53 126 0 8 5.440612 100 140 0 8 5.382865 """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = PsicovParser().read(f_in) contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(10, len(contact_map1)) self.assertEqual([46, 80, 111, 75, 19, 111, 23, 171, 53, 100], [c.res1_seq for c in contact_map1]) self.assertEqual([78, 105, 129, 205, 44, 130, 41, 205, 126, 140], [c.res2_seq for c in contact_map1]) self.assertEqual( [9.301869, 8.856009, 7.252451, 6.800462, 6.588349, 6.184269, 6.163786, 5.519271, 5.440612, 5.382865], [c.raw_score for c in contact_map1] ) os.unlink(f_name)
def test_read_3(self): content = """i j gene i_id j_id r_sco s_sco prob I_prob 127 187 A 127_V 187_I 0.183 3.635 1.000 N/A 83 87 A 83_E 87_Q 0.183 3.633 1.000 N/A 108 111 A 108_P 111_P 0.105 2.095 0.989 N/A 431 435 B 241_L 245_L 0.104 2.076 0.988 N/A 63 83 A 63_T 83_E 0.098 1.952 0.980 N/A 23 434 AB 23_T 244_L 0.082 1.624 0.924 0.519 20 438 AB 20_Y 248_T 0.059 1.178 0.647 0.181 265 275 B 75_E 85_V 0.059 1.175 0.644 N/A 263 267 B 73_A 77_G 0.059 1.172 0.641 N/A 19 438 AB 19_L 248_T 0.059 1.17 0.640 0.176 211 215 B 21_D 25_A 0.054 1.069 0.536 N/A 30 65 A 30_A 65_T 0.054 1.065 0.532 N/A 24 434 AB 24_A 244_L 0.054 1.064 0.531 0.123 """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = GremlinParser().read(f_in) self.assertEqual(3, len(contact_file)) chain_a_res1seq = [127, 83, 108, 63, 30] chain_a_rawscore = [0.183, 0.183, 0.105, 0.098, 0.054] chain_b_res1seq = [431, 265, 263, 211] chain_b_rawscore = [0.104, 0.059, 0.059, 0.054] chain_ab_res1seq = [23, 20, 19, 24] chain_ab_rawscore = [0.082, 0.059, 0.059, 0.054] for count, res1_seqs, raw_scores, cmap in zip([5, 4, 4], [chain_a_res1seq, chain_ab_res1seq, chain_b_res1seq], [chain_a_rawscore, chain_ab_rawscore, chain_b_rawscore], contact_file): self.assertEqual(count, len(cmap)) self.assertEqual(res1_seqs, [c.res1_seq for c in cmap]) self.assertEqual(raw_scores, [c.raw_score for c in cmap]) os.unlink(f_name)
def test_write_6(self): contact_file = ContactFile('RR') contact_map = ContactMap('1') contact_file.add(contact_map) for c in [('A', 1, 'B', 9, 0, 8, 0.7), ('A', 1, 'B', 10, 0, 8, 0.7), ('A', 2, 'B', 8, 0, 8, 0.9), ('A', 3, 'B', 12, 0, 8, 0.4)]: contact = Contact(c[1], c[3], c[6], distance_bound=(c[4], c[5])) contact.res1_chain = c[0] contact.res2_chain = c[2] contact_map.add(contact) contact_map.sequence = Sequence( '1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD' ) f_name = create_tmp_f() with open(f_name, 'w') as f_out: CaspParser().write(f_out, contact_file) content = [ "PFRMAT RR", "MODEL 1", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVF", "DGCHDFGRTYIWQMSD", "A1 B9 0 8 0.700000", "A1 B10 0 8 0.700000", "A2 B8 0 8 0.900000", "A3 B12 0 8 0.400000", "ENDMDL", "END", ] with open(f_name, 'r') as f_in: output = f_in.read().splitlines() self.assertEqual(content, output) os.unlink(f_name)
def test_write_1(self): contact_file = ContactFile('test') contact_map = ContactMap('A') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence('1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD') contact_map.assign_sequence_register() f_name = create_tmp_f() with open(f_name, 'w') as f_out: GremlinParser().write(f_out, contact_file) content = [ "i j i_id j_id r_sco s_sco prob", "1 9 1_H 9_L 0.7 1.0 1.0", "1 10 1_H 10_L 0.7 1.0 1.0", "2 8 2_L 8_I 0.9 1.3 1.0", "3 12 3_E 12_K 0.4 0.6 1.0", "", ] content = os.linesep.join(content) with open(f_name, 'r') as f_in: data = "".join(f_in.readlines()) self.assertEqual(content, data) os.unlink(f_name)
def test_read_2(self): content = """# Some comments # That are here for whatever reason i j i_id j_id r_sco s_sco prob 179 246 179_C 246_L 0.2019 4.740 1.000 262 305 262_G 305_Y 0.1742 4.090 1.000 428 448 428_A 448_N 0.1638 3.846 1.000 214 231 214_F 231_V 0.1342 3.150 1.000 457 488 457_L 488_Y 0.1254 2.945 1.000 220 223 220_A 223_A 0.1187 2.786 0.999 143 209 143_I 209_D 0.1139 2.674 0.999 79 365 79_M 365_I 0.1114 2.615 0.998 215 268 215_V 268_A 0.1109 2.604 0.998 262 266 262_G 266_K 0.1040 2.442 0.997 """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = GremlinParser().read(f_in) contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(10, len(contact_map1)) self.assertEqual( [179, 262, 428, 214, 457, 220, 143, 79, 215, 262], [c.res1_seq for c in contact_map1] ) self.assertEqual( [0.2019, 0.1742, 0.1638, 0.1342, 0.1254, 0.1187, 0.1139, 0.1114, 0.1109, 0.1040], [c.raw_score for c in contact_map1] ) os.unlink(f_name)
def test_write_1(self): contact_file = ContactFile('test') contact_map = ContactMap('1') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence( '1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD' ) f_name = create_tmp_f() # Not sure if bug in Python3 numpy or intended purpose [Implemented: 21.11.2016] mode = 'wb' if sys.version_info.major == 3 else 'w' with open(f_name, mode) as f_out: CCMpredParser().write(f_out, contact_file) content = [ '0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t6.999999999999999556e-01\t6.999999999999999556e-01\t0.000000000000000000e+00\t0.000000000000000000e+00', '0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t9.000000000000000222e-01\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00', '0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t4.000000000000000222e-01', '0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00', '0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00', '0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00', '0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00', '0.000000000000000000e+00\t9.000000000000000222e-01\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00', '6.999999999999999556e-01\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00', '6.999999999999999556e-01\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00', '0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00', '0.000000000000000000e+00\t0.000000000000000000e+00\t4.000000000000000222e-01\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00\t0.000000000000000000e+00' ] with open(f_name, 'r') as f_in: output = f_in.read().splitlines() self.assertEqual(content, output) os.unlink(f_name)
def test_read_3(self): msa = """#foo #bar >seq1 GSMFTPKPPQDSAVIKAGYCVKQGAVMKNWKRRYFQLDENTIGYF >seq2 EVHKVQECKQSDIMMRDNLFEIVTTSRTFYVQADSPEEMHSWIKA >seq3 EVHKVQECKQSDIMMRDNLFEIVTTSRTFWKRRYFQLDENTIGYF """ f_name = create_tmp_f(content=msa) parser = FastaParser() with open(f_name, 'r') as f_in: sequence_file = parser.read(f_in) self.assertEqual(['foo', 'bar'], sequence_file.remark) for i, sequence_entry in enumerate(sequence_file): if i == 0: self.assertEqual('seq1', sequence_entry.id) self.assertEqual( 'GSMFTPKPPQDSAVIKAGYCVKQGAVMKNWKRRYFQLDENTIGYF', sequence_entry.seq) elif i == 1: self.assertEqual('seq2', sequence_entry.id) self.assertEqual( 'EVHKVQECKQSDIMMRDNLFEIVTTSRTFYVQADSPEEMHSWIKA', sequence_entry.seq) elif i == 2: self.assertEqual('seq3', sequence_entry.id) self.assertEqual( 'EVHKVQECKQSDIMMRDNLFEIVTTSRTFWKRRYFQLDENTIGYF', sequence_entry.seq) os.unlink(f_name)
def test_read_1(self): content = """Helix Position Residue Helix Position Residue Probability H1 30 F H2 55 F 1.000000 H1 33 L H2 51 A 0.944091 H1 18 G H2 65 C 0.942259 H1 30 F H2 54 G 0.919241 H1 26 I H2 57 L 0.817638 H1 18 G H2 58 S 0.797449 H1 33 L H2 63 L 0.795520 H1 12 A H2 68 V 0.795462 H1 29 V H2 55 F 0.791829 H1 24 I H2 51 A 0.790044 H1 19 L H2 62 G 0.784613 H1 19 L H2 55 F 0.782741 """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = MemBrainParser().read(f_in) contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(12, len(contact_map1)) self.assertEqual([30, 33, 18, 30, 26, 18, 33, 12, 29, 24, 19, 19], [c.res1_seq for c in contact_map1]) self.assertEqual([55, 51, 65, 54, 57, 58, 63, 68, 55, 51, 62, 55], [c.res2_seq for c in contact_map1]) self.assertEqual([ 1.000000, 0.944091, 0.942259, 0.919241, 0.817638, 0.797449, 0.795520, 0.795462, 0.791829, 0.790044, 0.784613, 0.782741 ], [c.raw_score for c in contact_map1]) os.unlink(f_name)
def test_write_3(self): contact_file = ContactFile('RR') contact_map = ContactMap('1') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) f_name = create_tmp_f() with open(f_name, 'w') as f_out: CaspParser().write(f_out, contact_file) content = [ "PFRMAT RR", "MODEL 1", "1 9 0 8 0.700000", "1 10 0 8 0.700000", "2 8 0 8 0.900000", "3 12 0 8 0.400000", "ENDMDL", "END", ] with open(f_name, 'r') as f_in: output = f_in.read().splitlines() self.assertEqual(content, output) os.unlink(f_name)
def test_read_1(self): content = """1 M 2 V 0.0338619 0 1 M 3 G 0.0307956 0 1 M 4 L 0.0268079 0 1 M 5 T 0.0219783 0 1 M 6 T 0.0222061 0 1 M 7 L 0.0213079 0 1 M 8 F 0.0119054 0 1 M 9 W 0.0275182 0 1 M 10 L 0.0134577 0 1 M 11 G 0.0234555 0 """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = FreeContactParser().read(f_in) contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(10, len(contact_map1)) self.assertEqual([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [c.res1_seq for c in contact_map1]) self.assertEqual([2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [c.res2_seq for c in contact_map1]) self.assertEqual([ 0.0338619, 0.0307956, 0.0268079, 0.0219783, 0.0222061, 0.0213079, 0.0119054, 0.0275182, 0.0134577, 0.0234555 ], [c.raw_score for c in contact_map1]) os.unlink(f_name)
def test_write_5(self): contact_file = ContactFile('RR') contact_map = ContactMap('1') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 1.5), (1, 10, 0, 8, -0.3), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence( '1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD' ) f_name = create_tmp_f() with open(f_name, 'w') as f_out: CaspParser().write(f_out, contact_file) content = [ "PFRMAT RR", "MODEL 1", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVF", "DGCHDFGRTYIWQMSD", "1 9 0 8 1.000000", "1 10 0 8 0.000000", "2 8 0 8 0.666667", "3 12 0 8 0.388889", "ENDMDL", "END", ] with open(f_name, 'r') as f_in: output = f_in.read().splitlines() self.assertEqual(content, output) os.unlink(f_name)
def test_write_4(self): contact_file = ContactFile('RR') contact_map = ContactMap('1') contact_file.add(contact_map) for c in [(1, 9, 0, 8, 0.7), (1, 10, 0, 8, 0.7), (2, 8, 0, 8, 0.9), (3, 12, 0, 8, 0.4)]: contact = Contact(c[0], c[1], c[4], distance_bound=(c[2], c[3])) contact_map.add(contact) contact_map.sequence = Sequence( '1', 'HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSD' ) f_name = create_tmp_f() with open(f_name, 'w') as f_out: CaspParser().write(f_out, contact_file) content = [ "PFRMAT RR", "MODEL 1", "HLEGSIGILLKKHEIVFDGCHDFGRTYIWQMSDHLEGSIGILLKKHEIVF", "DGCHDFGRTYIWQMSD", "1 9 0 8 0.700000", "1 10 0 8 0.700000", "2 8 0 8 0.900000", "3 12 0 8 0.400000", "ENDMDL", "END", "", ] content = os.linesep.join(content) with open(f_name, 'r') as f_in: data = "".join(f_in.readlines()) self.assertEqual(content, data) os.unlink(f_name)
def test_read_2(self): content = """PFRMAT RR TARGET R9999 AUTHOR 1234-5678-9000 REMARK Predictor remarks METHOD Description of methods used MODEL 1 1 9 0 8 0.70 1 10 0 8 0.70 1 12 0 8 0.60 2 8 0 8 0.90 3 7 0 8 0.70 3 12 0 8 0.40 4 6 0 8 0.90 7 14 0 8 0.30 9 14 0 8 0.50 END """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = CaspParser().read(f_in) contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(9, len(contact_map1)) self.assertIsNone(contact_map1.sequence) with self.assertRaises(TypeError): _ = contact_map1.repr_sequence os.unlink(f_name)
def test_read_3(self): content = """#identifier diversity direction viterbiscore indexpred state res1 res2 1EAZ 0.65 Antiparallel 9.860725 1 first 29 24 1EAZ 0.65 Antiparallel 9.860725 1 internal 30 23 1EAZ 0.65 Antiparallel 9.860725 1 last 31 22 1EAZ 0.65 Parallel -6.855870 29 first 87 54 1EAZ 0.65 Parallel -6.855870 29 internal 88 55 1EAZ 0.65 Parallel -6.855870 29 last 89 56 1EAZ 0.65 Antiparallel 0.000000 1 first 100 24 1EAZ 0.65 Antiparallel 0.000000 1 last 101 23 1EAZ 0.65 Parallel 0.000000 29 first 100 15 """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = BbcontactsParser().read(f_in, del_one_two=False) contact_map1 = contact_file.top_map self.assertEqual(1, len(contact_file)) self.assertEqual(9, len(contact_map1)) self.assertEqual([24, 23, 22, 54, 55, 56, 24, 23, 15], [c.res1_seq for c in contact_map1]) self.assertEqual([29, 30, 31, 87, 88, 89, 100, 101, 100], [c.res2_seq for c in contact_map1]) self.assertEqual( sorted([ 9.860725, 9.860725, 9.860725, -6.855870, -6.855870, -6.855870, 0.0, 0.0, 0.0 ]), sorted([c.raw_score for c in contact_map1])) os.unlink(f_name)
def test_write_1(self): msa = [ 'GSMFTPKPPQDSAVI--GYCVKQGAVMKNWKRRY--LDENTIGYF', 'EVHK--ECKQSDIMMRD--FEIVTTSRTFYVQADSPEEMHSWIKA', 'EVHKVQECK--DIMMRDNLFEI--TSRTFWKRRY--LDENTIGYF', 'EVHKVQECK--DIMMRDNLFEI--TSRTF--RRY--LDENTIGYF', ] f_name_in = create_tmp_f(content='\n'.join(msa)) f_name_out = create_tmp_f() parser = A2mParser() with open(f_name_in, 'r') as f_in, open(f_name_out, 'w') as f_out: sequence_file = parser.read(f_in) parser.write(f_out, sequence_file) with open(f_name_out, 'r') as f_in: output = f_in.read().splitlines() self.assertEqual(msa, output) map(os.unlink, [f_name_in, f_name_out])
def test_create_tmp_f_2(self): content = 'Hello, World!' fname = _iotools.create_tmp_f(content=content, mode='w') self.assertTrue(os.path.isfile(fname)) with open(fname, 'r') as f_in: written_content = f_in.read() self.assertEqual(content, written_content) os.unlink(fname)
def test_write_1(self): seq = [ ">00FAF_A|<unknown description>", "GSMFTPKPPQDSAVIKAGYCVKQGAVMKNWKRRYFQLDENTIGYFKSELEKEPLRVIPLK", "EVHKVQECKQSDIMMRDNLFEIVTTSRTFYVQADSPEEMHSWIKAVSGAIVAQRGPGRSA", "SSEHP", ] f_name_in = create_tmp_f(content='\n'.join(seq)) f_name_out = create_tmp_f() parser = FastaParser() with open(f_name_in, 'r') as f_in, open(f_name_out, 'w') as f_out: sequence_file = parser.read(f_in) parser.write(f_out, sequence_file) with open(f_name_out, 'r') as f_in: output = f_in.read().splitlines() self.assertEqual(seq, output) map(os.unlink, [f_name_in, f_name_out])
def test_read_4(self): content = """ATOM 1 N TYR A 36 39.107 51.628 3.103 0.50 43.13 N ATOM 2 CA TYR A 36 38.300 50.814 2.204 0.50 41.80 C ATOM 3 O TYR A 36 38.712 48.587 1.405 0.50 41.03 O ATOM 4 CB TYR A 36 37.586 51.694 1.175 0.50 41.61 C ATOM 5 N PHE A 86 32.465 47.498 5.487 0.50 25.81 N ATOM 6 CA PHE A 86 32.670 48.303 4.288 0.50 26.45 C ATOM 7 O PHE A 86 31.469 50.326 3.758 0.50 28.47 O ATOM 8 CB PHE A 86 32.977 47.392 3.090 0.50 25.35 C TER ATOM 9 N TRP B 171 23.397 37.507 -1.161 0.50 18.04 N ATOM 10 CA TRP B 171 23.458 36.846 0.143 0.50 20.46 C ATOM 11 O TRP B 171 22.235 34.954 0.951 0.50 22.45 O ATOM 12 CB TRP B 171 23.647 37.866 1.275 0.50 18.83 C ATOM 13 N PHE B 208 32.221 42.624 -5.829 0.50 19.96 N ATOM 14 CA PHE B 208 31.905 43.710 -4.909 0.50 20.31 C ATOM 15 O PHE B 208 32.852 45.936 -5.051 0.50 17.69 O ATOM 16 CB PHE B 208 31.726 43.102 -3.518 0.50 19.90 C END """ f_name = create_tmp_f(content=content) with open(f_name, 'r') as f_in: contact_file = PdbParser().read(f_in, distance_cutoff=8, atom_type='CB') # Two maps because no contacts in B contact_map1 = contact_file['A'] # chain A contact_map2 = contact_file['AB'] # chain AB contact_map3 = contact_file['BA'] # chain BA self.assertEqual(3, len(contact_file)) self.assertEqual(1, len(contact_map1)) self.assertEqual(['A', 'A'], [ contact_map1.top_contact.res1_chain, contact_map1.top_contact.res2_chain ]) self.assertEqual([36, 86], [ contact_map1.top_contact.res1_seq, contact_map1.top_contact.res2_seq ]) self.assertEqual(1, len(contact_map2)) self.assertEqual(['A', 'B'], [ contact_map2.top_contact.res1_chain, contact_map2.top_contact.res2_chain ]) self.assertEqual([86, 208], [ contact_map2.top_contact.res1_seq, contact_map2.top_contact.res2_seq ]) self.assertEqual(1, len(contact_map3)) self.assertEqual(['B', 'A'], [ contact_map3.top_contact.res1_chain, contact_map3.top_contact.res2_chain ]) self.assertEqual([208, 86], [ contact_map3.top_contact.res1_seq, contact_map3.top_contact.res2_seq ]) os.unlink(f_name)
def test_open_f_handle_4(self): fname = _iotools.create_tmp_f() with _iotools.open_f_handle(fname, 'write') as fhandle: self.assertEqual('w', fhandle.mode) fhandle.write("hello world!") with _iotools.open_f_handle(fname, 'read') as fhandle: self.assertEqual('r', fhandle.mode) self.assertEqual("hello world!", fhandle.read().strip()) os.unlink(fname)