def test_missing_fasta_file(self): mm = AddSequenceToMmcif(input_mmcif='input.cif', output_mmcif='output.cif', fasta_file='missing_file.fasta') data = mm.process_input_sequences() self.assertEqual(data, dict()) self.assertTrue(len(data.keys()) == 0)
def test_invalid_fasta_file(self): mm = AddSequenceToMmcif( input_mmcif='input.cif', output_mmcif='output.cif', fasta_file=self.test_files.TEST_INVALID_FASTA_ONE_SEQUENCE) data = mm.process_input_sequences() self.assertEqual(data, dict()) self.assertTrue(len(data.keys()) == 0)
def test_get_data_from_3zt9_via_AddSequenceToMmcif_process_data(self): self.test_files.one_sequence() test_dir = tempfile.mkdtemp() output_cif = os.path.join(test_dir, 'output.cif') mm = AddSequenceToMmcif(input_mmcif=self.test_files.cif, output_mmcif=output_cif, fasta_file=self.test_files.fasta) worked = mm.process_data() self.assertTrue(worked) shutil.rmtree(test_dir)
def test_process_fasta_five_chains(self): self.test_files.five_sequences() expected_result = self.test_files.sample_seq mm = AddSequenceToMmcif(input_mmcif='input.cif', output_mmcif='output.cif', fasta_file=self.test_files.fasta) data = mm.process_input_sequences() self.assertTrue(len(data.keys()) == 5) for key in data: self.assertTrue(key in expected_result) self.assertTrue(data[key] == expected_result[key])
def test_get_data_from_3zt9_via_AddSequenceToMmcif_get_best_match(self): self.test_files.one_sequence() expected_result = self.test_files.sample_seq['pdb|3zt9|A'] entity_dict = self.test_files.observed_seq mm = AddSequenceToMmcif(input_mmcif=self.test_files.cif, output_mmcif='output.cif', fasta_file=self.test_files.fasta) mm.process_input_sequences() best_seq, best_score = mm.get_best_match( mmcif_sequence=entity_dict['1']['sequence']) self.assertTrue(best_seq == expected_result) self.assertTrue(best_score > 0)
def test_process_sequence_two_chains(self): sequence = 'MEKLEVGIYTRAREGEIACGDACLVKRVEGVIFLAVGDGIGHGPEAARAAEIAIASMESSMNTGLVNIFQLCHRELRGTRGAVAALCRVDRRQGLWQAAIVGNIHVKILSAKGIITPLATPGILGYNYPHQLLIAKGSYQEGDLFLIHSDGIQEGAVPLALLANYRLTAEELVRLIGEKYGRRDDDVAVIVAR' chains = 'A,B' mm = AddSequenceToMmcif(input_mmcif='input.cif', output_mmcif='output.cif', input_chainids=chains, input_sequence=sequence) data = mm.process_input_sequences() self.assertTrue(len(data.keys()) == 2) for key in data: self.assertTrue(key in chains.split(',')) self.assertTrue(data[key] == sequence)
def test_get_data_from_5liz_via_AddSequenceToMmcif(self): self.test_files.five_sequences() mm = AddSequenceToMmcif(input_mmcif=self.test_files.cif, output_mmcif='output.cif') entity_dict = self.test_files.observed_seq sequence_dict = mm.process_mmcif() self.assertFalse(sequence_dict == dict()) for entity_id in entity_dict: chains = sequence_dict.get(entity_id, {}).get('chains', []) self.assertTrue( sequence_dict.get(entity_id, {}).get('sequence', '') == entity_dict[entity_id]['sequence']) self.assertTrue( sorted(chains) == sorted(entity_dict[entity_id]['chains']))
def common_function(self): sample_seq = self.test_files.sample_seq temp_dir = tempfile.mkdtemp() output_cif = os.path.join(temp_dir, 'output.cif') mm = AddSequenceToMmcif(input_mmcif=self.test_files.cif, output_mmcif=output_cif, fasta_file=self.test_files.fasta) worked = mm.process_data() self.assertTrue(worked) self.assertTrue(os.path.exists(output_cif)) om = mmcifHandling() if om.parse_mmcif(fileName=output_cif): entity_poly = om.getCategory('entity_poly') for cat in entity_poly: for instance, entity_id in enumerate( entity_poly[cat]['entity_id']): sequence = entity_poly[cat]['pdbx_seq_one_letter_code'][ instance] sample_seq_key = self.test_files.sample_seq_to_obs_remapping[ entity_id] self.assertTrue(sequence == sample_seq[sample_seq_key]) shutil.rmtree(temp_dir)