def test_clean_pdb_and_get_chain(self): files = [('1kf6.pdb', '1kf6_clean_chainA_tester.pdb')] outdir = op.join('test_files', 'out') working_dir = 'test_structures' out_suffix = '_clean_chainA' custom_clean = CleanPDB(keep_chains='A') for infile, outfile in files: outfile_new = '{}{}.pdb'.format(op.splitext(infile)[0], out_suffix) infile_path = op.join(working_dir, infile) my_pdb = StructureIO(infile_path) default_cleaned_pdb = my_pdb.write_pdb( custom_selection=custom_clean, out_suffix=out_suffix, out_dir=outdir, force_rerun=True) default_cleaned_pdb_basename = op.basename(default_cleaned_pdb) # test if the filename is correct # print(default_cleaned_pdb_basename, outfile_new) self.assertEqual(default_cleaned_pdb_basename, outfile_new) # test if the file contents are equal self.assertEqual( open(default_cleaned_pdb, 'r').read(), open(op.join(working_dir, outfile), 'r').read()) # test that the file does not equal the original file self.assertNotEqual( open(default_cleaned_pdb, 'r').read(), open(infile_path, 'r').read())
def clean_pdb(pdb_file, out_suffix='_clean', outdir=None, force_rerun=False, remove_atom_alt=True, keep_atom_alt_id='A', remove_atom_hydrogen=True, add_atom_occ=True, remove_res_hetero=True, keep_chemicals=None, keep_res_only=None, add_chain_id_if_empty='X', keep_chains=None): """Clean a PDB file. Args: pdb_file (str): Path to input PDB file out_suffix (str): Suffix to append to original filename outdir (str): Path to output directory force_rerun (bool): If structure should be re-cleaned if a clean file exists already remove_atom_alt (bool): Remove alternate positions keep_atom_alt_id (str): If removing alternate positions, which alternate ID to keep remove_atom_hydrogen (bool): Remove hydrogen atoms add_atom_occ (bool): Add atom occupancy fields if not present remove_res_hetero (bool): Remove all HETATMs keep_chemicals (str, list): If removing HETATMs, keep specified chemical names keep_res_only (str, list): Keep ONLY specified resnames, deletes everything else! add_chain_id_if_empty (str): Add a chain ID if not present keep_chains (str, list): Keep only these chains Returns: str: Path to cleaned PDB file """ outfile = ssbio.utils.outfile_maker(inname=pdb_file, append_to_name=out_suffix, outdir=outdir, outext='.pdb') if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile): my_pdb = StructureIO(pdb_file) my_cleaner = CleanPDB(remove_atom_alt=remove_atom_alt, remove_atom_hydrogen=remove_atom_hydrogen, keep_atom_alt_id=keep_atom_alt_id, add_atom_occ=add_atom_occ, remove_res_hetero=remove_res_hetero, keep_res_only=keep_res_only, add_chain_id_if_empty=add_chain_id_if_empty, keep_chains=keep_chains, keep_chemicals=keep_chemicals) my_clean_pdb = my_pdb.write_pdb(out_suffix=out_suffix, out_dir=outdir, custom_selection=my_cleaner, force_rerun=force_rerun) return my_clean_pdb else: return outfile
def copy_results(self, copy_to_dir, rename_model_to=None, force_rerun=False): """Copy the raw information from I-TASSER modeling to a new folder. Copies all files in the list _attrs_to_copy. Args: copy_to_dir (str): Directory to copy the minimal set of results per sequence. rename_model_to (str): New file name (without extension) force_rerun (bool): If existing models and results should be overwritten. """ # Save path to the structure and copy it if specified if not rename_model_to: rename_model_to = self.model_to_use new_model_path = op.join(copy_to_dir, '{}.pdb'.format(rename_model_to)) if self.structure_path: if ssbio.utils.force_rerun(flag=force_rerun, outfile=new_model_path): # Clean and save it custom_clean = CleanPDB() my_pdb = StructureIO(self.structure_path) new_model_path = my_pdb.write_pdb( custom_selection=custom_clean, custom_name=rename_model_to, out_dir=copy_to_dir, force_rerun=force_rerun) # Update the structure_path to be the new clean file self.load_structure_path(structure_path=new_model_path, file_type='pdb') # Other modeling results - store in a new folder dest_itasser_dir = op.join(copy_to_dir, '{}_itasser'.format(rename_model_to)) if not op.exists(dest_itasser_dir): os.mkdir(dest_itasser_dir) for attr in self._attrs_to_copy: old_file_path = getattr(self, attr) new_file_path = op.join(dest_itasser_dir, op.basename(old_file_path)) if ssbio.utils.force_rerun(flag=force_rerun, outfile=new_file_path): shutil.copy2(old_file_path, new_file_path) log.debug('{}: copied from {}'.format( new_file_path, old_file_path)) else: log.debug('{}: file already exists'.format(new_file_path)) setattr(self, attr, new_file_path)
p.add_argument( 'mutations', help= 'Mutations in the form of Chain1.ResNum1.Mutation1,Chain2.ResNum2.Mutation2. Example: A.4.TYR,B.4.TYR' ) p.add_argument('--outsuffix', '-o', default='_mutated', help='Suffix appended to PDB file') p.add_argument('--clean', '-c', action='store_true', help='Clean PDB and keep only chain with mutation') args = p.parse_args() mutations = parse_mutation_input(args.mutations) my_pdb = StructureIO(args.infile) if args.clean: my_cleaner = CleanPDB(keep_chains=[x[0] for x in mutations]) my_clean_pdb = my_pdb.write_pdb(out_suffix='_clean', out_dir=tempfile.gettempdir(), custom_selection=my_cleaner) my_pdb = StructureIO(my_clean_pdb) my_mutation = MutatePDB(mutations) my_mutated_pdb = my_pdb.write_pdb(out_suffix=args.outsuffix, out_dir='mutated_pdbs', custom_selection=my_mutation) print('Mutated PDB at: {}'.format(my_mutated_pdb))
if not op.isdir(args.outdir): os.mkdir(args.outdir) infiles = ssbio.utils.input_list_parser(args.infile) for pdb in tqdm(infiles): outfile = ssbio.utils.outfile_maker(inname=pdb, append_to_name=args.outsuffix, outdir=args.outdir, outext='.pdb') if ssbio.utils.force_rerun(flag=args.force, outfile=outfile): my_pdb = StructureIO(pdb) my_cleaner = CleanPDB(remove_atom_alt=args.keepalt, remove_atom_hydrogen=args.keephydro, keep_atom_alt_id='A', add_atom_occ=True, remove_res_hetero=args.keephetero, add_chain_id_if_empty='X', keep_chains=args.chain) my_clean_pdb = my_pdb.write_pdb(out_suffix=args.outsuffix, out_dir=args.outdir, custom_selection=my_cleaner, force_rerun=args.force) print('Clean PDBs at: {}'.format(args.outdir))