示例#1
0
    def test_clean_pdb_and_get_chain(self):
        files = [('1kf6.pdb', '1kf6_clean_chainA_tester.pdb')]
        outdir = op.join('test_files', 'out')
        working_dir = 'test_structures'
        out_suffix = '_clean_chainA'
        custom_clean = CleanPDB(keep_chains='A')

        for infile, outfile in files:
            outfile_new = '{}{}.pdb'.format(op.splitext(infile)[0], out_suffix)
            infile_path = op.join(working_dir, infile)

            my_pdb = StructureIO(infile_path)
            default_cleaned_pdb = my_pdb.write_pdb(
                custom_selection=custom_clean,
                out_suffix=out_suffix,
                out_dir=outdir,
                force_rerun=True)
            default_cleaned_pdb_basename = op.basename(default_cleaned_pdb)

            # test if the filename is correct
            # print(default_cleaned_pdb_basename, outfile_new)
            self.assertEqual(default_cleaned_pdb_basename, outfile_new)

            # test if the file contents are equal
            self.assertEqual(
                open(default_cleaned_pdb, 'r').read(),
                open(op.join(working_dir, outfile), 'r').read())

            # test that the file does not equal the original file
            self.assertNotEqual(
                open(default_cleaned_pdb, 'r').read(),
                open(infile_path, 'r').read())
示例#2
0
def clean_pdb(pdb_file,
              out_suffix='_clean',
              outdir=None,
              force_rerun=False,
              remove_atom_alt=True,
              keep_atom_alt_id='A',
              remove_atom_hydrogen=True,
              add_atom_occ=True,
              remove_res_hetero=True,
              keep_chemicals=None,
              keep_res_only=None,
              add_chain_id_if_empty='X',
              keep_chains=None):
    """Clean a PDB file.

    Args:
        pdb_file (str): Path to input PDB file
        out_suffix (str): Suffix to append to original filename
        outdir (str): Path to output directory
        force_rerun (bool): If structure should be re-cleaned if a clean file exists already
        remove_atom_alt (bool): Remove alternate positions
        keep_atom_alt_id (str): If removing alternate positions, which alternate ID to keep
        remove_atom_hydrogen (bool): Remove hydrogen atoms
        add_atom_occ (bool): Add atom occupancy fields if not present
        remove_res_hetero (bool): Remove all HETATMs
        keep_chemicals (str, list): If removing HETATMs, keep specified chemical names
        keep_res_only (str, list): Keep ONLY specified resnames, deletes everything else!
        add_chain_id_if_empty (str): Add a chain ID if not present
        keep_chains (str, list): Keep only these chains

    Returns:
        str: Path to cleaned PDB file

    """
    outfile = ssbio.utils.outfile_maker(inname=pdb_file,
                                        append_to_name=out_suffix,
                                        outdir=outdir,
                                        outext='.pdb')

    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile):
        my_pdb = StructureIO(pdb_file)
        my_cleaner = CleanPDB(remove_atom_alt=remove_atom_alt,
                              remove_atom_hydrogen=remove_atom_hydrogen,
                              keep_atom_alt_id=keep_atom_alt_id,
                              add_atom_occ=add_atom_occ,
                              remove_res_hetero=remove_res_hetero,
                              keep_res_only=keep_res_only,
                              add_chain_id_if_empty=add_chain_id_if_empty,
                              keep_chains=keep_chains,
                              keep_chemicals=keep_chemicals)

        my_clean_pdb = my_pdb.write_pdb(out_suffix=out_suffix,
                                        out_dir=outdir,
                                        custom_selection=my_cleaner,
                                        force_rerun=force_rerun)

        return my_clean_pdb
    else:
        return outfile
示例#3
0
    def copy_results(self,
                     copy_to_dir,
                     rename_model_to=None,
                     force_rerun=False):
        """Copy the raw information from I-TASSER modeling to a new folder.

        Copies all files in the list _attrs_to_copy.

        Args:
            copy_to_dir (str): Directory to copy the minimal set of results per sequence.
            rename_model_to (str): New file name (without extension)
            force_rerun (bool): If existing models and results should be overwritten.

        """
        # Save path to the structure and copy it if specified
        if not rename_model_to:
            rename_model_to = self.model_to_use

        new_model_path = op.join(copy_to_dir, '{}.pdb'.format(rename_model_to))

        if self.structure_path:
            if ssbio.utils.force_rerun(flag=force_rerun,
                                       outfile=new_model_path):
                # Clean and save it
                custom_clean = CleanPDB()
                my_pdb = StructureIO(self.structure_path)
                new_model_path = my_pdb.write_pdb(
                    custom_selection=custom_clean,
                    custom_name=rename_model_to,
                    out_dir=copy_to_dir,
                    force_rerun=force_rerun)

            # Update the structure_path to be the new clean file
            self.load_structure_path(structure_path=new_model_path,
                                     file_type='pdb')

            # Other modeling results - store in a new folder
            dest_itasser_dir = op.join(copy_to_dir,
                                       '{}_itasser'.format(rename_model_to))
            if not op.exists(dest_itasser_dir):
                os.mkdir(dest_itasser_dir)

            for attr in self._attrs_to_copy:
                old_file_path = getattr(self, attr)
                new_file_path = op.join(dest_itasser_dir,
                                        op.basename(old_file_path))
                if ssbio.utils.force_rerun(flag=force_rerun,
                                           outfile=new_file_path):
                    shutil.copy2(old_file_path, new_file_path)
                    log.debug('{}: copied from {}'.format(
                        new_file_path, old_file_path))
                else:
                    log.debug('{}: file already exists'.format(new_file_path))
                setattr(self, attr, new_file_path)
示例#4
0
    p.add_argument(
        'mutations',
        help=
        'Mutations in the form of Chain1.ResNum1.Mutation1,Chain2.ResNum2.Mutation2. Example: A.4.TYR,B.4.TYR'
    )
    p.add_argument('--outsuffix',
                   '-o',
                   default='_mutated',
                   help='Suffix appended to PDB file')
    p.add_argument('--clean',
                   '-c',
                   action='store_true',
                   help='Clean PDB and keep only chain with mutation')
    args = p.parse_args()

    mutations = parse_mutation_input(args.mutations)

    my_pdb = StructureIO(args.infile)
    if args.clean:
        my_cleaner = CleanPDB(keep_chains=[x[0] for x in mutations])
        my_clean_pdb = my_pdb.write_pdb(out_suffix='_clean',
                                        out_dir=tempfile.gettempdir(),
                                        custom_selection=my_cleaner)
        my_pdb = StructureIO(my_clean_pdb)

    my_mutation = MutatePDB(mutations)
    my_mutated_pdb = my_pdb.write_pdb(out_suffix=args.outsuffix,
                                      out_dir='mutated_pdbs',
                                      custom_selection=my_mutation)
    print('Mutated PDB at: {}'.format(my_mutated_pdb))
示例#5
0
文件: cleanpdb.py 项目: feiranl/ssbio
    if not op.isdir(args.outdir):
        os.mkdir(args.outdir)

    infiles = ssbio.utils.input_list_parser(args.infile)

    for pdb in tqdm(infiles):

        outfile = ssbio.utils.outfile_maker(inname=pdb,
                                            append_to_name=args.outsuffix,
                                            outdir=args.outdir,
                                            outext='.pdb')

        if ssbio.utils.force_rerun(flag=args.force, outfile=outfile):

            my_pdb = StructureIO(pdb)
            my_cleaner = CleanPDB(remove_atom_alt=args.keepalt,
                                  remove_atom_hydrogen=args.keephydro,
                                  keep_atom_alt_id='A',
                                  add_atom_occ=True,
                                  remove_res_hetero=args.keephetero,
                                  add_chain_id_if_empty='X',
                                  keep_chains=args.chain)

            my_clean_pdb = my_pdb.write_pdb(out_suffix=args.outsuffix,
                                            out_dir=args.outdir,
                                            custom_selection=my_cleaner,
                                            force_rerun=args.force)

    print('Clean PDBs at: {}'.format(args.outdir))