Пример #1
0
    def test_svg_annotation(component: Component, tmpdir_factory):
        if not component.atoms_ids:
            return

        json_obj = None
        wd = tmpdir_factory.mktemp('svg_json_test')
        out_file = os.path.join(wd, f'{component.id}.json')
        component.compute_2d(depictions)
        component.export_2d_annotation(out_file)

        assert os.path.isfile(out_file)
        assert os.path.getsize(out_file) > 0

        with open(out_file, 'r') as fp:
            json_obj = json.load(fp)

        assert json_obj['ccd_id'] == component.id
        assert json_obj['resolution']['x'] >= 0
        assert json_obj['resolution']['y'] >= 0

        atom_names = [atom['name'] for atom in json_obj['atoms']]
        assert len(json_obj['atoms']) == component.mol_no_h.GetNumAtoms()
        assert len(json_obj['bonds']) >= component.mol_no_h.GetNumBonds()
        assert any(
            atom['label'] for atom in
            json_obj['atoms'])  # do we have any labels (not all atoms has one)
        assert all(atom['name']
                   for atom in json_obj['atoms'])  # do we have atom names?

        assert all(bond['bgn'] in atom_names and bond['end'] in atom_names
                   for bond in json_obj['bonds'])  # are all the atoms defined?
        assert all(bond['coords']
                   for bond in json_obj['bonds'])  # do we have coordinates?
        assert all(bond['style']
                   for bond in json_obj['bonds'])  # and its stylling?
Пример #2
0
    def test_svg_annotation(component: Component, tmpdir_factory):
        if not component.atoms_ids:
            return

        json_obj = None
        wd = tmpdir_factory.mktemp("svg_json_test")
        out_file = os.path.join(wd, f"{component.id}.json")
        component.compute_2d(depictions)
        component.export_2d_annotation(out_file)

        assert os.path.isfile(out_file)
        assert os.path.getsize(out_file) > 0

        with open(out_file, "r") as fp:
            json_obj = json.load(fp)

        assert json_obj["ccd_id"] == component.id
        assert json_obj["resolution"]["x"] >= 0
        assert json_obj["resolution"]["y"] >= 0

        atom_names = [atom["name"] for atom in json_obj["atoms"]]
        assert len(json_obj["atoms"]) == component.mol_no_h.GetNumAtoms()
        assert len(json_obj["bonds"]) >= component.mol_no_h.GetNumBonds()
        assert all(atom["name"] for atom in json_obj["atoms"])  # do we have atom names?

        assert all(
            bond["bgn"] in atom_names and bond["end"] in atom_names
            for bond in json_obj["bonds"]
        )  # are all the atoms defined?
        assert all(
            bond["coords"] for bond in json_obj["bonds"]
        )  # do we have coordinates?
        assert all(bond["style"] for bond in json_obj["bonds"])  # and its stylling?
    def _compute_component_scaffolds(self, component: Component):
        """Compute scaffolds for a given component.

        Args:
            component (Component): Component to be processed
        """

        try:
            component.get_scaffolds()
        except CCDUtilsError as e:
            logging.error(str(e))

            return

        logging.debug(f"{len(component.scaffolds)} scaffold(s) were found.")
Пример #4
0
def _parse_pdb_mmcif(cif_dict):
    """
    Create internal representation of the molecule from mmcif format.

    Args:
        cif_dict (dict): mmcif category

    Returns:
        CCDReaderResult: internal representation with the results
            of parsing and Mol object.
    """
    warnings = list()
    errors = list()
    mol = rdkit.Chem.RWMol()

    atoms_dict = _preprocess_pdb_parser_output(cif_dict, '_chem_comp_atom', warnings)
    bonds_dict = _preprocess_pdb_parser_output(cif_dict, '_chem_comp_bond', warnings)
    identifiers_dict = _preprocess_pdb_parser_output(cif_dict, '_pdbx_chem_comp_identifier', warnings)
    descriptors_dict = _preprocess_pdb_parser_output(cif_dict, '_pdbx_chem_comp_descriptor', warnings)
    properties_dict = _preprocess_pdb_parser_output(cif_dict, '_chem_comp', warnings)

    _parse_pdb_atoms(mol, atoms_dict)
    _parse_pdb_conformers(mol, atoms_dict)
    _parse_pdb_bonds(mol, bonds_dict, atoms_dict, errors)
    _handle_implicit_hydrogens(mol)

    descriptors = _parse_pdb_descriptors(descriptors_dict, 'descriptor')
    descriptors += _parse_pdb_descriptors(identifiers_dict, 'identifier')
    properties = _parse_pdb_properties(properties_dict)

    comp = Component(mol.GetMol(), cif_dict, properties, descriptors)
    reader_result = CCDReaderResult(warnings=warnings, errors=errors, component=comp)

    return reader_result
Пример #5
0
def to_pdb_ccd_cif_file(path, component: Component, remove_hs=True):
    """Converts structure to the PDB CIF format. Both model and ideal
    coordinates are stored. In case ideal coordinates are missing, rdkit
    attempts to generate 3D coordinates of the conformer.

    Args:
        path (str): Path to save cif file.
        component (Component): Component to be exported.
        remove_hs (bool, optional): Defaults to True.
    """
    if not isinstance(component.ccd_cif_dict, dict):
        component.ccd_cif_dict = _to_pdb_ccd_cif_dict(component)

    cif_copy = copy.deepcopy(component.ccd_cif_dict)

    _add_sw_info_cif(cif_copy)
    _add_2d_depiction_cif(component, cif_copy)
    _add_fragments_and_scaffolds_cif(component, cif_copy)
    _add_rdkit_properties_cif(component, cif_copy)
    _add_unichem_mapping_cif(component, cif_copy)
    _add_rdkit_conformer_cif(component, cif_copy, remove_hs)

    if remove_hs:
        h_indices: List[int] = [
            i for i, x in enumerate(cif_copy['_chem_comp_atom']['type_symbol'])
            if x == "H"
        ]
        h_names: List[str] = [
            cif_copy['_chem_comp_atom']['atom_id'][i] for i in h_indices
        ]

        hb_indices = []
        for key in ('atom_id_1', 'atom_id_2'):
            indices = [
                i for i, k in enumerate(cif_copy['_chem_comp_bond'][key])
                if k in h_names
            ]
            hb_indices += indices

        hb_indices = list(set(hb_indices))

        # scrap hydrogen atoms
        for key in cif_copy['_chem_comp_atom']:
            cif_copy['_chem_comp_atom'][key] = ([
                k for i, k in enumerate(cif_copy['_chem_comp_atom'][key])
                if i not in h_indices
            ])

        # scrap bonds to hydrogen atoms
        for key in cif_copy['_chem_comp_bond']:
            cif_copy['_chem_comp_bond'][key] = ([
                k for i, k in enumerate(cif_copy['_chem_comp_bond'][key])
                if i not in hb_indices
            ])

    cfd = mmcif.CifFileWriter(path)
    cfd.write({component.id: cif_copy})
    def _generate_ideal_structure(self, component: Component):
        """Checks whether or not the component has degenerated ideal
        coordinates. If so, new conformer is attempted to be generated.

        Args:
            component (Component): Component to be
                processed.
        Return:
            bool: Whether the ideal coordinates have been successfully
            recalculated, false otherwise.
        """
        result = component.compute_3d()

        if component.has_degenerated_conformer(ConformerType.Ideal):
            logging.debug("has degenerated ideal coordinates.")

        if not result:
            logging.debug("error in generating 3D conformation.")

        return result
Пример #7
0
def to_sdf_str(
    component: Component,
    remove_hs: bool = True,
    conf_type: ConformerType = ConformerType.Ideal,
):
    """Converts structure to the SDF format.

    Args:
        component (Component): Component to be exported.
        remove_hs (bool, optional): Defaults to True.
        conf_type (ConformerType, optional): Defaults to ConformerType.Ideal.

    Raises:
        CCDUtilsError: In case the structure could not be exported.

    Returns:
        str: String representation of the component in the SDF format
    """
    (mol_to_save, _, conf_type) = _prepate_structure(component, remove_hs,
                                                     conf_type)

    mol_block = []

    if conf_type == ConformerType.AllConformers:
        conformers = [
            ConformerType.Model, ConformerType.Ideal, ConformerType.Computed
        ]
    else:
        conformers = [conf_type]
    try:
        for c in conformers:
            try:
                conf_id = -1
                if c != ConformerType.AllConformers:
                    conf_id = component.get_conformer(c).GetId()

                block = [
                    f"{component.id} - {c.name} conformer",
                    rdkit.Chem.MolToMolBlock(mol_to_save,
                                             confId=conf_id).strip(),
                    "$$$$\n",
                ]
                mol_block += block
            except ValueError as e:
                if str(e) == "Bad Conformer Id":
                    pass
                else:
                    raise CCDUtilsError(f"Error writing SDF file - {e}")
    except Exception:
        mol_block = _to_sdf_str_fallback(mol_to_save, component.id, conformers)

    return "\n".join(mol_block)
    def _search_fragment_library(self, component: Component):
        """Search fragment library to find hits

        Args:
            component (Component): Component to be processed
        """

        matches = component.library_search(self.fragment_library)

        if matches:
            logging.debug(
                f"{len(matches)} matches found in the library `{self.fragment_library.name}`."
            )
Пример #9
0
def _parse_pdb_mmcif(cif, sanitize=True):
    """
    Create internal representation of the molecule from mmcif format.

    Args:
        cif (dict): mmcif dictionary
        sanitize (bool): Whether or not the rdkit component should
            be sanitized. Defaults to True.

    Returns:
        CCDReaderResult: internal representation with the results
            of parsing and Mol object.
    """
    warnings = []
    errors = []
    sanitized = False
    mol = rdkit.Chem.RWMol()

    for c in preprocessable_categories:
        w = cif_tools.preprocess_cif_category(cif, c)

        if w:
            warnings.append(w)

    _parse_pdb_atoms(mol, cif)
    _parse_pdb_conformers(mol, cif)
    _parse_pdb_bonds(mol, cif, errors)
    _handle_implicit_hydrogens(mol)

    if sanitize:
        sanitized = mol_tools.sanitize(mol)

    descriptors = _parse_pdb_descriptors(cif, "_pdbx_chem_comp_descriptor",
                                         "descriptor")
    descriptors += _parse_pdb_descriptors(cif, "_pdbx_chem_comp_identifier",
                                          "identifier")
    properties = _parse_pdb_properties(cif["_chem_comp"])

    comp = Component(mol.GetMol(), cif, properties, descriptors)
    reader_result = CCDReaderResult(warnings=warnings,
                                    errors=errors,
                                    component=comp,
                                    sanitized=sanitized)

    return reader_result
Пример #10
0
    def test_plain_cif_write(component: Component, tmpdir, rem_hs):
        path = tmpdir.join(f"{component.id}.cif")
        to_check = must_have_categories.copy()

        component.ccd_cif_dict = None
        ccd_writer.write_molecule(str(path), component, remove_hs=rem_hs)
        json_obj = reader.read(path)

        if component.id == "NA":  # Na is an atom!
            to_check.pop(2)  # remove "_chem_comp_bond"

        if component.id == "D3O" and rem_hs:  # D3O has single heavy atom
            to_check.pop(2)

        assert json_obj
        assert component.id in json_obj

        for c in to_check:
            assert c in json_obj[component.id]
    def _generate_depictions(self, component: Component):
        """Generate nice 2D depictions for the component. Presently depictions
        are generated in the following resolutions (100,200,300,400,500) with
        and without atom names.

        Args:
            component (Component): Component to be depicted.
            depictions (DepictionManager): Helper class
                to carry out depiction process.
            parent_dir (str): Where the depiction should be stored
        """
        parent_dir = os.path.join(self.output_dir, component.id[0],
                                  component.id)

        depiction_result = component.compute_2d(self.depictions)

        if depiction_result.source == DepictionSource.Failed:
            self.logger.debug('failed to generate 2D image.')
        else:
            if depiction_result.score > 0.99:
                self.logger.debug(
                    'collision free image could not be generated.')
            self.logger.debug(
                f'2D generated using {depiction_result.source.name} with score {depiction_result.score}.'
            )

        wedge_bonds = depiction_result.template_name != 'cube'

        for i in range(100, 600, 100):
            component.export_2d_svg(os.path.join(parent_dir,
                                                 f'{component.id}_{i}.svg'),
                                    width=i,
                                    wedge_bonds=wedge_bonds)
            component.export_2d_svg(os.path.join(
                parent_dir, f'{component.id}_{i}_names.svg'),
                                    width=i,
                                    names=True,
                                    wedge_bonds=wedge_bonds)

        component.export_2d_annotation(os.path.join(
            parent_dir, f'{component.id}_annotation.json'),
                                       wedge_bonds=wedge_bonds)
    def _generate_depictions(self, component: Component, out_dir: str):
        """Generate nice 2D depictions for the component and
        depiction annotations in JSON format. Presently depictions
        are generated in the following resolutions (100,200,300,400,500)
        with and without atom names.

        Args:
            component (Component): Component to be depicted.
            out_dir (str): Where the depictions should be stored.
        """
        depiction_result = component.compute_2d(self.depictions)

        if depiction_result.source == DepictionSource.Failed:
            logging.debug("failed to generate 2D image.")
        else:
            if depiction_result.score > 0.99:
                logging.debug("collision free image could not be generated.")
            logging.debug(
                f"2D generated using {depiction_result.source.name} with score {depiction_result.score}."
            )

        wedge_bonds = depiction_result.template_name != "cube"

        for i in range(100, 600, 100):
            component.export_2d_svg(
                os.path.join(out_dir, f"{component.id}_{i}.svg"),
                width=i,
                wedge_bonds=wedge_bonds,
            )
            component.export_2d_svg(
                os.path.join(out_dir, f"{component.id}_{i}_names.svg"),
                width=i,
                names=True,
                wedge_bonds=wedge_bonds,
            )

        component.export_2d_annotation(
            os.path.join(out_dir, f"{component.id}_annotation.json"),
            wedge_bonds=wedge_bonds,
        )