Пример #1
0
def fix_valence_charge(mol: Chem.rdchem.Mol, inplace: bool = False) -> Optional[Chem.rdchem.Mol]:
    """Fix valence issues that are due to incorrect charges.

    Args:
        mol: Input molecule with incorrect valence for some atoms
        inplace: Whether to modify in place or make a copy.

    Returns:
        Fixed molecule via charge correction or original molecule if failed.
    """

    vm = rdMolStandardize.RDKitValidation()

    # Don't fix something that is not broken
    if len(vm.validate(mol)) > 0:

        if not inplace:
            mol = copy.copy(mol)

        mol.UpdatePropertyCache(False)
        for a in mol.GetAtoms():
            n_electron = (
                a.GetImplicitValence()
                + a.GetExplicitValence()
                - dm.PERIODIC_TABLE.GetDefaultValence(a.GetSymbol())
            )
            a.SetFormalCharge(n_electron)

    return mol
Пример #2
0
def featurization(r_mol: Chem.rdchem.Mol,
                  p_mol: Chem.rdchem.Mol,
                  ):
    """
    Generates features of the reactant and product for one reaction as input for the network.

    Args:
        r_mol: RDKit molecule object for the reactant.
        p_mol: RDKit molecule object for the product.

    Returns:
        data: Torch Geometric Data object, storing the atom and bond features
    """

    # compute properties with rdkit (only works if dataset is clean)
    r_mol.UpdatePropertyCache()
    p_mol.UpdatePropertyCache()

    # fake the number of "atoms" if we are collapsing substructures
    n_atoms = r_mol.GetNumAtoms()

    # topological and 3d distance matrices
    tD_r = Chem.GetDistanceMatrix(r_mol)
    tD_p = Chem.GetDistanceMatrix(p_mol)
    D_r = Chem.Get3DDistanceMatrix(r_mol)
    D_p = Chem.Get3DDistanceMatrix(p_mol)

    f_atoms = list()        # atom (node) features
    edge_index = list()     # list of tuples indicating presence of bonds
    f_bonds = list()        # bond (edge) features

    for a1 in range(n_atoms):

        # Node features
        f_atoms.append(atom_features(r_mol.GetAtomWithIdx(a1)))

        # Edge features
        for a2 in range(a1 + 1, n_atoms):
            # fully connected graph
            edge_index.extend([(a1, a2), (a2, a1)])

            # for now, naively include both reac and prod
            b1_feats = [D_r[a1][a2], D_p[a1][a2]]
            b2_feats = [D_r[a2][a1], D_p[a2][a1]]

            # r_bond = r_mol.GetBondBetweenAtoms(a1, a2)
            # b1_feats.extend(bond_features(r_bond))
            # b2_feats.extend(bond_features(r_bond))
            #
            # p_bond = p_mol.GetBondBetweenAtoms(a1, a2)
            # b1_feats.extend(bond_features(p_bond))
            # b2_feats.extend(bond_features(p_bond))

            f_bonds.append(b1_feats)
            f_bonds.append(b2_feats)

    data = tg.data.Data()
    data.x = torch.tensor(f_atoms, dtype=torch.float)
    data.edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    data.edge_attr = torch.tensor(f_bonds, dtype=torch.float)

    return data