def fix_valence_charge(mol: Chem.rdchem.Mol, inplace: bool = False) -> Optional[Chem.rdchem.Mol]: """Fix valence issues that are due to incorrect charges. Args: mol: Input molecule with incorrect valence for some atoms inplace: Whether to modify in place or make a copy. Returns: Fixed molecule via charge correction or original molecule if failed. """ vm = rdMolStandardize.RDKitValidation() # Don't fix something that is not broken if len(vm.validate(mol)) > 0: if not inplace: mol = copy.copy(mol) mol.UpdatePropertyCache(False) for a in mol.GetAtoms(): n_electron = ( a.GetImplicitValence() + a.GetExplicitValence() - dm.PERIODIC_TABLE.GetDefaultValence(a.GetSymbol()) ) a.SetFormalCharge(n_electron) return mol
def featurization(r_mol: Chem.rdchem.Mol, p_mol: Chem.rdchem.Mol, ): """ Generates features of the reactant and product for one reaction as input for the network. Args: r_mol: RDKit molecule object for the reactant. p_mol: RDKit molecule object for the product. Returns: data: Torch Geometric Data object, storing the atom and bond features """ # compute properties with rdkit (only works if dataset is clean) r_mol.UpdatePropertyCache() p_mol.UpdatePropertyCache() # fake the number of "atoms" if we are collapsing substructures n_atoms = r_mol.GetNumAtoms() # topological and 3d distance matrices tD_r = Chem.GetDistanceMatrix(r_mol) tD_p = Chem.GetDistanceMatrix(p_mol) D_r = Chem.Get3DDistanceMatrix(r_mol) D_p = Chem.Get3DDistanceMatrix(p_mol) f_atoms = list() # atom (node) features edge_index = list() # list of tuples indicating presence of bonds f_bonds = list() # bond (edge) features for a1 in range(n_atoms): # Node features f_atoms.append(atom_features(r_mol.GetAtomWithIdx(a1))) # Edge features for a2 in range(a1 + 1, n_atoms): # fully connected graph edge_index.extend([(a1, a2), (a2, a1)]) # for now, naively include both reac and prod b1_feats = [D_r[a1][a2], D_p[a1][a2]] b2_feats = [D_r[a2][a1], D_p[a2][a1]] # r_bond = r_mol.GetBondBetweenAtoms(a1, a2) # b1_feats.extend(bond_features(r_bond)) # b2_feats.extend(bond_features(r_bond)) # # p_bond = p_mol.GetBondBetweenAtoms(a1, a2) # b1_feats.extend(bond_features(p_bond)) # b2_feats.extend(bond_features(p_bond)) f_bonds.append(b1_feats) f_bonds.append(b2_feats) data = tg.data.Data() data.x = torch.tensor(f_atoms, dtype=torch.float) data.edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous() data.edge_attr = torch.tensor(f_bonds, dtype=torch.float) return data