def _get_charge_delta_label(reactant_mol, product_mol): num_atom = reactant_mol.GetNumAtoms() reactant_c_count = np.zeros(num_atom, dtype=np.int) product_c_count = np.zeros_like(reactant_c_count) for atom in reactant_mol.GetAtoms(): reactant_c_count[idxfunc(atom)] = atom.GetFormalCharge() for atom in product_mol.GetAtoms(): product_c_count[idxfunc(atom)] = atom.GetFormalCharge() return (product_c_count - reactant_c_count) * -2
def _get_hydrogen_delta_label(reactant_mol, product_mol): num_atom = reactant_mol.GetNumAtoms() reactant_h_count = np.zeros(num_atom, dtype=np.int) product_h_count = np.zeros_like(reactant_h_count) for atom in reactant_mol.GetAtoms(): reactant_h_count[idxfunc(atom)] = atom.GetTotalNumHs() for atom in product_mol.GetAtoms(): product_h_count[idxfunc(atom)] = atom.GetTotalNumHs() return (product_h_count - reactant_h_count) * 2
def count_reactant_atom(mol_str): count = 0 mol = Chem.MolFromSmiles(mol_str) for atom in mol.GetAtoms(): if idxfunc(atom) in reactant_idx: count += 1 return count
def get_delta_labels(reactant_mol, product_mol): product_atom_idx = {idxfunc(atom) for atom in product_mol.GetAtoms()} reactant_atom_idx = get_reactant_atom_idx(get_reactant_mols(reactant_mol), product_mol) edge_deltas = _get_edge_delta_label(reactant_mol, product_mol) h_deltas = _get_hydrogen_delta_label(reactant_mol, product_mol) c_deltas = _get_charge_delta_label(reactant_mol, product_mol) num_atom = reactant_mol.GetNumAtoms() octet_sum = np.zeros(num_atom, dtype=np.int) for idx in range(num_atom): for idx_other in range(num_atom): if idx not in product_atom_idx and idx_other not in product_atom_idx: edge_deltas[idx, idx_other] = edge_deltas[idx_other, idx] = 0 if idx not in product_atom_idx and idx in reactant_atom_idx: # assume h on break h_deltas[idx] = -np.sum(edge_deltas[idx]) c_deltas[idx] = 0 elif idx not in product_atom_idx: h_deltas[idx] = 0 c_deltas[idx] = 0 octet_sum[idx] = np.sum( edge_deltas[idx]) + h_deltas[idx] + c_deltas[idx] return { EDGE_DELTA_KEY: edge_deltas, H_DELTA_KEY: h_deltas, C_DELTA_KEY: c_deltas, OCTET_SUM_KEY: octet_sum }
def __init__(self, delta_pred, num_candidates=10, calibration=(EDGE_CALIBRATION_KEY), soften=True, octet_rule=True, verbose=False): self.reaction_str = delta_pred[OUTPUT_REACTION_STR_KEY] self.edge_delta_pred = delta_pred[OUTPUT_EDGE_DELTA_KEY] self.c_delta_pred = delta_pred[OUTPUT_C_DELTA_KEY] self.h_delta_pred = delta_pred[OUTPUT_H_DELTA_KEY] if soften: self.edge_delta_pred = soften_matrix(self.edge_delta_pred) self.c_delta_pred = soften_matrix(self.c_delta_pred) self.h_delta_pred = soften_matrix(self.h_delta_pred) self.num_candidates = num_candidates self.edge_coefficient = 5.0 self.h_coefficient = 1.0 self.c_coefficient = 1.0 self.octet_rule = octet_rule self.edge_calibration_fn = no_calibration self.h_calibration_fn = no_calibration if EDGE_CALIBRATION_KEY in calibration: self.edge_calibration_fn = smooth_calibrate if H_CALIBRATION_KEY in calibration: self.h_calibration_fn = smooth_calibrate self.verbose = verbose self.reactant_mol, self.product_mol = get_reactant_product_molecule(self.reaction_str) Chem.SanitizeMol(self.reactant_mol) Chem.Kekulize(self.reactant_mol, clearAromaticFlags=True) self.n_atom = self.reactant_mol.GetNumAtoms() self.reactant_atom_map = {idxfunc(atom): atom for atom in self.reactant_mol.GetAtoms()} self.reactant_bond_map = {bond_idx_tuple(bond): bond for bond in self.reactant_mol.GetBonds()} self.reactant_atom_idx = get_reactant_atom_idx(get_reactant_mols(self.reactant_mol), self.product_mol) self.idx_to_delta_vars = {} self.reaction_center_delta_vars = [] self.model_objective = [] self.model = Model('Gurobi Sampler for Octet Sampling') if not self.verbose: self.model.setParam(GRB.Param.OutputFlag, 0) self._set_variables() self._set_constraints() self._set_model_objective() self._set_model_param() self._optimize_model()
def _get_edge_delta_label(reactant_mol, product_mol): num_atom = reactant_mol.GetNumAtoms() reactant_edge_charge = np.zeros((num_atom, num_atom), dtype=np.int) product_edge_charge = np.zeros_like(reactant_edge_charge) for bond in reactant_mol.GetBonds(): begin_atom_idx = idxfunc(bond.GetBeginAtom()) end_atom_idx = idxfunc(bond.GetEndAtom()) bond_charge = BOND_TYPE_TO_CHARGE[bond.GetBondType()] reactant_edge_charge[begin_atom_idx, end_atom_idx] = bond_charge reactant_edge_charge[end_atom_idx, begin_atom_idx] = bond_charge for bond in product_mol.GetBonds(): begin_atom_idx = idxfunc(bond.GetBeginAtom()) end_atom_idx = idxfunc(bond.GetEndAtom()) bond_charge = BOND_TYPE_TO_CHARGE[bond.GetBondType()] product_edge_charge[begin_atom_idx, end_atom_idx] = bond_charge product_edge_charge[end_atom_idx, begin_atom_idx] = bond_charge return product_edge_charge - reactant_edge_charge
def _build_solution_mol(self, solution_dict): solution_mol = Chem.rdchem.EditableMol(self.reactant_mol) # Modify Edge. for bond_idx, delta in sorted(solution_dict[EDGE_DELTA_VAR_NAME_HEADER].items(), key=operator.itemgetter(1), reverse=False): if abs(delta) > 0: idx1, idx2 = bond_idx atom_idx1 = self._idx_to_atom_idx(idx1) atom_idx2 = self._idx_to_atom_idx(idx2) old_bond = self.reactant_mol.GetBondBetweenAtoms(atom_idx1, atom_idx2) old_charge = 0 if old_bond is not None: old_charge += BOND_TYPE_TO_CHARGE[old_bond.GetBondType()] solution_mol.RemoveBond(atom_idx1, atom_idx2) new_charge = old_charge + delta if new_charge > 0: solution_mol.AddBond(atom_idx1, atom_idx2, CHARGE_TO_BOND_TYPE[new_charge]) elif new_charge < 0: raise ValueError('New charge cannot be negative: {}'.format(new_charge)) # Get the modified solution mol. solution_mol = solution_mol.GetMol() solution_mol_atom_map = {idxfunc(atom): atom for atom in solution_mol.GetAtoms()} # Modify H. for idx, delta in solution_dict[H_DELTA_VAR_NAME_HEADER].items(): if abs(delta) > 0: atom = solution_mol_atom_map[idx] old_h = int(atom.GetTotalNumHs()) atom.SetNoImplicit(True) new_h = int(old_h + (delta / 2)) atom.SetNumExplicitHs(new_h) # Modify Charge. for idx, delta in solution_dict[C_DELTA_VAR_NAME_HEADER].items(): if abs(delta) > 0: atom = solution_mol_atom_map[idx] new_c = int(atom.GetFormalCharge() - (delta / 2)) atom.SetFormalCharge(new_c) try: solution_mol = Chem.Mol(solution_mol) Chem.SanitizeMol(solution_mol) Chem.Kekulize(solution_mol, clearAromaticFlags=False) Chem.SanitizeMol(solution_mol) return solution_mol except ValueError as ve: return None
def get_mol_atom_features(reactant_mol, num_atom=None, reactant_atom_idx=None): if num_atom is None: num_atom = reactant_mol.GetNumAtoms() atom_features = np.zeros((num_atom, ATOM_FEATURE_DIM), dtype=np.float) for atom in reactant_mol.GetAtoms(): idx = idxfunc(atom) atom_feature = np.append(_get_atomic_features(atom.GetAtomicNum()), _get_atom_features(atom)) atom_features[idx, :ATOM_FEATURE_DIM - 1] = atom_feature if reactant_atom_idx is not None and idx in reactant_atom_idx: atom_features[idx, -1] = 1 return {ATOM_FEATURES_KEY: atom_features}
def bond_idx_tuple(bond): return tuple( sorted([idxfunc(bond.GetBeginAtom()), idxfunc(bond.GetEndAtom())]))
def get_mol_bond_features(reactant_mol, num_atom=None, reactant_atom_idx=None, reactant_component_map=None): if num_atom is None: num_atom = reactant_mol.GetNumAtoms() bond_features = np.zeros((num_atom, num_atom, BOND_FEATURE_DIM), dtype=np.float) neighbor_atom = np.zeros((num_atom, MAX_NEIGHBOR_NUM), dtype=np.int) neighbor_bond = np.zeros((num_atom, MAX_NEIGHBOR_NUM, 2), dtype=np.int) neighbor_mask = np.zeros_like(neighbor_atom) neighbor_num = np.zeros(num_atom, dtype=np.int) for bond in reactant_mol.GetBonds(): begin_atom_idx = idxfunc(bond.GetBeginAtom()) end_atom_idx = idxfunc(bond.GetEndAtom()) neighbor_atom[begin_atom_idx, neighbor_num[begin_atom_idx]] = end_atom_idx neighbor_atom[end_atom_idx, neighbor_num[end_atom_idx]] = begin_atom_idx neighbor_bond[begin_atom_idx, neighbor_num[begin_atom_idx]] = [ begin_atom_idx, end_atom_idx ] neighbor_bond[end_atom_idx, neighbor_num[end_atom_idx]] = [ end_atom_idx, begin_atom_idx ] neighbor_mask[begin_atom_idx, neighbor_num[begin_atom_idx]] = 1 neighbor_mask[end_atom_idx, neighbor_num[end_atom_idx]] = 1 neighbor_num[begin_atom_idx] += 1 neighbor_num[end_atom_idx] += 1 bond_feature = _get_bond_features(bond) if reactant_atom_idx is not None and ( begin_atom_idx in reactant_atom_idx or end_atom_idx in reactant_atom_idx): bond_feature = np.append(bond_feature, 1) else: bond_feature = np.append(bond_feature, 0) # bond exists bond_feature = np.append(bond_feature, 1) bond_features[begin_atom_idx, end_atom_idx, :BOND_FEATURE_DIM - 2] = bond_feature bond_features[end_atom_idx, begin_atom_idx, :BOND_FEATURE_DIM - 2] = bond_feature for i in range(num_atom): for j in range(num_atom): if reactant_component_map is not None and reactant_component_map[COMPONENT_MAP_KEY][i] == \ reactant_component_map[COMPONENT_MAP_KEY][j]: bond_features[i, j, -2] = bond_features[j, i, -2] = 1 else: bond_features[i, j, -2] = bond_features[j, i, -2] = 0 if reactant_component_map is not None and reactant_component_map[ COMPONENT_NUM_KEY] > 1: bond_features[i, j, -1] = bond_features[j, i, -1] = 1 else: bond_features[i, j, -1] = bond_features[j, i, -1] = 0 return { BOND_FEATURES_KEY: bond_features, NEIGHBOR_ATOM_KEY: neighbor_atom, NEIGHBOR_BOND_KEY: neighbor_bond, NEIGHBOR_MASK_KEY: neighbor_mask }