def atom_features(atom: Chem.rdchem.Atom, args) -> List[Union[bool, int, float]]: """ Builds a feature vector for an atom. :param atom: An RDKit atom. :param functional_groups: A k-hot vector indicating the functional groups the atom belongs to. :return: A list containing the atom features. """ features = onek_encoding_unk(atom.GetSymbol(), ATOM_FEATURES['atomic_num']) + \ onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \ onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) features += onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \ onek_encoding_unk(int(atom.GetHybridization()), ATOM_FEATURES['hybridization']) + \ [1 if atom.GetIsAromatic() else 0] + \ [atom.GetMass() * 0.01] # scaled to about the same range as other features if args.chiral_features: features += onek_encoding_unk(int(atom.GetChiralTag()), ATOM_FEATURES['chiral_tag']) if args.global_chiral_features: if atom.HasProp('_CIPCode'): features += onek_encoding_unk(atom.GetProp('_CIPCode'), ATOM_FEATURES['global_chiral_tag']) else: features += onek_encoding_unk(None, ATOM_FEATURES['global_chiral_tag']) return features
def atom_features(atom: Chem.rdchem.Atom) -> List[Union[bool, int, float]]: """ Builds a feature vector for an atom. :param atom: An RDKit atom. :return: A list containing the atom features. """ features = onek_encoding_unk(atom.GetSymbol(), ATOM_FEATURES['atomic_num']) + \ [1 if atom.GetIsAromatic() else 0] + \ onek_encoding_unk(atom.GetTotalDegree(), ATOM_FEATURES['degree']) + \ onek_encoding_unk(atom.GetFormalCharge(), ATOM_FEATURES['formal_charge']) + \ onek_encoding_unk(int(atom.GetTotalNumHs()), ATOM_FEATURES['num_Hs']) + \ [atom.GetMass() * 0.01] # scaled to about the same range as other features return features
def get_atom_features(atom:Chem.rdchem.Atom) -> np.ndarray: "Concats all atom features together and returns numpy array of bools" # elements from Duvenaud's original code. Note the absense of H(optional) potential_atoms_list = ['C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br', 'Mg', 'Na', 'Ca', 'Fe', 'As', 'Al', 'I', 'B', 'V', 'K', 'Tl', 'Yb', 'Sb', 'Sn', 'Ag', 'Pd', 'Co', 'Se', 'Ti', 'Zn', 'H', 'Li', 'Ge', 'Cu', 'Au', 'Ni', 'Cd', 'In', 'Mn', 'Zr', 'Cr', 'Pt', 'Hg', 'Pb', 'Unknown'] encoded_element_list = one_of_k(atom.GetSymbol(), potential_atoms_list) encoded_degree_list = one_of_k(atom.GetDegree(), [0, 1, 2, 3, 4, 5]) encoded_num_hs_list = one_of_k(atom.GetTotalNumHs(), [0, 1, 2, 3, 4]) encoded_fc_list = one_of_k(atom.GetFormalCharge(), [-1,-2,1,2,0]) encoded_implicit_valence_list = one_of_k(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5]) feature_vector = np.array(encoded_element_list + encoded_degree_list + encoded_num_hs_list + encoded_implicit_valence_list + encoded_fc_list + [atom.GetIsAromatic()]) return feature_vector
def copy_atom(atom: Chem.rdchem.Atom) -> Chem.rdchem.Atom: new_atom = Chem.Atom(atom.GetSymbol()) new_atom.SetFormalCharge(atom.GetFormalCharge()) new_atom.SetAtomMapNum(atom.GetAtomMapNum()) return new_atom