def _getPropertiesRDkit(smallmol): """ Returns ndarray of shape (n_atoms x n_properties) molecule atom types, according to the following definitions and order: 0. Hydrophibic 1. Aromatic 2. Acceptor 3. Donor 4. - Ionizable 5. + Ionizable 6. Metal (empty) 7. Occupancy (No hydrogens) """ from moleculekit.smallmol.util import factory n_atoms = smallmol.numAtoms atom_mapping = { "Hydrophobe": 0, "LumpedHydrophobe": 0, "Aromatic": 1, "Acceptor": 2, "Donor": 3, "PosIonizable": 4, "NegIonizable": 5, } feats = factory.GetFeaturesForMol(smallmol._mol) properties = np.zeros((n_atoms, 8), dtype=bool) for feat in feats: fam = feat.GetFamily() if fam not in atom_mapping: # Non relevant property continue properties[feat.GetAtomIds(), atom_mapping[fam]] = 1 # Occupancy, ignoring hydrogens. properties[:, 7] = smallmol.get("element") != "H" return properties
def _getPropertiesRDkit(smallmol): n_atoms = smallmol.numAtoms from tempfile import NamedTemporaryFile tmpmol2 = NamedTemporaryFile(suffix='.pdb').name smallmol.write(tmpmol2) s_atoms = process_arpeggio(tmpmol2) os.remove(tmpmol2) # the order of s_atoms matches the order of channels arpeggio_features = [] for i, atom in enumerate(s_atoms): raw_features = sorted(tuple(s_atoms[i].atom_types)) feature_vector = np.zeros(len(arpeggio_atomtypes_unique), dtype=bool) for rf in raw_features: #print(rf) if rf in arpeggio_atomtypes_unique: feature_vector[arpeggio_atomtypes_unique[rf]] = 1 arpeggio_features.append(feature_vector) if feature_vector[-1] != 0: print(raw_features, feature_vector) arpeggio_features = np.stack(arpeggio_features, axis=0) #atoms = ['C', 'O', 'N', 'S', 'P', 'Cl', 'F'] atom_mapping = { "Hydrophobe": 0, "LumpedHydrophobe": 0, "Aromatic": 1, "Acceptor": 2, "Donor": 3, "PosIonizable": 4, "NegIonizable": 5, "ZnBinder": 6, } # other features come from arpeggio atom_mapping1 = { "Hydrophobe": 0, "LumpedHydrophobe": 1, "ZnBinder": 2, } feats = factory.GetFeaturesForMol(smallmol._mol) properties = np.zeros((n_atoms, 3), dtype=bool) protein_feature = np.zeros(shape=(len(properties), 1), dtype=bool) # 0 for ligand for feat in feats: fam = feat.GetFamily() if fam not in atom_mapping1: # Non relevant property continue properties[feat.GetAtomIds(), atom_mapping1[fam]] = 1 #for idx, atom in enumerate(atoms): # properties[:, 7+idx] = smallmol.get('element') == atom els = np.stack([get_atomic_feature(e) for e in smallmol.get('element')], axis=0) # Occupancy, ignoring hydrogens. #properties[:, 3] = ~np.isin(smallmol.get('element'), ['H']) print([ c.shape for c in [properties, arpeggio_features, els, protein_feature] ]) channels = np.concatenate( [properties, arpeggio_features, els, protein_feature], axis=-1) return channels