def test_canonize_ring_path(): """Test canonic paths""" path0 = list(range(6)) path = deque(path0) path.rotate(3) assert canonize_ring_path(path) == path0 path.reverse() assert canonize_ring_path(path) == path0 with pytest.raises(ValueError): canonize_ring_path(tuple(range(6)))
def _dicts(self): max_neighbors = 6 # max of 6 neighbors should be enough # Atoms atom_dtype = [ ('id', np.uint32), # atom info ('coords', np.float32, 3), ('radius', np.float32), ('charge', np.float32), ('atomicnum', np.int8), ('atomtype', 'U5' if PY3 else 'a5'), ('hybridization', np.int8), ('numhs', np.uint8), ('formalcharge', np.int8), ('neighbors_id', np.int16, max_neighbors), ('neighbors', np.float32, (max_neighbors, 3)), # residue info ('resid', np.int16), ('resnum', np.int16), ('resname', 'U3' if PY3 else 'a3'), ('isbackbone', bool), # atom properties ('isacceptor', bool), ('isdonor', bool), ('isdonorh', bool), ('ismetal', bool), ('ishydrophobe', bool), ('isaromatic', bool), ('isminus', bool), ('isplus', bool), ('ishalogen', bool), # secondary structure ('isalpha', bool), ('isbeta', bool) ] atom_dict = np.empty(self.OBMol.NumAtoms(), dtype=atom_dtype) metals = [ 3, 4, 11, 12, 13, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103 ] for i, atom in enumerate(self.atoms): atomicnum = atom.atomicnum # skip non-polar hydrogens for performance # if atomicnum == 1 and atom.OBAtom.IsNonPolarHydrogen(): # continue atomtype = typetable.Translate(atom.type) # sybyl atom type partialcharge = atom.partialcharge coords = atom.coords if self.protein: residue = Residue(atom.OBAtom.GetResidue()) else: residue = False # get neighbors, but only for those atoms which realy need them neighbors = np.zeros(max_neighbors, dtype=[('id', np.int16), ('coords', np.float32, 3), ('atomicnum', np.int8)]) neighbors['coords'].fill(np.nan) for n, nbr_atom in enumerate(atom.neighbors): if n >= max_neighbors: warnings.warn( 'Error while parsing molecule "%s" ' 'for `atom_dict`. Atom #%i (%s) has %i ' 'neighbors (max_neighbors=%i). Additional ' 'neighbors are ignored.' % (self.title, atom.idx0, atomtype, len( atom.neighbors), max_neighbors), UserWarning) break if nbr_atom.atomicnum == 1: continue neighbors[n] = (nbr_atom.idx0, nbr_atom.coords, nbr_atom.atomicnum) assert i == atom.idx0 atom_dict[i] = ( i, coords, ob.GetVdwRad(atomicnum), partialcharge, atomicnum, atomtype, atom.OBAtom.GetHyb(), atom.OBAtom.GetTotalDegree() - atom.OBAtom.GetHvyDegree(), atom.formalcharge, neighbors['id'], neighbors['coords'], # residue info residue.idx0 if residue else 0, residue.number if residue else 0, residue.name if residue else '', residue.OBResidue.GetAtomProperty(atom.OBAtom, 2) if residue else False, # is backbone # atom properties False, # atom.OBAtom.IsHbondAcceptor(), False, # atom.OBAtom.IsHbondDonor(), False, # atom.OBAtom.IsHbondDonorH(), atomicnum in metals, atomicnum == 6 and np.in1d(neighbors['atomicnum'], [6, 1, 0]).all(), # hydrophobe atom.OBAtom.IsAromatic(), atom.formalcharge < 0, # is charged (minus) atom.formalcharge > 0, # is charged (plus) atomicnum in [9, 17, 35, 53], # is halogen? False, # alpha False # beta ) not_carbon = np.argwhere( ~np.in1d(atom_dict['atomicnum'], [1, 6])).flatten() # Acceptors patt = Smarts('[$([O;H1;v2]),' '$([O;H0;v2;!$(O=N-*),' '$([O;-;!$(*-N=O)]),' '$([o;+0])]),' '$([n;+0;!X3;!$([n;H1](cc)cc),' '$([$([N;H0]#[C&v4])]),' '$([N&v3;H0;$(Nc)])]),' '$([F;$(F-[#6]);!$(FC[F,Cl,Br,I])])]') matches = np.array(patt.findall(self)).flatten() if len(matches) > 0: atom_dict['isacceptor'][np.intersect1d(matches - 1, not_carbon)] = True # Donors patt = Smarts( '[$([N&!H0&v3,N&!H0&+1&v4,n&H1&+0,$([$([Nv3](-C)(-C)-C)]),' '$([$(n[n;H1]),' '$(nc[n;H1])])]),' # Guanidine can be tautormeic - e.g. Arginine '$([NX3,NX2]([!O,!S])!@C(!@[NX3,NX2]([!O,!S]))!@[NX3,NX2]([!O,!S])),' '$([O,S;H1;+0])]') matches = np.array(patt.findall(self)).flatten() if len(matches) > 0: atom_dict['isdonor'][np.intersect1d(matches - 1, not_carbon)] = True atom_dict['isdonorh'][[ n.idx0 for idx in np.argwhere(atom_dict['isdonor']).flatten() for n in self.atoms[int(idx)].neighbors if n.atomicnum == 1 ]] = True # Basic group patt = Smarts( '[$([N;H2&+0][$([C,a]);!$([C,a](=O))]),' '$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),' '$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))]),' '$([N,n;X2;+0])]') matches = np.array(patt.findall(self)).flatten() if len(matches) > 0: atom_dict['isplus'][np.intersect1d(matches - 1, not_carbon)] = True # Acidic group patt = Smarts('[CX3](=O)[OX1H0-,OX2H1]') matches = np.array(patt.findall(self)).flatten() if len(matches) > 0: atom_dict['isminus'][np.intersect1d(matches - 1, not_carbon)] = True if self.protein: # Protein Residues (alpha helix and beta sheet) res_dtype = [('id', np.int16), ('resnum', np.int16), ('resname', 'U3' if PY3 else 'a3'), ('N', np.float32, 3), ('CA', np.float32, 3), ('C', np.float32, 3), ('O', np.float32, 3), ('isalpha', bool), ('isbeta', bool)] # N, CA, C, O b = [] for residue in self.residues: backbone = {} for atom in residue: if residue.OBResidue.GetAtomProperty(atom.OBAtom, 1): if atom.atomicnum == 7: backbone['N'] = atom.coords elif atom.atomicnum == 6: if atom.type == 'C3': backbone['CA'] = atom.coords else: backbone['C'] = atom.coords elif atom.atomicnum == 8: backbone['O'] = atom.coords if len(backbone.keys()) == 4: b.append((residue.idx0, residue.number, residue.name, backbone['N'], backbone['CA'], backbone['C'], backbone['O'], False, False)) res_dict = np.array(b, dtype=res_dtype) res_dict = detect_secondary_structure(res_dict) alpha_mask = np.in1d(atom_dict['resid'], res_dict[res_dict['isalpha']]['id']) atom_dict['isalpha'][alpha_mask] = True beta_mask = np.in1d(atom_dict['resid'], res_dict[res_dict['isbeta']]['id']) atom_dict['isbeta'][beta_mask] = True # Aromatic Rings r = [] for ring in self.sssr: if ring.IsAromatic(): path = [x - 1 for x in ring._path] # NOTE: mol.sssr is 1-based atoms = atom_dict[canonize_ring_path(path)] if len(atoms): atom = atoms[0] coords = atoms['coords'] centroid = coords.mean(axis=0) # get vector perpendicular to ring ring_vectors = coords - centroid vector = np.cross(ring_vectors, np.roll(ring_vectors, shift=-1, axis=0)).mean(axis=0) r.append( (centroid, vector, atom['resid'], atom['resnum'], atom['resname'], atom['isalpha'], atom['isbeta'])) ring_dict = np.array(r, dtype=[('centroid', np.float32, 3), ('vector', np.float32, 3), ('resid', np.int16), ('resnum', np.int16), ('resname', 'U3' if PY3 else 'a3'), ('isalpha', bool), ('isbeta', bool)]) self._atom_dict = atom_dict self._atom_dict.setflags(write=False) self._ring_dict = ring_dict self._ring_dict.setflags(write=False) if self.protein: self._res_dict = res_dict self._res_dict.setflags(write=False)