#Nd += len(X) #Nt += Nu Nt += 1 if len(X) > 0: Nd += 1 X = np.stack(X) x = X[0].reshape(1, X.shape[1], 3) # keep only the first guy print(' -kept', len(x), 'of', Nu) if len(X) > 0: S = gdb.get_symbols_rdkitmol(m) hdn.write_rcdb_input(x[0], S, int(id), wdir, fpf, 100, LOT, '500.0', fill=4, comment='smiles: ' + Chem.MolToSmiles(m) + ' GDB-ID: ' + str(ridx)) hdn.writexyzfile(wdir + fpf + '-' + str(id).zfill(4) + '.xyz', x.reshape(1, x.shape[1], x.shape[2]), S) #print(str(id).zfill(8)) molnfo.close() print('Total mols:', Nd, 'of', Nt, 'percent:', "{:.2f}".format(100.0 * Nd / float(Nt)))
for i, f in enumerate(files): data = hdn.read_rcdb_coordsandnm(idir + f) X = data['coordinates'] S = data['species'] mol = Atoms(positions=X, symbols=S) mol.set_calculator(ANI(False)) mol.calc.setnc(nc) dyn = LBFGS(mol, logfile='optimization.log') dyn.run(fmax=0.00001, steps=1000) X = mol.get_positions() Nc = int(f.split(".")[0].split("-")[1]) Fp = f.split("-")[0] smiles = get_smiles(idir + f) print(i, 'of', len(files), ':', f, dyn.get_number_of_steps(), Nc) hdn.write_rcdb_input(X, S, Nc, sdir, Fp, 50, 'wb97x/6-31g*', '800.0', fill=4, comment='Smiles: ' + smiles)
storedir = '/home/jujuman/Research/extensibility_test_sets/drugbank/' suppl = Chem.SDMolSupplier( '/home/jujuman/Dropbox/ChemSciencePaper.AER/Benchmark_Datasets/drugbank/drugbank_3d_1564.sdf', removeHs=False) for id, m in enumerate(suppl): if m is None: continue name = m.GetProp('_Name') xyz, spc = pya.__convert_rdkitconfs_to_nparr__(m) print(xyz.shape) print(name, id, spc) hdt.writexyzfile(storedir + 'xyz/drugbank_' + str(id).zfill(4) + '.xyz', xyz, spc) hdt.write_rcdb_input(xyz[0], spc, id, storedir, 'drugbank', 10, 'wb97x/6-31g*', '300.0', fill=4, comment='Name: ' + name) # Print size #print(m.GetNumAtoms(), m.GetNumHeavyAtoms())
energies = np.vstack(energies) modl_std = np.std(energies[::-1],axis=0) / float(len(spc)) bad_cnt = 0 bad_xyz = [] bad_idx = [] for i,(X,s) in enumerate(zip(xyz,modl_std)): if s > 0.05: if bad_cnt%3 == 0: bad_xyz.append(X) bad_idx.append(i) bad_cnt = bad_cnt + 1 for j,(X,i) in enumerate(zip(bad_xyz,bad_idx)): idx = s_idx + str(j).zfill(3) hdt.write_rcdb_input(X, spc, int(idx), r, fp, 5, 'wb97x/6-31g*', '500.0', freq='1', opt='0', fill=6, comment=' index: '+ str(i)) print(s_idx,' ',bad_cnt,'of',modl_std.shape[0],' Bad kept: ', len(bad_xyz)) #print("CV1:", modl_std) #plt.plot(Rc[:, 1], np.abs(hdt.hatokcal*Eact[::-1] - energies[0,:][::-1]), color='red',label='Delta') #plt.plot(Rc[:, 1], energies[0,:][::-1]-energies[0,:][::-1].min(),color='green',label='ANI-0') #plt.plot(Rc[:, 1], energies[1,:][::-1]-energies[1,:][::-1].min(),color='green',label='ANI-1') #plt.plot(Rc[:, 1], energies[2,:][::-1]-energies[2,:][::-1].min(),color='green',label='ANI-2') #plt.plot(Rc[:, 1], energies[3,:][::-1]-energies[3,:][::-1].min(),color='green',label='ANI-3') #plt.plot(Rc[:, 1], energies[4,:][::-1]-energies[4,:][::-1].min(),color='green',label='ANI-4') #plt.plot(Rc[:, 1], float(len(spc))*modl_std[::-1],color='blue', label='std') #plt.plot([Rc[:, 1].min(),Rc[:, 1].max()],[float(len(spc))*0.05,float(len(spc))*0.05],r'--') #plt.plot(Rc[:, 1], hdt.hatokcal*(Rc[:, 0]-Rc[:, 0].min()),color='Black', label='Act') #plt.legend(bbox_to_anchor=(0.01, 0.99), loc=2, borderaxespad=0., fontsize=14)
X = np.stack(X) #X = X[0].reshape(1, X.shape[1], 3) # keep only the first guy print(' -kept', len(X), 'of', Nu) if len(X) > 0: S = gdb.get_symbols_rdkitmol(m) P = np.random.binomial(1, Pr, Ns) for i, (x, p) in enumerate(zip(X, P)): #print(' -Keep:', p) if p: id = int(str(n) + str(i)) hdn.write_rcdb_input(x, S, id, wdir, fpf, LOT, charge=str(chg), fill=8, comment='smiles: ' + Chem.MolToSmiles(m)) hdn.writexyzfile( wdir + fpf + '-' + str(id).zfill(8) + '.xyz', x.reshape(1, x.shape[0], x.shape[1]), S) #print(str(id).zfill(8)) print('Total mols:', Nd, 'of', Nt, 'percent:', "{:.2f}".format(100.0 * Nd / float(Nt)))
if len(X) > 0: Nd += 1 X = np.stack(X) #X = X[0].reshape(1, X.shape[1], 3) # keep only the first guy print(' -kept', len(X),'of',Nu) if len(X) > 0: S = gdb.get_symbols_rdkitmol(m) P = np.random.binomial(1, Pr, Ns) for i,(x,p) in enumerate(zip(X,P)): print(' -Keep:', p) if p: id = int(str(n)+str(i)) hdn.write_rcdb_input(x,S,id,wdir,fpf,100,LOT,'500.0',fill=8,comment='smiles: '+Chem.MolToSmiles(m)) hdn.writexyzfile(wdir+fpf+'-'+str(id).zfill(8)+'.xyz',x.reshape(1,x.shape[0],x.shape[1]),S) #print(str(id).zfill(8)) ======= chg = 0 for a in m.GetAtoms(): chg += a.GetFormalCharge() print(' -Total Charge:',chg) if chg == 0: # generate Nc conformers cids = AllChem.EmbedMultipleConfs(m, Nc, useRandomCoords=True) # Classical Optimization for cid in cids: _ = AllChem.MMFFOptimizeMolecule(m, confId=cid, maxIters=1000)
import pygau09tools as g09 import hdnntools as hdn import numpy as np wkdir = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/dnnts_rxns/scans_double_bond_migration_1/' filef = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/dnnts_rxns/scans_double_bond_migration_1/IRC_fwd.log' #fileb = '/home/jujuman/Research/GDB-11-wB97X-6-31gd/dnnts_rxns/scans_double_bond_migration/IRC_bck.log' dataf = g09.read_irc(filef) #datab = g09.read_irc(fileb) #xyz = np.concatenate([np.flipud(datab[1]),dataf[1]]) xyz = dataf[1] for i,x in enumerate(xyz): hdn.write_rcdb_input(x,dataf[2],i,wkdir,'double_B_mig',50,'wb97x/6-31g*','1000.0',opt='0')