def get_properties(self, keys): props = {} props_l = {} # the same as props, but without basis in key if (keys is None) or len(keys) == 0 or (keys[0].lower() == 'e'): keys = self.eprops #print('keys=',keys) #print('eprops=',self.eprops) for _key in keys: is_optg_e = F key = _key.lower() _props = {} energetic_props = [] if key in self.eprops: const = self.e_const patt = self.epatts[self.eprops.index(key)] if self.ioptg: if ''.join(key.split('-')) in [ 'b3lyp', 'dfb3lyp', 'mp2', 'dfmp2', 'mp2f12', 'dfmp2f12' ]: is_optg_e = T cmd = "grep -B2 ' END OF GEOMETRY OPTIMIZATION' %s | head -n 1" % self.fo cs = cmdout1(cmd).split() #print('--cs=',cs) v = eval(cs[2]) * const else: raise Exception('geom optimizer not supported!') else: #if not is_optg_e: cmd = "grep -E '%s' %s | tail -n 1" % (patt, self.fo) #print(cmd) cs = cmdout1(cmd).split() #print('cs=',cs) v = eval(cs[-1]) * const key2 = key + self.basis energetic_props.append(key2) #props_l[key] = v _props[key2] = v elif key in [ 'dipole', ]: assert os.path.exists(self.fl) cmd = "grep ' Dipole moment \/Debye' %s | tail -1 | awk '{print $4,$5,$6}'" % self.fl dip = np.asarray(cmdout1(cmd).split(), dtype=float) _props[key] = dip elif key in ['h**o', 'lumo', 'gap']: #'H**O','LUMO','GAP']: assert os.path.exists(self.fl) cmd = "grep '^ [HL][OU]MO' %s | tail -3 | awk '{print $NF}' | sed 's/eV//g'" % self.fl mos = np.asarray(cmdout(cmd), dtype=float) / io2.Units().h2e _props.update(dict(zip(['h**o', 'lumo', 'gap'], mos))) else: raise Exception('#ERROR: method %s not supported' % key) props.update(_props) self.props.update(props) self.energetic_props = energetic_props
from aqml.cheminfo.core import * import os, sys, io2 import scipy.spatial.distance as ssd #import torch from representation.xb import * import cml.fkernels as qk import cml.fdist as qd from functools import reduce #from qml.math import cho_solve home = os.environ['HOME'] np.set_printoptions(precision=3, suppress=True) T, F = True, False UN = io2.Units() h2e = UN.h2e h2kc = UN.h2kc class dmml(object): def __init__(self, xd): #, yd): self.__dict__ = xd.__dict__.copy() def init_YData(self, yd): self.yd = yd self.yobj = yd.yobj def krr(self, x1, y1, x2, kernel='g', icenter=F, c=1.0, l=1e-8): #kf = qk.gaussian_kernel if kernel[0] == 'g' else qk.laplacian_kernel g = T
def __init__(self, objs, rcut=4.8, fitmorse=F, property_names=None, \ idxsr=None, iae=F, no_strain=F, Delta=F, saveblk=F,\ unit='kcal', prog='g09', itarget=F, use_morse_db=F, \ check_boundary=T, xparam={}): """ itarget: use target molecule to calc dmax (to save memory) or not? """ self.saveblk = saveblk self.itarget = itarget if isinstance(objs, (tuple, list)): fs = [] for obj in objs: if isinstance(obj, str): if os.path.exists(obj): if os.path.isdir(obj): fs += cmdout('ls %s/*.xyz' % obj) else: # assume a file fs += [obj] else: print('input object: %s' % obj) raise Exception('#ERROR: not a file/dir??') else: # assume aqml.cheminfo.core.atoms class #if obj.__class__.__name__ == 'atoms': # mols.update([obj]) #else: raise Exception('Not a class or aqml.cheminfo.core.atoms?') mols = cc.molecules(fs, property_names) else: # assume aqml.cheminfo.core.atoms class if objs.__class__.__name__ == 'molecules': mols = objs else: raise Exception('Not a class or aqml.cheminfo.core.molecules?') pns = property_names pn1 = pns[0] # attach strains: an array of T/F imcs = [] strains = [] for i in range(mols.nm): rawm = coo.ConnMol(mols[i]) strains.append(rawm.strained) imcs.append(rawm.is_mcplx) mols.strains = np.array(strains) mols.imcs = np.array(imcs, dtype=np.bool) if iae: for pn1 in pns: is_energetic_prop = T if is_energetic_prop: mols.get_atomization_energies(pn1, prog=prog) #mols.ys = mols.props[pn1] if len(property_names) == 2 and Delta: pn2 = property_names ys1, ys2 = mols.props[pn2], mols.props[pn1] ys = mols.props[pn2] - mols.props[pn1] else: ys = mols.props[pn1] #np.array([ mols.props[p] for p in pns ]).T #print('shape of ys = ', ys.shape) uc = io2.Units() const = dict(zip([ 'h', 'ev', 'kcal'], \ [ uc.h2kc, uc.e2kc, 1.0])) mols.ys = ys * const[unit.lower()] self.ys = ys #rcut = 4.8 #2.7 #4.8 coeffs = [1.0] local = T self.xparam={'local':local, 'kernel':'g', 'rcut':rcut, 'reuses':[F,F,F], \ 'saves':[F,F,F], 'coeffs':coeffs, 'ws':[1.,1.,1.]} for k in xparam: self.xparam[k] = xparam[k] self.mols = mols self.fitmorse = fitmorse self.idxsr = idxsr self.no_strain = no_strain # for morse param fit self.check_boundary = check_boundary self.use_morse_db = use_morse_db
from aqml.cheminfo.molecule.molecule import * from aqml.cheminfo.molecule.nbody import NBody from aqml.cheminfo.rw.xyz import write_xyz from aqml.cheminfo.core import * #import aqml.cheminfo.molecule.amon_f as cma import aqml.cheminfo.oechem.amon as coa import indigo import tempfile as tpf import aqml.cheminfo.rdkit.core as crk import cml.sd as dd try: import representation.x as sl except: pass h2kc = io2.Units().h2kc T, F = True, False np.set_printoptions(formatter={'float': '{: 0.4f}'.format}) _hyb = { Chem.rdchem.HybridizationType.SP3: 3, \ Chem.rdchem.HybridizationType.SP2: 2, \ Chem.rdchem.HybridizationType.SP: 1, \ Chem.rdchem.HybridizationType.UNSPECIFIED: 0} bt2bo = { Chem.BondType.SINGLE: 1.0, Chem.BondType.DOUBLE: 2.0, Chem.BondType.TRIPLE: 3.0, Chem.BondType.AROMATIC: 1.5, Chem.BondType.UNSPECIFIED: 0.0 }
def read_molecule(self): """ read geometry info, basis and hamiltonian """ fo = self.f[:-4] + '.out' self.fo = fo icom = F ilog = F # read geom from log with success?? iout = F iout_0 = F # use input geom at the beginning of out file ioptg = F _cs = open(self.f).readlines() icalc = F itn, igc = F, F self.itn = itn self.igc = igc ratio = 1.0 if self.fmt in ['com', 'inp']: #read molecule from Molpro input file icom = T for i, ci in enumerate(_cs): if ci.strip()[:8] in ['geometry']: break na = int(_cs[i + 1]) cs = _cs[i + 3:i + 3 + na] elif self.fmt in ['out']: self.check_status() itn, igc = self.itn, self.igc if not np.all([itn, igc]): icalc = T ioptg = not os.system("grep ' PROGRAM \* OPT' %s >/dev/null" % fo) self.ioptg = ioptg if ioptg: if self.itn: #igc: iout = T #cmd = "sed -n '/Current geom/,/Geometry wr/p' %s"%fo #cs = cmdout(cmd)[4:-4] # The two lines above may fail sometimes, the code below are more robust cmd = "grep -n ' Current geometry' %s | head -n 1 | cut -d: -f1" % fo ln = int(cmdout1(cmd)) fid = open(fo) for il in range(ln + 1): next(fid) na = int(next(fid)) next(fid) cs = [] for _ in range(na): cs.append(next(fid)) else: # retrieve last config from log file fl = self.f[:-4] + '.log' print(' *** read geom from log file %s' % fl) assert os.path.exists(fl) cmd = "grep -n ' Current geometry' %s | tail -n 1 | cut -d: -f1" % fl try: ln = int(cmdout1(cmd)) fid = open(fl) for il in range(ln + 1): next(fid) na = int(next(fid)) next(fid) cs = [] for _ in range(na): cs.append(next(fid)) ilog = T except: print( ' ** no optg cycle found in log file! use geom from input' ) cmd = "sed -n '/ ATOMIC COORDINATES/,/ Bond lengths in Bohr/p' %s | grep '^\s*[0-9]' | awk '{print $2,$4,$5,$6}'" % fo #iout_0 = T # this simply means that input geom is to be used cs = cmdout(cmd) else: # single point energy/force calc, coordinates in Bohr ratio = io2.Units().b2a cmd0 = "grep 'Molecule type: Atom' %s" % fo if cmdout1(cmd0): ln = int( cmdout1( "grep -n ' ATOMIC COORDINATES' %s | sed 's/:/ /g' | awk '{print $1}'" % fo)) + 4 cs = cmdout("sed -n '%dp' %s | awk '{print $2,$4,$5,$6}'" % (ln, fo)) else: cmd = "sed -n '/ ATOMIC COORDINATES/,/ Bond lengths in Bohr/p' %s | grep '^\s*[0-9]' | awk '{print $2,$4,$5,$6}'" % fo #print('cmd=',cmd) cs = cmdout(cmd) #[4:-2] #print('cs=',cs) na = len(cs) #print('cmd=\n', cmd) else: raise Exception('#ERROR: file format not supported') self.icalc = icalc # need for further calcualtion?? self.itn, self.igc = itn, igc # job type task = None if ioptg: # may be assigned T when fmt='out' task = 'optg' else: for key in ['force', 'freq']: if self.is_jobtype(key): task = key break if not task: task = 'energy' self.task = task self.ioptg = ioptg symbols = [] zs = [] coords = [] #print('cs=',cs) for ci in cs: csi = ci.strip().split() #print('csi=',csi) si = csi[0] try: zi = chemical_symbols.index(si) except: zi = chemical_symbols_lowercase.index(si.lower()) coords_i = np.array(csi[1:4], dtype=float) * ratio symbols.append(si) zs.append(zi) coords.append(coords_i) #print('zs=',zs) m = atoms(zs, coords) zs = np.array(zs, dtype=int) self.zs = zs nheav = (zs > 1).sum() self.symbols = symbols self.nheav = nheav self.coords = np.array(coords) self.na = len(zs) self.m = m self.props.update( dict(zip(['m','na','nheav','zs','symbols','symbs','coords'], \ [m,na,nheav,zs,symbols,symbols,self.coords])) ) # # now method, i.e., hamitonian # first, get contents of input if self.fmt in ['out']: #ie = int(cmdout1("awk '/Commands\s\s*initialized/{print NR}' %s"%self.f)) # not work under macos ie = int( cmdout1( "grep -nE 'Commands\s\s*initialized' %s | cut -d: -f1" % self.f)) cs0 = _cs[:ie] elif self.fmt in ['com', 'inp']: cs0 = _cs else: raise Exception('#ERROR: format not supported') nlmax = len(cs0) _meths = ['df-hf','df-ks','hf','ks', \ 'mp2-f12','df-mp2-f12','pno-lmp2-f12','mp2','df-mp2', \ 'ccsd-f12', 'df-ccsd-f12', 'pno-lccsd-f12', 'ccsd', 'df-ccsd', \ 'ccsd(t)-f12', 'df-ccsd(t)-f12', 'pno-lccsd(t)-f12', 'ccsd(t)', 'df-ccsd(t)'] _eprops = ['hf','ks',]*2 + \ ['mp2f12']*2 + ['lmp2f12'] + ['mp2']*2 + \ ['cc2f12']*2 + ['lcc2f12'] + ['cc2']*2 + \ ['cc2tf12']*2 + ['lcc2tf12'] + ['cc2t']*2 _meths_patts = ['df-hf','df-ks','^-hf','^-ks', \ 'mp2-f12','df-mp2-f12','pno-lmp2-f12','mp2','df-mp2', \ 'ccsd-f12', 'df-ccsd-f12', 'pno-lccsd-f12', 'ccsd', 'df-ccsd', \ 'ccsd\(t\)-f12', 'df-ccsd\(t\)-f12', 'pno-lccsd\(t\)-f12', 'ccsd\(t\)', 'df-ccsd\(t\)'] spp = '\s\s*' p1 = spp.join(['![UR](HF|KS)', 'STATE', '1.1', 'Energy']) p2 = spp.join(['!MP2-F12', 'total', 'energy']) p3 = spp.join(['!MP2', 'total', 'energy']) p4 = spp.join(['!PNO-LMP2-F12\(PNO\)', 'total', 'energy']) # From Molpro manual, # """ Thus, we currently recommend CCSD-F12A for AVDZ and AVTZ basis sets, # and CCSD-F12B for larger basis sets (rarely needed). """ aux = 'a' p5 = spp.join(['!PNO-LCCSD-F12%s' % aux, 'total', 'energy']) p6 = spp.join(['!LCCSD\(T\)-F12%s' % aux, 'total', 'energy']) p7 = spp.join(['CCSD-F12%s' % aux, 'total', 'energy']) p8 = spp.join(['CCSD\(T\)-F12%s' % aux, 'total', 'energy']) p9 = spp.join(['CCSD', 'total', 'energy']) p10 = spp.join(['CCSD\(T\)', 'total', 'energy']) _epatts = [p1]*4 + \ [p2]*2 + [p4] + [p3]*2 + \ [p7]*2 + [p5] + [p9]*2 + \ [p8]*2 + [p6] + [p10]*2 _levels = [0.35, 0.45, 0.5, 0.6, \ 1.65, 1.45, 1.25, 1.15, 1.05, \ 2.65, 2.45, 2.25, 2.15, 2.05, \ 3.65, 3.45, 3.25, 3.15, 3.05 ] meths = [] levels = [] eprops = [] # energy properties epatts = [] # patterns to match different energies icnt = 0 #itl = 0 #print('cs0=',cs0) idft = F idf = F # density-fitting while T: #itl += 1 #if itl == 20: break #print('icnt,nlmax=',icnt,nlmax) if icnt == nlmax: break ci = cs0[icnt].strip().lower() #print('ci=',cs0[icnt]) if ci == '' or ci[0] == '!': icnt += 1 continue else: for imeth, meth in enumerate(_meths): mp = _meths_patts[imeth] if meth not in meths: patts = [ '^%s$' % mp, '^%s[},\s!]' % mp, '[{\s]%s[},\s]' % mp ] tfs = [] for p in patts: tfi = F if re.search(p, ci, flags=re.MULTILINE): tfi = T tfs.append(tfi) if np.any(tfs): #print('++ meth, ci=', meth,ci) if 'ks' in meth: # now get xc function idft = T pt1 = '([^{]*)ks,\s*([a-zA-Z][a-zA-Z0-9]*)[,}\s!]' pt2 = '([^{]*)ks,\s*([a-zA-Z][a-zA-Z0-9]*)$' ot1 = re.search(pt1, ci) ot2 = re.search(pt2, ci, flags=re.MULTILINE) if ot1: ot = ot1 elif ot2: ot = ot2 else: raise Exception( '#ERROR: no match found for %s or %s!' % (pt1, pt2)) ots = ot.groups() meth = ''.join(ots) eprop = meth #ots[-1] else: eprop = _eprops[imeth] if meth == 'df-mp2-f12' and re.search( 'cabs_singles\s*=\s*-1', ci): continue imp2 = T if meth in [ 'ccsd-f12', 'df-ccsd-f12', 'ccsd(t)-f12', 'df-ccsd(t)-f12' ]: meth2 = 'mp2-f12' elif meth in [ 'ccsd', 'df-ccsd', 'ccsd(t)', 'df-ccsd(t)' ]: meth2 = 'mp2' elif meth in ['pno-lccsd-f12', 'pno-lccsd(t)-f12']: meth2 = 'pno-lmp2-f12' else: imp2 = F if imp2: meths.append(meth2) i2 = _meths.index(meth2) eprop2 = _eprops[i2] epatt2 = _epatts[i2] level2 = _levels[i2] eprops.append(eprop2) epatts.append(epatt2) levels.append(level2) meths.append(meth) eprops.append(eprop) epatts.append(_epatts[imeth]) levels.append(_levels[imeth]) icnt += 1 #meth_h = meths[-1] self.meths = meths self.eprops = eprops self.epatts = epatts self.meth = meths[-1] ## #self.h = eprops[-1] # meth_h self.props.update(dict(zip(['meths', 'meth'], [self.meths, self.meth]))) # now basis set idxl = 0 while T: ci = _cs[idxl].strip() if ci[:5] == 'basis': break idxl += 1 ci2 = _cs[idxl + 1].strip() tf1, tf2 = ('{' not in ci), ('{' not in ci) # tf: true or false? if tf1: # and tf2: # i.e., simple basis set input, e.g., basis=vtz basis = ci.split('!')[0].split('=')[-1].strip().lower() basis_c = basis else: # i.e., detailed basis setting, e.g., basis={default=vtz; I=vtz-pp; ...} csb = '' # '{'+ci.split('!')[0].split('{')[1] #'' if 'default' not in ci: idxl += 1 basis = ci2.split('!')[0].strip().split('=')[1].lower() else: c1, c2 = ci.split('!')[0].split('{') basis = c2.split('=')[1].lower() #csb += c2 ### search for '}' while T: cj = _cs[idxl].strip() if '}' in cj: cj2 = cj.split('}')[0] if cj2 != '': csb = csb + ';' + cj2 if csb != '' else cj2 break else: if cj != '': cj2 = cj.split('!')[0] if cj2 != '': csb = csb + ';' + cj2 if csb != '' else cj2 idxl += 1 basis_c = '{' + csb.split('basis={')[1].lower() + '}' #print('basis=',basis) #print('basis_c=',basis_c) basis = re.sub('-', '', basis) self.basis = basis self.basis_c = basis_c self.props.update(dict(zip(['basis', 'basis_c'], [basis, basis_c])))
def get_atoms(self): fo = self.f fl = fo[:-4] + '.log' const = 1.0 cmd = "grep 'OPT (Geometry optimization' %s" % fo cmd2 = "grep -n ' Current geometry' %s | head -n 1 | cut -d: -f1" % fo cmd3 = "grep -n ' Current geometry' %s | tail -n 1 | cut -d: -f1" % fl zs = [] coords = [] s = cmdout1(cmd) s2 = cmdout1(cmd2) ioc = T # use output coords iofmt = 'out' # use geom from *.out file if s: if self.is_job_done: ln = int(s2) + 1 else: if os.path.exists(fl): s3 = cmdout1(cmd3) #print('s=',s, 's2=',s2, 's3=',s3) if s3: ln = int(s3) + 1 iofmt = 'log' self.cs = open(fl).readlines() else: ioc = F else: ioc = F else: ioc = F self.ioc = ioc self.iofmt = iofmt if ioc: print(' ** read fully/partially optimized geom from %s.%s' % (fo[:-4], iofmt)) #print('ln=',ln, 'ci= "%s"'%self.cs[ln:ln+2]) na = int(self.cs[ln]) for li in self.cs[ln + 2:ln + 2 + na]: tsi = li.strip().split() zi = co.chemical_symbols_lowercase.index(tsi[0].lower()) zs.append(zi) coords.append([eval(x) for x in tsi[1:]]) else: # single point energy/force calc, coordinates in Bohr print(' ** read input geom from %s.out' % fo[:-4]) const = io2.Units().b2a ln = int( cmdout1( "grep -n ' ATOMIC COORDINATES' %s | sed 's/:/ /g' | awk '{print $1}'" % fo)) + 3 while True: ci = self.cs[ln].strip() if ci == '': break tsi = ci.strip().split() #print('ci=',ci) zi = co.chemical_symbols_lowercase.index(tsi[1].lower()) zs.append(zi) coords.append( [eval(x) for x in tsi[3:6]] ) #cmdout("sed -n '%dp' %s | awk '{print $2,$4,$5,$6}'"%(ln,fo)) ln += 1 #print('cs=',cs) return co.atoms(zs, coords)
#!/usr/bin/env python import re, io2, os, sys import numpy as np import aqml.cheminfo as co from aqml.cheminfo.core import * import aqml.cheminfo.rw.xyz as crx import shutil T, F = True, False spp = '\s\s*' uc = io2.Units() cardinal = {'vdz': 2, 'vtz': 3, 'vqz': 4} cmdout1 = lambda cmd: os.popen(cmd).read().strip() cmdout = lambda cmd: os.popen(cmd).read().strip().split('\n') iu = io2.Units() class Molpro(object): jobs = ['optg', 'force', 'freq'] jobs_a = ['optg', 'force', 'forces', 'freq', 'frequency'] def __init__(self, f, keys=[], iprop=T, units=['kcal', 'a']): self.f = f self.units = units # kcal/mol and Angstrom # note that later `f may change (when more properties from
#!/usr/bin/env python import io2, re, os, sys import numpy as np from io2.gaussian_reader import GaussianReader as GR0 import aqml.cheminfo.molecule.molecule as cmm from aqml.cheminfo.core import * import aqml.cheminfo.rdkit.core as crk from aqml.cheminfo.rw.ctab import * import scipy.spatial.distance as ssd h2kc = io2.Units().h2kc T, F = True, False np.set_printoptions(formatter={'float': '{: 0.4f}'.format}) class _atoms(object): """ `atoms object from file formats other than xyz""" def __init__(self, f): import ase.io as aio m = aio.read(f) self.zs = m.numbers self.coords = m.positions self.na = len(self.zs) uc = io2.Units() # unit converter def get_val(dic, key): assert key in list(dic.keys()), '#ERROR: prop not found!'
#!/usr/bin/env python """ This module defines an ASE interface to MOPAC. """ import os, sys, re import numpy as np import ase.io as aio import ase import io2 iu = io2.Units() class mopac(object): def __init__(self, obj, label=None, method='PM7', task='OPT', \ ias_fix=[], ias_relax=[], iwrite=False): """ atomic indices in either `ias_fix or `ias_relax starts from 1 """ self.method = method self.task = task self.iwrite = iwrite self.obj = obj typ = type(obj) if typ is str: suffix = obj[-3:] if obj[-3:] in ['com', 'gjf']: