def _banner(self, calc, mp2=False, mp2_dm=False): # type: (calcid, Optional[bool], Optional[bool]) core.print_out('\n') p4util.banner(' Scheduling {fmt_calc} ({c}) '.format( fmt_calc=self._display_name(calc), c='MP2 density matrix' if mp2_dm else ('MP2' if mp2 else 'HF'))) core.print_out('\n')
def run_espx(): core.tstart() p4util.banner('ESPX') dimer_basis = ddhigh.basisset() aux_basis = core.BasisSet.build(molecule, "DF_BASIS_SCF", HIGH + '-JKFIT', "JKFIT", HIGH) decomp = ESHLOVLPDecomposition(m1_basis=m1mhigh.basisset(), m2_basis=m2mhigh.basisset(), dimer_basis=dimer_basis, dimer_aux_basis=aux_basis) esovlpfunc = decomp.esovlp() eshf, ovlhf = esovlpfunc(m1mhigh.reference_wavefunction(), m2mhigh.reference_wavefunction()) esmp, ovlmp = esovlpfunc(m1mhigh, m2mhigh) del esovlpfunc hlfunc = decomp.hl() hl = hlfunc(m1mhigh.reference_wavefunction(), m2mhigh.reference_wavefunction()) core.tstop() return { 'eshf': eshf, 'ovlhf': ovlhf, 'esmp': esmp, 'ovlmp': ovlmp, 'hl': hl }, dimer_basis
def run_cfour(name, **kwargs): """Function that prepares environment and input files for a calculation calling Stanton and Gauss's CFOUR code. Also processes results back into Psi4 format. This function is not called directly but is instead called by :py:func:`~driver.energy` or :py:func:`~driver.optimize` when a Cfour method is requested (through *name* argument). In order to function correctly, the Cfour executable ``xcfour`` must be present in :envvar:`PATH` or :envvar:`PSIPATH`. .. hlist:: :columns: 1 * Many :ref:`PSI Variables <apdx:cfour_psivar>` extracted from the Cfour output * Python dictionary of associated file constants accessible as ``P4C4_INFO['zmat']``, ``P4C4_INFO['output']``, ``P4C4_INFO['grd']``, *etc.* :type name: string :param name: ``'c4-scf'`` || ``'c4-ccsd(t)'`` || ``'cfour'`` || etc. First argument, usually unlabeled. Indicates the computational method to be applied to the system. :type keep: :ref:`boolean <op_py_boolean>` :param keep: ``'on'`` || |dl| ``'off'`` |dr| Indicates whether to delete the Cfour scratch directory upon completion of the Cfour job. :type path: string :param path: Indicates path to Cfour scratch directory (with respect to Psi4 scratch directory). Otherwise, the default is a subdirectory within the Psi4 scratch directory. If specified, GENBAS and/or ZMAT within will be used. :type genbas: string :param genbas: Indicates that contents should be used for GENBAS file. GENBAS is a complicated topic. It is quite unnecessary if the molecule is from a molecule {...} block and basis is set through |Psifours| BASIS keyword. In that case, a GENBAS is written from LibMints and all is well. Otherwise, a GENBAS is looked for in the usual places: PSIPATH, PATH, PSIDATADIR/basis. If path kwarg is specified, also looks there preferentially for a GENBAS. Can also specify GENBAS within an input file through a string and setting the genbas kwarg. Note that due to the input parser's aggression, blank lines need to be replaced by the text blankline. """ lowername = name.lower() internal_p4c4_info = {} return_wfn = kwargs.pop('return_wfn', False) # Make sure the molecule the user provided is the active one molecule = kwargs.pop('molecule', core.get_active_molecule()) molecule.update_geometry() optstash = p4util.OptionsState(['CFOUR', 'TRANSLATE_PSI4']) # Determine calling function and hence dertype calledby = inspect.stack()[1][3] dertype = ['energy', 'gradient', 'hessian'].index(calledby) #print('I am %s called by %s called by %s.\n' % # (inspect.stack()[0][3], inspect.stack()[1][3], inspect.stack()[2][3])) # Save submission directory current_directory = os.getcwd() # Move into job scratch directory psioh = core.IOManager.shared_object() psio = core.IO.shared_object() os.chdir(psioh.get_default_path()) # Construct and move into cfour subdirectory of job scratch directory cfour_tmpdir = kwargs['path'] if 'path' in kwargs else \ 'psi.' + str(os.getpid()) + '.' + psio.get_default_namespace() + \ '.cfour.' + str(uuid.uuid4())[:8] if not os.path.exists(cfour_tmpdir): os.mkdir(cfour_tmpdir) os.chdir(cfour_tmpdir) # Find environment by merging PSIPATH and PATH environment variables lenv = { 'PATH': ':'.join([os.path.abspath(x) for x in os.environ.get('PSIPATH', '').split(':') if x != '']) + \ ':' + os.environ.get('PATH') + \ ':' + core.get_datadir() + '/basis', 'GENBAS_PATH': core.get_datadir() + '/basis', 'CFOUR_NUM_CORES': os.environ.get('CFOUR_NUM_CORES'), 'MKL_NUM_THREADS': os.environ.get('MKL_NUM_THREADS'), 'OMP_NUM_THREADS': os.environ.get('OMP_NUM_THREADS'), 'LD_LIBRARY_PATH': os.environ.get('LD_LIBRARY_PATH') } if 'path' in kwargs: lenv['PATH'] = kwargs['path'] + ':' + lenv['PATH'] # Filter out None values as subprocess will fault on them lenv = {k: v for k, v in lenv.items() if v is not None} # Load the GENBAS file genbas_path = qcdb.search_file('GENBAS', lenv['GENBAS_PATH']) if genbas_path: try: shutil.copy2(genbas_path, psioh.get_default_path() + cfour_tmpdir) except shutil.Error: # should only fail if src and dest equivalent pass core.print_out("\n GENBAS loaded from %s\n" % (genbas_path)) core.print_out(" CFOUR to be run from %s\n" % (psioh.get_default_path() + cfour_tmpdir)) else: message = """ GENBAS file for CFOUR interface not found. Either: [1] Supply a GENBAS by placing it in PATH or PSIPATH [1a] Use cfour {} block with molecule and basis directives. [1b] Use molecule {} block and CFOUR_BASIS keyword. [2] Allow Psi4's internal basis sets to convert to GENBAS [2a] Use molecule {} block and BASIS keyword. """ core.print_out(message) core.print_out(' Search path that was tried:\n') core.print_out(lenv['PATH'].replace(':', ', ')) # Generate the ZMAT input file in scratch if 'path' in kwargs and os.path.isfile('ZMAT'): core.print_out(" ZMAT loaded from %s\n" % (psioh.get_default_path() + kwargs['path'] + '/ZMAT')) else: with open('ZMAT', 'w') as cfour_infile: cfour_infile.write(write_zmat(lowername, dertype, molecule)) internal_p4c4_info['zmat'] = open('ZMAT', 'r').read() #core.print_out('\n====== Begin ZMAT input for CFOUR ======\n') #core.print_out(open('ZMAT', 'r').read()) #core.print_out('======= End ZMAT input for CFOUR =======\n\n') #print('\n====== Begin ZMAT input for CFOUR ======') #print(open('ZMAT', 'r').read()) #print('======= End ZMAT input for CFOUR =======\n') if 'genbas' in kwargs: with open('GENBAS', 'w') as cfour_basfile: cfour_basfile.write(kwargs['genbas'].replace( '\nblankline\n', '\n\n')) core.print_out(' GENBAS loaded from kwargs string\n') # Close psi4 output file and reopen with filehandle print('output in', current_directory + '/' + core.outfile_name()) pathfill = '' if os.path.isabs( core.outfile_name()) else current_directory + os.path.sep # Handle threading # OMP_NUM_THREADS from env is in lenv from above # threads from psi4 -n (core.get_num_threads()) is ignored # CFOUR_OMP_NUM_THREADS psi4 option takes precedence, handled below if core.has_option_changed('CFOUR', 'CFOUR_OMP_NUM_THREADS') == True: lenv['OMP_NUM_THREADS'] = str( core.get_option('CFOUR', 'CFOUR_OMP_NUM_THREADS')) #print("""\n\n<<<<< RUNNING CFOUR ... >>>>>\n\n""") # Call executable xcfour, directing cfour output to the psi4 output file cfour_executable = kwargs['c4exec'] if 'c4exec' in kwargs else 'xcfour' try: retcode = subprocess.Popen([cfour_executable], bufsize=0, stdout=subprocess.PIPE, env=lenv) except OSError as e: sys.stderr.write( 'Program %s not found in path or execution failed: %s\n' % (cfour_executable, e.strerror)) message = ('Program %s not found in path or execution failed: %s\n' % (cfour_executable, e.strerror)) raise ValidationError(message) c4out = '' while True: data = retcode.stdout.readline() data = data.decode('utf-8') if not data: break core.print_out(data) c4out += data internal_p4c4_info['output'] = c4out c4files = {} core.print_out('\n') for item in ['GRD', 'FCMFINAL', 'DIPOL']: try: with open(psioh.get_default_path() + cfour_tmpdir + '/' + item, 'r') as handle: c4files[item] = handle.read() core.print_out(' CFOUR scratch file %s has been read\n' % (item)) core.print_out('%s\n' % c4files[item]) internal_p4c4_info[item.lower()] = c4files[item] except IOError: pass core.print_out('\n') if molecule.name() == 'blank_molecule_psi4_yo': qcdbmolecule = None else: molecule.update_geometry() qcdbmolecule = qcdb.Molecule( molecule.create_psi4_string_from_molecule()) qcdbmolecule.update_geometry() # c4mol, if it exists, is dinky, just a clue to geometry of cfour results psivar, c4grad, c4mol = qcdb.cfour.harvest(qcdbmolecule, c4out, **c4files) # Absorb results into psi4 data structures for key in psivar.keys(): core.set_variable(key.upper(), float(psivar[key])) if qcdbmolecule is None and c4mol is not None: molecule = geometry(c4mol.create_psi4_string_from_molecule(), name='blank_molecule_psi4_yo') molecule.update_geometry() # This case arises when no Molecule going into calc (cfour {} block) but want # to know the orientation at which grad, properties, etc. are returned (c4mol). # c4mol is dinky, w/o chg, mult, dummies and retains name # blank_molecule_psi4_yo so as to not interfere with future cfour {} blocks if c4grad is not None: mat = core.Matrix.from_list(c4grad) core.set_gradient(mat) #print ' <<< [3] C4-GRD-GRAD >>>' #mat.print() # exit(1) # # Things needed core.so module to do # collect c4out string # read GRD # read FCMFINAL # see if theres an active molecule # # Things delegatable to qcdb # parsing c4out # reading GRD and FCMFINAL strings # reconciling p4 and c4 molecules (orient) # reconciling c4out and GRD and FCMFINAL results # transforming frame of results back to p4 # # Things run_cfour needs to have back # psivar # qcdb.Molecule of c4? # coordinates? # gradient in p4 frame # # Process the cfour output # psivar, c4coord, c4grad = qcdb.cfour.cfour_harvest(c4out) # for key in psivar.keys(): # core.set_variable(key.upper(), float(psivar[key])) # # # Awful Hack - Go Away TODO # if c4grad: # molecule = core.get_active_molecule() # molecule.update_geometry() # # if molecule.name() == 'blank_molecule_psi4_yo': # p4grad = c4grad # p4coord = c4coord # else: # qcdbmolecule = qcdb.Molecule(molecule.create_psi4_string_from_molecule()) # #p4grad = qcdbmolecule.deorient_array_from_cfour(c4coord, c4grad) # #p4coord = qcdbmolecule.deorient_array_from_cfour(c4coord, c4coord) # # with open(psioh.get_default_path() + cfour_tmpdir + '/GRD', 'r') as cfour_grdfile: # c4outgrd = cfour_grdfile.read() # print('GRD\n',c4outgrd) # c4coordGRD, c4gradGRD = qcdb.cfour.cfour_harvest_files(qcdbmolecule, grd=c4outgrd) # # p4mat = core.Matrix.from_list(p4grad) # core.set_gradient(p4mat) # print(' <<< P4 PSIVAR >>>') # for item in psivar: # print(' %30s %16.8f' % (item, psivar[item])) #print(' <<< P4 COORD >>>') #for item in p4coord: # print(' %16.8f %16.8f %16.8f' % (item[0], item[1], item[2])) # print(' <<< P4 GRAD >>>') # for item in c4grad: # print(' %16.8f %16.8f %16.8f' % (item[0], item[1], item[2])) # Clean up cfour scratch directory unless user instructs otherwise keep = yes.match(str(kwargs['keep'])) if 'keep' in kwargs else False os.chdir('..') try: if keep or ('path' in kwargs): core.print_out('\n CFOUR scratch files have been kept in %s\n' % (psioh.get_default_path() + cfour_tmpdir)) else: shutil.rmtree(cfour_tmpdir) except OSError as e: print('Unable to remove CFOUR temporary directory %s' % e, file=sys.stderr) exit(1) # Return to submission directory and reopen output file os.chdir(current_directory) core.print_out('\n') p4util.banner(' Cfour %s %s Results ' % (name.lower(), calledby.capitalize())) core.print_variables() if c4grad is not None: core.get_gradient().print_out() core.print_out('\n') p4util.banner(' Cfour %s %s Results ' % (name.lower(), calledby.capitalize())) core.print_variables() if c4grad is not None: core.get_gradient().print_out() # Quit if Cfour threw error if core.variable('CFOUR ERROR CODE'): raise ValidationError("""Cfour exited abnormally.""") P4C4_INFO.clear() P4C4_INFO.update(internal_p4c4_info) optstash.restore() # new skeleton wavefunction w/mol, highest-SCF basis (just to choose one), & not energy # Feb 2017 hack. Could get proper basis in skel wfn even if not through p4 basis kw gobas = core.get_global_option('BASIS') if core.get_global_option( 'BASIS') else 'sto-3g' basis = core.BasisSet.build(molecule, "ORBITAL", gobas) if basis.has_ECP(): raise ValidationError("""ECPs not hooked up for Cfour""") wfn = core.Wavefunction(molecule, basis) optstash.restore() if dertype == 0: finalquantity = psivar['CURRENT ENERGY'] elif dertype == 1: finalquantity = core.get_gradient() wfn.set_gradient(finalquantity) if finalquantity.rows(0) < 20: core.print_out('CURRENT GRADIENT') finalquantity.print_out() elif dertype == 2: pass #finalquantity = finalhessian #wfn.set_hessian(finalquantity) #if finalquantity.rows(0) < 20: # core.print_out('CURRENT HESSIAN') # finalquantity.print_out() return wfn
def run_cfour(name, **kwargs): """Function that prepares environment and input files for a calculation calling Stanton and Gauss's CFOUR code. Also processes results back into Psi4 format. This function is not called directly but is instead called by :py:func:`~driver.energy` or :py:func:`~driver.optimize` when a Cfour method is requested (through *name* argument). In order to function correctly, the Cfour executable ``xcfour`` must be present in :envvar:`PATH` or :envvar:`PSIPATH`. .. hlist:: :columns: 1 * Many :ref:`PSI Variables <apdx:cfour_psivar>` extracted from the Cfour output * Python dictionary of associated file constants accessible as ``P4C4_INFO['zmat']``, ``P4C4_INFO['output']``, ``P4C4_INFO['grd']``, *etc.* :type name: string :param name: ``'c4-scf'`` || ``'c4-ccsd(t)'`` || ``'cfour'`` || etc. First argument, usually unlabeled. Indicates the computational method to be applied to the system. :type keep: :ref:`boolean <op_py_boolean>` :param keep: ``'on'`` || |dl| ``'off'`` |dr| Indicates whether to delete the Cfour scratch directory upon completion of the Cfour job. :type path: string :param path: Indicates path to Cfour scratch directory (with respect to Psi4 scratch directory). Otherwise, the default is a subdirectory within the Psi4 scratch directory. If specified, GENBAS and/or ZMAT within will be used. :type genbas: string :param genbas: Indicates that contents should be used for GENBAS file. GENBAS is a complicated topic. It is quite unnecessary if the molecule is from a molecule {...} block and basis is set through |Psifours| BASIS keyword. In that case, a GENBAS is written from LibMints and all is well. Otherwise, a GENBAS is looked for in the usual places: PSIPATH, PATH, PSIDATADIR/basis. If path kwarg is specified, also looks there preferentially for a GENBAS. Can also specify GENBAS within an input file through a string and setting the genbas kwarg. Note that due to the input parser's aggression, blank lines need to be replaced by the text blankline. """ lowername = name.lower() internal_p4c4_info = {} return_wfn = kwargs.pop('return_wfn', False) # Make sure the molecule the user provided is the active one molecule = kwargs.pop('molecule', core.get_active_molecule()) molecule.update_geometry() optstash = p4util.OptionsState( ['CFOUR', 'TRANSLATE_PSI4']) # Determine calling function and hence dertype calledby = inspect.stack()[1][3] dertype = ['energy', 'gradient', 'hessian'].index(calledby) #print('I am %s called by %s called by %s.\n' % # (inspect.stack()[0][3], inspect.stack()[1][3], inspect.stack()[2][3])) # Save submission directory current_directory = os.getcwd() # Move into job scratch directory psioh = core.IOManager.shared_object() psio = core.IO.shared_object() os.chdir(psioh.get_default_path()) # Construct and move into cfour subdirectory of job scratch directory cfour_tmpdir = kwargs['path'] if 'path' in kwargs else \ 'psi.' + str(os.getpid()) + '.' + psio.get_default_namespace() + \ '.cfour.' + str(uuid.uuid4())[:8] if not os.path.exists(cfour_tmpdir): os.mkdir(cfour_tmpdir) os.chdir(cfour_tmpdir) # Find environment by merging PSIPATH and PATH environment variables lenv = { 'PATH': ':'.join([os.path.abspath(x) for x in os.environ.get('PSIPATH', '').split(':') if x != '']) + \ ':' + os.environ.get('PATH') + \ ':' + core.get_datadir() + '/basis', 'GENBAS_PATH': core.get_datadir() + '/basis', 'CFOUR_NUM_CORES': os.environ.get('CFOUR_NUM_CORES'), 'MKL_NUM_THREADS': os.environ.get('MKL_NUM_THREADS'), 'OMP_NUM_THREADS': os.environ.get('OMP_NUM_THREADS'), 'LD_LIBRARY_PATH': os.environ.get('LD_LIBRARY_PATH') } if 'path' in kwargs: lenv['PATH'] = kwargs['path'] + ':' + lenv['PATH'] # Filter out None values as subprocess will fault on them lenv = {k: v for k, v in lenv.items() if v is not None} # Load the GENBAS file genbas_path = qcdb.search_file('GENBAS', lenv['GENBAS_PATH']) if genbas_path: try: shutil.copy2(genbas_path, psioh.get_default_path() + cfour_tmpdir) except shutil.Error: # should only fail if src and dest equivalent pass core.print_out("\n GENBAS loaded from %s\n" % (genbas_path)) core.print_out(" CFOUR to be run from %s\n" % (psioh.get_default_path() + cfour_tmpdir)) else: message = """ GENBAS file for CFOUR interface not found. Either: [1] Supply a GENBAS by placing it in PATH or PSIPATH [1a] Use cfour {} block with molecule and basis directives. [1b] Use molecule {} block and CFOUR_BASIS keyword. [2] Allow Psi4's internal basis sets to convert to GENBAS [2a] Use molecule {} block and BASIS keyword. """ core.print_out(message) core.print_out(' Search path that was tried:\n') core.print_out(lenv['PATH'].replace(':', ', ')) # Generate the ZMAT input file in scratch if 'path' in kwargs and os.path.isfile('ZMAT'): core.print_out(" ZMAT loaded from %s\n" % (psioh.get_default_path() + kwargs['path'] + '/ZMAT')) else: with open('ZMAT', 'w') as cfour_infile: cfour_infile.write(write_zmat(lowername, dertype, molecule)) internal_p4c4_info['zmat'] = open('ZMAT', 'r').read() #core.print_out('\n====== Begin ZMAT input for CFOUR ======\n') #core.print_out(open('ZMAT', 'r').read()) #core.print_out('======= End ZMAT input for CFOUR =======\n\n') #print('\n====== Begin ZMAT input for CFOUR ======') #print(open('ZMAT', 'r').read()) #print('======= End ZMAT input for CFOUR =======\n') if 'genbas' in kwargs: with open('GENBAS', 'w') as cfour_basfile: cfour_basfile.write(kwargs['genbas'].replace('\nblankline\n', '\n\n')) core.print_out(' GENBAS loaded from kwargs string\n') # Close psi4 output file and reopen with filehandle print('output in', current_directory + '/' + core.outfile_name()) pathfill = '' if os.path.isabs(core.outfile_name()) else current_directory + os.path.sep # Handle threading # OMP_NUM_THREADS from env is in lenv from above # threads from psi4 -n (core.get_num_threads()) is ignored # CFOUR_OMP_NUM_THREADS psi4 option takes precedence, handled below if core.has_option_changed('CFOUR', 'CFOUR_OMP_NUM_THREADS') == True: lenv['OMP_NUM_THREADS'] = str(core.get_option('CFOUR', 'CFOUR_OMP_NUM_THREADS')) #print("""\n\n<<<<< RUNNING CFOUR ... >>>>>\n\n""") # Call executable xcfour, directing cfour output to the psi4 output file cfour_executable = kwargs['c4exec'] if 'c4exec' in kwargs else 'xcfour' try: retcode = subprocess.Popen([cfour_executable], bufsize=0, stdout=subprocess.PIPE, env=lenv) except OSError as e: sys.stderr.write('Program %s not found in path or execution failed: %s\n' % (cfour_executable, e.strerror)) message = ('Program %s not found in path or execution failed: %s\n' % (cfour_executable, e.strerror)) raise ValidationError(message) c4out = '' while True: data = retcode.stdout.readline() data = data.decode('utf-8') if not data: break core.print_out(data) c4out += data internal_p4c4_info['output'] = c4out c4files = {} core.print_out('\n') for item in ['GRD', 'FCMFINAL', 'DIPOL']: try: with open(psioh.get_default_path() + cfour_tmpdir + '/' + item, 'r') as handle: c4files[item] = handle.read() core.print_out(' CFOUR scratch file %s has been read\n' % (item)) core.print_out('%s\n' % c4files[item]) internal_p4c4_info[item.lower()] = c4files[item] except IOError: pass core.print_out('\n') if molecule.name() == 'blank_molecule_psi4_yo': qcdbmolecule = None else: molecule.update_geometry() qcdbmolecule = qcdb.Molecule(molecule.create_psi4_string_from_molecule()) qcdbmolecule.update_geometry() # c4mol, if it exists, is dinky, just a clue to geometry of cfour results psivar, c4grad, c4mol = qcdb.cfour.harvest(qcdbmolecule, c4out, **c4files) # Absorb results into psi4 data structures for key in psivar.keys(): core.set_variable(key.upper(), float(psivar[key])) if qcdbmolecule is None and c4mol is not None: molecule = geometry(c4mol.create_psi4_string_from_molecule(), name='blank_molecule_psi4_yo') molecule.update_geometry() # This case arises when no Molecule going into calc (cfour {} block) but want # to know the orientation at which grad, properties, etc. are returned (c4mol). # c4mol is dinky, w/o chg, mult, dummies and retains name # blank_molecule_psi4_yo so as to not interfere with future cfour {} blocks if c4grad is not None: mat = core.Matrix.from_list(c4grad) core.set_gradient(mat) #print ' <<< [3] C4-GRD-GRAD >>>' #mat.print() # exit(1) # # Things needed core.so module to do # collect c4out string # read GRD # read FCMFINAL # see if theres an active molecule # # Things delegatable to qcdb # parsing c4out # reading GRD and FCMFINAL strings # reconciling p4 and c4 molecules (orient) # reconciling c4out and GRD and FCMFINAL results # transforming frame of results back to p4 # # Things run_cfour needs to have back # psivar # qcdb.Molecule of c4? # coordinates? # gradient in p4 frame # # Process the cfour output # psivar, c4coord, c4grad = qcdb.cfour.cfour_harvest(c4out) # for key in psivar.keys(): # core.set_variable(key.upper(), float(psivar[key])) # # # Awful Hack - Go Away TODO # if c4grad: # molecule = core.get_active_molecule() # molecule.update_geometry() # # if molecule.name() == 'blank_molecule_psi4_yo': # p4grad = c4grad # p4coord = c4coord # else: # qcdbmolecule = qcdb.Molecule(molecule.create_psi4_string_from_molecule()) # #p4grad = qcdbmolecule.deorient_array_from_cfour(c4coord, c4grad) # #p4coord = qcdbmolecule.deorient_array_from_cfour(c4coord, c4coord) # # with open(psioh.get_default_path() + cfour_tmpdir + '/GRD', 'r') as cfour_grdfile: # c4outgrd = cfour_grdfile.read() # print('GRD\n',c4outgrd) # c4coordGRD, c4gradGRD = qcdb.cfour.cfour_harvest_files(qcdbmolecule, grd=c4outgrd) # # p4mat = core.Matrix.from_list(p4grad) # core.set_gradient(p4mat) # print(' <<< P4 PSIVAR >>>') # for item in psivar: # print(' %30s %16.8f' % (item, psivar[item])) #print(' <<< P4 COORD >>>') #for item in p4coord: # print(' %16.8f %16.8f %16.8f' % (item[0], item[1], item[2])) # print(' <<< P4 GRAD >>>') # for item in c4grad: # print(' %16.8f %16.8f %16.8f' % (item[0], item[1], item[2])) # Clean up cfour scratch directory unless user instructs otherwise keep = yes.match(str(kwargs['keep'])) if 'keep' in kwargs else False os.chdir('..') try: if keep or ('path' in kwargs): core.print_out('\n CFOUR scratch files have been kept in %s\n' % (psioh.get_default_path() + cfour_tmpdir)) else: shutil.rmtree(cfour_tmpdir) except OSError as e: print('Unable to remove CFOUR temporary directory %s' % e, file=sys.stderr) exit(1) # Return to submission directory and reopen output file os.chdir(current_directory) core.print_out('\n') p4util.banner(' Cfour %s %s Results ' % (name.lower(), calledby.capitalize())) core.print_variables() if c4grad is not None: core.get_gradient().print_out() core.print_out('\n') p4util.banner(' Cfour %s %s Results ' % (name.lower(), calledby.capitalize())) core.print_variables() if c4grad is not None: core.get_gradient().print_out() # Quit if Cfour threw error if 'CFOUR ERROR CODE' in core.variables(): raise ValidationError("""Cfour exited abnormally.""") P4C4_INFO.clear() P4C4_INFO.update(internal_p4c4_info) optstash.restore() # new skeleton wavefunction w/mol, highest-SCF basis (just to choose one), & not energy # Feb 2017 hack. Could get proper basis in skel wfn even if not through p4 basis kw gobas = core.get_global_option('BASIS') if core.get_global_option('BASIS') else 'sto-3g' basis = core.BasisSet.build(molecule, "ORBITAL", gobas) if basis.has_ECP(): raise ValidationError("""ECPs not hooked up for Cfour""") wfn = core.Wavefunction(molecule, basis) optstash.restore() if dertype == 0: finalquantity = psivar['CURRENT ENERGY'] elif dertype == 1: finalquantity = core.get_gradient() wfn.set_gradient(finalquantity) if finalquantity.rows(0) < 20: core.print_out('CURRENT GRADIENT') finalquantity.print_out() elif dertype == 2: pass #finalquantity = finalhessian #wfn.set_hessian(finalquantity) #if finalquantity.rows(0) < 20: # core.print_out('CURRENT HESSIAN') # finalquantity.print_out() return wfn
def database(name, db_name, **kwargs): r"""Function to access the molecule objects and reference energies of popular chemical databases. :aliases: db() :returns: (*float*) Mean absolute deviation of the database in kcal/mol :PSI variables: .. hlist:: :columns: 1 * :psivar:`db_name DATABASE MEAN SIGNED DEVIATION <db_nameDATABASEMEANSIGNEDDEVIATION>` * :psivar:`db_name DATABASE MEAN ABSOLUTE DEVIATION <db_nameDATABASEMEANABSOLUTEDEVIATION>` * :psivar:`db_name DATABASE ROOT-MEAN-SQUARE DEVIATION <db_nameDATABASEROOT-MEAN-SQUARESIGNEDDEVIATION>` * Python dictionaries of results accessible as ``DB_RGT`` and ``DB_RXN``. .. note:: It is very easy to make a database from a collection of xyz files using the script :source:`share/scripts/ixyz2database.py`. See :ref:`sec:createDatabase` for details. .. caution:: Some features are not yet implemented. Buy a developer some coffee. - In sow/reap mode, use only global options (e.g., the local option set by ``set scf scf_type df`` will not be respected). .. note:: To access a database that is not embedded in a |PSIfour| distribution, add the path to the directory containing the database to the environment variable :envvar:`PYTHONPATH`. :type name: string :param name: ``'scf'`` || ``'sapt0'`` || ``'ccsd(t)'`` || etc. First argument, usually unlabeled. Indicates the computational method to be applied to the database. May be any valid argument to :py:func:`~driver.energy`. :type db_name: string :param db_name: ``'BASIC'`` || ``'S22'`` || ``'HTBH'`` || etc. Second argument, usually unlabeled. Indicates the requested database name, matching (case insensitive) the name of a python file in ``psi4/share/databases`` or :envvar:`PYTHONPATH`. Consult that directory for available databases and literature citations. :type func: :ref:`function <op_py_function>` :param func: |dl| ``energy`` |dr| || ``optimize`` || ``cbs`` Indicates the type of calculation to be performed on each database member. The default performs a single-point ``energy('name')``, while ``optimize`` perfoms a geometry optimization on each reagent, and ``cbs`` performs a compound single-point energy. If a nested series of python functions is intended (see :ref:`sec:intercalls`), use keyword ``db_func`` instead of ``func``. :type mode: string :param mode: |dl| ``'continuous'`` |dr| || ``'sow'`` || ``'reap'`` Indicates whether the calculations required to complete the database are to be run in one file (``'continuous'``) or are to be farmed out in an embarrassingly parallel fashion (``'sow'``/``'reap'``). For the latter, run an initial job with ``'sow'`` and follow instructions in its output file. :type cp: :ref:`boolean <op_py_boolean>` :param cp: ``'on'`` || |dl| ``'off'`` |dr| Indicates whether counterpoise correction is employed in computing interaction energies. Use this option and NOT the :py:func:`~wrappers.cp` function for BSSE correction in database(). Option available (See :ref:`sec:availableDatabases`) only for databases of bimolecular complexes. :type rlxd: :ref:`boolean <op_py_boolean>` :param rlxd: ``'on'`` || |dl| ``'off'`` |dr| Indicates whether correction for deformation energy is employed in computing interaction energies. Option available (See :ref:`sec:availableDatabases`) only for databases of bimolecular complexes with non-frozen monomers, e.g., HBC6. :type symm: :ref:`boolean <op_py_boolean>` :param symm: |dl| ``'on'`` |dr| || ``'off'`` Indicates whether the native symmetry of the database reagents is employed (``'on'``) or whether it is forced to :math:`C_1` symmetry (``'off'``). Some computational methods (e.g., SAPT) require no symmetry, and this will be set by database(). :type zpe: :ref:`boolean <op_py_boolean>` :param zpe: ``'on'`` || |dl| ``'off'`` |dr| Indicates whether zero-point-energy corrections are appended to single-point energy values. Option valid only for certain thermochemical databases. Disabled until Hessians ready. :type benchmark: string :param benchmark: |dl| ``'default'`` |dr| || ``'S22A'`` || etc. Indicates whether a non-default set of reference energies, if available (See :ref:`sec:availableDatabases`), are employed for the calculation of error statistics. :type tabulate: array of strings :param tabulate: |dl| ``[]`` |dr| || ``['scf total energy', 'natom']`` || etc. Indicates whether to form tables of variables other than the primary requested energy. Available for any PSI variable. :type subset: string or array of strings :param subset: Indicates a subset of the full database to run. This is a very flexible option and can be used in three distinct ways, outlined below. Note that two take a string and the last takes an array. See `Available Databases`_ for available values. * ``'small'`` || ``'large'`` || ``'equilibrium'`` Calls predefined subsets of the requested database, either ``'small'``, a few of the smallest database members, ``'large'``, the largest of the database members, or ``'equilibrium'``, the equilibrium geometries for a database composed of dissociation curves. * ``'BzBz_S'`` || ``'FaOOFaON'`` || ``'ArNe'`` || ``'HB'`` || etc. For databases composed of dissociation curves, or otherwise divided into subsets, individual curves and subsets can be called by name. Consult the database python files for available molecular systems (case insensitive). * ``[1,2,5]`` || ``['1','2','5']`` || ``['BzMe-3.5', 'MeMe-5.0']`` || etc. Specify a list of database members to run. Consult the database python files for available molecular systems. This is the only portion of database input that is case sensitive; choices for this keyword must match the database python file. :examples: >>> # [1] Two-stage SCF calculation on short, equilibrium, and long helium dimer >>> db('scf','RGC10',cast_up='sto-3g',subset=['HeHe-0.85','HeHe-1.0','HeHe-1.5'], tabulate=['scf total energy','natom']) >>> # [2] Counterpoise-corrected interaction energies for three complexes in S22 >>> # Error statistics computed wrt an old benchmark, S22A >>> database('mp2','S22',cp=1,subset=[16,17,8],benchmark='S22A') >>> # [3] SAPT0 on the neon dimer dissociation curve >>> db('sapt0',subset='NeNe',cp=0,symm=0,db_name='RGC10') >>> # [4] Optimize system 1 in database S22, producing tables of scf and mp2 energy >>> db('mp2','S22',db_func=optimize,subset=[1], tabulate=['mp2 total energy','current energy']) >>> # [5] CCSD on the smallest systems of HTBH, a hydrogen-transfer database >>> database('ccsd','HTBH',subset='small', tabulate=['ccsd total energy', 'mp2 total energy']) """ lowername = name #TODO kwargs = p4util.kwargs_lower(kwargs) # Wrap any positional arguments into kwargs (for intercalls among wrappers) if not ('name' in kwargs) and name: kwargs['name'] = name #.lower() if not ('db_name' in kwargs) and db_name: kwargs['db_name'] = db_name # Establish function to call func = kwargs.pop('db_func', kwargs.pop('func', energy)) kwargs['db_func'] = func # Bounce to CP if bsse kwarg (someday) if kwargs.get('bsse_type', None) is not None: raise ValidationError( """Database: Cannot specify bsse_type for database. Use the cp keyword withing database instead.""" ) allowoptexceeded = kwargs.get('allowoptexceeded', False) optstash = p4util.OptionsState(['WRITER_FILE_LABEL'], ['SCF', 'REFERENCE']) # Wrapper wholly defines molecule. discard any passed-in kwargs.pop('molecule', None) # Paths to search for database files: here + PSIPATH + library + PYTHONPATH db_paths = [] db_paths.append(os.getcwd()) db_paths.extend(os.environ.get('PSIPATH', '').split(os.path.pathsep)) db_paths.append(os.path.join(core.get_datadir(), 'databases')) db_paths.append(os.path.dirname(__file__)) db_paths = list(map(os.path.abspath, db_paths)) sys.path[1:1] = db_paths # TODO this should be modernized a la interface_cfour # Define path and load module for requested database database = p4util.import_ignorecase(db_name) if database is None: core.print_out('\nPython module for database %s failed to load\n\n' % (db_name)) core.print_out('\nSearch path that was tried:\n') core.print_out(", ".join(map(str, sys.path))) raise ValidationError("Python module loading problem for database " + str(db_name)) else: dbse = database.dbse HRXN = database.HRXN ACTV = database.ACTV RXNM = database.RXNM BIND = database.BIND TAGL = database.TAGL GEOS = database.GEOS try: DATA = database.DATA except AttributeError: DATA = {} user_writer_file_label = core.get_global_option('WRITER_FILE_LABEL') user_reference = core.get_global_option('REFERENCE') # Configuration based upon e_name & db_name options # Force non-supramolecular if needed if not hasattr(lowername, '__call__') and re.match(r'^.*sapt', lowername): try: database.ACTV_SA except AttributeError: raise ValidationError( 'Database %s not suitable for non-supramolecular calculation.' % (db_name)) else: ACTV = database.ACTV_SA # Force open-shell if needed openshell_override = 0 if user_reference in ['RHF', 'RKS']: try: database.isOS except AttributeError: pass else: if p4util.yes.match(str(database.isOS)): openshell_override = 1 core.print_out( '\nSome reagents in database %s require an open-shell reference; will be reset to UHF/UKS as needed.\n' % (db_name)) # Configuration based upon database keyword options # Option symmetry- whether symmetry treated normally or turned off (currently req'd for dfmp2 & dft) db_symm = kwargs.get('symm', True) symmetry_override = 0 if db_symm is False: symmetry_override = 1 elif db_symm is True: pass else: raise ValidationError("""Symmetry mode '%s' not valid.""" % (db_symm)) # Option mode of operation- whether db run in one job or files farmed out db_mode = kwargs.pop('db_mode', kwargs.pop('mode', 'continuous')).lower() kwargs['db_mode'] = db_mode if db_mode == 'continuous': pass elif db_mode == 'sow': pass elif db_mode == 'reap': db_linkage = kwargs.get('linkage', None) if db_linkage is None: raise ValidationError( """Database execution mode 'reap' requires a linkage option.""" ) else: raise ValidationError("""Database execution mode '%s' not valid.""" % (db_mode)) # Option counterpoise- whether for interaction energy databases run in bsse-corrected or not db_cp = kwargs.get('cp', False) if db_cp is True: try: database.ACTV_CP except AttributeError: raise ValidationError( """Counterpoise correction mode 'yes' invalid for database %s.""" % (db_name)) else: ACTV = database.ACTV_CP elif db_cp is False: pass else: raise ValidationError( """Counterpoise correction mode '%s' not valid.""" % (db_cp)) # Option relaxed- whether for non-frozen-monomer interaction energy databases include deformation correction or not? db_rlxd = kwargs.get('rlxd', False) if db_rlxd is True: if db_cp is True: try: database.ACTV_CPRLX database.RXNM_CPRLX except AttributeError: raise ValidationError( 'Deformation and counterpoise correction mode \'yes\' invalid for database %s.' % (db_name)) else: ACTV = database.ACTV_CPRLX RXNM = database.RXNM_CPRLX elif db_cp is False: try: database.ACTV_RLX except AttributeError: raise ValidationError( 'Deformation correction mode \'yes\' invalid for database %s.' % (db_name)) else: ACTV = database.ACTV_RLX elif db_rlxd is False: #elif no.match(str(db_rlxd)): pass else: raise ValidationError('Deformation correction mode \'%s\' not valid.' % (db_rlxd)) # Option zero-point-correction- whether for thermochem databases jobs are corrected by zpe db_zpe = kwargs.get('zpe', False) if db_zpe is True: raise ValidationError( 'Zero-point-correction mode \'yes\' not yet implemented.') elif db_zpe is False: pass else: raise ValidationError('Zero-point-correction \'mode\' %s not valid.' % (db_zpe)) # Option benchmark- whether error statistics computed wrt alternate reference energies db_benchmark = 'default' if 'benchmark' in kwargs: db_benchmark = kwargs['benchmark'] if db_benchmark.lower() == 'default': pass else: BIND = p4util.getattr_ignorecase(database, 'BIND_' + db_benchmark) if BIND is None: raise ValidationError( 'Special benchmark \'%s\' not available for database %s.' % (db_benchmark, db_name)) # Option tabulate- whether tables of variables other than primary energy method are formed # TODO db(func=cbs,tabulate=[non-current-energy]) # broken db_tabulate = [] if 'tabulate' in kwargs: db_tabulate = kwargs['tabulate'] # Option subset- whether all of the database or just a portion is run db_subset = HRXN if 'subset' in kwargs: db_subset = kwargs['subset'] if isinstance(db_subset, (str, bytes)): if db_subset.lower() == 'small': try: database.HRXN_SM except AttributeError: raise ValidationError( """Special subset 'small' not available for database %s.""" % (db_name)) else: HRXN = database.HRXN_SM elif db_subset.lower() == 'large': try: database.HRXN_LG except AttributeError: raise ValidationError( """Special subset 'large' not available for database %s.""" % (db_name)) else: HRXN = database.HRXN_LG elif db_subset.lower() == 'equilibrium': try: database.HRXN_EQ except AttributeError: raise ValidationError( """Special subset 'equilibrium' not available for database %s.""" % (db_name)) else: HRXN = database.HRXN_EQ else: HRXN = p4util.getattr_ignorecase(database, db_subset) if HRXN is None: HRXN = p4util.getattr_ignorecase(database, 'HRXN_' + db_subset) if HRXN is None: raise ValidationError( """Special subset '%s' not available for database %s.""" % (db_subset, db_name)) else: temp = [] for rxn in db_subset: if rxn in HRXN: temp.append(rxn) else: raise ValidationError( """Subset element '%s' not a member of database %s.""" % (str(rxn), db_name)) HRXN = temp temp = [] for rxn in HRXN: temp.append(ACTV['%s-%s' % (dbse, rxn)]) HSYS = p4util.drop_duplicates(sum(temp, [])) # Sow all the necessary reagent computations core.print_out("\n\n") p4util.banner(("Database %s Computation" % (db_name))) core.print_out("\n") # write index of calcs to output file instructions = """\n The database single-job procedure has been selected through mode='continuous'.\n""" instructions += """ Calculations for the reagents will proceed in the order below and will be followed\n""" instructions += """ by summary results for the database.\n\n""" for rgt in HSYS: instructions += """ %-s\n""" % (rgt) core.print_out(instructions) # Loop through chemical systems ERGT = {} ERXN = {} VRGT = {} VRXN = {} for rgt in HSYS: VRGT[rgt] = {} core.print_out('\n') p4util.banner(' Database {} Computation: Reagent {} \n {}'.format( db_name, rgt, TAGL[rgt])) core.print_out('\n') molecule = core.Molecule.from_dict(GEOS[rgt].to_dict()) molecule.set_name(rgt) molecule.update_geometry() if symmetry_override: molecule.reset_point_group('c1') molecule.fix_orientation(True) molecule.fix_com(True) molecule.update_geometry() if (openshell_override) and (molecule.multiplicity() != 1): if user_reference == 'RHF': core.set_global_option('REFERENCE', 'UHF') elif user_reference == 'RKS': core.set_global_option('REFERENCE', 'UKS') core.set_global_option( 'WRITER_FILE_LABEL', user_writer_file_label + ('' if user_writer_file_label == '' else '-') + rgt) if allowoptexceeded: try: ERGT[rgt] = func(molecule=molecule, **kwargs) except ConvergenceError: core.print_out(f"Optimization exceeded cycles for {rgt}") ERGT[rgt] = 0.0 else: ERGT[rgt] = func(molecule=molecule, **kwargs) core.print_variables() core.print_out(" Database Contributions Map:\n {}\n".format('-' * 75)) for rxn in HRXN: db_rxn = dbse + '-' + str(rxn) if rgt in ACTV[db_rxn]: core.print_out( ' reagent {} contributes by {:.4f} to reaction {}\n'. format(rgt, RXNM[db_rxn][rgt], db_rxn)) core.print_out('\n') for envv in db_tabulate: VRGT[rgt][envv.upper()] = core.variable(envv) core.set_global_option("REFERENCE", user_reference) core.clean() #core.opt_clean() core.clean_variables() # Reap all the necessary reaction computations core.print_out("\n") p4util.banner(("Database %s Results" % (db_name))) core.print_out("\n") maxactv = [] for rxn in HRXN: maxactv.append(len(ACTV[dbse + '-' + str(rxn)])) maxrgt = max(maxactv) table_delimit = '-' * (62 + 20 * maxrgt) tables = '' # find any reactions that are incomplete FAIL = collections.defaultdict(int) for rxn in HRXN: db_rxn = dbse + '-' + str(rxn) for i in range(len(ACTV[db_rxn])): if abs(ERGT[ACTV[db_rxn][i]]) < 1.0e-12: if not allowoptexceeded: FAIL[rxn] = 1 # tabulate requested process::environment variables tables += """ For each VARIABLE requested by tabulate, a 'Reaction Value' will be formed from\n""" tables += """ 'Reagent' values according to weightings 'Wt', as for the REQUESTED ENERGY below.\n""" tables += """ Depending on the nature of the variable, this may or may not make any physical sense.\n""" for rxn in HRXN: db_rxn = dbse + '-' + str(rxn) VRXN[db_rxn] = {} for envv in db_tabulate: envv = envv.upper() tables += """\n ==> %s <==\n\n""" % (envv.title()) tables += _tblhead(maxrgt, table_delimit, 2) for rxn in HRXN: db_rxn = dbse + '-' + str(rxn) if FAIL[rxn]: tables += """\n%23s %8s %8s %8s %8s""" % (db_rxn, '', '****', '', '') for i in range(len(ACTV[db_rxn])): tables += """ %16.8f %2.0f""" % (VRGT[ ACTV[db_rxn][i]][envv], RXNM[db_rxn][ACTV[db_rxn][i]]) else: VRXN[db_rxn][envv] = 0.0 for i in range(len(ACTV[db_rxn])): VRXN[db_rxn][envv] += VRGT[ ACTV[db_rxn][i]][envv] * RXNM[db_rxn][ACTV[db_rxn][i]] tables += """\n%23s %16.8f """ % ( db_rxn, VRXN[db_rxn][envv]) for i in range(len(ACTV[db_rxn])): tables += """ %16.8f %2.0f""" % (VRGT[ ACTV[db_rxn][i]][envv], RXNM[db_rxn][ACTV[db_rxn][i]]) tables += """\n %s\n""" % (table_delimit) # tabulate primary requested energy variable with statistics count_rxn = 0 minDerror = 100000.0 maxDerror = 0.0 MSDerror = 0.0 MADerror = 0.0 RMSDerror = 0.0 tables += """\n ==> %s <==\n\n""" % ('Requested Energy') tables += _tblhead(maxrgt, table_delimit, 1) for rxn in HRXN: db_rxn = dbse + '-' + str(rxn) if FAIL[rxn]: tables += """\n%23s %8.4f %8s %10s %10s""" % ( db_rxn, BIND[db_rxn], '****', '****', '****') for i in range(len(ACTV[db_rxn])): tables += """ %16.8f %2.0f""" % (ERGT[ACTV[db_rxn][i]], RXNM[db_rxn][ACTV[db_rxn][i]]) else: ERXN[db_rxn] = 0.0 for i in range(len(ACTV[db_rxn])): ERXN[db_rxn] += ERGT[ACTV[db_rxn][i]] * RXNM[db_rxn][ ACTV[db_rxn][i]] error = constants.hartree2kcalmol * ERXN[db_rxn] - BIND[db_rxn] tables += """\n%23s %8.4f %8.4f %10.4f %10.4f""" % ( db_rxn, BIND[db_rxn], constants.hartree2kcalmol * ERXN[db_rxn], error, error * constants.cal2J) for i in range(len(ACTV[db_rxn])): tables += """ %16.8f %2.0f""" % (ERGT[ACTV[db_rxn][i]], RXNM[db_rxn][ACTV[db_rxn][i]]) if abs(error) < abs(minDerror): minDerror = error if abs(error) > abs(maxDerror): maxDerror = error MSDerror += error MADerror += abs(error) RMSDerror += error * error count_rxn += 1 tables += """\n %s\n""" % (table_delimit) if count_rxn: MSDerror /= float(count_rxn) MADerror /= float(count_rxn) RMSDerror = math.sqrt(RMSDerror / float(count_rxn)) tables += """%23s %19s %10.4f %10.4f\n""" % ( 'Minimal Dev', '', minDerror, minDerror * constants.cal2J) tables += """%23s %19s %10.4f %10.4f\n""" % ( 'Maximal Dev', '', maxDerror, maxDerror * constants.cal2J) tables += """%23s %19s %10.4f %10.4f\n""" % ( 'Mean Signed Dev', '', MSDerror, MSDerror * constants.cal2J) tables += """%23s %19s %10.4f %10.4f\n""" % ( 'Mean Absolute Dev', '', MADerror, MADerror * constants.cal2J) tables += """%23s %19s %10.4f %10.4f\n""" % ( 'RMS Dev', '', RMSDerror, RMSDerror * constants.cal2J) tables += """ %s\n""" % (table_delimit) core.set_variable('%s DATABASE MEAN SIGNED DEVIATION' % (db_name), MSDerror) core.set_variable('%s DATABASE MEAN ABSOLUTE DEVIATION' % (db_name), MADerror) core.set_variable('%s DATABASE ROOT-MEAN-SQUARE DEVIATION' % (db_name), RMSDerror) core.print_out(tables) finalenergy = MADerror else: finalenergy = 0.0 optstash.restore() DB_RGT.clear() DB_RGT.update(VRGT) DB_RXN.clear() DB_RXN.update(VRXN) return finalenergy
def database(name, db_name, **kwargs): r"""Function to access the molecule objects and reference energies of popular chemical databases. :aliases: db() :returns: (*float*) Mean absolute deviation of the database in kcal/mol :PSI variables: .. hlist:: :columns: 1 * :psivar:`db_name DATABASE MEAN SIGNED DEVIATION <db_nameDATABASEMEANSIGNEDDEVIATION>` * :psivar:`db_name DATABASE MEAN ABSOLUTE DEVIATION <db_nameDATABASEMEANABSOLUTEDEVIATION>` * :psivar:`db_name DATABASE ROOT-MEAN-SQUARE DEVIATION <db_nameDATABASEROOT-MEAN-SQUARESIGNEDDEVIATION>` * Python dictionaries of results accessible as ``DB_RGT`` and ``DB_RXN``. .. note:: It is very easy to make a database from a collection of xyz files using the script :source:`share/scripts/ixyz2database.py`. See :ref:`sec:createDatabase` for details. .. caution:: Some features are not yet implemented. Buy a developer some coffee. - In sow/reap mode, use only global options (e.g., the local option set by ``set scf scf_type df`` will not be respected). .. note:: To access a database that is not embedded in a |PSIfour| distribution, add the path to the directory containing the database to the environment variable :envvar:`PYTHONPATH`. :type name: string :param name: ``'scf'`` || ``'sapt0'`` || ``'ccsd(t)'`` || etc. First argument, usually unlabeled. Indicates the computational method to be applied to the database. May be any valid argument to :py:func:`~driver.energy`. :type db_name: string :param db_name: ``'BASIC'`` || ``'S22'`` || ``'HTBH'`` || etc. Second argument, usually unlabeled. Indicates the requested database name, matching (case insensitive) the name of a python file in ``psi4/share/databases`` or :envvar:`PYTHONPATH`. Consult that directory for available databases and literature citations. :type func: :ref:`function <op_py_function>` :param func: |dl| ``energy`` |dr| || ``optimize`` || ``cbs`` Indicates the type of calculation to be performed on each database member. The default performs a single-point ``energy('name')``, while ``optimize`` perfoms a geometry optimization on each reagent, and ``cbs`` performs a compound single-point energy. If a nested series of python functions is intended (see :ref:`sec:intercalls`), use keyword ``db_func`` instead of ``func``. :type mode: string :param mode: |dl| ``'continuous'`` |dr| || ``'sow'`` || ``'reap'`` Indicates whether the calculations required to complete the database are to be run in one file (``'continuous'``) or are to be farmed out in an embarrassingly parallel fashion (``'sow'``/``'reap'``). For the latter, run an initial job with ``'sow'`` and follow instructions in its output file. :type cp: :ref:`boolean <op_py_boolean>` :param cp: ``'on'`` || |dl| ``'off'`` |dr| Indicates whether counterpoise correction is employed in computing interaction energies. Use this option and NOT the :py:func:`~wrappers.cp` function for BSSE correction in database(). Option available (See :ref:`sec:availableDatabases`) only for databases of bimolecular complexes. :type rlxd: :ref:`boolean <op_py_boolean>` :param rlxd: ``'on'`` || |dl| ``'off'`` |dr| Indicates whether correction for deformation energy is employed in computing interaction energies. Option available (See :ref:`sec:availableDatabases`) only for databases of bimolecular complexes with non-frozen monomers, e.g., HBC6. :type symm: :ref:`boolean <op_py_boolean>` :param symm: |dl| ``'on'`` |dr| || ``'off'`` Indicates whether the native symmetry of the database reagents is employed (``'on'``) or whether it is forced to :math:`C_1` symmetry (``'off'``). Some computational methods (e.g., SAPT) require no symmetry, and this will be set by database(). :type zpe: :ref:`boolean <op_py_boolean>` :param zpe: ``'on'`` || |dl| ``'off'`` |dr| Indicates whether zero-point-energy corrections are appended to single-point energy values. Option valid only for certain thermochemical databases. Disabled until Hessians ready. :type benchmark: string :param benchmark: |dl| ``'default'`` |dr| || ``'S22A'`` || etc. Indicates whether a non-default set of reference energies, if available (See :ref:`sec:availableDatabases`), are employed for the calculation of error statistics. :type tabulate: array of strings :param tabulate: |dl| ``[]`` |dr| || ``['scf total energy', 'natom']`` || etc. Indicates whether to form tables of variables other than the primary requested energy. Available for any PSI variable. :type subset: string or array of strings :param subset: Indicates a subset of the full database to run. This is a very flexible option and can be used in three distinct ways, outlined below. Note that two take a string and the last takes an array. See `Available Databases`_ for available values. * ``'small'`` || ``'large'`` || ``'equilibrium'`` Calls predefined subsets of the requested database, either ``'small'``, a few of the smallest database members, ``'large'``, the largest of the database members, or ``'equilibrium'``, the equilibrium geometries for a database composed of dissociation curves. * ``'BzBz_S'`` || ``'FaOOFaON'`` || ``'ArNe'`` || ``'HB'`` || etc. For databases composed of dissociation curves, or otherwise divided into subsets, individual curves and subsets can be called by name. Consult the database python files for available molecular systems (case insensitive). * ``[1,2,5]`` || ``['1','2','5']`` || ``['BzMe-3.5', 'MeMe-5.0']`` || etc. Specify a list of database members to run. Consult the database python files for available molecular systems. This is the only portion of database input that is case sensitive; choices for this keyword must match the database python file. :examples: >>> # [1] Two-stage SCF calculation on short, equilibrium, and long helium dimer >>> db('scf','RGC10',cast_up='sto-3g',subset=['HeHe-0.85','HeHe-1.0','HeHe-1.5'], tabulate=['scf total energy','natom']) >>> # [2] Counterpoise-corrected interaction energies for three complexes in S22 >>> # Error statistics computed wrt an old benchmark, S22A >>> database('mp2','S22',cp=1,subset=[16,17,8],benchmark='S22A') >>> # [3] SAPT0 on the neon dimer dissociation curve >>> db('sapt0',subset='NeNe',cp=0,symm=0,db_name='RGC10') >>> # [4] Optimize system 1 in database S22, producing tables of scf and mp2 energy >>> db('mp2','S22',db_func=optimize,subset=[1], tabulate=['mp2 total energy','current energy']) >>> # [5] CCSD on the smallest systems of HTBH, a hydrogen-transfer database >>> database('ccsd','HTBH',subset='small', tabulate=['ccsd total energy', 'mp2 total energy']) """ lowername = name #TODO kwargs = p4util.kwargs_lower(kwargs) # Wrap any positional arguments into kwargs (for intercalls among wrappers) if not('name' in kwargs) and name: kwargs['name'] = name #.lower() if not('db_name' in kwargs) and db_name: kwargs['db_name'] = db_name # Establish function to call func = kwargs.pop('db_func', kwargs.pop('func', energy)) kwargs['db_func'] = func # Bounce to CP if bsse kwarg (someday) if kwargs.get('bsse_type', None) is not None: raise ValidationError("""Database: Cannot specify bsse_type for database. Use the cp keyword withing database instead.""") allowoptexceeded = kwargs.get('allowoptexceeded', False) optstash = p4util.OptionsState( ['WRITER_FILE_LABEL'], ['SCF', 'REFERENCE']) # Wrapper wholly defines molecule. discard any passed-in kwargs.pop('molecule', None) # Paths to search for database files: here + PSIPATH + library + PYTHONPATH db_paths = [] db_paths.append(os.getcwd()) db_paths.extend(os.environ.get('PSIPATH', '').split(os.path.pathsep)) db_paths.append(os.path.join(core.get_datadir(), 'databases')) db_paths.append(os.path.dirname(__file__)) db_paths = list(map(os.path.abspath, db_paths)) sys.path[1:1] = db_paths # TODO this should be modernized a la interface_cfour # Define path and load module for requested database database = p4util.import_ignorecase(db_name) if database is None: core.print_out('\nPython module for database %s failed to load\n\n' % (db_name)) core.print_out('\nSearch path that was tried:\n') core.print_out(", ".join(map(str, sys.path))) raise ValidationError("Python module loading problem for database " + str(db_name)) else: dbse = database.dbse HRXN = database.HRXN ACTV = database.ACTV RXNM = database.RXNM BIND = database.BIND TAGL = database.TAGL GEOS = database.GEOS try: DATA = database.DATA except AttributeError: DATA = {} user_writer_file_label = core.get_global_option('WRITER_FILE_LABEL') user_reference = core.get_global_option('REFERENCE') # Configuration based upon e_name & db_name options # Force non-supramolecular if needed if not hasattr(lowername, '__call__') and re.match(r'^.*sapt', lowername): try: database.ACTV_SA except AttributeError: raise ValidationError('Database %s not suitable for non-supramolecular calculation.' % (db_name)) else: ACTV = database.ACTV_SA # Force open-shell if needed openshell_override = 0 if user_reference in ['RHF', 'RKS']: try: database.isOS except AttributeError: pass else: if p4util.yes.match(str(database.isOS)): openshell_override = 1 core.print_out('\nSome reagents in database %s require an open-shell reference; will be reset to UHF/UKS as needed.\n' % (db_name)) # Configuration based upon database keyword options # Option symmetry- whether symmetry treated normally or turned off (currently req'd for dfmp2 & dft) db_symm = kwargs.get('symm', True) symmetry_override = 0 if db_symm is False: symmetry_override = 1 elif db_symm is True: pass else: raise ValidationError("""Symmetry mode '%s' not valid.""" % (db_symm)) # Option mode of operation- whether db run in one job or files farmed out db_mode = kwargs.pop('db_mode', kwargs.pop('mode', 'continuous')).lower() kwargs['db_mode'] = db_mode if db_mode == 'continuous': pass elif db_mode == 'sow': pass elif db_mode == 'reap': db_linkage = kwargs.get('linkage', None) if db_linkage is None: raise ValidationError("""Database execution mode 'reap' requires a linkage option.""") else: raise ValidationError("""Database execution mode '%s' not valid.""" % (db_mode)) # Option counterpoise- whether for interaction energy databases run in bsse-corrected or not db_cp = kwargs.get('cp', False) if db_cp is True: try: database.ACTV_CP except AttributeError: raise ValidationError("""Counterpoise correction mode 'yes' invalid for database %s.""" % (db_name)) else: ACTV = database.ACTV_CP elif db_cp is False: pass else: raise ValidationError("""Counterpoise correction mode '%s' not valid.""" % (db_cp)) # Option relaxed- whether for non-frozen-monomer interaction energy databases include deformation correction or not? db_rlxd = kwargs.get('rlxd', False) if db_rlxd is True: if db_cp is True: try: database.ACTV_CPRLX database.RXNM_CPRLX except AttributeError: raise ValidationError('Deformation and counterpoise correction mode \'yes\' invalid for database %s.' % (db_name)) else: ACTV = database.ACTV_CPRLX RXNM = database.RXNM_CPRLX elif db_cp is False: try: database.ACTV_RLX except AttributeError: raise ValidationError('Deformation correction mode \'yes\' invalid for database %s.' % (db_name)) else: ACTV = database.ACTV_RLX elif db_rlxd is False: #elif no.match(str(db_rlxd)): pass else: raise ValidationError('Deformation correction mode \'%s\' not valid.' % (db_rlxd)) # Option zero-point-correction- whether for thermochem databases jobs are corrected by zpe db_zpe = kwargs.get('zpe', False) if db_zpe is True: raise ValidationError('Zero-point-correction mode \'yes\' not yet implemented.') elif db_zpe is False: pass else: raise ValidationError('Zero-point-correction \'mode\' %s not valid.' % (db_zpe)) # Option benchmark- whether error statistics computed wrt alternate reference energies db_benchmark = 'default' if 'benchmark' in kwargs: db_benchmark = kwargs['benchmark'] if db_benchmark.lower() == 'default': pass else: BIND = p4util.getattr_ignorecase(database, 'BIND_' + db_benchmark) if BIND is None: raise ValidationError('Special benchmark \'%s\' not available for database %s.' % (db_benchmark, db_name)) # Option tabulate- whether tables of variables other than primary energy method are formed # TODO db(func=cbs,tabulate=[non-current-energy]) # broken db_tabulate = [] if 'tabulate' in kwargs: db_tabulate = kwargs['tabulate'] # Option subset- whether all of the database or just a portion is run db_subset = HRXN if 'subset' in kwargs: db_subset = kwargs['subset'] if isinstance(db_subset, (str, bytes)): if db_subset.lower() == 'small': try: database.HRXN_SM except AttributeError: raise ValidationError("""Special subset 'small' not available for database %s.""" % (db_name)) else: HRXN = database.HRXN_SM elif db_subset.lower() == 'large': try: database.HRXN_LG except AttributeError: raise ValidationError("""Special subset 'large' not available for database %s.""" % (db_name)) else: HRXN = database.HRXN_LG elif db_subset.lower() == 'equilibrium': try: database.HRXN_EQ except AttributeError: raise ValidationError("""Special subset 'equilibrium' not available for database %s.""" % (db_name)) else: HRXN = database.HRXN_EQ else: HRXN = p4util.getattr_ignorecase(database, db_subset) if HRXN is None: HRXN = p4util.getattr_ignorecase(database, 'HRXN_' + db_subset) if HRXN is None: raise ValidationError("""Special subset '%s' not available for database %s.""" % (db_subset, db_name)) else: temp = [] for rxn in db_subset: if rxn in HRXN: temp.append(rxn) else: raise ValidationError("""Subset element '%s' not a member of database %s.""" % (str(rxn), db_name)) HRXN = temp temp = [] for rxn in HRXN: temp.append(ACTV['%s-%s' % (dbse, rxn)]) HSYS = p4util.drop_duplicates(sum(temp, [])) # Sow all the necessary reagent computations core.print_out("\n\n") p4util.banner(("Database %s Computation" % (db_name))) core.print_out("\n") # write index of calcs to output file instructions = """\n The database single-job procedure has been selected through mode='continuous'.\n""" instructions += """ Calculations for the reagents will proceed in the order below and will be followed\n""" instructions += """ by summary results for the database.\n\n""" for rgt in HSYS: instructions += """ %-s\n""" % (rgt) core.print_out(instructions) # Loop through chemical systems ERGT = {} ERXN = {} VRGT = {} VRXN = {} for rgt in HSYS: VRGT[rgt] = {} core.print_out('\n') p4util.banner(' Database {} Computation: Reagent {} \n {}'.format(db_name, rgt, TAGL[rgt])) core.print_out('\n') molecule = core.Molecule.from_dict(GEOS[rgt].to_dict()) molecule.set_name(rgt) molecule.update_geometry() if symmetry_override: molecule.reset_point_group('c1') molecule.fix_orientation(True) molecule.fix_com(True) molecule.update_geometry() if (openshell_override) and (molecule.multiplicity() != 1): if user_reference == 'RHF': core.set_global_option('REFERENCE', 'UHF') elif user_reference == 'RKS': core.set_global_option('REFERENCE', 'UKS') core.set_global_option('WRITER_FILE_LABEL', user_writer_file_label + ('' if user_writer_file_label == '' else '-') + rgt) if allowoptexceeded: try: ERGT[rgt] = func(molecule=molecule, **kwargs) except ConvergenceError: core.print_out(f"Optimization exceeded cycles for {rgt}") ERGT[rgt] = 0.0 else: ERGT[rgt] = func(molecule=molecule, **kwargs) core.print_variables() core.print_out(" Database Contributions Map:\n {}\n".format('-' * 75)) for rxn in HRXN: db_rxn = dbse + '-' + str(rxn) if rgt in ACTV[db_rxn]: core.print_out(' reagent {} contributes by {:.4f} to reaction {}\n'.format(rgt, RXNM[db_rxn][rgt], db_rxn)) core.print_out('\n') for envv in db_tabulate: VRGT[rgt][envv.upper()] = core.variable(envv) core.set_global_option("REFERENCE", user_reference) core.clean() #core.opt_clean() core.clean_variables() # Reap all the necessary reaction computations core.print_out("\n") p4util.banner(("Database %s Results" % (db_name))) core.print_out("\n") maxactv = [] for rxn in HRXN: maxactv.append(len(ACTV[dbse + '-' + str(rxn)])) maxrgt = max(maxactv) table_delimit = '-' * (62 + 20 * maxrgt) tables = '' # find any reactions that are incomplete FAIL = collections.defaultdict(int) for rxn in HRXN: db_rxn = dbse + '-' + str(rxn) for i in range(len(ACTV[db_rxn])): if abs(ERGT[ACTV[db_rxn][i]]) < 1.0e-12: if not allowoptexceeded: FAIL[rxn] = 1 # tabulate requested process::environment variables tables += """ For each VARIABLE requested by tabulate, a 'Reaction Value' will be formed from\n""" tables += """ 'Reagent' values according to weightings 'Wt', as for the REQUESTED ENERGY below.\n""" tables += """ Depending on the nature of the variable, this may or may not make any physical sense.\n""" for rxn in HRXN: db_rxn = dbse + '-' + str(rxn) VRXN[db_rxn] = {} for envv in db_tabulate: envv = envv.upper() tables += """\n ==> %s <==\n\n""" % (envv.title()) tables += _tblhead(maxrgt, table_delimit, 2) for rxn in HRXN: db_rxn = dbse + '-' + str(rxn) if FAIL[rxn]: tables += """\n%23s %8s %8s %8s %8s""" % (db_rxn, '', '****', '', '') for i in range(len(ACTV[db_rxn])): tables += """ %16.8f %2.0f""" % (VRGT[ACTV[db_rxn][i]][envv], RXNM[db_rxn][ACTV[db_rxn][i]]) else: VRXN[db_rxn][envv] = 0.0 for i in range(len(ACTV[db_rxn])): VRXN[db_rxn][envv] += VRGT[ACTV[db_rxn][i]][envv] * RXNM[db_rxn][ACTV[db_rxn][i]] tables += """\n%23s %16.8f """ % (db_rxn, VRXN[db_rxn][envv]) for i in range(len(ACTV[db_rxn])): tables += """ %16.8f %2.0f""" % (VRGT[ACTV[db_rxn][i]][envv], RXNM[db_rxn][ACTV[db_rxn][i]]) tables += """\n %s\n""" % (table_delimit) # tabulate primary requested energy variable with statistics count_rxn = 0 minDerror = 100000.0 maxDerror = 0.0 MSDerror = 0.0 MADerror = 0.0 RMSDerror = 0.0 tables += """\n ==> %s <==\n\n""" % ('Requested Energy') tables += _tblhead(maxrgt, table_delimit, 1) for rxn in HRXN: db_rxn = dbse + '-' + str(rxn) if FAIL[rxn]: tables += """\n%23s %8.4f %8s %10s %10s""" % (db_rxn, BIND[db_rxn], '****', '****', '****') for i in range(len(ACTV[db_rxn])): tables += """ %16.8f %2.0f""" % (ERGT[ACTV[db_rxn][i]], RXNM[db_rxn][ACTV[db_rxn][i]]) else: ERXN[db_rxn] = 0.0 for i in range(len(ACTV[db_rxn])): ERXN[db_rxn] += ERGT[ACTV[db_rxn][i]] * RXNM[db_rxn][ACTV[db_rxn][i]] error = constants.hartree2kcalmol * ERXN[db_rxn] - BIND[db_rxn] tables += """\n%23s %8.4f %8.4f %10.4f %10.4f""" % (db_rxn, BIND[db_rxn], constants.hartree2kcalmol * ERXN[db_rxn], error, error * constants.cal2J) for i in range(len(ACTV[db_rxn])): tables += """ %16.8f %2.0f""" % (ERGT[ACTV[db_rxn][i]], RXNM[db_rxn][ACTV[db_rxn][i]]) if abs(error) < abs(minDerror): minDerror = error if abs(error) > abs(maxDerror): maxDerror = error MSDerror += error MADerror += abs(error) RMSDerror += error * error count_rxn += 1 tables += """\n %s\n""" % (table_delimit) if count_rxn: MSDerror /= float(count_rxn) MADerror /= float(count_rxn) RMSDerror = math.sqrt(RMSDerror / float(count_rxn)) tables += """%23s %19s %10.4f %10.4f\n""" % ('Minimal Dev', '', minDerror, minDerror * constants.cal2J) tables += """%23s %19s %10.4f %10.4f\n""" % ('Maximal Dev', '', maxDerror, maxDerror * constants.cal2J) tables += """%23s %19s %10.4f %10.4f\n""" % ('Mean Signed Dev', '', MSDerror, MSDerror * constants.cal2J) tables += """%23s %19s %10.4f %10.4f\n""" % ('Mean Absolute Dev', '', MADerror, MADerror * constants.cal2J) tables += """%23s %19s %10.4f %10.4f\n""" % ('RMS Dev', '', RMSDerror, RMSDerror * constants.cal2J) tables += """ %s\n""" % (table_delimit) core.set_variable('%s DATABASE MEAN SIGNED DEVIATION' % (db_name), MSDerror) core.set_variable('%s DATABASE MEAN ABSOLUTE DEVIATION' % (db_name), MADerror) core.set_variable('%s DATABASE ROOT-MEAN-SQUARE DEVIATION' % (db_name), RMSDerror) core.print_out(tables) finalenergy = MADerror else: finalenergy = 0.0 optstash.restore() DB_RGT.clear() DB_RGT.update(VRGT) DB_RXN.clear() DB_RXN.update(VRXN) return finalenergy