def __init__(self, data=None): self.geo_spec_all = [] #: Contains all molecular geometries, i.e., :literal:`geo_spec`. (See :ref:`Central Variables` for details.) self.geo_info = [] #: See :ref:`Central Variables` for details. self.ao_spec = [] #: See :ref:`Central Variables` for details. self.mo_coeff_all = [] #: Contains all molecular orbital coefficients. List of numpy.ndarray self.mo_energy_all = [] #: Contains all molecular orbital energies. List of numpy.ndarray self.mo_occ_all = [] #: Contains all molecular orbital occupations. List of numpy.ndarray self.sym = [] #: Python dictionary containing the molecular orbital self.symmetries and the corresponding position in self.mo_coeff_all, self.mo_energy_all, and self.mo_occ_all, respectively. self.index_list = [] #: After the execution of the ordering routine, it contains the new indices of the molecular orbitals. If index < 0, the molecular orbital changes its sign. shape=(Nfiles,NMO) self.geo_spec_tck = [] self.mo_coeff_tck = [] self.mo_energy_tck = [] self.mo_occ_tck = [] self.MO_Spec = [] self.QC = [] if data: if isinstance(data['ao_spec'], numpy.ndarray): ao_spec = data['ao_spec'][numpy.newaxis][0] else: ao_spec = data['ao_spec'] self.ao_spec = AOClass(restart=ao_spec) self.geo_info = data['geo_info'] self.geo_spec_all = data['geo_spec_all'] if isinstance(data['mo_data'], numpy.ndarray): mo_data = data['mo_data'][numpy.newaxis][0] else: mo_data = data['mo_data'] self.mo_list_parsing(mo_data) if isinstance(data['sym'], numpy.ndarray): self.sym = data['sym'][numpy.newaxis][0] else: self.sym = data['sym'] self.index_list = data['index_list']
def read_gamess(fname, all_mo=False, spin=None, read_properties=False, **kwargs): '''Reads all information desired from a Gamess-US output file. **Parameters:** fname : str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. ''' if isinstance(fname, str): filename = fname fname = descriptor_from_file(filename, index=0) else: filename = fname.name from io import TextIOWrapper if isinstance(fname, TextIOWrapper): flines = fname.readlines() # Read the WHOLE file into RAM else: magic = 'This is an Orbkit magic string' text = fname.read().decode("iso-8859-1").replace( '\n', '\n{}'.format(magic)) flines = text.split(magic) flines.pop() # Initialize the variables qc = QCinfo() qc.ao_spec = AOClass([]) qc.mo_spec = MOClass([]) has_alpha = False # Flag for alpha electron set has_beta = False # Flag for beta electron set restricted = True # Flag for restricted calculation sec_flag = None # A Flag specifying the current section is_pop_ana = True # Flag for population analysis for ground state keyword = [' ATOM ATOMIC COORDINATES', ''] # Keywords for single point calculation and # geometry optimization mokey = 'EIGENVECTORS' # Keyword for MOs unrestopt = False # Flag for unrestricted optimization bopt = False # Flag for geometry optimization sym = {} # Symmetry of MOs geo_skip = 1 # Number of lines to skip in geometry section for il in range(len(flines)): line = flines[il] # The current line as string thisline = line.split() # The current line split into segments # Check the file for keywords if 'RUNTYP=OPTIMIZE' in line: keyword = [ ' COORDINATES OF ALL ATOMS ARE', '***** EQUILIBRIUM GEOMETRY LOCATED *****' ] geo_skip = 2 bopt = True if 'SCFTYP=UHF' in line: mokey = ' SET ****' restricted = False else: mokey = 'EIGENVECTORS' elif keyword[0] in line and keyword[1] in flines[il - 1]: # The section containing information about # the molecular geometry begins sec_flag = 'geo_info' atom_count = 0 # Counter for Atoms angstrom = not '(BOHR)' in line elif 'ATOMIC BASIS SET' in line: # The section containing information about # the atomic orbitals begins sec_flag = 'ao_info' ao_skip = 6 # Number of lines to skip AO = [] # Atomic orbitals elif '----- ALPHA SET ' in line: # The section for alpha electrons has_alpha = True has_beta = False restricted = False elif '----- BETA SET ' in line: # The section for alpha electrons restricted = False has_alpha = False has_beta = True elif mokey in line and len(thisline) < 3: # The section containing information about # the molecular orbitals begins sec_flag = 'mo_info' mo_skip = 1 len_mo = 0 # Number of MOs init_mo = False # Initialize new MO section info_key = None # A Flag specifying the energy and symmetry section lxlylz = [] if 'ALPHA' in line: has_alpha = True mo_skip = 0 elif 'BETA' in line: has_beta = True has_alpha = False mo_skip = 0 elif 'NATURAL ORBITALS' in line and len(thisline) <= 3: display('The natural orbitals are not extracted.') elif ' NUMBER OF OCCUPIED ORBITALS (ALPHA) =' in line: occ = [] # occupation number of molecular orbitals occ.append(int(thisline[-1])) elif ' NUMBER OF OCCUPIED ORBITALS (BETA ) =' in line: occ.append(int(thisline[-1])) # elif 'ECP POTENTIALS' in line: # sec_flag = 'ecp_info' # ecp = '' elif ' NUMBER OF OCCUPIED ORBITALS (ALPHA) KEPT IS =' in line: occ = [] # occupation number of molecular orbitals occ.append(int(thisline[-1])) elif ' NUMBER OF OCCUPIED ORBITALS (BETA ) KEPT IS =' in line: occ.append(int(thisline[-1])) elif 'NUMBER OF STATES REQUESTED' in line and read_properties: # get the number of excited states and initialize variables for # transition dipole moment and energies exc_states = int(line.split('=')[1]) # Number of excited states # Dipole moments matrix: Diagonal elements -> permanent dipole moments # Off-diagonal elements -> transition dipole moments qc.dipole_moments = numpy.zeros( ((exc_states + 1), (exc_states + 1), 3)) # Multiplicity of ground and excited states qc.states['multiplicity'] = numpy.zeros(exc_states + 1) # Energies of ground and excited states qc.states['energy'] = numpy.zeros(exc_states + 1) qc.states['energy'][0] = qc.etot qc.states['multiplicity'][0] = gs_multi dm_flag = None # Flag specifying the dipole moments section elif 'TRANSITION DIPOLE MOMENTS' in line and read_properties: # Section containing energies of excited states sec_flag = 'dm_info' # Energy and Multiplicity for ground state elif 'SPIN MULTIPLICITY' in line and read_properties: # odd way to get gound state multiplicity gs_multi = int(line.split()[3]) elif 'FINAL' in line and read_properties: # get (last) energy qc.etot = float(line.split()[4]) elif 'TOTAL MULLIKEN AND LOWDIN ATOMIC POPULATIONS' in line and is_pop_ana == True and read_properties: # Read Mulliken and Lowdin Atomic Populations sec_flag = 'pop_info' pop_skip = 1 is_pop_ana == False qc.pop_ana['Lowdin'] = [] qc.pop_ana['Mulliken'] = [] else: # Check if we are in a specific section if sec_flag == 'geo_info': if not geo_skip: if len(line) < 2: sec_flag = None else: qc.geo_info.append( [thisline[0], atom_count + 1, thisline[1]]) qc.geo_spec.append([float(ii) for ii in thisline[2:]]) atom_count += 1 elif geo_skip: geo_skip -= 1 elif sec_flag == 'ao_info': if not ao_skip: if ' TOTAL NUMBER OF BASIS SET SHELLS' in line: sec_flag = None else: if len(thisline) == 1: # Read atom type at_type = thisline[0] AO.append([]) new_ao = False elif len(thisline) == 0 and new_ao == False: new_ao = True else: coeffs = [float(ii) for ii in thisline[3:]] if new_ao: ao_type = thisline[1].lower().replace( 'l', 'sp') for i_ao, t_ao in enumerate(ao_type): AO[-1].append({ 'atom_type': at_type, 'type': t_ao, 'pnum': 1, 'coeffs': [[coeffs[0], coeffs[1 + i_ao]]] }) new_ao = False else: for i_ao in range(len(ao_type)): AO[-1][-len(ao_type) + i_ao]['coeffs'].append( [coeffs[0], coeffs[1 + i_ao]]) AO[-1][-len(ao_type) + i_ao]['pnum'] += 1 elif ao_skip: ao_skip -= 1 elif sec_flag == 'mo_info': if not mo_skip: if 'END OF' in line and 'CALCULATION' in line or '-----------' in line: sec_flag = None has_alpha = False has_beta = False else: if thisline == []: info_key = None init_mo = True try: int(flines[il + 1].split()[0]) except ValueError: sec_flag = None init_mo = False elif init_mo: init_len = len(thisline) lxlylz = [] for ii in range(len(thisline)): if has_alpha == True or has_beta == True: qc.mo_spec.append({ 'coeffs': [], 'energy': 0.0, 'occ_num': 0.0, 'sym': '', 'spin': '' }) else: qc.mo_spec.append({ 'coeffs': [], 'energy': 0.0, 'occ_num': 0.0, 'sym': '' }) init_mo = False info_key = 'energy' elif len( thisline) == init_len and info_key == 'energy': for ii in range(init_len, 0, -1): qc.mo_spec[-ii]['energy'] = float( thisline[init_len - ii]) info_key = 'symmetry' elif len(thisline ) == init_len and info_key == 'symmetry': for ii in range(init_len, 0, -1): len_mo += 1 a = thisline[init_len - ii] if a not in sym.keys(): sym[a] = 1 else: sym[a] = len_mo if has_alpha: qc.mo_spec[-ii]['sym'] = '%d.%s_a' % ( sym[a], thisline[init_len - ii]) qc.mo_spec[-ii]['spin'] = 'alpha' elif has_beta: qc.mo_spec[-ii]['sym'] = '%d.%s_b' % ( sym[a], thisline[init_len - ii]) qc.mo_spec[-ii]['spin'] = 'beta' else: qc.mo_spec[-ii]['sym'] = '%d.%s' % ( sym[a], thisline[init_len - ii]) info_key = 'coeffs' elif thisline != [] and info_key == 'coeffs': lxlylz.append((line[11:17])) for ii, m in enumerate( re.finditer('-?\d+\.\d+', line[16:])): qc.mo_spec[-init_len + ii]['coeffs'].append( float(m.group())) elif mo_skip: mo_skip -= 1 elif sec_flag == 'ecp_info': if 'THE ECP RUN REMOVES' in line: sec_flag = None elif 'PARAMETERS FOR' in line: if line[17:25].split()[0] != ecp: ecp = line[17:25].split()[0] zcore = float(line[51:55].split()[0]) ii_geo = int(line[35:41].split()[0]) - 1 qc.geo_info[ii_geo][2] = str( float(qc.geo_info[ii_geo][2]) - zcore) else: ii_geo = int(line[35:41].split()[0]) - 1 qc.geo_info[ii_geo][2] = str( float(qc.geo_info[ii_geo][2]) - zcore) elif sec_flag == 'dm_info': # instead of giving the output in a useful human and machine readable # way, gamess output syntax differs for transitions involving the # ground state compared to transitions between excited states... if 'GROUND STATE (SCF) DIPOLE=' in line: # ground state dipole is in debye...convert to atomic units for ii in range(3): qc.dipole_moments[0][0][ii] = float( thisline[ii + 4]) * 0.393430307 if 'EXPECTATION VALUE DIPOLE MOMENT FOR EXCITED STATE' in line: state = (int(line.replace('STATE', 'STATE ').split()[7])) dm_flag = 'state_info' if 'TRANSITION FROM THE GROUND STATE TO EXCITED STATE' in line: state = [ 0, int(line.replace('STATE', 'STATE ').split()[8]) ] dm_flag = 'transition_info' if 'TRANSITION BETWEEN EXCITED STATES' in line: state = [ int(thisline[4]), int(line.replace('AND', 'AND ').split()[6]) ] dm_flag = 'transition_info' if 'NATURAL ORBITAL OCCUPATION NUMBERS FOR EXCITED STATE' in line: sec_flag = None dm_flag = None if dm_flag == 'state_info': if 'STATE MULTIPLICITY' in line: qc.states['multiplicity'][state] = int( line.split('=')[1]) if 'STATE ENERGY' in line: qc.states['energy'][state] = float(line.split('=')[1]) if 'STATE DIPOLE' and 'E*BOHR' in line: for ii in range(3): qc.dipole_moments[state][state][ii] = float( thisline[ii + 3]) elif dm_flag == 'transition_info': if 'TRANSITION DIPOLE' and 'E*BOHR' in line: for ii in range(3): qc.dipole_moments[state[0]][state[1]][ii] = float( thisline[ii + 3]) qc.dipole_moments[state[1]][state[0]][ii] = float( thisline[ii + 3]) elif sec_flag == 'pop_info': if not pop_skip: if line == '\n': sec_flag = None else: qc.pop_ana = {} qc.pop_ana['Lowdin'].append(float(thisline[5])) qc.pop_ana['Mulliken'].append(float(thisline[3])) elif pop_skip: pop_skip -= 1 # Check usage of same atomic basis sets basis_set = {} for ii in range(len(AO)): if not AO[ii][0]['atom_type'] in basis_set.keys(): basis_set[AO[ii][0]['atom_type']] = AO[ii] else: for jj in range(len(AO[ii])): if AO[ii][jj]['coeffs'] != basis_set[ AO[ii][0]['atom_type']][jj]['coeffs']: raise IOError('Different basis sets for the same atom.') # Numpy array for ii in basis_set.keys(): for jj in range(len(basis_set[ii])): basis_set[ii][jj]['coeffs'] = numpy.array( basis_set[ii][jj]['coeffs']) for kk in range(len(qc.mo_spec)): qc.mo_spec[kk]['coeffs'] = numpy.array(qc.mo_spec[kk]['coeffs']) # Complement atomic basis sets for kk in range(len(qc.geo_info)): for ll in range(len(basis_set[qc.geo_info[kk][0]])): qc.ao_spec.append({ 'atom': qc.geo_info[kk][1] - 1, 'type': basis_set[qc.geo_info[kk][0]][ll]['type'], 'pnum': basis_set[qc.geo_info[kk][0]][ll]['pnum'], 'coeffs': basis_set[qc.geo_info[kk][0]][ll]['coeffs'], 'lxlylz': None }) # Reconstruct exponents list for ao_spec count = 0 for i, j in enumerate(qc.ao_spec): l = l_deg(lquant[j['type']]) j['lxlylz'] = [] for i in range(l): j['lxlylz'].append((lxlylz[count].lower().count('x'), lxlylz[count].lower().count('y'), lxlylz[count].lower().count('z'))) count += 1 j['lxlylz'] = numpy.array(j['lxlylz'], dtype=numpy.int64) if restricted: for ii in range(len(qc.mo_spec)): if occ[0] and occ[1]: qc.mo_spec[ii]['occ_num'] += 2.0 occ[0] -= 1 occ[1] -= 1 if not occ[0] and occ[1]: qc.mo_spec[ii]['occ_num'] += 1.0 occ[1] -= 1 if not occ[1] and occ[0]: qc.mo_spec[ii]['occ_num'] += 1.0 occ[0] -= 1 if restricted == False: for ii in range(len(qc.mo_spec)): if qc.mo_spec[ii]['spin'] == 'alpha' and occ[0] > 0: qc.mo_spec[ii]['occ_num'] += 1.0 occ[0] -= 1 has_alpha = True elif qc.mo_spec[ii]['spin'] == 'beta' and occ[1] > 0: qc.mo_spec[ii]['occ_num'] += 1.0 occ[1] -= 1 has_beta = True if spin is not None: if restricted: raise IOError( 'The keyword `spin` is only supported for unrestricted calculations.' ) if spin != 'alpha' and spin != 'beta': raise IOError('`spin=%s` is not a valid option' % spin) elif spin == 'alpha' and has_alpha == True: display('Reading only molecular orbitals of spin alpha.') elif spin == 'beta' and has_beta == True: display('Reading only molecular orbitals of spin beta.') elif (not has_alpha) and (not has_beta): raise IOError('No spin molecular orbitals available') elif ((spin == 'alpha' and not has_alpha) or (spin == 'beta' and not has_beta)): raise IOError( 'You requested `%s` orbitals, but None of them are present.' % spin) # Are all MOs requested for the calculation? if not all_mo: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['occ_num'] < 0.0000001: del qc.mo_spec[i] # Only molecular orbitals of one spin requested? if spin is not None: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['spin'] != spin: del qc.mo_spec[i] # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo(is_angstrom=angstrom) qc.mo_spec.update() qc.ao_spec.update() return qc
def read_gaussian_log(fname, all_mo=False, spin=None, orientation='standard', i_link=-1, i_geo=-1, i_ao=-1, i_mo=-1, interactive=True, **kwargs): '''Reads all information desired from a Gaussian .log file. **Parameters:** fname: str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. spin : {None, 'alpha', or 'beta'}, optional If not None, returns exclusively 'alpha' or 'beta' molecular orbitals. orientation : string, choices={'input', 'standard'}, optional Specifies orientation of the molecule in Gaussian nomenclature. [#first]_ i_link : int, default=-1 Selects the file for linked Gaussian jobs. i_geo : int, default=-1 Selects the geometry section of the output file. i_ao : int, default=-1 Selects the atomic orbital section of the output file. i_mo : int, default=-1 Selects the molecular orbital section of the output file. interactive : bool If True, the user is asked to select the different sets. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. .. [#first] Attention: The MOs in the output are only valid for the standard orientation! ''' if isinstance(fname, str): filename = fname fname = descriptor_from_file(filename, index=0) else: filename = fname.name from io import TextIOWrapper if isinstance(fname, TextIOWrapper): flines = fname.readlines() # Read the WHOLE file into RAM else: magic = 'This is an Orbkit magic string' text = fname.read().decode("iso-8859-1").replace( '\n', '\n{}'.format(magic)) flines = text.split(magic) flines.pop() # Search the file the specific sections count = { 'link': 0, 'geometry': 0, 'geometry_input': 0, 'atomic orbitals': 0, 'molecular orbitals': [], 'state': [] } def check_sel(count, i, interactive=False, default=-1): if count == 0: raise IndexError elif count == 1: return 0 message = '\tPlease give an integer from 0 to {0} (default: {0}): '.format( count - 1) try: if interactive: i = raw_input(message) i = default if i == '' else int(i) i = range(count)[i] except (IndexError, ValueError): raise IOError(message.replace(':', '!')) else: display('\tSelecting the %s' % ('last element.' if (i == count - 1) else 'element %d.' % i)) return i # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string # Check the file for keywords if ' Entering Link 1' in line: count['link'] += 1 try: display('\tFound %d linked GAUSSIAN files.' % count['link']) i_link = check_sel(count['link'], i_link, interactive=interactive) except IndexError: raise IOError('Found no `Entering Link 1` keyword!') cartesian_basis = True c_link = 0 # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string thisline = line.split() # The current line split into segments # Check the file for keywords if ' Entering Link 1' in line: c_link += 1 if i_link == (c_link - 1): if ' orientation:' in line: if '%s orientation:' % orientation in line.lower(): count['geometry'] += 1 if 'input orientation:' in line.lower(): count['geometry_input'] += 1 elif 'Standard basis:' in line or 'General basis read from cards:' in line: # Check if a cartesian basis has been applied if '(5D, 7F)' in line: cartesian_basis = False elif '(6D, 10F)' not in line: raise IOError( 'Please apply a Spherical Harmonics (5D, 7F) or ' + 'a Cartesian Gaussian Basis Set (6D, 10F)!') elif 'AO basis set in the form of general basis input' in line: count['atomic orbitals'] += 1 elif 'The electronic state is ' in line: count['state'].append(thisline[-1][:-1]) elif 'Orbital Coefficients:' in line: mo_type = thisline[0] if mo_type != 'Beta': count['molecular orbitals'].append(mo_type) else: count['molecular orbitals'][-1] = 'Alpha&Beta' display('\nContent of the GAUSSIAN .log file:') display('\tFound %d geometry section(s). (%s orientation)' % (count['geometry'], orientation)) try: i_geo = check_sel(count['geometry'], i_geo, interactive=interactive) except IndexError: count['geometry'] = count['geometry_input'] orientation = 'input' display('\Looking for "Input orientation": \n' + '\tFound %d geometry section(s). (%s orientation)' % (count['geometry'], orientation)) try: i_geo = check_sel(count['geometry'], i_geo, interactive=interactive) except IndexError: raise IOError('Found no geometry section!' + ' Are you sure this is a GAUSSIAN .log file?') try: display('\tFound %d atomic orbitals section(s) %s.' % (count['atomic orbitals'], '(6D, 10F)' if cartesian_basis else '(5D, 7F)')) i_ao = check_sel(count['atomic orbitals'], i_ao, interactive=interactive) except IndexError: raise IOError('Write GFINPUT in your GAUSSIAN route section to print' + ' the basis set information!') try: display('\tFound the following %d molecular orbitals section(s):' % len(count['molecular orbitals'])) except IndexError: raise IOError( 'Write IOP(6/7=3) in your GAUSSIAN route section to print\n' + ' all molecular orbitals!') for i, j in enumerate(count['molecular orbitals']): string = '\t\tSection %d: %s Orbitals' % (i, j) try: string += ' (electronic state: %s)' % count['state'][i] except IndexError: pass display(string) i_mo = check_sel(len(count['molecular orbitals']), i_mo, interactive=interactive) if spin is not None: if spin != 'alpha' and spin != 'beta': raise IOError('`spin=%s` is not a valid option' % spin) else: display('Reading only molecular orbitals of spin %s.' % spin) # Set a counter for the AOs basis_count = 0 # Initialize some variables sec_flag = None skip = 0 c_link = 0 c_geo = 0 c_ao = 0 c_mo = 0 c_sao = 0 old_ao = -1 orb_sym = [] qc = QCinfo() qc.ao_spec = AOClass([]) qc.mo_spec = MOClass([]) index = [] # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string thisline = line.split() # The current line split into segments # Check the file for keywords if ' Entering Link 1' in line: c_link += 1 if i_link == (c_link - 1): if '%s orientation:' % orientation in line.lower(): # The section containing information about # the molecular geometry begins if i_geo == c_geo: qc.geo_info = [] qc.geo_spec = [] sec_flag = 'geo_info' c_geo += 1 skip = 4 elif 'Standard basis:' in line or 'General basis read from cards:' in line: # Check if a cartesian basis has been applied if '(5D, 7F)' in line: cartesian_basis = False elif '(6D, 10F)' not in line: raise IOError( 'Please apply a Spherical Harmonics (5D, 7F) or ' + 'a Cartesian Gaussian Basis Sets (6D, 10F)!') elif 'AO basis set in the form of general basis input' in line: # The section containing information about # the atomic orbitals begins if i_ao == c_ao: qc.ao_spec = AOClass([]) if not cartesian_basis: qc.ao_spec.spherical = True sec_flag = 'ao_info' basis_count = 0 c_ao += 1 bNew = True # Indication for start of new AO section elif 'Orbital symmetries:' in line: sec_flag = 'mo_sym' add = '' orb_sym = [] elif 'Orbital Coefficients:' in line: # The section containing information about # the molecular orbitals begins if (i_mo == c_mo): sec_flag = 'mo_info' mo_type = count['molecular orbitals'][i_mo] qc.mo_spec = MOClass([]) offset = 0 add = '' orb_spin = [] if orb_sym == []: if 'Alpha' in mo_type: add = '_a' orb_spin = ['alpha'] * basis_count orb_sym = ['A1' + add] * basis_count if 'Beta' in mo_type: add = '_b' orb_spin += ['beta'] * basis_count orb_sym += ['A1' + add] * basis_count for i in range(len(orb_sym)): # for numpy version < 1.6 c = ((numpy.array(orb_sym[:i + 1]) == orb_sym[i]) != 0).sum() # for numpy version >= 1.6 this could be used: #c = numpy.count_nonzero(numpy.array(orb_sym[:i+1]) == orb_sym[i]) qc.mo_spec.append({ 'coeffs': numpy.zeros(basis_count), 'energy': 0., 'sym': '%d.%s' % (c, orb_sym[i]) }) if orb_spin != []: qc.mo_spec[-1]['spin'] = orb_spin[i] if mo_type != 'Beta': c_mo += 1 bNew = True # Indication for start of new MO section elif 'E(' in line: qc.etot = float(line.split('=')[1].split()[0]) else: # Check if we are in a specific section if sec_flag == 'geo_info': if not skip: qc.geo_info.append( [thisline[1], thisline[0], thisline[1]]) qc.geo_spec.append([float(ij) for ij in thisline[3:]]) if '-----------' in flines[il + 1]: sec_flag = None else: skip -= 1 if sec_flag == 'ao_info': # Atomic orbital section if ' ****' in line: # There is a line with stars after every AO bNew = True # If there is an additional blank line, the AO section is complete if flines[il + 1].split() == []: sec_flag = None elif bNew: # The following AOs are for which atom? bNew = False at_num = int(thisline[0]) - 1 ao_num = 0 elif len(thisline) == 4: # AO information section # Initialize a new dict for this AO ao_num = 0 # Initialize number of atomic orbiatls ao_type = thisline[0].lower() # Type of atomic orbital pnum = int(thisline[1]) # Number of primatives for i_ao in ao_type: # Calculate the degeneracy of this AO and increase basis_count basis_count += l_deg( lquant[i_ao], cartesian_basis=cartesian_basis) qc.ao_spec.append({ 'atom': at_num, 'type': i_ao, 'pnum': pnum, 'coeffs': numpy.zeros((pnum, 2)) }) if not cartesian_basis: qc.ao_spec[-1]['lm'] = [] else: # Append the AO coefficients coeffs = numpy.array(line.replace('D', 'e').split(), dtype=numpy.float64) for i_ao in range(len(ao_type)): qc.ao_spec[-len(ao_type) + i_ao]['coeffs'][ao_num, :] = [ coeffs[0], coeffs[1 + i_ao] ] ao_num += 1 if sec_flag == 'mo_sym': if 'electronic state' in line: sec_flag = None else: info = line[18:].replace('(', '').replace(')', '').split() if 'Alpha' in line: add = '_a' elif 'Beta' in line: add = '_b' for i in info: orb_sym.append(i + add) if sec_flag == 'mo_info': # Molecular orbital section info = line[:21].split() if info == []: coeffs = line[21:].split() if bNew: index = [offset + i for i in range(len(coeffs))] bNew = False else: for i, j in enumerate(index): qc.mo_spec[j]['occ_num'] = int( 'O' in coeffs[i]) if mo_type not in 'Alpha&Beta': qc.mo_spec[j]['occ_num'] *= 2 elif 'Eigenvalues' in info: coeffs = line[21:].replace('-', ' -').split() if mo_type == 'Natural': key = 'occ_num' else: key = 'energy' for i, j in enumerate(index): qc.mo_spec[j][key] = float(coeffs[i]) else: try: int(info[0]) except ValueError: for j in list(range(index[-1] + 1, len(qc.mo_spec)))[::-1]: del qc.mo_spec[j] sec_flag = None orb_sym = [] bNew = True continue coeffs = line[21:].replace('-', ' -').split() if not cartesian_basis and offset == 0: if old_ao != line[:14].split()[-1] or len( line[:14].split()) == 4: old_ao = line[:14].split()[-1] c_sao += 1 i = c_sao - 1 l = lquant[line[13].lower()] m = line[14:21].replace(' ', '').lower() p = 'yzx'.find(m) if len(m) == 1 else -1 if p != -1: m = p - 1 elif m == '': m = 0 else: m = int(m) qc.ao_spec[i]['lm'].append((l, m)) for i, j in enumerate(index): qc.mo_spec[j]['coeffs'][int(info[0]) - 1] = float( coeffs[i]) if int(info[0]) == basis_count: bNew = True offset = index[-1] + 1 if index[-1] + 1 == len(orb_sym): sec_flag = None orb_sym = [] # Are all MOs requested for the calculation? if not all_mo: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['occ_num'] < 0.0000001: del qc.mo_spec[i] if spin is not None: if orb_spin == []: raise IOError( 'You requested `%s` orbitals, but None of them are present.' % spin) else: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['spin'] != spin: del qc.mo_spec[i] # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo(is_angstrom=True) qc.mo_spec.update() qc.ao_spec.update() return qc
def read_wfx(fname, all_mo=False, spin=None, **kwargs): '''Reads all information desired from a wfn file. **Parameters:** fname: str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. spin : {None, 'alpha', or 'beta'}, optional If not None, returns exclusively 'alpha' or 'beta' molecular orbitals. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. ''' # Initialize the variables qc = QCinfo() qc.ao_spec = AOClass([]) qc.mo_spec = MOClass([]) lxlylz = [] for j in exp_wfn: lxlylz.extend(j) lxlylz = numpy.array(lxlylz, dtype=numpy.int64) if isinstance(fname, str): filename = fname fname = descriptor_from_file(filename, index=0) else: filename = fname.name from io import TextIOWrapper if isinstance(fname, TextIOWrapper): flines = fname.readlines() # Read the WHOLE file into RAM else: magic = 'This is an Orbkit magic string' text = fname.read().decode("iso-8859-1").replace( '\n', '\n{}'.format(magic)) flines = text.split(magic) flines.pop() is_valid = False for il in range(len(flines)): if '<Keywords>' in flines[il] and 'GTO' in flines[il + 1]: is_valid = True if not is_valid: raise IOError('No valid .wfx file!\nMissing:\n' + '<Keywords>\n GTO\n</Keywords>') sec_flag = None # A Flag specifying the current section at_num = None mo_num = None ao_num = None restricted = True count = 0 # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string if '<Number of Nuclei>' in line: at_num = int(flines[il + 1]) qc.geo_info = [[None, i + 1, None] for i in range(at_num)] qc.geo_spec = [] elif '<Nuclear Names>' in line: if not at_num: raise IOError('`<Number of Nuclei>` has to be found ' + 'before `<Nuclear Names>`.') for i in range(at_num): qc.geo_info[i][0] = flines[il + i + 1].replace(' ', '').replace( '\n', '') elif '<Atomic Numbers>' in line: if not at_num: raise IOError('`<Number of Nuclei>` has to be found ' + 'before `<Atomic Numbers>`.') for i in range(at_num): qc.geo_info[i][2] = flines[il + i + 1].replace(' ', '').replace( '\n', '') elif '<Nuclear Cartesian Coordinates>' in line: if not at_num: raise IOError('`<Number of Nuclei>` has to be found ' + 'before `<Nuclear Cartesian Coordinates>`.') for i in range(at_num): qc.geo_spec.append(flines[il + i + 1].split()) elif '<Number of Primitives>' in line: ao_num = int(flines[il + 1]) qc.ao_spec = AOClass([ { 'atom': None, 'pnum': -1, 'coeffs': None, 'lxlylz': None, #'lm': None } for i in range(ao_num) ]) elif '<Primitive Centers>' in line: sec_flag = 'ao_center' count = 0 elif '<Primitive Types>' in line: sec_flag = 'ao_type' count = 0 elif '<Primitive Exponents>' in line: sec_flag = 'ao_exp' count = 0 elif '<Number of Occupied Molecular Orbitals>' in line: mo_num = int(flines[il + 1]) qc.mo_spec = MOClass([{ 'coeffs': numpy.zeros(ao_num), 'energy': None, 'occ_num': None, 'spin': None, 'sym': '%s.1' % (i + 1) } for i in range(mo_num)]) elif '<Molecular Orbital Occupation Numbers>' in line: for i in range(mo_num): qc.mo_spec[i]['occ_num'] = float(flines[il + 1 + i]) elif '<Molecular Orbital Energies>' in line: for i in range(mo_num): qc.mo_spec[i]['energy'] = float(flines[il + 1 + i]) elif '<Molecular Orbital Spin Types>' in line: for i in range(mo_num): qc.mo_spec[i]['spin'] = (flines[il + 1 + i].replace( ' ', '').replace('\n', '')).replace('and', '_').lower() restricted = restricted and ('_' in qc.mo_spec[i]['spin']) elif '<MO Number>' in line: index = int(flines[il + 1]) - 1 for i in range(ao_num): qc.mo_spec[index]['coeffs'][i] = float(flines[il + 3 + i]) elif '</' in line: sec_flag = None elif sec_flag is not None: if sec_flag == 'ao_center': for i in line.split(): qc.ao_spec[count]['atom'] = int(i) - 1 count += 1 if sec_flag == 'ao_type': for i in line.split(): qc.ao_spec[count]['lxlylz'] = lxlylz[int(i) - 1][numpy.newaxis] qc.ao_spec[count]['type'] = orbit[sum(lxlylz[int(i) - 1])] count += 1 if sec_flag == 'ao_exp': for i in line.split(): qc.ao_spec[count]['coeffs'] = numpy.array([[float(i), 1.0]]) count += 1 has_alpha = any([i['spin'] == 'alpha' for i in qc.mo_spec]) has_beta = any([i['spin'] == 'beta' for i in qc.mo_spec]) spin_check(spin, restricted, has_alpha, has_beta) qc.select_spin(restricted, spin=spin) # Remove numbers from atom names for i in qc.geo_info: i[0] = ''.join([k for k in i[0] if not k.isdigit()]) # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo() qc.mo_spec.update() qc.ao_spec.update() return qc
class Multi(): def __init__(self, data=None): self.geo_spec_all = [] #: Contains all molecular geometries, i.e., :literal:`geo_spec`. (See :ref:`Central Variables` for details.) self.geo_info = [] #: See :ref:`Central Variables` for details. self.ao_spec = [] #: See :ref:`Central Variables` for details. self.mo_coeff_all = [] #: Contains all molecular orbital coefficients. List of numpy.ndarray self.mo_energy_all = [] #: Contains all molecular orbital energies. List of numpy.ndarray self.mo_occ_all = [] #: Contains all molecular orbital occupations. List of numpy.ndarray self.sym = [] #: Python dictionary containing the molecular orbital self.symmetries and the corresponding position in self.mo_coeff_all, self.mo_energy_all, and self.mo_occ_all, respectively. self.index_list = [] #: After the execution of the ordering routine, it contains the new indices of the molecular orbitals. If index < 0, the molecular orbital changes its sign. shape=(Nfiles,NMO) self.geo_spec_tck = [] self.mo_coeff_tck = [] self.mo_energy_tck = [] self.mo_occ_tck = [] self.MO_Spec = [] self.QC = [] if data: if isinstance(data['ao_spec'], numpy.ndarray): ao_spec = data['ao_spec'][numpy.newaxis][0] else: ao_spec = data['ao_spec'] self.ao_spec = AOClass(restart=ao_spec) self.geo_info = data['geo_info'] self.geo_spec_all = data['geo_spec_all'] if isinstance(data['mo_data'], numpy.ndarray): mo_data = data['mo_data'][numpy.newaxis][0] else: mo_data = data['mo_data'] self.mo_list_parsing(mo_data) if isinstance(data['sym'], numpy.ndarray): self.sym = data['sym'][numpy.newaxis][0] else: self.sym = data['sym'] self.index_list = data['index_list'] def read(self,fid_list,itype='auto',all_mo=True,nosym=False, sort=True, **kwargs_all): '''Reads a list of input files. **Parameters:** fid_list : list of str List of input file names. itype : str, choices={'auto', 'tar', 'molden', 'gamess', 'gaussian.log', 'gaussian.fchk'} Specifies the type of the input files. sort: bool Sort input files by name. ''' # self.geo_info and ao_info have to stay unchanged geo_old = [] ao_old = [] sym_list = {} n_ao = {} #Check if fname poits to a tar archive and #read all files from archive if that is the case if is_tar_file(fid_list): fid_list, itypes = get_all_files_from_tar(fid_list, sort=sort) else: itypes = [[itype]*len(fid_list)][0] for i,fname in enumerate(fid_list): kwargs = kwargs_all['kwargs'][i] if 'kwargs' in kwargs_all.keys() else kwargs_all qc = main_read(fname, itype=itypes[i], all_mo=all_mo, **kwargs) # Geo Section if i > 0 and (geo_old != qc.geo_info).sum(): raise IOError('qc.geo_info has changed!') else: geo_old = deepcopy(qc.geo_info) self.geo_spec_all.append(qc.geo_spec) # AO Section if (i > 0 and not numpy.alltrue([numpy.allclose(ao_old[j]['coeffs'],qc.ao_spec[j]['coeffs']) for j in range(len(ao_old))] )): raise IOError('qc.ao_spec has changed!') else: ao_old = deepcopy(qc.ao_spec) # MO Section sym_tmp = {} self.MO_Spec.append(qc.mo_spec) for i,mo in enumerate(qc.mo_spec): if nosym: qc.mo_spec[i]['sym'] = '%d.1' % (i+1) key = mo['sym'].split('.') if key[1] not in sym_tmp.keys(): sym_tmp[key[1]] = 0 n_ao[key[1]] = len(qc.mo_spec[0]['coeffs']) sym_tmp[key[1]] += 1 for k,it in sym_tmp.items(): if k in sym_list: sym_list[k] = max(sym_list[k],it) else: sym_list[k] = it self.geo_spec_all = numpy.array(self.geo_spec_all) self.geo_info = qc.geo_info self.ao_spec = qc.ao_spec # Presorting of the MOs according to their self.symmetry n_r = len(fid_list) self.sym = [] for k in sorted(sym_list.keys()): it = sym_list[k] self.sym.append((k,len(self.sym))) self.mo_coeff_all.append(numpy.zeros((n_r,it,n_ao[k]))) self.mo_energy_all.append(numpy.zeros((n_r,it))) self.mo_occ_all.append(numpy.zeros((n_r,it))) self.sym = odict(self.sym) for i,spec in enumerate(self.MO_Spec): for j,mo in enumerate(spec): index,k = mo['sym'].split('.') index = int(index)-1 self.mo_coeff_all[self.sym[k]][i,index,:] = mo['coeffs'] self.mo_energy_all[self.sym[k]][i,index] = mo['energy'] self.mo_occ_all[self.sym[k]][i,index] = mo['occ_num'] return def get_extrapolation(self,r1,r2,mo_coeff,deg=1,grid1d=None): '''Extrapolates the molecular orbital coefficients :literal:`mo_coeff` using a polynomial of degree :literal:`deg`. **Paramerters:** r1 : int Specifies the index of the last known molecular orbital. r2 : int Specifies the index to which the molecular orbital coefficients are extrapolated. deg : int Specifies the degree of the extrapolation polynomial. grid1d : list or numpy.1darray, optional Specifies the grid for the extrapolation. **Returns:** epol : numpy.ndarray, shape=(NMO,NAO)) Contains the extrapolated molecular orbital coefficients. ''' if grid1d is None: grid1d = range(r2+1) if deg < 2: m = (mo_coeff[r1-1,:,:] - mo_coeff[r1,:,:])/float(grid1d[r1-1] - grid1d[r1]) epol = (m * (grid1d[r2] - grid1d[r1]) + mo_coeff[r1,:,:]) else: shape = mo_coeff.shape epol = numpy.zeros(shape[1:]) for i in range(shape[1]): for j in range(shape[2]): x = grid1d[:r2] y = mo_coeff[:r2,i,j] z = numpy.polyfit(x, y, deg) epol[i,j] = numpy.poly1d(z)(grid1d[r2]) return epol def order_using_analytical_overlap(self,fid_list=None,itype=None,deg=0,numproc=1, **kwargs): '''Performs an ordering routine using analytical overlap integrals between molecular orbitals. Set fid_list to None to omit the reading of input files. If :literal:`deg` is set to a value larger than zero, the molecular orbital coefficients are extrapolated with a polynomial of degree :literal:`deg`, before computing the molecular orbital overlap matrix. **Paramerters:** fid_list : list of str or None If not None, it contains the list of input file names. itype : str, choices={'auto', 'tar', 'molden', 'gamess', 'gaussian.log', 'gaussian.fchk'} Specifies the type of the input files. deg : None|int, optional - If deg is None, atomic orbitals of two successive geometries will be assumed to be on the same positions. - If greater than zero, specifies the degree of the extrapolation polynomial for the molecular orbital coefficients. **Returns:** index_list : numpy.ndarray, shape=(Nfiles,NMO) Contains the new indices of the molecular orbitals. If index < 0, the molecular orbital changes its sign. mo_overlap : numpy.ndarray, shape=((Nfiles - 1),NMO,NMO) Contains the overlap matrix between the molecular orbitals of two neighboring geometries, i.e., mo_overlap[i,j,k] corresponds to overlap between the jth molecular orbital at geometry i to the kth molecular orbital at geometry (i+1). ''' if fid_list is not None: read(fid_list,itype=itype,**kwargs) display('\nStarting the ordering routine using the molecular orbital overlap...') iterate= list(range(1,len(self.geo_spec_all))) if deg is not None and deg > 0: display('\tThe molecular orbital coefficients will be extrapolated') display('\tusing a least squares polynomial fit of degree %d.' % deg) std = numpy.array([numpy.std(i-self.geo_spec_all[0]) for i in self.geo_spec_all]) sym_sorted_keys = sorted(self.sym.keys()) mo_overlap = [[] for i in sym_sorted_keys] index_list = [[] for i in sym_sorted_keys] for ik in sym_sorted_keys: s = self.sym[ik] shape = numpy.shape(self.mo_coeff_all[s]) index_list[s] = numpy.ones((shape[0],shape[1]),dtype=int) index_list[s] *= numpy.arange(shape[1],dtype=int) c = 0 t = time() for rr in iterate: r1 = rr-1 r2 = rr if (deg is None) or (deg > 0 and r1 >= deg): ao_overlap = get_ao_overlap(self.geo_spec_all[r2],self.geo_spec_all[r2],self.ao_spec) else: ao_overlap = get_ao_overlap(self.geo_spec_all[r1],self.geo_spec_all[r2],self.ao_spec) cs = 0 for ik in sym_sorted_keys: s = self.sym[ik] mo_coeff = self.mo_coeff_all[s] shape = numpy.shape(mo_coeff) if deg is not None and deg > 0 and r1 >= deg: mo_r1 = get_extrapolation(r1,r2,mo_coeff,grid1d=std,deg=deg) else: mo_r1 = mo_coeff[r1] overlap = get_mo_overlap_matrix(mo_r1,mo_coeff[r2],ao_overlap, numproc=numproc) for i in range(shape[1]): # Iterate the rows of the overlap matrix line_max = None # variable for maximum value in the current row line_sort = numpy.argsort(numpy.abs(overlap[i,:]))[::-1] # sort the row for k in line_sort[::-1]: # Is this value the maximum in the current column? col_max = numpy.argmax(numpy.abs(overlap[:,k])) if i == col_max: line_max = k break if line_max is not None: # Interchange the coefficients mo_coeff[r2,[i,line_max],:] = mo_coeff[r2,[line_max,i],:] overlap[:,[i,line_max]] = overlap[:,[line_max,i]] index_list[s][r2,[i,line_max]] = index_list[s][r2,[line_max,i]] for i in range(shape[1]): # Change the signs mo_coeff[r2,i,:] *= numpy.sign(overlap[i,i]) overlap[:,i] *= numpy.sign(overlap[i,i]) index_list[s][r2,i] *= numpy.sign(overlap[i,i]) mo_overlap[cs].append(overlap) cs += 1 self.mo_coeff_all[s] = mo_coeff index = numpy.abs(index_list[s])[r2,:] self.mo_energy_all[s][r2,:] = self.mo_energy_all[s][r2,index] self.mo_occ_all[s][r2,:] = self.mo_occ_all[s][r2,index] c += 1 #if not c % int(numpy.ceil(len(iterate)/10.)): display('\tFinished %d of %d geometries (%.1f s)' % (c, len(iterate), time()-t)) t = time() tmp = [] for i in mo_overlap: tmp.append(numpy.array(i)) mo_overlap = tmp return index_list, mo_overlap def order_using_extrapolation(self,fid_list=None,itype=None,deg=1, use_mo_values=False,matrix=None,**kwargs): '''Performs an ordering routine using extrapolation of quantities related to the molecular orbitals. Set fid_list to None to omit the reading of input files. The molecular orbital coefficients (If use_mo_values is False) are extrapolated with a polynomial of degree :literal:`deg` and ordered by minimizing a selected norm (default: Euclidian norm). **Paramerters:** fid_list : list of str or None If not None, it contains the list of input file names. itype : str, choices={None, 'tar', 'molden', 'gamess', 'gaussian.log', 'gaussian.fchk'} Specifies the type of the input files. deg : int Specifies the degree of the extrapolation polynomial. use_mo_values : bool, optional If True, some molecular orbital values and their derivatives are computed at the nuclear positions. The ordering routine is applied for those values instead. matrix : None or numpy.ndarray with shape=(Nfiles,N,M) If not None, contains the data to be ordered. **Returns:** :if matrix is None: - index_list :else: - matrix, index_list index_list : numpy.ndarray, shape=(Nfiles,NMO) Contains the new indices of the molecular orbitals. If index < 0, the molecular orbital changes its sign. matrix : numpy.ndarray, shape=(Nfiles,N,M) Contains the ordered matrix. ''' # Read all input files if fid_list is not None: read(fid_list,itype=itype,**kwargs) radius = range(len(self.geo_spec_all)) #: We assume an equally spaced grid if deg < 2: function = order_mo else: function = order_mo_higher_deg if matrix is not None: display('\tOdering backward') matrix, index_list = function(matrix,index_list=index_list[ii_s],backward=True,mu=mu,deg=deg) display('\tOdering forward') matrix, index_list = function(matrix,index_list=index_list[ii_s],backward=False,mu=mu,deg=deg) return matrix, index_list index_list = [None for i in self.sym.keys()] for s,ii_s in self.sym.items(): display('Starting ordering of MOs of self.symmetry %s' % s) shape = numpy.shape(self.mo_coeff_all[ii_s]) mu = 5e-2 matrix = self.mo_coeff_all[ii_s] if use_mo_values: display('\tComputing molecular orbitals at the nuclear positions') matrix = compute_mo_list(self.geo_spec_all,self.ao_spec,matrix, iter_drv=[None, 'x', 'y', 'z']) display('\tOdering backward') matrix, index_list[ii_s] = function(matrix,index_list=index_list[ii_s],backward=True,mu=mu,deg=deg) display('\tOdering forward') matrix, index_list[ii_s] = function(matrix,index_list=index_list[ii_s],backward=False,mu=mu,deg=deg) for rr in range(shape[0]): index = numpy.abs(index_list[ii_s])[rr,:] sign = (-1)**(index_list[ii_s][rr] < 0) self.mo_energy_all[ii_s][rr,:] = self.mo_energy_all[ii_s][rr,index] self.mo_occ_all[ii_s][rr,:] = self.mo_occ_all[ii_s][rr,index] self.mo_coeff_all[ii_s][rr,:,:] = sign[:,numpy.newaxis]*self.mo_coeff_all[ii_s][rr,index,:] # numpy.array(matrix,copy=True) return index_list def order_manually(self,matrix,i_0,i_1,r_range,using_sign=True): '''Performs the ordering manually. ''' def sign(x): return -1 if x < 0 and using_sign else 1 for rr in r_range: matrix[rr,[abs(i_0),abs(i_1)]] = sign(i_1)*matrix[rr,[abs(i_1),abs(i_0)]] return matrix def order_mo(self,mo,index_list=None,backward=True,mu=1e-1,use_factor=False,**kwargs): '''Orders a 3d-matrix (shape=(Nfiles,NMO,NAO)) by interchanging the axis=1, i.e., NMO, applying linear extrapolation.''' shape = numpy.shape(mo) if index_list == None: index_list = numpy.ones((shape[0],shape[1]),dtype=int) index_list *= numpy.arange(shape[1],dtype=int) if 'criterion' in kwargs: if kwargs['criterion'] == '1-norm': test = lambda x,y: numpy.sum(numpy.abs(x)) < numpy.sum(numpy.abs(y)) if kwargs['criterion'] == '2-norm': test = lambda x,y: ((x**2).sum()) < ((y**2).sum()) elif kwargs['criterion'] == 'infty-norm': test = lambda x,y: abs(x).max() < abs(y).max() elif kwargs['criterion'] == 'perc': test = lambda x,y: ((x**2 < y**2).sum()/float(shape[2])) > 1./2. else: raise ValueError('creterion %s is not defined!' % kwargs['criterion']) else: # Take 2-norm by default test = lambda x,y: ((x**2).sum()) < ((y**2).sum()) if backward: st = [-1, 1,-1] else: st = [ 0,-2, 1] x = 2.*st[2] # Extrapolate linearly to the next point for i,i_0 in enumerate(range(shape[1])[:-1]): for rr in range(shape[0])[st[0]:st[1]:st[2]]: f = numpy.ones(shape[2]) if use_factor: is_larger = abs(mo[rr,i_0,:]) > mu f[is_larger] = 1/abs(mo[rr,i_0,is_larger]) for ii_s,sign in enumerate([1,-1]): m = (sign*mo[rr+st[2],i_0,:] - mo[rr,i_0,:])/float(st[2]) epol = (m * x + mo[rr,i_0,:]) cp = ((f[:]*mo[rr+2*st[2],i_0,:] - f[:]*epol[:])) cm = ((-1*f[:]*mo[rr+2*st[2],i_0,:] - f[:]*epol[:])) is_smaller = test(cm,cp) current = cm if is_smaller else cp if ii_s == 0: diff = current i_1 = i_0 new_signs = [sign,(-1)**is_smaller] elif test(current,diff): diff = current i_1 = i_0 new_signs = [sign,(-1)**is_smaller] # Check other molecular orbitals for ik_index,ik in enumerate(range(shape[1])[i+1:]): cp = ((f[:]*mo[rr+2*st[2],ik,:] - f[:]*epol[:])) cm = ((-1*f[:]*mo[rr+2*st[2],ik,:] - f[:]*epol[:])) is_smaller = test(cm,cp) current = cm if is_smaller else cp if test(current,diff): diff = current i_1 = ik new_signs = [sign,(-1)**is_smaller] if i_0 != i_1: mo[rr+2*st[2],[i_0,i_1],:] = mo[rr+2*st[2],[i_1,i_0],:] index_list[rr+2*st[2],[i_0,i_1]] = index_list[rr+2*st[2],[i_1,i_0]] mo[rr+st[2],i_0,:] *= new_signs[0] mo[rr+2*st[2],i_0,:] *= new_signs[1] index_list[rr+st[2],i_0] *= new_signs[0] index_list[rr+2*st[2],i_0] *= new_signs[1] return mo, index_list def order_mo_higher_deg(self,mo,index_list=None,backward=True,mu=1e-1,deg=2,**kwargs): '''Orders a 3d-matrix (shape=(Nfiles,NMO,NAO)) by interchanging the axis=1, i.e., NMO, applying an extrapolation a polynomial fit with a Vandermonde matrix as implemented in numpy.''' shape = numpy.shape(mo) # Check if degree is correctly set if not isinstance(deg, int) or deg < 1 or deg > (shape[0]-1): raise IOError('Wrong choice for degree of the fitting polynomial!') display('\tusing a least squares polynomial fit of degree %d.' % deg) if index_list == None: index_list = numpy.ones((shape[0],shape[1]),dtype=int) index_list *= numpy.arange(shape[1],dtype=int) if 'criterion' in kwargs: if kwargs['criterion'] == '1-norm': test = lambda x,y: numpy.sum(numpy.abs(x)) < numpy.sum(numpy.abs(y)) if kwargs['criterion'] == '2-norm': test = lambda x,y: ((x**2).sum()) < ((y**2).sum()) elif kwargs['criterion'] == 'infty-norm': test = lambda x,y: abs(x).max() < abs(y).max() elif kwargs['criterion'] == 'perc': test = lambda x,y: ((x**2 < y**2).sum()/float(shape[2])) > 1./2. else: raise ValueError('creterion %s is not defined!' % kwargs['criterion']) else: # Take 2-norm by default test = lambda x,y: ((x**2).sum()) < ((y**2).sum()) if backward: st = [-(deg + 1), 0,-1] x = numpy.arange(0,deg+1) else: st = [deg, -1,1] x = numpy.arange(-deg,1) for i,i_0 in enumerate(range(shape[1])[:-1]): for rr in range(shape[0])[st[0]:st[1]:st[2]]: epol = numpy.zeros(shape[2]) for k in range(shape[2]): if mo[rr,i_0,k] != 0.: xnew = rr+st[2] y = mo[rr+x,i_0,k] z = numpy.polyfit(rr+x, y, deg) epol[k] = numpy.poly1d(z)(xnew) cp = ((mo[rr+st[2],i_0,:] - epol[:])**2) cm = ((-1*mo[rr+st[2],i_0,:] - epol[:])**2) is_smaller = test(cm,cp) current = cm if is_smaller else cp diff = current i_1 = i_0 new_signs = [1,(-1)**is_smaller] # Check other molecular orbitals for ik_index,ik in enumerate(range(shape[1])[i+1:]): cp = ((mo[rr+st[2],ik,:] - epol[:])**2) cm = ((-1*mo[rr+st[2],ik,:] - epol[:])**2) is_smaller = test(cm,cp) current = cm if is_smaller else cp if test(current,diff): diff = current i_1 = ik new_signs = [1,(-1)**is_smaller] if i_0 != i_1: mo[rr+st[2],[i_0,i_1],:] = mo[rr+st[2],[i_1,i_0],:] index_list[rr+st[2],[i_0,i_1]] = index_list[rr+st[2],[i_1,i_0]] mo[rr,i_0,:] *= new_signs[0] mo[rr+st[2],i_0,:] *= new_signs[1] index_list[rr,i_0] *= new_signs[0] index_list[rr+st[2],i_0] *= new_signs[1] return mo, index_list def order_pm(self,x,y,backward=True,mu=1e-1,use_factor=False): '''Outdated function to order exclusively the sign of a data set. ''' if backward: st = [-2,1,-1] else: st = [1,-2,1] if numpy.ndim(y) == 1: diff = numpy.zeros(2) for rr in range(len(y))[st[0]:st[1]:st[2]]: m = (y[rr+st[2]]-y[rr])/(x[rr+st[2]]-x[rr]) epol = m * (x[rr+2*st[2]]-x[rr]) + y[rr] for ii_d in range(2): diff[ii_d] = (((-1)**ii_d * y[rr+2*st[2]])-epol)**2 if numpy.argmin(numpy.abs(diff)) == 1: y[rr+2*st[2]] = -y[rr+2*st[2]] elif numpy.ndim(y) == 2: y = numpy.array(y) shape = numpy.shape(y) for rr in range(shape[0])[st[0]:st[1]:st[2]]: for ii_s,sign in [(0,-1),(1,+1)]: f = numpy.ones(shape[1]) if use_factor: f[numpy.abs(y[rr,:]) > mu] = 1/numpy.abs(y[rr,numpy.abs(y[rr,:]) > mu]) m = (y[rr+st[2],:]-y[rr,:])/(x[rr+st[2]]-x[rr]) epol = f[:]*(m * (x[rr+2*st[2]]-x[rr]) + y[rr,:]) # Euclidean norm (2 norm) current = numpy.sum((f[:]*sign*y[rr+2*st[2],:] - epol[:])**2) # Current value if ii_s == 0: diff = current new_sign = sign elif current < diff: new_sign = sign y[rr+2*st[2],:] *= new_sign else: display('Function order_pm only works for vectors and 2D matrices') return y def mo_list_parsing(self,indata=None): # Parses lists of mos to and from the native Orbkit format parameters = {'energy': self.mo_energy_all, 'coeff': self.mo_coeff_all, 'occ': self.mo_occ_all} if indata is None: outdata = {} for param in parameters: for i in range(len(parameters[param])): outdata[param+'#'+str(i)] = parameters[param][i] print(param,parameters[param][i].shape) return outdata else: order = {'energy': [], 'coeff': [], 'occ': []} param_tmp = {'energy': [], 'coeff': [], 'occ': []} for name in indata: param = name.split('#')[0] param_tmp[param].append(indata[name]) order[param].append(int(name.split('#')[-1])) for param in parameters: sort = numpy.argsort(numpy.array(order[param],dtype=numpy.intc)) for s in sort: parameters[param].append(param_tmp[param][s]) return def todict(self): data = {} data['ao_spec'] = self.ao_spec.todict() data['geo_info'] = self.geo_info data['geo_spec_all'] = self.geo_spec_all data['mo_data'] = self.mo_list_parsing() data['sym'] = self.sym data['index_list'] = self.index_list data['parent_class_name'] = self.__module__ + '.' + self.__class__.__name__ return data def construct_qc(self, all_mo=True): '''Converts all global variables to a list of `QCinfo` classes. ''' self.QC = [] ilumo = None for rr in range(len(self.geo_spec_all)): qc = QCinfo() qc.geo_spec = self.geo_spec_all[rr] qc.geo_info = self.geo_info qc.ao_spec = self.ao_spec qc.mo_spec = [] for s,ii_s in self.sym.items(): for i,coeffs in enumerate(self.mo_coeff_all[ii_s][rr]): qc.mo_spec.append({'coeffs': coeffs, 'energy' : self.mo_energy_all[ii_s][rr,i], 'occ_num' : self.mo_occ_all[ii_s][rr,i], 'sym': '%d.%s' % (i+1,s)}) qc.ao_spec.update() qc.mo_spec = MOClass(qc.mo_spec) qc.mo_spec.update() if not all_mo: ilumo = max(ilumo or 0, qc.mo_spec.get_lumo()) self.QC.append(qc) if not all_mo: for i in range(len(self.QC)): self.QC[i].mo_spec = self.QC[i].mo_spec[slice(None,ilumo)] return self.QC def compute_mo_list(self,ao_spec,mo_matrix, iter_drv=[None, 'x', 'y', 'z']): '''Computes the values of the molecular orbitals and, if requested, their derivatives at the nuclear positions for a complete mo_matrix (shape=(Nfiles,NMO,NAO)).''' from orbkit.core import ao_creator shape = numpy.shape(mo_matrix) mo_list = numpy.zeros((shape[0],shape[1],4*numpy.shape(self.geo_spec_all)[1])) for rr in range(shape[0]): geo_spec = self.geo_spec_all[rr] x = geo_spec[:,0] y = geo_spec[:,1] z = geo_spec[:,2] N = len(x) for i,drv in enumerate(iter_drv): ao_list = ao_creator(geo_spec,self.ao_spec, exp_list=False, is_vector=True, drv=drv, x=x,y=y,z=z) for i_mo in range(shape[1]): for i_ao in range(shape[2]): mo_list[rr,i_mo,N*i+numpy.arange(N)] += mo_matrix[rr,i_mo,i_ao] * ao_list[i_ao,:] return mo_list def data_interp(self,x,y,xnew,k=3,der=0,s=0,**kwargs): '''Interpolates a dataset y(x) to y(xnew) using B-Splines of order k.''' from scipy import interpolate tck = interpolate.splrep(x,y,s=s,k=k) ynew = interpolate.splev(xnew,tck,der=der) return ynew def splrep_all(self,x,k=3,**kwargs): from scipy import interpolate geo_spec_tck = [] mo_coeff_tck = [] mo_energy_tck = [] mo_occ_tck = [] shape = self.geo_spec_all.shape for i in range(shape[1]): geo_spec_tck.append([]) for j in range(shape[2]): geo_spec_tck[-1].append(interpolate.splrep(x,self.geo_spec_all[:,i,j], k=k,**kwargs)) for i_mo in range(len(self.mo_coeff_all)): mo_coeff_tck.append([]) mo_energy_tck.append([]) mo_occ_tck.append([]) shape = self.mo_coeff_all[i_mo].shape for i in range(shape[1]): mo_coeff_tck[-1].append([]) mo_energy_tck[-1].append(interpolate.splrep(x,mo_energy_all[i_mo][:,i], k=k,**kwargs)) mo_occ_tck[-1].append(interpolate.splrep(x,mo_occ_all[i_mo][:,i], k=k,**kwargs)) for j in range(shape[2]): mo_coeff_tck[-1][-1].append(interpolate.splrep(x, self.mo_coeff_all[i_mo][:,i,j], k=k,**kwargs)) def interpolate_all(self,x,xnew,k=3,**kwargs): '''Interpolates a dataset y(x) to y(xnew) using B-Splines of order k.''' from scipy import interpolate shape = list(self.geo_spec_all.shape) shape[0] = len(xnew) tmp = numpy.zeros(shape) for i in range(shape[1]): for j in range(shape[2]): tmp[:,i,j] = data_interp(x,self.geo_spec_all[:,i,j],xnew,k=k,**kwargs) self.geo_spec_all = numpy.copy(tmp) for i_mo in range(len(self.mo_coeff_all)): shape = list(self.mo_coeff_all[i_mo].shape) shape[0] = len(xnew) tmp = numpy.zeros(shape) for i in range(shape[1]): for j in range(shape[2]): tmp[:,i,j] = data_interp(x,self.mo_coeff_all[i_mo][:,i,j],xnew,k=k,**kwargs) self.mo_coeff_all[i_mo] = numpy.copy(tmp) shape = list(mo_energy_all[i_mo].shape) shape[0] = len(xnew) tmp = numpy.zeros(shape) for i in range(shape[1]): tmp[:,i] = data_interp(x,mo_energy_all[i_mo][:,i],xnew,k=k,**kwargs) self.mo_energy_all[i_mo] = numpy.copy(tmp) shape = list(mo_occ_all[i_mo].shape) shape[0] = len(xnew) tmp = numpy.zeros(shape) for i in range(shape[1]): tmp[:,i] = data_interp(x,mo_occ_all[i_mo][:,i],xnew,k=k,**kwargs) self.mo_occ_all[i_mo] = numpy.copy(tmp) def plot(self,mo_matrix,symmetry='1',title='All',x_label='index', y_label='MO coefficients',output_format='png', plt_dir='Plots',ylim=None,thresh=0.1,x0=0,grid=True,x_grid=None,**kwargs): '''Plots all molecular orbital coefficients of one self.symmetry.''' import pylab as plt from matplotlib.ticker import MultipleLocator import os display('Plotting data of self.symmetry %s to %s/' % (symmetry,plt_dir)) if not os.path.exists(plt_dir): os.makedirs(plt_dir) if numpy.ndim(mo_matrix) == 2: mo_matrix = mo_matrix[:,numpy.newaxis,:] shape = numpy.shape(mo_matrix) def plot_mo(i): fig=plt.figure() plt.rc('xtick', labelsize=16) plt.rc('ytick', labelsize=16) ax = plt.subplot(111) curves=[] for ij in range(shape[2]): Y = mo_matrix[:,i,ij] if x_grid is None: X = numpy.arange(len(Y))+x0 else: X = x_grid if max(numpy.abs(Y)) > thresh: curves.append(ax.plot(X,Y, '.-' ,linewidth=1.5)) plt.xlabel(x_label, fontsize=16); plt.ylabel(y_label, fontsize=16); plt.title('%s: %d.%s'% (title,i+1,symmetry)) plt.ylim(ylim) plt.tight_layout() return fig if output_format == 'pdf': from matplotlib.backends.backend_pdf import PdfPages output_fid = '%s.%s.pdf'% (title,symmetry.replace(' ','_')) display('\t%s' % output_fid) with PdfPages(os.path.join(plt_dir,output_fid)) as pdf: for i in range(shape[1]): fig = plot_mo(i) pdf.savefig(fig,**kwargs) plt.close() elif output_format == 'png': for i in range(shape[1]): fig = plot_mo(i) output_fid = '%d.%s.png' % (i+1,symmetry.replace(' ','_')) display('\t%s' % output_fid) fig.savefig(os.path.join(plt_dir, output_fid),format='png',**kwargs) plt.close() else: raise ValueError('output_format `%s` is not supported' % output_format) def show_selected_mos(self,selected_mos,r0=0,steps=1,select_slice='xz',where=0.0, npts=[26,51],minpts=[-3,-6],maxpts=[3,6],nuclear_pos='x'): '''Uses orbkit to compute selected molecular orbitals and plots it with :func:`contour_mult_mo`.''' from orbkit import grid from orbkit.core import ao_creator,mo_creator r = range(r0,r0+steps) grid.N_ = [1,1,1] grid.min_ = [0,0,0] grid.max_ = [0,0,0] if select_slice == 'xy': k = [0,1] grid.min_[2] += where grid.max_[2] += where elif select_slice == 'yz': k = [1,2] grid.min_[0] += where grid.max_[0] += where elif select_slice == 'xz': k = [0,2] grid.min_[1] += where grid.max_[1] += where else: raise ValueError('`show_selected_mos` currently only' + 'supports slices parallel to the following planes:' + 'select_slice = `xy`, `yz`, or `xz`') for i,j in enumerate(k): grid.N_[j] = npts[i] grid.min_[j] = minpts[i] grid.max_[j] = maxpts[i] # Initialize grid grid.is_initialized = False grid.grid_init(force=True) xyz = grid.x,grid.y,grid.z for mo_sel in selected_mos: i,j = mo_sel.split('.') mo = [] for rr in r: ao_list = ao_creator(self.geo_spec_all[rr],self.ao_spec) mo.append(mo_creator(ao_list,mo_coeff_all[self.sym[j]][rr,int(i)-1,numpy.newaxis])[0].reshape(tuple(npts))) f, pics = contour_mult_mo(xyz[k[0]],xyz[k[1]],mo, xlabel=select_slice[0],ylabel=select_slice[1], title='MO:%s' % mo_sel,r0=r0) for i,pic in enumerate(pics): pic.plot(self.geo_spec_all[rr,:,k[1]],self.geo_spec_all[rr,:,k[0]],nuclear_pos, markersize=10,markeredgewidth=2) def contour_mult_mo(self,x,y,mo,xlabel='x',ylabel='y',title='',r0=0): '''Uses matplotlib to show slices of a molecular orbitals.''' import matplotlib.pyplot as plt # Plot slices f, pics = \ plt.subplots(len(mo),1,sharex=True,sharey=True,figsize=(6,2+4*len(mo))) plt.suptitle(title) vmax = numpy.max(numpy.abs(mo)) for i,pic in enumerate(pics): pic.contour(y,x,mo[i],50,linewidths=0.5,colors='k') pic.contourf(\ y,x,mo[i],50,cmap=plt.cm.rainbow,vmax=vmax,vmin=-vmax) pic.set_ylabel(xlabel) pic.set_xlabel(ylabel) pic.set_title('Data Point %d' % (r0+i)) f.subplots_adjust(left=0.15,bottom=0.05,top=0.95,right=0.95) f.show() return f,pics
def read_aomix(fname, all_mo=False, spin=None, i_md=-1, interactive=True, created_by_tmol=True, **kwargs): '''Reads all information desired from a aomix file. **Parameters:** fname : str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. spin : {None, 'alpha', or 'beta'}, optional If not None, returns exclusively 'alpha' or 'beta' molecular orbitals. i_md : int, default=-1 Selects the `[AOMix Format]` section of the output file. interactive : bool If True, the user is asked to select the different sets. created_by_tmol : bool If True and if Cartesian basis set is found, the molecular orbital coefficients will be converted. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. ''' aomix_regex = re.compile(r"\[[ ]{,}[Aa][Oo][Mm]ix[ ]+[Ff]ormat[ ]{,}\]") if isinstance(fname, str): filename = fname fname = descriptor_from_file(filename, index=0) else: filename = fname.name from io import TextIOWrapper if isinstance(fname, TextIOWrapper): flines = fname.readlines() # Read the WHOLE file into RAM else: magic = 'This is an Orbkit magic string' text = fname.read().decode("iso-8859-1").replace( '\n', '\n{}'.format(magic)) flines = text.split(magic) flines.pop() # Is this really a aomix file? if not '[AOMix Format]\n' in flines: raise IOError('The input file %s is no valid aomix file!\n\nIt does' % filename + ' not contain the keyword: [AOMix Format]\n') def check_sel(count, i, interactive=False): if count == 0: raise IndexError elif count == 1: return 0 message = '\tPlease give an integer from 0 to %d: ' % (count - 1) try: if interactive: i = int(input(message)) i = range(count)[i] except (IndexError, ValueError): raise IOError(message.replace(':', '!')) else: display('\tSelecting the %s' % ('last element.' if (i == count - 1) else 'element %d.' % i)) return i has_alpha = [] has_beta = [] restricted = [] count = 0 # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string # Check the file for keywords if aomix_regex.search(line): count += 1 has_alpha.append(False) has_beta.append(False) restricted.append(False) if 'Spin' in line and 'alpha' in line.lower(): has_alpha[-1] = True if 'Spin' in line and 'beta' in line.lower(): has_beta[-1] = True if 'Occup' in line: restricted[-1] = restricted[-1] or (float(line.split('=')[1]) > 1. + 1e-4) if count == 0: raise IOError('The input file %s is no valid aomix file!\n\nIt does' % filename + ' not contain the keyword: [AOMix Format]\n') else: if count > 1: display('\nContent of the aomix file:') display('\tFound %d [AOMix Format] keywords, i.e., ' % count + 'this file contains %d aomix files.' % count) i_md = check_sel(count, i_md, interactive=interactive) spin_check(spin, restricted[i_md], has_alpha[i_md], has_beta[i_md]) # Set a counter for the AOs basis_count = 0 # Declare synonyms for molden keywords synonyms = { 'Sym': 'sym', 'Ene': 'energy', 'Occup': 'occ_num', 'Spin': 'spin' } MO_keys = synonyms.keys() lxlylz = [] count = 0 start_reading = False # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string thisline = line.split() # The current line split into segments # Check the file for keywords if '[aomix format]' in line.lower(): # A new file begins # Initialize the variables if i_md == count: qc = QCinfo() qc.ao_spec = AOClass([]) qc.mo_spec = MOClass([]) sec_flag = False # A Flag specifying the current section start_reading = True # Found the selected section else: start_reading = False count += 1 continue if start_reading: if '[SCF Energy / Hartree]' in line: try: qc.etot = float(flines[il + 1].split()[0]) except IndexError: pass elif '[atoms]' in line.lower(): # The section containing information about # the molecular geometry begins sec_flag = 'geo_info' angstrom = 'Angs' in line elif '[gto]' in line.lower(): # The section containing information about # the atomic orbitals begins sec_flag = 'ao_info' bNew = True # Indication for start of new AO section elif '[mo]' in line.lower(): # The section containing information about # the molecular orbitals begins sec_flag = 'mo_info' bNew = True # Indication for start of new MO section elif '[sto]' in line.lower(): # The orbkit does not support Slater type orbitals raise IOError('orbkit does not work for STOs!\nEXIT\n') else: # Check if we are in a specific section if sec_flag == 'geo_info': # Geometry section qc.geo_info.append(thisline[0:3]) qc.geo_spec.append([float(ii) for ii in thisline[3:]]) if sec_flag == 'ao_info': # Atomic orbital section def check_int(i): try: int(i) return True except ValueError: return False if thisline == []: # There is a blank line after every AO bNew = True elif bNew: # The following AOs are for which atom? bNew = False at_num = int(thisline[0]) - 1 ao_num = 0 elif len(thisline) == 3 and check_int(thisline[1]): # AO information section # Initialize a new dict for this AO ao_num = 0 # Initialize number of atomic orbiatls ao_type = thisline[ 0] # Which type of atomic orbital do we have pnum = int(thisline[1]) # Number of primatives # Calculate the degeneracy of this AO and increase basis_count for i_ao in ao_type: # Calculate the degeneracy of this AO and increase basis_count basis_count += l_deg(lquant[i_ao]) qc.ao_spec.append({ 'atom': at_num, 'type': i_ao, 'pnum': pnum, #'ao_spherical': None, 'coeffs': numpy.zeros((pnum, 2)) }) else: # Append the AO coefficients coeffs = numpy.array(line.replace('D', 'e').split(), dtype=numpy.float64) for i_ao in range(len(ao_type)): qc.ao_spec[-len(ao_type) + i_ao]['coeffs'][ao_num, :] = [ coeffs[0], coeffs[1 + i_ao] ] ao_num += 1 if sec_flag == 'mo_info': # Molecular orbital section if '=' in line: # MO information section if bNew: # Create a numpy array for the MO coefficients and # for backward compability create a simple counter for 'sym' qc.mo_spec.append({ 'coeffs': numpy.zeros(basis_count), 'sym': '%d.1' % (len(qc.mo_spec) + 1) }) bNew = False # Append information to dict of this MO info = line.replace('\n', '').replace(' ', '') info = info.split('=') if info[0] in MO_keys: if info[0] == 'Spin': info[1] = info[1].lower() elif info[0] != 'Sym': info[1] = float(info[1]) elif not '.' in info[1]: from re import search a = search(r'\d+', info[1]).group() if a == info[1]: info[1] = '%s.1' % a else: info[1] = info[1].replace(a, '%s.' % a, 1) qc.mo_spec[-1][synonyms[info[0]]] = info[1] else: if ('[' or ']') in line: # start of another section that is not (yet) read sec_flag = None else: # Append the MO coefficients bNew = True # Reset bNew index = int(thisline[0]) - 1 try: # Try to convert coefficient to float qc.mo_spec[-1]['coeffs'][index] = float( thisline[-1]) if len(qc.mo_spec) == 1: lxlylz.append(thisline[-2]) except ValueError: # If it cannot be converted print error message raise ValueError( 'Error in coefficient %d of MO %s!' % (index, qc.mo_spec[-1]['sym']) + '\nSetting this coefficient to zero...') # Check usage of same atomic basis sets for ii in range(len(lxlylz)): s = lxlylz[ii] exp = [0, 0, 0] c_last = None for jj in s[1:]: try: c = int(jj) exp[c_last] += (c - 1) except ValueError: for kk, ll in enumerate('xyz'): if jj == ll: exp[kk] += 1 c_last = kk lxlylz[ii] = exp count = 0 for i, j in enumerate(qc.ao_spec): l = l_deg(lquant[j['type']]) j['lxlylz'] = [] for i in range(l): j['lxlylz'].append( (lxlylz[count][0], lxlylz[count][1], lxlylz[count][2])) count += 1 j['lxlylz'] = numpy.array(j['lxlylz'], dtype=numpy.int64) # For Cartesian basis sets in Turbomole, the molecular orbital coefficients # have to be converted. is_tmol_cart = not (len(qc.mo_spec) % len(qc.mo_spec[0]['coeffs'])) # Are all MOs requested for the calculation? if not all_mo: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['occ_num'] < 0.0000001: del qc.mo_spec[i] # Modify qc.mo_spec to support spin qc.select_spin(restricted[i_md], spin=spin) # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo(is_angstrom=angstrom) if is_tmol_cart and created_by_tmol: display('\nFound a Cartesian basis set in the AOMix file.') display('We assume that this file has been created by Turbomole.') display( 'Applying a conversion to the molecular orbital coefficients, ') display('in order to get normalized orbitals.') # Convert MO coefficients def dfact(n): if n <= 0: return 1 else: return n * dfact(n - 2) mo = qc.mo_spec.get_coeffs() for i, j in enumerate(qc.ao_spec.get_lxlylz()): norm = (dfact(2 * j[0] - 1) * dfact(2 * j[1] - 1) * dfact(2 * j[2] - 1)) j = sum(j) if j > 1: mo[:, i] *= numpy.sqrt(norm) for ii in range(len(qc.mo_spec)): qc.mo_spec[ii]['coeffs'] = mo[ii] qc.mo_spec.update() qc.ao_spec.update() return qc
def convert_cclib(ccData, all_mo=False, spin=None): '''Converts a ccData class created by cclib to an instance of orbkit's QCinfo class. **Parameters:** ccData : class Contains the input data created by cclib. all_mo : bool, optional If True, all molecular orbitals are returned. spin : {None, 'alpha', or 'beta'}, optional If not None, returns exclusively 'alpha' or 'beta' molecular orbitals. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. ''' # Initialize the variables qc = QCinfo() qc.ao_spec = AOClass([]) qc.mo_spec = MOClass([]) # Converting all information concerning atoms and geometry qc.geo_spec = ccData.atomcoords[0] * aa_to_a0 for ii in range(ccData.natom): symbol = get_atom_symbol(atom=ccData.atomnos[ii]) qc.geo_info.append([symbol,str(ii+1),str(ccData.atomnos[ii])]) # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo() # Converting all information about atomic basis set for ii in range(ccData.natom): for jj in range(len(ccData.gbasis[ii])): pnum = len(ccData.gbasis[ii][jj][1]) qc.ao_spec.append({'atom': ii, 'type': str(ccData.gbasis[ii][jj][0]).lower(), 'pnum': pnum, 'coeffs': numpy.zeros((pnum, 2)) }) for kk in range(pnum): qc.ao_spec[-1]['coeffs'][kk][0] = ccData.gbasis[ii][jj][1][kk][0] qc.ao_spec[-1]['coeffs'][kk][1] = ccData.gbasis[ii][jj][1][kk][1] if hasattr(ccData,'aonames'): # Reconstruct exponents list for ao_spec cartesian_basis = True for i in ccData.aonames: if '+' in i or '-' in i: cartesian_basis = False if not cartesian_basis: qc.ao_spec.spherical = True count = 0 for i,ao in enumerate(qc.ao_spec): l = l_deg(lquant[ao['type']],cartesian_basis=cartesian_basis) if cartesian_basis: ao['lxlylz'] = [] else: ao['lm'] = [] for ll in range(l): if cartesian_basis: ao['lxlylz'].append((ccData.aonames[count].lower().count('x'), ccData.aonames[count].lower().count('y'), ccData.aonames[count].lower().count('z'))) else: m = ccData.aonames[count].lower().split('_')[-1] m = m.replace('+',' +').replace('-',' -').replace('s','s 0').split(' ') p = 'yzx'.find(m[0][-1]) if p != -1: m = p - 1 else: m = int(m[-1]) ao['lm'].append((lquant[ao['type']],m)) count += 1 # Converting all information about molecular orbitals ele_num = numpy.sum(ccData.atomnos) - numpy.sum(ccData.coreelectrons) - ccData.charge ue = (ccData.mult-1) # Check for natural orbitals and occupation numbers is_natorb = False if hasattr(ccData,'nocoeffs'): if not hasattr(ccData,'nooccnos'): raise IOError('There are natural orbital coefficients (`nocoeffs`) in the cclib' + ' ccData, but no natural occupation numbers (`nooccnos`)!') is_natorb = True restricted = (len(ccData.mosyms) == 1) if spin is not None: if spin != 'alpha' and spin != 'beta': raise IOError('`spin=%s` is not a valid option' % spin) elif restricted: raise IOError('The keyword `spin` is only supported for unrestricted calculations.') else: qc.mo_spec.spinpola display('Converting only molecular orbitals of spin %s.' % spin) sym = {} if len(ccData.mosyms) == 1: add = [''] orb_sym = [None] else: add = ['_a','_b'] orb_sym = ['alpha','beta'] nmo = ccData.nmo if hasattr(ccData,'nmo') else len(ccData.mocoeffs[0]) for ii in range(nmo): for i,j in enumerate(add): a = '%s%s' % (ccData.mosyms[i][ii],j) if a not in sym.keys(): sym[a] = 1 else: sym[a] += 1 if is_natorb: occ_num = ccData.nooccnos[ii] elif not restricted: occ_num = 1.0 if ii <= ccData.homos[i] else 0.0 elif ele_num > ue: occ_num = 2.0 ele_num -= 2.0 elif ele_num > 0.0 and ele_num <= ue: occ_num = 1.0 ele_num -= 1.0 ue -= 1.0 else: occ_num = 0.0 qc.mo_spec.append({'coeffs': (ccData.nocoeffs if is_natorb else ccData.mocoeffs[i])[ii], 'energy': 0.0 if is_natorb else ccData.moenergies[i][ii]*ev_to_ha, 'occ_num': occ_num, 'sym': '%d.%s' %(sym[a],a) }) if orb_sym[i] is not None: qc.mo_spec[-1]['spin'] = orb_sym[i] if spin is not None and spin != orb_sym[i]: del qc.mo_spec[-1] # Use default order for atomic basis functions if aonames is not present if not hasattr(ccData,'aonames'): display('The attribute `aonames` is not present in the parsed data.') display('Using the default order of basis functions.') # Check which basis functions have been used c_cart = sum([l_deg(l=ao['type'], cartesian_basis=True) for ao in qc.ao_spec]) c_sph = sum([l_deg(l=ao['type'], cartesian_basis=False) for ao in qc.ao_spec]) c = qc.mo_spec.get_coeffs().shape[-1] if c != c_cart and c == c_sph: # Spherical basis qc.ao_spec.set_lm_dict(p=[0,1]) elif c != c_cart: display('Warning: The basis set type does not match with pure spherical ' + 'or pure Cartesian basis!') display('Please specify qc.ao_spec["lxlylz"] and/or qc.ao_spec["lm"] by your self.') # Are all MOs requested for the calculation? if not all_mo: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['occ_num'] < 0.0000001: del qc.mo_spec[i] qc.mo_spec.update() qc.ao_spec.update() return qc
def read_wfn(fname, all_mo=False, spin=None, **kwargs): '''Reads all information desired from a wfn file. **Parameters:** fname: str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. ''' if spin is not None: raise IOError( 'The option `spin` is not supported for the `.wfn` reader.') # Initialize the variables qc = QCinfo() qc.ao_spec = AOClass([]) qc.mo_spec = MOClass([]) sec_flag = None # A Flag specifying the current section is_wfn = False # Check type of file ao_num = 0 # Number of AO mo_num = 0 # Number of MO at_num = 0 # Number of atoms c_type = 0 # Counting variable for AO type c_exp = 0 # Counting variable for AO exponents lxlylz = [] for j in exp_wfn: lxlylz.extend(j) lxlylz = numpy.array(lxlylz, dtype=numpy.int64) if isinstance(fname, str): filename = fname fname = descriptor_from_file(filename, index=0) else: filename = fname.name from io import TextIOWrapper if isinstance(fname, TextIOWrapper): flines = fname.readlines() # Read the WHOLE file into RAM else: magic = 'This is an Orbkit magic string' text = fname.read().decode("iso-8859-1").replace( '\n', '\n{}'.format(magic)) flines = text.split(magic) flines.pop() for line in flines: thisline = line.split() # The current line split into segments # Check the file for keywords if 'GAUSSIAN' in line or 'GTO' in line: if len(thisline) == 8: mo_num = int(thisline[1]) ao_num = int(thisline[4]) at_num = int(thisline[6]) sec_flag = 'geo_info' elif 'CENTRE ASSIGNMENTS' in line: thisline = line[20:].split() for i in range(len(thisline)): qc.ao_spec.append({ 'atom': int(thisline[i]) - 1, 'pnum': -1, 'coeffs': None, 'lxlylz': None, #'lm': None }) elif 'TYPE ASSIGNMENTS' in line: thisline = line[18:].split() for i in range(len(thisline)): qc.ao_spec[c_type]['lxlylz'] = lxlylz[int(thisline[i]) - 1][numpy.newaxis] qc.ao_spec[c_type]['type'] = orbit[sum(lxlylz[int(i) - 1])] c_type += 1 elif 'EXPONENTS' in line: thisline = line.replace('EXPONENTS', '').replace('D', 'E').split() for i in thisline: qc.ao_spec[c_exp]['coeffs'] = numpy.array([[float(i), 1.0]]) c_exp += 1 elif 'MO' in line and 'OCC NO =' in line and 'ORB. ENERGY =' in line: qc.mo_spec.append({ 'coeffs': numpy.zeros(ao_num), 'energy': float(line[25:].split()[7]), 'occ_num': float(line[25:].split()[3]), 'sym': '%s.1' % thisline[1] }) sec_flag = 'mo_info' c_mo = 0 # Counting variable for MOs else: if sec_flag == 'geo_info': if not at_num: sec_flag = None elif at_num: qc.geo_info.append( [thisline[0], thisline[-7][:-1], thisline[-1]]) qc.geo_spec.append([float(ii) for ii in thisline[-6:-3]]) at_num -= 1 elif sec_flag == 'mo_info': for i in thisline: if (c_mo) < ao_num: qc.mo_spec[-1]['coeffs'][c_mo] = numpy.array( float(i.replace('D', 'E'))) c_mo += 1 if (c_mo) == ao_num: sec_flag = None if isinstance(fname, str): fname.close() # Leave existing file descriptors alive # Remove numbers from atom names for i in qc.geo_info: i[0] = ''.join([k for k in i[0] if not k.isdigit()]) # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo(is_angstrom=False) qc.mo_spec.update() qc.ao_spec.update() return qc