def xyzfile(xyzfile, ccxyz=False): """Parse xyzfile to ccData or ccData_xyz object""" if not type(xyzfile) == str: print(xzyfile, "is not a xyzfilename") raise attributes = {} ptable = PeriodicTable() with open(xyzfile, 'r') as handle: lines = handle.readlines() charge, mult = _chargemult(lines[1]) geometry = [x.split() for x in lines[2:]] coordinates = [x[1:] for x in geometry] atomnos = [ptable.number[x[0]] for x in geometry] attributes['atomcoords'] = [np.array(coordinates)] attributes['atomnos'] = np.array(atomnos) attributes['natom'] = len(atomnos) elements = [pt.Element[x] for x in atomnos] attributes['atommasses'] = [pt.Mass[x] for x in elements] if ccxyz: # Custom ccData_xyz attributes elements = [x[0] for x in geometry] attributes['elements'] = elements attributes['comment'] = lines[1] attributes['filename'] = os.path.split(xyzfile.rstrip())[1] ccObject = ccData_xyz(attributes=attributes) else: ccObject = ccData(attributes=attributes) return ccObject
def makecclib(iodat): """ Create cclib ccData object from horton IOData object """ check_horton() attributes = {} # Horton 3 IOData class uses attr and does not have __dict__. # In horton 3, some attributes have a default value of None. # Therefore, second hasattr statement is needed for mo attribute. if hasattr(iodat, "atcoords"): # cclib parses the whole history of coordinates in the list, horton keeps the last one. attributes["atomcoords"] = [iodat.atcoords] if hasattr(iodat, "mo") and hasattr(iodat.mo, "norba"): # MO coefficient should be transposed to match the dimensions. attributes["mocoeffs"] = [iodat.mo.coeffs[:iodat.mo.norba].T] if iodat.mo.kind == "unrestricted": attributes["mocoeffs"].append(iodat.mo.coeffs[iodat.mo.norba:].T) if hasattr(iodat, "spinpol") and isinstance(iodat.spinpol, int): # IOData stores 2S, ccData stores 2S+1. attributes["mult"] = iodat.spinpol + 1 if hasattr(iodat, "atnums"): attributes["atnums"] = numpy.asanyarray(iodat.atnums) if hasattr(iodat, "atcorenums") and isinstance(iodat.atnums, numpy.ndarray): # cclib stores num of electrons screened out by pseudopotential # horton stores num of electrons after applying pseudopotential attributes["coreelectrons"] = numpy.asanyarray( iodat.atnums) - numpy.asanyarray(iodat.atcorenums) if hasattr(iodat, "atcharges"): attributes["atomcharges"] = iodat.atcharges return ccData(attributes)
def xyzfile(xyzfile, ccxyz=False): """ Parse xyzfile to ccData or ccData_xyz object""" if not type(xyzfile) == str: print(xzyfile, "is not a xyzfilename") raise attributes = {} ptable = PeriodicTable() with open(xyzfile, 'r') as handle: lines = handle.readlines() charge, mult = _chargemult(lines[1]) geometry = [x.split() for x in lines[2:]] coordinates = [x[1:] for x in geometry] atomnos = [ptable.number[x[0]] for x in geometry] attributes['atomcoords'] = [np.array(coordinates)] attributes['atomnos'] = np.array(atomnos) attributes['natom'] = len(atomnos) elements = [pt.Element[x] for x in atomnos] attributes['atommasses'] = [pt.Mass[x] for x in elements] if ccxyz: # Custom ccData_xyz attributes elements = [x[0] for x in geometry] attributes['elements'] = elements attributes['comment'] = lines[1] attributes['filename'] = os.path.split(xyzfile.rstrip())[1] ccObject = ccData_xyz(attributes=attributes) else: ccObject = ccData(attributes=attributes) return ccObject
def generate_repr(self): """Convert the raw contents of the source into the internal representation.""" assert hasattr(self, 'filecontents') it = iter(self.filecontents.splitlines()) # Ordering of lines: # 1. number of atoms # 2. comment line # 3. line of at least 4 columns: 1 is atomic symbol (str), 2-4 are atomic coordinates (float) # repeat for numver of atoms # (4. optional blank line) # repeat for multiple sets of coordinates all_atomcoords = [] comments = [] while True: try: line = next(it) if line.strip() == '': line = next(it) tokens = line.split() assert len(tokens) >= 1 natom = int(tokens[0]) comments.append(next(it)) lines = [] for _ in range(natom): line = next(it) tokens = line.split() assert len(tokens) >= 4 lines.append(tokens) assert len(lines) == natom atomsyms = [line[0] for line in lines] atomnos = [self.pt.number[atomsym] for atomsym in atomsyms] atomcoords = [line[1:4] for line in lines] # Everything beyond the fourth column is ignored. all_atomcoords.append(atomcoords) except StopIteration: break attributes = { 'natom': natom, 'atomnos': atomnos, 'atomcoords': all_atomcoords, 'metadata': { "comments": comments }, } self.data = ccData(attributes)
def read_trajectory(filename): """Read an ASE Trajectory object and return a ccData object. The returned object has everything write_trajectory writes, plus natom, charge, mult and temperature. The following properties are taken from the last frame: atomnos, atomcharges, atomspins, atommasses, moments, freeenergy and temperature. charge, mult and natom also represent the last frame, since they depend on other propertes read from the last frame. Bear in mind that ASE calculates temperature from the kinetic energy, so anything "static" (which includes anything cclib parses) will have zero temperature. Inputs: filename - path to traj file to be read. """ _check_ase(_found_ase) attributes = {"atomcoords": [], "scfenergies": [], "grads": []} for atoms in Trajectory(filename, "r"): ccdata = makecclib(atoms) attributes["atomcoords"].append(ccdata.atomcoords[-1]) if hasattr(ccdata, "scfenergies"): attributes["scfenergies"].append(ccdata.scfenergies[-1]) if hasattr(ccdata, "grads"): attributes["grads"].append(ccdata.grads[-1]) # ccdata is now last frame attributes["atomnos"] = ccdata.atomnos attributes["atomcharges"] = ccdata.atomcharges attributes["atomspins"] = ccdata.atomspins attributes["atommasses"] = ccdata.atommasses if hasattr(ccdata, "moments"): attributes["moments"] = ccdata.moments if hasattr(ccdata, "freeenergy"): attributes["freeenergy"] = ccdata.freeenergy # remove if empty if not attributes["scfenergies"]: del attributes["scfenergies"] if not attributes["grads"]: del attributes["grads"] # extra stuff we can't write in write_trajectory attributes["temperature"] = ccdata.temperature attributes["charge"] = ccdata.charge attributes["mult"] = ccdata.mult attributes["natom"] = ccdata.natom return ccData(attributes)
def generate_repr(self): """Convert the raw contents of the source into the internal representation.""" assert hasattr(self, 'filecontents') it = iter(self.filecontents.splitlines()) # Ordering of lines: # 1. number of atoms # 2. comment line # 3. line of at least 4 columns: 1 is atomic symbol (str), 2-4 are atomic coordinates (float) # repeat for numver of atoms # (4. optional blank line) # repeat for multiple sets of coordinates all_atomcoords = [] while True: try: line = next(it) if line.strip() == '': line = next(it) tokens = line.split() assert len(tokens) >= 1 natom = int(tokens[0]) comment = next(it) lines = [] for _ in range(natom): line = next(it) tokens = line.split() assert len(tokens) >= 4 lines.append(tokens) assert len(lines) == natom atomsyms = [line[0] for line in lines] atomnos = [self.pt.number[atomsym] for atomsym in atomsyms] atomcoords = [line[1:4] for line in lines] # Everything beyond the fourth column is ignored. all_atomcoords.append(atomcoords) except StopIteration: break attributes = { 'natom': natom, 'atomnos': atomnos, 'atomcoords': all_atomcoords, } self.data = ccData(attributes)
def makecclib(mol): """Create cclib attributes and return a ccData from an OpenBabel molecule. Beyond the numbers, masses and coordinates, we could also set the total charge and multiplicity, but often these are calculated from atomic formal charges so it is better to assume that would not be correct. """ attributes = {"atomcoords": [], "atommasses": [], "atomnos": [], "natom": mol.NumAtoms()} for atom in ob.OBMolAtomIter(mol): attributes["atomcoords"].append([atom.GetX(), atom.GetY(), atom.GetZ()]) attributes["atommasses"].append(atom.GetAtomicMass()) attributes["atomnos"].append(atom.GetAtomicNum()) return ccData(attributes)
def multixyzfile(multixyzfile): """Parse multixyzfile to list of ccData objects""" assert type(multixyzfile) == str attributeslist = [] ptable = PeriodicTable() # Check that the file is not empty, if it is not, parse away! if os.stat(multixyzfile).st_size == 0: raise EOFError(multixyzfile + " is empty") else: with open(multixyzfile, 'r') as handle: attributeslist = [] lines = handle.readlines() filelength = len(lines) idx = 0 while True: attributes = {} atomcoords = [] atomnos = [] # Get number of atoms and charge/mult from comment line numatoms = int(lines[idx]) charge, mult = _chargemult(lines[idx + 1]) for line in lines[idx + 2:numatoms + idx + 2]: atomgeometry = [x for x in line.split()] atomnos.append(ptable.number[atomgeometry[0]]) atomcoords.append([float(x) for x in atomgeometry[1:]]) idx = numatoms + idx + 2 attributes['charge'] = charge attributes['mult'] = mult attributes['atomcoords'] = [np.array(atomcoords)] attributes['atomnos'] = np.array(atomnos) attributeslist.append(attributes) # Break at EOF if idx >= filelength: break print('Number of conformers parsed:', len(attributeslist)) ccdatas = [ccData(attributes=attrs) for attrs in attributeslist] return ccdatas
def multixyzfile(multixyzfile): """Parse multixyzfile to list of ccData objects""" assert type(multixyzfile) == str attributeslist = [] ptable = PeriodicTable() # Check that the file is not empty, if it is not, parse away! if os.stat(multixyzfile).st_size == 0: raise EOFError(multixyzfile+" is empty") else: with open(multixyzfile, 'r') as handle: attributeslist = [] lines = handle.readlines() filelength = len(lines) idx = 0 while True: attributes = {} atomcoords = [] atomnos = [] # Get number of atoms and charge/mult from comment line numatoms = int(lines[idx]) charge, mult = _chargemult(lines[idx+1]) for line in lines[idx+2:numatoms+idx+2]: atomgeometry = [x for x in line.split()] atomnos.append(ptable.number[atomgeometry[0]]) atomcoords.append([float(x) for x in atomgeometry[1:]]) idx = numatoms+idx+2 attributes['charge'] = charge attributes['mult'] = mult attributes['atomcoords'] = [np.array(atomcoords)] attributes['atomnos'] = np.array(atomnos) attributeslist.append(attributes) # Break at EOF if idx >= filelength: break print('Number of conformers parsed:', len(attributeslist)) ccdatas = [ccData(attributes=attrs) for attrs in attributeslist] return ccdatas
def makecclib(mol): """Create cclib attributes and return a ccData from an OpenBabel molecule. Beyond the numbers, masses and coordinates, we could also set the total charge and multiplicity, but often these are calculated from atomic formal charges so it is better to assume that would not be correct. """ _check_openbabel(_found_openbabel) attributes = { 'atomcoords': [], 'atommasses': [], 'atomnos': [], 'natom': mol.NumAtoms(), } for atom in ob.OBMolAtomIter(mol): attributes['atomcoords'].append([atom.GetX(), atom.GetY(), atom.GetZ()]) attributes['atommasses'].append(atom.GetAtomicMass()) attributes['atomnos'].append(atom.GetAtomicNum()) return ccData(attributes)
def mopacoutputfile(mopacoutputfile, nogeometry=True): """Parse MOPAC output file""" if not nogeometry: print("MOPAC geometry parsing not yet implemented - IN PROGRESS") raise spinstate = { 'SINGLET': 1, 'DOUBLET': 2, 'TRIPLET': 3, 'QUARTET': 4, 'QUINTET': 5, 'SEXTET': 6, 'HEPTET': 7, 'OCTET': 8, 'NONET': 9 } with open(mopacoutputfile, 'r') as handle: lines = handle.readlines() attributes = {} ccdata = None # Whether or not we are in geometry printout geometry = False # Defaults charge = 0 mult = 1 # Empties atomcoords = [] atomelements = [] atomnos = [] natom = None scfenergies = [] subatomelements = [] subatomcoords = [] for line in lines: if 'CHARGE ON SYSTEM =' in line and charge == 0: charge = int(line.split()[5]) continue elif 'SPIN STATE DEFINED AS ' in line and mult == 1: mult = spinstate[line.split()[1]] continue elif "TOTAL ENERGY" in line: scf = float(line.split()[3]) scfkcal = convertor(scf, 'eV', 'kcal') scfenergies.append(scfkcal) break elif geometry and line != ' \n': entry = line.split() if not entry: geometry = False atomcoords.append(subatomcoords) if not atomelements: atomelements = subatomelements for atomelement in atomelements: atomnos.append(pt.AtomicNum[atomelement]) natom = len(atomnos) else: subatomelements.append(entry[1]) subatomcoords.append(list(map(float, entry[2::2]))) elif 'NUMBER SYMBOL (ANGSTROMS) (ANGSTROMS) (ANGSTROMS)' in line: geometry = True subatomelements = [] subatomcoords = [] attributes['natom'] = natom attributes['atomcoords'] = atomcoords attributes['atomnos'] = atomnos attributes['scfenergies'] = scfenergies attributes['charge'] = charge attributes['mult'] = mult ccdata = ccData(attributes=attributes) return ccdata
def mopacoutputfile(mopacoutputfile, nogeometry=True): """Parse MOPAC output file""" if not nogeometry: print("MOPAC geometry parsing not yet implemented - IN PROGRESS") raise spinstate = {'SINGLET': 1, 'DOUBLET': 2, 'TRIPLET': 3, 'QUARTET': 4, 'QUINTET': 5, 'SEXTET': 6, 'HEPTET': 7, 'OCTET': 8, 'NONET': 9} with open(mopacoutputfile, 'r') as handle: lines = handle.readlines() attributes = {} ccdata = None # Whether or not we are in geometry printout geometry = False # Defaults charge = 0 mult = 1 # Empties atomcoords = [] atomelements = [] atomnos = [] natom = None scfenergies = [] subatomelements = [] subatomcoords = [] for line in lines: if 'CHARGE ON SYSTEM =' in line and charge == 0: charge = int(line.split()[5]) continue elif 'SPIN STATE DEFINED AS ' in line and mult == 1: mult = spinstate[line.split()[1]] continue elif "TOTAL ENERGY" in line: scf = float(line.split()[3]) scfkcal = convertor(scf, 'eV', 'kcal') scfenergies.append(scfkcal) break elif geometry and line != ' \n': entry = line.split() if not entry: geometry = False atomcoords.append(subatomcoords) if not atomelements: atomelements = subatomelements for atomelement in atomelements: atomnos.append(pt.AtomicNum[atomelement]) natom = len(atomnos) else: subatomelements.append(entry[1]) subatomcoords.append(list(map(float, entry[2::2]))) elif 'NUMBER SYMBOL (ANGSTROMS) (ANGSTROMS) (ANGSTROMS)' in line: geometry = True subatomelements = [] subatomcoords = [] attributes['natom'] = natom attributes['atomcoords'] = atomcoords attributes['atomnos'] = atomnos attributes['scfenergies'] = scfenergies attributes['charge'] = charge attributes['mult'] = mult ccdata = ccData(attributes=attributes) return ccdata
def makecclib(atoms, popname="mulliken"): """Create cclib attributes and return a ccData from an ASE Atoms object. Available data (such as forces/gradients and potential energy/free energy) is assumed to be from SCF (see https://wiki.fysik.dtu.dk/ase/ase/atoms.html#adding-a-calculator). Bear in mind that ASE calculates temperature from the kinetic energy, so anything "static" (which includes anything cclib parses) will have zero temperature. Inputs: atoms - an instance of ASE `Atoms` popname - population analysis to use for atomic partial charges and atomic spin densities. Molecular charge and multiplicity are evaluated from them. """ _check_ase(_found_ase) attributes = { "atomcoords": np.array([atoms.get_positions()]), "atomnos": atoms.get_atomic_numbers(), "atommasses": atoms.get_masses(), "natom": atoms.get_global_number_of_atoms(), } try: attributes["atomcharges"] = {popname: atoms.get_charges()} except (PropertyNotImplementedError, RuntimeError): attributes["atomcharges"] = {popname: atoms.get_initial_charges()} try: attributes["atomspins"] = {popname: atoms.get_magnetic_moments()} except (PropertyNotImplementedError, RuntimeError): attributes["atomspins"] = { popname: atoms.get_initial_magnetic_moments() } # the following is how ASE determines charge and multiplicity from initial # charges and initial magnetic moments in its Gaussian interface # (https://gitlab.com/ase/ase/-/blob/a26bda2160527ca7afc0135c69e4367a5bc5a264/ase/io/gaussian.py#L105) attributes["charge"] = attributes["atomcharges"][popname].sum() attributes["mult"] = attributes["atomspins"][popname].sum() + 1 try: attributes["scfenergies"] = np.array([atoms.get_potential_energy()]) except RuntimeError: pass try: attributes["grads"] = (-np.array([atoms.get_forces()]) * units.Bohr / units.Hartree) except RuntimeError: pass try: attributes["moments"] = [ atoms.get_center_of_mass(), atoms.get_dipole_moment() / units.Bohr, ] except RuntimeError: pass try: attributes["freeenergy"] = ( atoms.get_potential_energy(force_consistent=True) / units.Hartree) except RuntimeError: pass attributes["temperature"] = atoms.get_temperature() return ccData(attributes)
def makecclib(iodat): """ Create cclib ccData object from horton IOData object """ hortonver = check_horton() attributes = {} if hortonver == 2: # For a few attributes, a simple renaming suffices renameAttrs = { "numbers": "atomnos", "ms2": "mult", "polar": "polarizability", } inputattrs = iodat.__dict__ attributes = dict( (renameAttrs[oldKey], val) for (oldKey, val) in inputattrs.items() if (oldKey in renameAttrs) ) # Rest of attributes need some manipulation in data structure. if hasattr(iodat, "coordinates"): # cclib parses the whole history of coordinates in the list, horton keeps the last one. attributes["atomcoords"] = [iodat.coordinates] if hasattr(iodat, "orb_alpha"): attributes["mocoeffs"] = [iodat.orb_alpha] if hasattr(iodat, "orb_beta"): attributes["mocoeffs"].append(iodat.orb_beta) if hasattr(iodat, "pseudo_numbers"): # cclib stores num of electrons screened out by pseudopotential # horton stores num of electrons after applying pseudopotential attributes["coreelectrons"] = iodat.numbers - iodat.pseudo_numbers if hasattr(iodat, "mulliken_charges"): attributes["atomcharges"] = {"mulliken": iodat.mulliken_charges} if hasattr(iodat, "npa_charges"): attributes["atomcharges"]["natural"] = iodat.npa_charges elif hasattr(iodat, "npa_charges"): attributes["atomcharges"] = {"natural": iodat.npa_charges} elif hortonver == 3: # Horton 3 IOData class uses attr and does not have __dict__. # In horton 3, some attributes have a default value of None. # Therefore, second hasattr statement is needed for mo attribute. if hasattr(iodat, "atcoords"): # cclib parses the whole history of coordinates in the list, horton keeps the last one. attributes["atomcoords"] = [iodat.atcoords] if hasattr(iodat, "mo") and hasattr(iodat.mo, "norba"): # MO coefficient should be transposed to match the dimensions. attributes["mocoeffs"] = [iodat.mo.coeffs[: iodat.mo.norba].T] if iodat.mo.kind == "unrestricted": attributes["mocoeffs"].append(iodat.mo.coeffs[iodat.mo.norba :].T) if hasattr(iodat, "spinpol") and isinstance(iodat.spinpol, int): # IOData stores 2S, ccData stores 2S+1. attributes["mult"] = iodat.spinpol + 1 if hasattr(iodat, "atnums"): attributes["atnums"] = numpy.asanyarray(iodat.atnums) if hasattr(iodat, "atcorenums") and isinstance(iodat.atnums, numpy.ndarray): # cclib stores num of electrons screened out by pseudopotential # horton stores num of electrons after applying pseudopotential attributes["coreelectrons"] = numpy.asanyarray(iodat.atnums) - numpy.asanyarray( iodat.atcorenums ) if hasattr(iodat, "atcharges"): attributes["atomcharges"] = iodat.atcharges return ccData(attributes)