def get_center_of_mass(xyz=None, coords=None, symbols=None): """ Get the center of mass of xyz coordinates. Assumes arc.converter.standardize_xyz_string() was already called for xyz. Note that xyz from ESS output is usually already centered at the center of mass (to some precision). Either xyz or coords and symbols must be given. Args: xyz (string, optional): The xyz coordinates in a string format. coords (list, optional): The xyz coordinates in an array format. symbols (list, optional): The chemical element symbols corresponding to `coords`. Returns: tuple: The center of mass coordinates. """ if xyz is not None: masses, coords = list(), list() for line in xyz.splitlines(): if line.strip(): splits = line.split() masses.append(get_element_mass(str(splits[0]))[0]) coords.append([float(splits[1]), float(splits[2]), float(splits[3])]) elif coords is not None and symbols is not None: masses = [get_element_mass(str(symbol))[0] for symbol in symbols] else: raise InputError('Either xyz or coords and symbols must be given') cm_x, cm_y, cm_z = 0, 0, 0 for coord, mass in zip(coords, masses): cm_x += coord[0] * mass cm_y += coord[1] * mass cm_z += coord[2] * mass cm_x /= sum(masses) cm_y /= sum(masses) cm_z /= sum(masses) return cm_x, cm_y, cm_z
def test_get_mass(self): """Test that the correct mass/number/isotope is returned from get_element_mass""" self.assertEquals(get_element_mass(1), (1.00782503224, 1)) # test input by integer self.assertEquals( get_element_mass('Si'), (27.97692653465, 14)) # test string input and most common isotope self.assertEquals(get_element_mass('C', 13), (13.00335483507, 6)) # test specific isotope self.assertEquals( get_element_mass('Bk'), (247.0703073, 97)) # test a two-element array (no isotope data)
def load_geometry(self): """ Return the optimum geometry of the molecular configuration from the Orca log file. If multiple such geometries are identified, only the last is returned. """ atoms, coords, numbers, mass = [], [], [], [] with open(self.path) as f: log = f.readlines() # First check that the Orca job file (not necessarily a geometry optimization) # has successfully completed, if not an error is thrown completed_job = False for line in reversed(log): if 'ORCA TERMINATED NORMALLY' in line: logging.debug('Found a successfully completed Orca Job') completed_job = True break if not completed_job: raise LogError( 'Could not find a successfully completed Orca job in Orca output file {0}' .format(self.path)) # Now look for the geometry. # Will return the final geometry in the file under Standard Nuclear Orientation. geometry_flag = False for i in reversed(range(len(log))): line = log[i] if 'CARTESIAN COORDINATES (ANGSTROEM)' in line: for line in log[(i + 2):]: if not line.strip(): break if '---------------------------------' not in line: data = line.split() atoms.append(data[0]) coords.append([float(c) for c in data[1:]]) geometry_flag = True if geometry_flag: break # Assign appropriate mass to each atom in the molecule for atom1 in atoms: mass1, num1 = get_element_mass(atom1) mass.append(mass1) numbers.append(num1) coord = numpy.array(coords, numpy.float64) number = numpy.array(numbers, numpy.int) mass = numpy.array(mass, numpy.float64) if len(number) == 0 or len(coord) == 0 or len(mass) == 0: raise LogError( 'Unable to read atoms from Orca geometry output file {0}'. format(self.path)) return coords, numbers, mass
def loadGeometry(self): """ Return the optimum geometry of the molecular configuration from the Molpro .out file. If multiple such geometries are identified, only the last is returned. """ symbol, coord, mass, number = [], [], [], [] f = open(self.path, 'r') line = f.readline() while line != '': # Automatically determine the number of atoms if 'Current geometry' in line: symbol, coord = [], [] while 'ENERGY' not in line: line = f.readline() line = f.readline() while line != '\n': data = line.split() symbol.append(str(data[0])) coord.append([float(data[1]), float(data[2]), float(data[3])]) line = f.readline() line = f.readline() line = f.readline() # Close file when finished f.close() # If no optimized coordinates were found, uses the input geometry # (for example if reading the geometry from a frequency file) if not coord: f = open(self.path, 'r') line = f.readline() while line != '': if 'atomic coordinates' in line.lower(): symbol, coord = [], [] for i in range(4): line = f.readline() while line != '\n': data = line.split() symbol.append(str(data[1])) coord.append([float(data[3]), float(data[4]), float(data[5])]) line = f.readline() line = f.readline() # Assign appropriate mass to each atom in the molecule for atom1 in symbol: mass1, num1 = get_element_mass(atom1) mass.append(mass1) number.append(num1) number = numpy.array(number, numpy.int) mass = numpy.array(mass, numpy.float64) coord = numpy.array(coord, numpy.float64) if len(number) == 0 or len(coord) == 0 or len(mass) == 0: raise InputError('Unable to read atoms from Molpro geometry output file {0}'.format(self.path)) return coord, number, mass
def loadGeometry(self): """ Return the optimum geometry of the molecular configuration from the QChem log file. If multiple such geometries are identified, only the last is returned. """ atom, coord, number, mass = [], [], [], [] with open(self.path) as f: log = f.read().splitlines() # First check that the QChem job file (not necessarily a geometry optimization) # has successfully completed, if not an error is thrown completed_job = False for line in reversed(log): if 'Total job time:' in line: logging.debug('Found a sucessfully completed QChem Job') completed_job = True break if not completed_job: raise InputError( 'Could not find a successfully completed QChem job in QChem output file {0}' .format(self.path)) # Now look for the geometry. # Will return the final geometry in the file under Standard Nuclear Orientation. geometry_flag = False for i in reversed(xrange(len(log))): line = log[i] if 'Standard Nuclear Orientation' in line: for line in log[(i + 3):]: if '------------' not in line: data = line.split() atom.append(data[1]) coord.append([float(c) for c in data[2:]]) geometry_flag = True else: break if geometry_flag: break # Assign appropriate mass to each atom in the molecule for atom1 in atom: mass1, num1 = get_element_mass(atom1) mass.append(mass1) number.append(num1) coord = numpy.array(coord, numpy.float64) number = numpy.array(number, numpy.int) mass = numpy.array(mass, numpy.float64) if len(number) == 0 or len(coord) == 0 or len(mass) == 0: raise InputError( 'Unable to read atoms from QChem geometry output file {0}'. format(self.path)) return coord, number, mass
def loadGeometry(self): """ Return the optimum geometry of the molecular configuration from the QChem log file. If multiple such geometries are identified, only the last is returned. """ atom, coord, number, mass = [], [], [], [] with open(self.path) as f: log = f.read().splitlines() # First check that the QChem job file (not necessarily a geometry optimization) # has successfully completed, if not an error is thrown completed_job = False for line in reversed(log): if 'Total job time:' in line: logging.debug('Found a sucessfully completed QChem Job') completed_job = True break if not completed_job: raise InputError('Could not find a successfully completed QChem job in QChem output file {0}'.format(self.path)) # Now look for the geometry. # Will return the final geometry in the file under Standard Nuclear Orientation. geometry_flag = False for i in reversed(xrange(len(log))): line = log[i] if 'Standard Nuclear Orientation' in line: for line in log[(i+3):]: if '------------' not in line: data = line.split() atom.append(data[1]) coord.append([float(c) for c in data [2:]]) geometry_flag = True else: break if geometry_flag: break # Assign appropriate mass to each atom in the molecule for atom1 in atom: mass1, num1 = get_element_mass(atom1) mass.append(mass1) number.append(num1) coord = numpy.array(coord, numpy.float64) number = numpy.array(number, numpy.int) mass = numpy.array(mass, numpy.float64) if len(number) == 0 or len(coord) == 0 or len(mass) == 0: raise InputError('Unable to read atoms from QChem geometry output file {0}'.format(self.path)) return coord, number, mass
def load_geometry(self): """ Return the optimum geometry of the molecular configuration from the Gaussian log file. If multiple such geometries are identified, only the last is returned. """ number, coord, mass = [], [], [] with open(self.path, 'r') as f: line = f.readline() while line != '': # Automatically determine the number of atoms if 'Input orientation:' in line: number, coord = [], [] for i in range(5): line = f.readline() while '---------------------------------------------------------------------' not in line: data = line.split() number.append(int(data[1])) coord.append( [float(data[3]), float(data[4]), float(data[5])]) line = f.readline() line = f.readline() # Assign appropriate mass to each atom in the molecule mass = [] for num in number: mass1, _ = get_element_mass(num) mass.append(mass1) coord = np.array(coord, np.float64) number = np.array(number, np.int) mass = np.array(mass, np.float64) if len(number) == 0 or len(coord) == 0 or len(mass) == 0: raise LogError( 'Unable to read atoms from Gaussian geometry output file {0}. ' 'Make sure the output file is not corrupt.\nNote: if your species has ' '50 or more atoms, you will need to add the `iop(2/9=2000)` keyword to your ' 'input file so Gaussian will print the input orientation geometry.' .format(self.path)) return coord, number, mass
def loadGeometry(self): """ Return the optimum geometry of the molecular configuration from the Gaussian log file. If multiple such geometries are identified, only the last is returned. """ number, coord, mass = [], [], [] f = open(self.path, 'r') line = f.readline() while line != '': # Automatically determine the number of atoms if 'Input orientation:' in line: number, coord = [], [] for i in range(5): line = f.readline() while '---------------------------------------------------------------------' not in line: data = line.split() number.append(int(data[1])) coord.append( [float(data[3]), float(data[4]), float(data[5])]) line = f.readline() line = f.readline() # Close file when finished f.close() # Assign appropriate mass to each atom in the molecule mass = [] for num in number: mass1, _ = get_element_mass(num) mass.append(mass1) coord = numpy.array(coord, numpy.float64) number = numpy.array(number, numpy.int) mass = numpy.array(mass, numpy.float64) if len(number) == 0 or len(coord) == 0 or len(mass) == 0: raise InputError( 'Unable to read atoms from Gaussian geometry output file {0}'. format(self.path)) return coord, number, mass
def load_geometry(self): """ Return the optimum geometry of the molecular configuration from the QChem log file. If multiple such geometries are identified, only the last is returned. """ atom, coord, number, mass = [], [], [], [] with open(self.path) as f: log = f.readlines() # Now look for the geometry. # Will return the final geometry in the file under Standard Nuclear Orientation. geometry_flag = False for i in reversed(range(len(log))): line = log[i] if 'Standard Nuclear Orientation' in line: for line in log[(i + 3):]: if '------------' not in line: data = line.split() atom.append(data[1]) coord.append([float(c) for c in data[2:]]) geometry_flag = True else: break if geometry_flag: break # Assign appropriate mass to each atom in the molecule for atom1 in atom: mass1, num1 = get_element_mass(atom1) mass.append(mass1) number.append(num1) coord = np.array(coord, np.float64) number = np.array(number, np.int) mass = np.array(mass, np.float64) if len(number) == 0 or len(coord) == 0 or len(mass) == 0: raise LogError('Unable to read atoms from QChem geometry output file {0}.'.format(self.path)) return coord, number, mass
def loadGeometry(self): """ Return the optimum geometry of the molecular configuration from the Gaussian log file. If multiple such geometries are identified, only the last is returned. """ number, coord, mass = [], [], [] f = open(self.path, 'r') line = f.readline() while line != '': # Automatically determine the number of atoms if 'Input orientation:' in line: number, coord = [], [] for i in range(5): line = f.readline() while '---------------------------------------------------------------------' not in line: data = line.split() number.append(int(data[1])) coord.append([float(data[3]), float(data[4]), float(data[5])]) line = f.readline() line = f.readline() # Close file when finished f.close() # Assign appropriate mass to each atom in the molecule mass = [] for num in number: mass1, _ = get_element_mass(num) mass.append(mass1) coord = numpy.array(coord, numpy.float64) number = numpy.array(number, numpy.int) mass = numpy.array(mass, numpy.float64) if len(number) == 0 or len(coord) == 0 or len(mass) == 0: raise InputError('Unable to read atoms from Gaussian geometry output file {0}. ' 'Make sure the output file is not corrupt.\nNote: if your species has ' '50 or more atoms, you will need to add the `iop(2/9=2000)` keyword to your ' 'input file so Gaussian will print the input orientation geomerty.'.format(self.path)) return coord, number, mass
def load_geometry(self): """ Return the optimum geometry of the molecular configuration from the TeraChem log file. If multiple such geometries are identified, only the last is returned. """ coords, numbers, masses = list(), list(), list() with open(self.path) as f: lines = f.readlines() num_of_atoms = None # used to verify the result if os.path.splitext(self.path)[1] == '.xyz': skip_line = False for line in lines: if not skip_line and line.rstrip(): if len(line.split()) == 1 and line[0].isdigit(): num_of_atoms = int(line.rstrip()) skip_line = True # the next line is just a comment, skip it continue splits = line.split() coords.append([float(c) for c in splits[1:]]) mass, num = get_element_mass(splits[0]) masses.append(mass) numbers.append(num) if skip_line: skip_line = False coords, numbers, masses = list(), list(), list() else: for i, line in enumerate(lines): if 'Type X Y Z Mass' in line: # this is an output.geometry file j = i + 1 while lines[j].strip(): # example: ' C 0.6640965100 0.0039526500 0.0710079300 12.0000000000' # or: ' C 0.512276 -0.516064 0.779232' splits = lines[j].split() coords.append([float(c) for c in splits[1:-1]]) masses.append(float(splits[-1])) numbers.append( list(symbol_by_number.keys())[list( symbol_by_number.values()).index(splits[0])]) j += 1 break if '*** Reference Geometry ***' in line: # this is an output.out file, e.g., from a freq run j = i + 2 while lines[j].strip(): # example: ' C 0.512276 -0.516064 0.779232' splits = lines[j].split() coords.append([float(c) for c in splits[1:]]) mass, num = get_element_mass(splits[0]) masses.append(mass) numbers.append(num) j += 1 break coords = np.array(coords, np.float64) numbers = np.array(numbers, np.int) masses = np.array(masses, np.float64) if len(coords) == 0 or len(numbers) == 0 or len(masses) == 0 \ or ((len(coords) != num_of_atoms or len(numbers) != num_of_atoms or len(masses) != num_of_atoms) and num_of_atoms is not None): raise LogError( f'Unable to read atoms from TeraChem geometry output file {self.path}. ' f'If this is a TeraChem optimization log file, try using either the ' f'frequencies calculation log file (important if torsion modes exist) or ' f'the "output.geometry" or a ".xyz" file instead.') return coords, numbers, masses
def load_geometry(self): """ Return the optimum geometry of the molecular configuration from the QChem log file. If multiple such geometries are identified, only the last is returned. """ atom, coord, number, mass = [], [], [], [] with open(self.path) as f: log = f.readlines() # First check that the QChem job file (not necessarily a geometry optimization) # has successfully completed, if not an error is thrown completed_job = False for line in reversed(log): if 'Total job time:' in line: completed_job = True break if not completed_job: raise LogError('Could not find a successfully completed QChem job ' 'in QChem output file {0}'.format(self.path)) # Now look for the geometry. # Will return the final geometry in the file under Standard Nuclear Orientation. geometry_flag = False for i in reversed(range(len(log))): line = log[i] if 'Standard Nuclear Orientation' in line: for line in log[(i + 3):]: if '------------' not in line: data = line.split() atom.append(data[1]) coord.append([float(c) for c in data[2:]]) geometry_flag = True else: break if geometry_flag: break # Assign appropriate mass to each atom in the molecule for atom1 in atom: mass1, num1 = get_element_mass(atom1) mass.append(mass1) number.append(num1) coord = np.array(coord, np.float64) number = np.array(number, np.int) mass = np.array(mass, np.float64) if len(number) == 0 or len(coord) == 0 or len(mass) == 0: raise LogError('Unable to read atoms from QChem geometry output file {0}.'.format(self.path)) if self.is_QM_MM_INTERFACE(): QM_mass = [] for i in self.get_QM_ATOMS(): QM_mass.append(mass[int(i)-1]) ISOTOPES = self.get_ISOTOPES() for i in sorted(ISOTOPES.keys()): QM_mass.append(ISOTOPES[i]) #QM_mass.append(get_element_mass('H')[0]) self.QM_mass = np.array(QM_mass, np.float64) QM_atom = [] QM_coord = [] for i in reversed(range(len(log))): line = log[i] if 'In VibMan new if statement' in line: break geometry_flag = False for j in reversed(range(i)): line = log[j] if 'Standard Nuclear Orientation' in line: for line in log[(j + 3):]: if '------------' not in line: data = line.split() QM_atom.append(data[1]) QM_coord.append([float(c) for c in data[2:]]) geometry_flag = True else: break if geometry_flag: break self.QM_atom = QM_atom self.QM_coord = np.array(QM_coord, np.float64) return coord, number, mass
def test_get_mass(self): """Test that the correct mass/number/isotop is returned from get_element_mass""" self.assertEquals(get_element_mass(1), (1.00782503224, 1)) # test input by integer self.assertEquals(get_element_mass('Si'), (27.97692653465, 14)) # test string input and most common isotope self.assertEquals(get_element_mass('C', 13), (13.00335483507, 6)) # test specific isotope self.assertEquals(get_element_mass('Bk'), (247.0703073, 97)) # test a two-element array (no isotope data)