def parse_all(input_file): contents = open(input_file).read() time = None if 'Normal termination of Gaussian 09' not in contents: pass else: m = re.search( 'Job cpu time: +(\S+) +days +(\S+) +hours +(\S+) +minutes +(\S+) +seconds', contents) time = float(m.group(1)) * 24 * 60 * 60 + float( m.group(2)) * 60 * 60 + float(m.group(3)) * 60 + float(m.group(4)) energies = [] atom_frames = [] start = 0 while True: try: start = contents.index('SCF Done', start) energy_this_step = float( re.search('SCF Done: +\S+ += +(\S+)', contents[start:]).group(1)) start = contents.find('Input orientation:', start) next_coordinates = contents.index('Coordinates (Angstroms)', start) except: break start = contents.index('---\n', next_coordinates) + 4 end = contents.index('\n ---', start) lines = contents[start:end].splitlines() start = end atoms = [] for line in lines: columns = line.split() element = columns[1] x, y, z = columns[3:6] atoms.append( utils.Atom(element=element, x=float(x), y=float(y), z=float(z))) atom_frames.append(atoms) energies.append(energy_this_step) return energies, atom_frames, time
def read_cml(name, parameter_file='oplsaa.prm', extra_parameters={}, test_charges=True, allow_errors=False, pair_style='lj/cut'): if not name.endswith('.cml'): name += '.cml' tree = xml.parse(name) root = tree.getroot() # If periodic box information was passed to cml file, first root is the periodic box # If no periodic box info, first root is atomArray # Fill appropriate section when it is found atoms, bonds, angles, dihedrals = [], [], [], [] for child in root: # Skip crystal information if child.tag == 'crystal': continue # Add atoms if child.tag == 'atomArray': for atom in child: a = utils.Atom( element=atom.attrib['elementType'], x=float(atom.attrib['x3']), y=float(atom.attrib['y3']), z=float(atom.attrib['z3']) ) a.bonded = [] a.index = int(atom.attrib['id'][1:]) #if 'formalCharge' in atom.attrib: # a.type_index = int(atom.attrib['formalCharge']) if 'label' in atom.attrib: a.type_index = int(atom.attrib['label']) atoms.append(a) # Add atoms if child.tag == 'bondArray': for bond in child: a, b = [int(n[1:]) for n in bond.attrib['atomRefs2'].split()] bonds.append( utils.Bond(atoms[a-1],atoms[b-1]) ) atoms[a-1].bonded.append( atoms[b-1] ) atoms[b-1].bonded.append( atoms[a-1] ) angles, dihedrals = utils.get_angles_and_dihedrals(atoms) if parameter_file: atoms, bonds, angles, dihedrals = set_forcefield_parameters(atoms, bonds=bonds, angles=angles, dihedrals=dihedrals, name=name, parameter_file=parameter_file, extra_parameters=extra_parameters, test_charges=test_charges, allow_errors=allow_errors, pair_style=pair_style) return atoms, bonds, angles, dihedrals
def read_xyz(name): if not name.endswith('.xyz') and '.' not in name: name += '.xyz' lines = open(name).readlines() atom_count = int(lines[0].split()[0]) lines_by_frame = [ lines[i:i+atom_count+2] for i in range(0,len(lines),atom_count+2) ] frames = [] for frame in lines_by_frame: atoms = [] for line in frame[2:]: columns = line.split() if len(columns)>=4: x,y,z = [float(s) for s in columns[1:4]] atoms.append( utils.Atom(element=columns[0], x=x, y=y, z=z, index=len(atoms)+1) ) if len(atoms)>0: frames.append(atoms) if len(frames)==1: return frames[0] else: return frames
def parse_scan(input_file): contents = open(input_file).read() if 'Normal termination of Gaussian 09' not in contents: return None scan_steps = contents.split('on scan point') energy_list = [] atoms_list = [] scan_steps = [ scan_steps[i] for i in range(1, len(scan_steps) - 1) if scan_steps[i][:10].split()[0] != scan_steps[i + 1][:10].split()[0] ] #print [int(s[:10].split()[0]) for s in scan_steps] #print len(scan_steps) for scan_step in scan_steps: energy_line = scan_step[scan_step.rindex('SCF Done'):scan_step. index('\n', scan_step.rindex('SCF Done'))] energy = float( re.search('SCF Done: +\S+ += +(\S+)', energy_line).group(1)) last_coordinates = scan_step.rindex('Coordinates (Angstroms)') start = scan_step.index('---\n', last_coordinates) + 4 end = scan_step.index('\n ---', start) atoms = [] for line in scan_step[start:end].splitlines(): columns = line.split() element = columns[1] x, y, z = [float(s) for s in columns[3:6]] atoms.append( utils.Atom(element=utils.elements_by_atomic_number[int( columns[1])], x=x, y=y, z=z)) energy_list.append(energy) atoms_list.append(atoms) return energy_list, atoms_list
def inp_to_xyz(name, write=False, outName=None): warn( "this function is not used and will be removed soon.", DeprecationWarning ) data = open("gaussian/"+name+".inp",'r').read().split('\n') # Get start of data for i,s in enumerate(data): try: if(s.split()[0]=='run'): break except: pass i += 3 # Get end of data for j,s in enumerate(data[i:]): if s == '': break data = data[i:j+i] if write: if outName == None: f = open('inp_'+name+'.xyz','w') else: f = open(outName,'w') f.write(str(len(data))+'\n') f.write('Atoms'+'\n') for s in data: f.write(s+'\n') f.write('\n') f.close() atoms = [] for i,d in enumerate(data): d = d.split() atoms.append(utils.Atom(element=d[0], x=float(d[1]), y=float(d[2]), z=float(d[3]), index=i)) return atoms
def parse_atoms(input_file, get_atoms=True, get_energy=True, check_convergence=True, get_time=False, counterpoise=False): if input_file[-4:] != '.log': input_file = 'gaussian/' + input_file + '.log' contents = open(input_file).read() if check_convergence and get_energy and 'Normal termination of Gaussian 09' not in contents: return None if 'Summary of Optimized Potential Surface Scan' in contents: end_section = contents[ contents.rindex('Summary of Optimized Potential Surface Scan'):] energy_lines = re.findall('Eigenvalues -- ([^\\n]+)', end_section) energy = [ float(s) for line in energy_lines for s in re.findall('-[\d]+\.[\d]+', line) ] minima = re.split('Stationary point found', contents) atoms = [] for m in minima[1:]: coordinates = m.index('Coordinates (Angstroms)') start = m.index('---\n', coordinates) + 4 end = m.index('\n ---', start) atoms.append([]) for line in m[start:end].splitlines(): columns = line.split() element = columns[1] x, y, z = [float(s) for s in columns[3:6]] atoms[-1].append( utils.Atom(element=utils.elements_by_atomic_number[int( columns[1])], x=x, y=y, z=z, index=len(atoms[-1]) + 1)) if get_energy: return energy, atoms elif get_energy: if ' MP2/' in contents: # MP2 files don't have just SCF energy energy = float( re.findall('EUMP2 = +(\S+)', contents)[-1].replace('D', 'e')) elif ' CCSD/' in contents: energy = float(re.findall('E\(CORR\)= +(\S+)', contents)[-1]) else: if not counterpoise: try: energy_line = contents[ contents.rindex('SCF Done'):contents. index('\n', contents.rindex('SCF Done'))] except ValueError: raise Exception('No SCF for ' + input_file) energy = float( re.search('SCF Done: +\S+ += +(\S+)', energy_line).group(1)) else: energy = float( re.findall('Counterpoise: corrected energy = +(\S+)', contents)[-1]) if get_time: m = re.search( 'Job cpu time: +(\S+) +days +(\S+) +hours +(\S+) +minutes +(\S+) +seconds', contents) time = float(m.group(1)) * 24 * 60 * 60 + float( m.group(2)) * 60 * 60 + float(m.group(3)) * 60 + float(m.group(4)) if get_energy and not get_atoms: if get_time: return energy, time else: return energy #get coordinates last_coordinates = contents.rindex('Input orientation:') last_coordinates = contents.index('Coordinates (Angstroms)', last_coordinates) start = contents.index('---\n', last_coordinates) + 4 end = contents.index('\n ---', start) atoms = [] for line in contents[start:end].splitlines(): columns = line.split() element = columns[1] x, y, z = [float(s) for s in columns[3:6]] atoms.append( utils.Atom(element=utils.elements_by_atomic_number[int( columns[1])], x=x, y=y, z=z, index=len(atoms) + 1)) #get forces if 'Forces (Hartrees/Bohr)' in contents: last_forces = contents.rindex('Forces (Hartrees/Bohr)') start = contents.index('---\n', last_forces) + 4 end = contents.index('\n ---', start) for i, line in enumerate(contents[start:end].splitlines()): columns = line.split() atoms[i].fx, atoms[i].fy, atoms[i].fz = [ float(s) for s in columns[2:5] ] #return the appropriate values if get_time: if get_atoms: return energy, atoms, time else: return energy, time if get_energy: return energy, atoms else: return atoms
def parse_atoms(input_file, get_atoms=True, get_energy=True, check_convergence=True, get_time=False, counterpoise=False, parse_all=False): """ @input_file [str] : string name of log file Returns: (? energy, ? atoms, ? time) | None @energy [float] : If get_energy or parse_all, otherwise return omitted. @atoms |[atom list] : Iff parse_all, returns atom list list. |[atom list list] : Iff not parse_all and get_atoms, atom list. Otherwise omitted. @time [float] : If get_time returns float (seconds). Otherwise, return omitted. Note that None may be returned in the event that Gaussian did not terminate normally (see 7 lines down). """ if input_file[-4:] != '.log': input_file = 'gaussian/' + input_file + '.log' contents = open(input_file).read() time = None if check_convergence and get_energy and not parse_all and 'Normal termination of Gaussian 09' not in contents: return None if ('Normal termination of Gaussian 09' in contents) and (get_time | parse_all): m = re.search( 'Job cpu time: +(\S+) +days +(\S+) +hours +(\S+) +minutes +(\S+) +seconds', contents) try: time = float(m.group(1)) * 24 * 60 * 60 + float( m.group(2)) * 60 * 60 + float(m.group(3)) * 60 + float( m.group(4)) except: pass if 'Summary of Optimized Potential Surface Scan' in contents and not parse_all: end_section = contents[ contents.rindex('Summary of Optimized Potential Surface Scan'):] energy_lines = re.findall('Eigenvalues -- ([^\\n]+)', end_section) energy = [ float(s) for line in energy_lines for s in re.findall('-[\d]+\.[\d]+', line) ] minima = re.split('Stationary point found', contents) atoms = [] for m in minima[1:]: coordinates = m.index('Coordinates (Angstroms)') start = m.index('---\n', coordinates) + 4 end = m.index('\n ---', start) atoms.append([]) for line in m[start:end].splitlines(): columns = line.split() element = columns[1] x, y, z = [float(s) for s in columns[3:6]] atoms[-1].append( utils.Atom(element=constants.PERIODIC_TABLE[int( columns[1])]['sym'], x=x, y=y, z=z, index=len(atoms[-1]) + 1)) if get_energy: return energy, atoms elif get_energy and not parse_all: if ' MP2/' in contents: # MP2 files don't have just SCF energy energy = float( re.findall('EUMP2 = +(\S+)', contents)[-1].replace('D', 'e')) elif ' CCSD/' in contents: energy = float(re.findall('E\(CORR\)= +(\S+)', contents)[-1]) else: if not counterpoise: try: energy_line = contents[ contents.rindex('SCF Done'):contents. index('\n', contents.rindex('SCF Done'))] except ValueError: raise Exception('No SCF for ' + input_file) energy = float( re.search('SCF Done: +\S+ += +(\S+)', energy_line).group(1)) else: energy = float( re.findall('Counterpoise: corrected energy = +(\S+)', contents)[-1]) if parse_all: energies = [] atom_frames = [] start = 0 orientation = 'Input orientation:' while True: try: #match energy input_orientation = contents.find(orientation, start) if input_orientation == -1: orientation = 'Standard orientation' #print("\nWarning - No available Input Orientation, defaulting to Standard") input_orientation = contents.find(orientation, start) if input_orientation >= 0: start = input_orientation next_coordinates = contents.index('Coordinates (Angstroms)', start) start = contents.index('SCF Done', start) energies.append( float( re.search('SCF Done: +\S+ += +(\S+)', contents[start:]).group(1))) except: break start = contents.index('---\n', next_coordinates) + 4 end = contents.index('\n ---', start) lines = contents[start:end].splitlines() start = end atoms = [] for line in lines: columns = line.split() element = columns[1] x, y, z = columns[3:6] atoms.append( utils.Atom(element=element, x=float(x), y=float(y), z=float(z))) atom_frames.append(atoms) return energies, atom_frames, time if get_energy and not get_atoms: if get_time: return energy, time else: return energy #get coordinates try: last_coordinates = contents.rindex('Input orientation:') last_coordinates = contents.index('Coordinates (Angstroms)', last_coordinates) except ValueError: last_coordinates = contents.rindex('Coordinates (Angstroms)') start = contents.index('---\n', last_coordinates) + 4 end = contents.index('\n ---', start) atoms = [] for line in contents[start:end].splitlines(): columns = line.split() element = columns[1] x, y, z = [float(s) for s in columns[3:6]] atoms.append( utils.Atom(element=constants.PERIODIC_TABLE[int( columns[1])]['sym'], x=x, y=y, z=z, index=len(atoms) + 1)) #get forces if 'Forces (Hartrees/Bohr)' in contents: last_forces = contents.rindex('Forces (Hartrees/Bohr)') start = contents.index('---\n', last_forces) + 4 end = contents.index('\n ---', start) for i, line in enumerate(contents[start:end].splitlines()): columns = line.split() atoms[i].fx, atoms[i].fy, atoms[i].fz = [ float(s) for s in columns[2:5] ] #return the appropriate values if get_time: if get_atoms: return energy, atoms, time else: return energy, time if get_energy: return energy, atoms else: return atoms
def read_lammpstrj_legacy(name, read_atoms=True, read_timesteps=True, read_num_atoms=True, read_box_bounds=True, last_frame=False): if not name.endswith('.lammpstrj') and '.' not in name: name += '.lammpstrj' # If file does not exist, return empty lammpstrj object if not os.path.isfile(name): warn('Expected lammps trajectory file does not exist at %s' % (name)) data = '' else: data = open(name,'r').read() # Compile data from only the last frame if last_frame=True if last_frame: s = 'ITEM: TIMESTEP' section = data while (s in section): section = section[section.find(s)+len(s):] # Rewrite data to only include last frame data = section # Determine coordinate type coords = '' if read_atoms: section = data if 'x y z' in section: if verbose: print('%s: Reading wrapped, unscaled atom coordinates' % (name)) coords = 'x y z' elif 'xs ys zs' in section: if verbose: print('%s: Reading warpped, scaled atom coordinates' % (name)) coords = 'xs ys zs' elif 'xu yu zu' in section: if verbose: print('%s: Reading unwrapped, unscaled atom coordinates' % (name)) coords = 'xu yu zu' elif 'xsu ysu zsu' in section: if verbose: print('%s: Reading unwrapped, scaled atom coordinates' % (name)) coords = 'xsu ysu zsu' else: print('No valid coordinates found') # Get all the positions section, frames = data, [] s = 'ITEM: ATOMS id type ' + coords while read_atoms and (s in section): section = section[section.find(s)+len(s):] atom_block = section[:section.find('\nITEM: TIMESTEP')].split('\n')[1:] frame = [] for line in atom_block: a = line.split() # Check if atom has expected number of characteristics if len(a) == 5: frame.append(utils.Atom(a[1],float(a[2]),float(a[3]),float(a[4]),index=a[0])) else: print('Atom skipped due to missing information') frames.append(frame) if frames: atoms = frames[-1] else: atoms = None # Get all timesteps section, timesteps = data, [] s = 'ITEM: TIMESTEP' while read_timesteps and (s in section): num = section.find(s)+len(s) print(num) section = section[num:] tmp = section[:section.find('\nITEM: NUMBER OF ATOMS')].split('\n')[1:] for line in tmp: a = line.split() timesteps.append(int(a[0])) if len(timesteps) > 0: final_timestep = timesteps[-1] else: final_timestep = None # Get number of atoms. Useful if number of atoms change during simulation, such as during a deposition section, atom_counts = data, [] s = 'ITEM: NUMBER OF ATOMS' while read_num_atoms and (s in section): section = section[section.find(s)+len(s):] tmp = section[:section.find('\nITEM: BOX BOUNDS')].split('\n')[1:] for line in tmp: a = line.split() atom_counts.append(int(a[0])) if len(atom_counts) > 0: atom_count = atom_counts[-1] else: atom_count = None # Get box bounds # Currently only imports orthorhombic crystal information aka all angles = 90 degrees section, box_bounds_list = data, [] s = 'ITEM: BOX BOUNDS' while read_box_bounds and (s in section): section = section[section.find(s)+len(s):] tmp = section[:section.find('\nITEM: ATOMS')].split('\n')[1:] box_bounds = utils.Struct(xlo=None, xhi=None, ylo=None, yhi=None, zlo=None, zhi=None) for line in tmp: a = line.split() if box_bounds.xlo is None: box_bounds.xlo = float(a[0]) box_bounds.xhi = float(a[1]) elif box_bounds.ylo is None: box_bounds.ylo = float(a[0]) box_bounds.yhi = float(a[1]) elif box_bounds.zlo is None: box_bounds.zlo = float(a[0]) box_bounds.zhi = float(a[1]) box_bounds_list.append(box_bounds) if len(box_bounds_list) > 0: box_bounds = box_bounds_list[-1] else: box_bounds = None # Create object to store all results data = utils.sim_out(name, 'lammps') # Record all lammps trajectory data into results object data.frames = frames data.atoms = atoms data.timesteps = timesteps data.final_timestep = final_timestep data.atom_counts = atom_counts data.atom_count = atom_count data.box_bounds_list = box_bounds_list data.box_bounds = box_bounds data.last_modified = 'Null' # Stores when lammpstrj was last modified in seconds return data
def read_lammpstrj(name, read_atoms=True, read_timesteps=True, read_num_atoms=True, read_box_bounds=True, verbose=True, last_frame_only=False): if not name.endswith('.lammpstrj') and '.' not in name: name += '.lammpstrj' # If file does not exist, return empty lammpstrj object if not os.path.isfile(name): warn('Expected lammps trajectory file does not exist at %s' % (name)) data = '' # Initialize variables timesteps, atom_counts, box_bounds_list, frames = [], [], [], [] # Initialize atom attributes list atom_attrs = [] # Use these flags to determine what section you are in sect_timesteps, sect_num_atoms, sect_box_bounds, sect_atoms = False, False, False, False # Flag to keep track if this is the first step analyzed first_step = True # Skip the listed number of frames then reset # Set to 0 for no skpping #skip_set = range(2**5 - 1) #skip_set = [x+2 for x in skip_set] skip_set = [0] skip_count = 0 # Iterate file line by line. This reduces the memory required since it only loads the current line with open(name) as f: for line in f: # Check for new section if 'ITEM: TIMESTEP' in line: # If skipped previous frame, reset skip counter if skip_count == max(skip_set): skip_count = 0 # Increment skip counter and skip if necessary skip_count = skip_count + 1 if skip_count in skip_set: continue sect_timesteps, sect_num_atoms, sect_box_bounds, sect_atoms = False, False, False, False sect_timesteps = True # Add previous timestep to list of frames. Do not try for first time step since you have not read any atoms yet. # Do not add if only looking for final frame if not first_step and not last_frame_only: frames.append(frame) continue # If it is not the timestep section, and a skip has triggered, skip all lines till next timestep elif skip_count in skip_set: continue elif 'ITEM: NUMBER OF ATOMS' in line: sect_timesteps, sect_num_atoms, sect_box_bounds, sect_atoms = False, False, False, False sect_num_atoms = True continue elif 'ITEM: BOX BOUNDS' in line: sect_timesteps, sect_num_atoms, sect_box_bounds, sect_atoms = False, False, False, False sect_box_bounds = True box_bounds = utils.Struct(xlo=None, xhi=None, ylo=None, yhi=None, zlo=None, zhi=None) continue elif 'ITEM: ATOMS' in line: sect_timesteps, sect_num_atoms, sect_box_bounds, sect_atoms = False, False, False, False sect_atoms = True box_bounds_list.append(box_bounds) frame = [] # Determine atom attributes in lammpstrj file and add to list if first_step: a = line.split() atom_attrs = a[2:] # If this is the first time step analyzed, report the coordinates style if first_step and verbose: if 'x y z' in line: print('%s: Reading wrapped, unscaled atom coordinates' % (name)) elif 'xs ys zs' in line: print('%s: Reading wrapped, scaled atom coordinates' % (name)) elif 'xu yu zu' in line: print('%s: Reading unwrapped, unscaled atom coordinates' % (name)) elif 'xsu ysu zsu' in line: print('%s: Reading unwrapped, scaled atom coordinates' % (name)) else: print('No valid coordinates found') first_step = False continue # Record information as required by the section if sect_timesteps and read_timesteps: a = line.split() timesteps.append(int(a[0])) if sect_num_atoms and read_num_atoms: a = line.split() atom_counts.append(int(a[0])) if sect_box_bounds and read_box_bounds: a = line.split() if box_bounds.xlo is None: box_bounds.xlo = float(a[0]) box_bounds.xhi = float(a[1]) elif box_bounds.ylo is None: box_bounds.ylo = float(a[0]) box_bounds.yhi = float(a[1]) elif box_bounds.zlo is None: box_bounds.zlo = float(a[0]) box_bounds.zhi = float(a[1]) if sect_atoms and read_atoms: a = line.split() # Initialize all atom values index, lammps_type, x, y, z, vx, vy, vz, fx, fy, fz = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 # Assign atom attributes for (value, attr) in zip(a, atom_attrs): if attr == 'id': index = int(value) elif attr == 'type': lammps_type = value elif attr in ['x','xs','xu','xsu']: x = float(value) elif attr in ['y','ys','yu','ysu']: y = float(value) elif attr in ['z','zs','zu','zsu']: z = float(value) elif attr == 'vx': vx = float(value) elif attr == 'vy': vy = float(value) elif attr == 'vz': vz = float(value) elif attr == 'fx': fx = float(value) elif attr == 'fy': fy = float(value) elif attr == 'fz': fz = float(value) # Check if atom has expected number of characteristics if len(a) == len(atom_attrs): frame.append(utils.Atom(lammps_type,x,y,z,index=index,vx=vx,vy=vy,vz=vz,fx=fx,fy=fy,fz=fz)) else: print('Atom skipped due to missing information') # Add final frame frames.append(frame) # Record final data point as necessary if len(timesteps) > 0: final_timestep = timesteps[-1] else: final_timestep = None if len(atom_counts) > 0: atom_count = atom_counts[-1] else: atom_count = None if len(box_bounds_list) > 0: box_bounds = box_bounds_list[-1] else: box_bounds = None if frames: atoms = frames[-1] else: atoms = None # If only looking for final frame, erase all other timesteps if last_frame_only: timesteps = [timesteps[-1]] atom_counts = [atom_counts[-1]] box_bounds_list = [box_bounds_list[-1]] frames = [frames[-1]] # Create object to store all results data = utils.sim_out(name, 'lammps') # Record all lammps trajectory data into results object data.frames = frames data.atoms = atoms data.timesteps = timesteps data.final_timestep = final_timestep data.atom_counts = atom_counts data.atom_count = atom_count data.box_bounds_list = box_bounds_list data.box_bounds = box_bounds data.last_modified = 'Null' # Stores when lammpstrj was last modified in seconds return data