def compile_training_set(path="./training_sets"): if path.endswith("/"): path = path[:-1] if not os.path.exists(path): raise Exception("Unable to find training set directory") frames = [] i = 0 while os.path.exists(path+"/"+str(i)+".cml"): atoms = files.read_cml(path+"/"+str(i)+".cml", return_molecules=False, allow_errors=True, test_charges=False)[0] frames.append(atoms) i += 1 files.write_xyz(frames, "full_training_set")
def calculate(coords): coord_count = 0 for s in NEB.states[1:-1]: for a in s: a.x, a.y, a.z = coords[coord_count], coords[coord_count+1], coords[coord_count+2] coord_count += 3 #start DFT jobs running_jobs = [] for i,state in enumerate(NEB.states[1:-1]): guess = '' if NEB.step==0 else ' Guess=Read' running_jobs.append( job('%s-%d-%d'%(NEB.name,NEB.step,i), NEB.theory+' Force'+guess, state, queue=queue, force=True, previous=('%s-%d-%d'%(NEB.name,NEB.step-1,i)) if NEB.step>0 else None, extra_section=extra_section) ) #wait for jobs to finish for j in running_jobs: j.wait() #get forces and energies from DFT calculations energies = [] for i,state in enumerate(NEB.states[1:-1]): try: new_energy, new_atoms = parse_atoms('%s-%d-%d'%(NEB.name,NEB.step,i)) except: print 'Job failed: %s-%d-%d'%(NEB.name,NEB.step,i); exit() energies.append(new_energy) for a,b in zip(state, new_atoms): a.fx = b.fx; a.fy = b.fy; a.fz = b.fz dft_energies = copy.deepcopy(energies) #add spring forces to atoms for i,state in enumerate(NEB.states[1:-1]): for j,b in enumerate(state): a,c = NEB.states[i-1][j], NEB.states[i+1][j] if j in spring_atoms: b.fx += NEB.k*(a.x-b.x) + NEB.k*(c.x-b.x) b.fy += NEB.k*(a.y-b.y) + NEB.k*(c.y-b.y) b.fz += NEB.k*(a.z-b.z) + NEB.k*(c.z-b.z) energies[i] += 0.5*NEB.k*(utils.dist_squared(a,b) + utils.dist_squared(b,c)) #set error NEB.error = sum(energies) #set forces NEB.forces = [] for state in NEB.states[1:-1]: for a in state: NEB.forces += [-a.fx, -a.fy, -a.fz] #derivative of the error #increment step NEB.step += 1 #write to xyz file NEB.xyz = open(name+'.xyz', 'w') for state in NEB.states: files.write_xyz(state, NEB.xyz) NEB.xyz.close() #print data print NEB.step, NEB.error, ('%10.7g '*len(dft_energies)) % tuple(dft_energies)
def compile_training_set(path="./training_set", extra_parameters={}): if path.endswith("/"): path = path[:-1] if not os.path.exists(path): raise Exception("Unable to find training set directory") frames = [] i = 0 while os.path.exists(path + "/" + str(i) + ".cml"): atoms = files.read_cml(path + "/" + str(i) + ".cml", return_molecules=False, allow_errors=True, test_charges=False, extra_parameters=extra_parameters)[0] frames.append(atoms) i += 1 files.write_xyz(frames, "full_training_set")
def method_CLANCELOT(opt_method="LBFGS"): from files import read_xyz, write_xyz import neb from units import convert, convert_energy FPTR = "./../xyz/CNH_HCN.xyz" frames = read_xyz(FPTR) route = '! HF-3c' if RIGID_ROTATION: is_on = "ON" else: is_on = "OFF" print("\nRUNNING CLANCELOT SIMULATION WITH RIGID_ROTATION %s...\n" % is_on) run_name = 'CNH_HCN_c_' + opt_method new_opt_params = { 'step_size': ALPHA, 'step_size_adjustment': 0.5, 'max_step': MAX_STEP, 'linesearch': 'backtrack', 'accelerate': True, 'reset_step_size': 5, 'g_rms': convert("eV/Ang", "Ha/Ang", 0.03), 'g_max': convert("eV/Ang", "Ha/Ang", FMAX) } opt = neb.NEB(run_name, frames, route, k=convert_energy("eV", "Ha", 0.1), opt=opt_method, new_opt_params=new_opt_params) output = opt.optimize() frames = output[-1] write_xyz(frames, "CNH_HCN_opt_%s" % opt_method) print("\nDONE WITH CLANCELOT SIMULATION...\n")
def pickle_training_set(run_name, training_sets_folder="training_set", pickle_file_name="training_set", high_energy_cutoff=500.0, system_x_offset=1000.0, verbose=False, extra_parameters={}): """ A function to pickle together the training set in a manner that is readable for MCSMRFF. This is a single LAMMPs data file with each training set offset alongst the x-axis by system_x_offset. The pickle file, when read in later, holds a list of two objects. The first is the entire system as described above. The second is a dictionary of all molecules in the system, organized by composition. **Parameters** run_name: *str* Name of final training set. training_sets_folder: *str, optional* Path to the folder where all the training set data is. pickle_file_name: *str, optional* A name for the pickle file and training set system. high_energy_cutoff: *float, optional* A cutoff for systems that are too large in energy, as MD is likely never to sample them. system_x_offset: *float, optional* The x offset for the systems to be added by. verbose: *bool, optional* Whether to have additional stdout or not. extra_parameters: *dict, optional* A dictionaries for additional parameters that do not exist in the default OPLSAA parameter file. **Returns** system: *System* The entire training set system. systems_by_composition: *dict, list, Molecule* Each molecule organized in this hash table. """ # Take care of pickle file I/O if training_sets_folder.endswith("/"): training_sets_folder = training_sets_folder[:-1] if pickle_file_name is not None and pickle_file_name.endswith(".pickle"): pickle_file_name = pickle_file_name.split(".pickle")[0] pfile = training_sets_folder + "/" + pickle_file_name + ".pickle" sys_name = pickle_file_name if os.path.isfile(pfile): raise Exception("Pickled training set already exists!") # Generate empty system for your training set system = None system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name) systems_by_composition = {} # For each folder in the training_sets folder lets get the cml file we # want and write the energies and forces for that file for name in os.listdir(training_sets_folder): # We'll read in any training subset that succeeded and print a warning # on those that failed try: result = orca.read("%s/%s/%s.out" % (training_sets_folder, name, name)) except IOError: print( "Warning - Training Subset %s not included as \ out file not found..." % name) continue # Check for convergence if not result.converged: print("Warning - Results for %s have not converged." % name) continue # Parse the force output and change units. In the case of no force # found, do not use this set of data try: forces = orca.engrad_read("%s/%s/%s.orca.engrad" % (training_sets_folder, name, name), pos="Ang")[0] # Convert force from Ha/Bohr to kcal/mol-Ang def convert(x): return units.convert_dist( "Ang", "Bohr", units.convert_energy("Ha", "kcal", x)) for a, b in zip(result.atoms, forces): a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz) except (IndexError, IOError): print( "Warning - Training Subset %s not included as \ results not found..." % name) continue # Get the bonding information with_bonds = structures.Molecule("%s/%s/%s.cml" % (training_sets_folder, name, name), extra_parameters=extra_parameters, allow_errors=True, test_charges=False) # Copy over the forces read in into the system that has the bonding # information for a, b in zip(with_bonds.atoms, result.atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz # sanity check on atom positions if geometry.dist(a, b) > 1e-4: raise Exception('Atoms are different:', (a.x, a.y, a.z), (b.x, b.y, b.z)) # Rename and save energy with_bonds.energy = result.energy with_bonds.name = name # Now, we read in all the potential three-body interactions that our # training set takes into account. This will be in a 1D array composition = ' '.join(sorted([a.element for a in result.atoms])) if composition not in systems_by_composition: systems_by_composition[composition] = [] systems_by_composition[composition].append(with_bonds) # Generate: # (1) xyz file of various systems as different time steps # (2) system to simulate xyz_atoms = [] to_delete = [] for i, composition in enumerate(systems_by_composition): # Sort so that the lowest energy training subset is first # in the system systems_by_composition[composition].sort(key=lambda s: s.energy) baseline_energy = systems_by_composition[composition][0].energy # Offset the energies by the lowest energy, and convert energy units for j, s in enumerate(systems_by_composition[composition]): s.energy -= baseline_energy s.energy = units.convert_energy("Ha", "kcal/mol", s.energy) # Don't use high-energy systems, because these will not likely # be sampled in MD if s.energy > high_energy_cutoff: to_delete.append([composition, j]) continue # For testing purposes, output if verbose: print "Using:", s.name, s.energy xyz_atoms.append(s.atoms) system.add(s, len(system.molecules) * system_x_offset) # Delete the system_names that we aren't actually using due to energy # being too high to_delete = sorted(to_delete, key=lambda x: x[1])[::-1] for d1, d2 in to_delete: if verbose: print "Warning - Training Subset %s not included as energy \ is too high..." % systems_by_composition[d1][d2].name del systems_by_composition[d1][d2] # Make the box just a little bigger (100) so that we can fit all our # systems system.xhi = len(system.molecules) * system_x_offset + 100.0 # Write all of the states we are using to training_sets.xyz files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name) # Generate our pickle file print("Saving pickle file %s..." % pfile) fptr = open(pfile, "wb") pickle.dump([system, systems_by_composition], fptr) fptr.close() # Now we have the data, save it to files for this simulation of # "run_name" and return parameters if not os.path.isdir(run_name): os.mkdir(run_name) os.chdir(run_name) mcsmrff_files.write_system_and_training_data(run_name, system, systems_by_composition) os.chdir("../") shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name)) return system, systems_by_composition
else: low_x = min(x_vals) high_x = max(x_vals) for y in yy: plt.plot(x_vals,y,marker='.',label=str(int(start_val) + i)) if min(y) < low_y: low_y = min(y) if max(y) > high_y: high_y = max(y) plt.xlabel(x_label) plt.ylabel('%s (%s)' % (y_label,u2)) plt.title(title) if x_range is None: x_range = [low_x, high_x] if y_range is None: y_range = [low_y, high_y*1.05] plt.axis([x_range[0], x_range[1], y_range[0], y_range[1]]) plt.legend() plt.show() if comp[0] != None: start -= 1 plot(energies,start,x_label,y_label,title,x_range,y_range) # Write files files.write_xyz(frames,out_name[:-4]) # Print out values if desired if p_vals: for y in energies: print(str(y))
def pretty_xyz(name,R_MAX=1,F_MAX=50,PROCRUSTS=False,outName=None,write_xyz=False,verbose=False): #---------- # name = Name of xyz file to read in # R_MAX = maximum motion per frame # F_MAX = maximum number of frames allowed # PROCRUSTES = Center frames or not # outName = If you wish to output to file, give a name. Defalt name is 'pretty_xyz' # write_xyz = Write to file. Default False # Verbose = Outputing what pretty_xyz is doing as it goes #---------- from copy import deepcopy # Get data as either frames or a file if type(name)==type(''): frames = files.read_xyz(name) elif type(name)==type([]): frames = name else: print "Error - Invalid name input. Should be either the name of an xyz file or a list.", sys.exc_info()[0] exit() # Loop till we're below R_MAX while 1: # Find largest motion_per_frame if PROCRUSTS: procrustes(frames) tmp = motion_per_frame(frames) i = tmp.index(max(tmp)) # Check if we're done r2 = max(tmp) if r2 < R_MAX: break if len(frames) > F_MAX: print "-------------------------------------------------------" print tmp print "-------------------------------------------------------" print "\n\nError - Could not lower motion below %lg in %d frames." % (R_MAX,F_MAX), sys.exc_info()[0] exit() else: if verbose: print "Currently Frames = %d\tr2 = %lg" % (len(frames),r2) # Now, split the list, interpolate, and regenerate if i > 0 and i < len(frames) - 1: f_low = deepcopy(frames[:i]) f_high = deepcopy(frames[i+1:]) f_mid = interpolate(frames[i-1],frames[i+1],3) frames = f_low + f_mid + f_high elif i == 0: f_low = deepcopy(frames[i]) f_mid = interpolate(frames[i],frames[i+1],3) f_high = deepcopy(frames[i+1:]) frames = [f_low] + f_mid + f_high else: f_low = deepcopy(frames[:i]) f_mid = interpolate(frames[i-1],frames[i],3) f_high = deepcopy(frames[i]) frames = f_low + f_mid + [f_high] if verbose: print "\tInterpolated %d,%d ... %lg" % (index-1,index+1,max(motion_per_frame(frames))) if PROCRUSTS: procrustes(frames) if write_xyz: files.write_xyz(frames,'pretty_xyz' if outName==None else outName) else: return frames
def pickle_training_set(run_name, training_sets_folder="training_set", pickle_file_name="training_set", high_energy_cutoff=500.0, system_x_offset=1000.0, verbose=False, extra_parameters={}): """ A function to pickle together the training set in a manner that is readable for MCSMRFF. This is a single LAMMPs data file with each training set offset alongst the x-axis by system_x_offset. The pickle file, when read in later, holds a list of two objects. The first is the entire system as described above. The second is a dictionary of all molecules in the system, organized by composition. **Parameters** run_name: *str* Name of final training set. training_sets_folder: *str, optional* Path to the folder where all the training set data is. pickle_file_name: *str, optional* A name for the pickle file and training set system. high_energy_cutoff: *float, optional* A cutoff for systems that are too large in energy, as MD is likely never to sample them. system_x_offset: *float, optional* The x offset for the systems to be added by. verbose: *bool, optional* Whether to have additional stdout or not. extra_parameters: *dict, optional* A dictionaries for additional parameters that do not exist in the default OPLSAA parameter file. **Returns** system: *System* The entire training set system. systems_by_composition: *dict, list, Molecule* Each molecule organized in this hash table. """ # Take care of pickle file I/O if training_sets_folder.endswith("/"): training_sets_folder = training_sets_folder[:-1] if pickle_file_name is not None and pickle_file_name.endswith(".pickle"): pickle_file_name = pickle_file_name.split(".pickle")[0] pfile = training_sets_folder + "/" + pickle_file_name + ".pickle" sys_name = pickle_file_name if os.path.isfile(pfile): raise Exception("Pickled training set already exists!") # Generate empty system for your training set system = None system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name) systems_by_composition = {} # For each folder in the training_sets folder lets get the cml file we # want and write the energies and forces for that file for name in os.listdir(training_sets_folder): # We'll read in any training subset that succeeded and print a warning # on those that failed try: result = orca.read("%s/%s/%s.out" % (training_sets_folder, name, name)) except IOError: print("Warning - Training Subset %s not included as \ out file not found..." % name) continue # Check for convergence if not result.converged: print("Warning - Results for %s have not converged." % name) continue # Parse the force output and change units. In the case of no force # found, do not use this set of data try: forces = orca.engrad_read("%s/%s/%s.orca.engrad" % (training_sets_folder, name, name), pos="Ang")[0] # Convert force from Ha/Bohr to kcal/mol-Ang def convert(x): return units.convert_dist("Ang", "Bohr", units.convert_energy("Ha", "kcal", x) ) for a, b in zip(result.atoms, forces): a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz) except (IndexError, IOError): print("Warning - Training Subset %s not included as \ results not found..." % name) continue # Get the bonding information with_bonds = structures.Molecule("%s/%s/%s.cml" % (training_sets_folder, name, name), extra_parameters=extra_parameters, allow_errors=True, test_charges=False) # Copy over the forces read in into the system that has the bonding # information for a, b in zip(with_bonds.atoms, result.atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz # sanity check on atom positions if geometry.dist(a, b) > 1e-4: raise Exception('Atoms are different:', (a.x, a.y, a.z), (b.x, b.y, b.z) ) # Rename and save energy with_bonds.energy = result.energy with_bonds.name = name # Now, we read in all the potential three-body interactions that our # training set takes into account. This will be in a 1D array composition = ' '.join(sorted([a.element for a in result.atoms])) if composition not in systems_by_composition: systems_by_composition[composition] = [] systems_by_composition[composition].append(with_bonds) # Generate: # (1) xyz file of various systems as different time steps # (2) system to simulate xyz_atoms = [] to_delete = [] for i, composition in enumerate(systems_by_composition): # Sort so that the lowest energy training subset is first # in the system systems_by_composition[composition].sort(key=lambda s: s.energy) baseline_energy = systems_by_composition[composition][0].energy # Offset the energies by the lowest energy, and convert energy units for j, s in enumerate(systems_by_composition[composition]): s.energy -= baseline_energy s.energy = units.convert_energy("Ha", "kcal/mol", s.energy) # Don't use high-energy systems, because these will not likely # be sampled in MD if s.energy > high_energy_cutoff: to_delete.append([composition, j]) continue # For testing purposes, output if verbose: print "Using:", s.name, s.energy xyz_atoms.append(s.atoms) system.add(s, len(system.molecules) * system_x_offset) # Delete the system_names that we aren't actually using due to energy # being too high to_delete = sorted(to_delete, key=lambda x: x[1])[::-1] for d1, d2 in to_delete: if verbose: print "Warning - Training Subset %s not included as energy \ is too high..." % systems_by_composition[d1][d2].name del systems_by_composition[d1][d2] # Make the box just a little bigger (100) so that we can fit all our # systems system.xhi = len(system.molecules) * system_x_offset + 100.0 # Write all of the states we are using to training_sets.xyz files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name) # Generate our pickle file print("Saving pickle file %s..." % pfile) fptr = open(pfile, "wb") pickle.dump([system, systems_by_composition], fptr) fptr.close() # Now we have the data, save it to files for this simulation of # "run_name" and return parameters if not os.path.isdir(run_name): os.mkdir(run_name) os.chdir(run_name) mcsmrff_files.write_system_and_training_data(run_name, system, systems_by_composition ) os.chdir("../") shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name)) return system, systems_by_composition
def job(run_name, atoms, ecut, ecutrho=None, atom_units="Ang", route=None, pseudopotentials=None, periodic_distance=15, dumps="dump End Ecomponents ElecDensity", queue=None, walltime="00:30:00", procs=1, threads=None, redundancy=False, previous=None, mem=2000, priority=None, xhost=None, slurm_allocation=sysconst.slurm_default_allocation): """ Wrapper to submitting a JDFTx simulation. **Parameters** run_name: *str* Name of the simulation to be run. atoms: *list,* :class:`structures.Atom` *, or str* A list of atoms for the simulation. If a string is passed, it is assumed to be an xyz file (relative or full path). If None is passed, then it is assumed that previous was specified. ecut: *float* The planewave cutoff energy in Hartree. ecutrho: *float, optional* The charge density cutoff in Hartree. By default this is 4 * ecut. atom_units: *str, optional* What units your atoms are in. JDFTx expects bohr; however, typically most work in Angstroms. Whatever units are converted to bohr here. route: *str, optional* Any additional script to add to the JDFTx simulation. pseudopotentials: *list, str, optional* The pseudopotentials to use in this simulation. If nothing is passed, a default set of ultra-soft pseudo potentials will be chosen. periodic_distance: *float, optional* The periodic box distance in Bohr. dumps: *str, optional* The outputs for this simulation. queue: *str, optional* What queue to run the simulation on (queueing system dependent). procs: *int, optional* How many processors to run the simulation on. threads: *int, optional* How many threads to run the simulation on. By default this is procs. redundancy: *bool, optional* With redundancy on, if the job is submitted and unique_name is on, then if another job of the same name is running, a pointer to that job will instead be returned. previous: *str, optional* Name of a previous simulation for which to try reading in information using the MORead method. mem: *float, optional* Amount of memory per processor that is available (in MB). priority: *int, optional* Priority of the simulation (queueing system dependent). Priority ranges (in NBS) from a low of 1 (start running whenever) to a high of 255 (start running ASAP). xhost: *list, str or str, optional* Which processor to run the simulation on(queueing system dependent). slurm_allocation: *str, optional* Whether to use a slurm allocation for this job or not. If so, specify the name. **Returns** job: :class:`jobs.Job` Teturn the job container. """ if len(run_name) > 31 and queue is not None: raise Exception("Job name too long (%d) for NBS. \ Max character length is 31." % len(run_name)) # Generate the orca input file os.system('mkdir -p jdftx/%s' % run_name) if previous is not None: shutil.copyfile("jdftx/%s/%s.xyz" % (previous, previous), "jdftx/%s/%s.xyz" % (run_name, previous)) shutil.copyfile("jdftx/%s/%s.xyz" % (previous, previous), "jdftx/%s/%s.xyz" % (run_name, run_name)) shutil.copyfile("jdftx/%s/%s.ionpos" % (previous, previous), "jdftx/%s/%s.ionpos" % (run_name, previous)) shutil.copyfile("jdftx/%s/%s.lattice" % (previous, previous), "jdftx/%s/%s.lattice" % (run_name, previous)) os.chdir('jdftx/%s' % run_name) # Start from a blank output file if os.path.isfile("%s.out" % run_name): os.system("mv %s.out %s_prev.out" % (run_name, run_name)) if threads is None: threads = procs path_jdftx = sysconst.jdftx_path if path_jdftx.endswith("/"): path_jdftx = path_jdftx[:-1] path_jdftx_scripts = sysconst.jdftx_script_path if path_jdftx_scripts.endswith("/"): path_jdftx_scripts = path_jdftx_scripts[:-1] if atoms is not None: if not isinstance(atoms, str): for a in atoms: a.element = units.elem_i2s(a.element) files.write_xyz(atoms, "%s.xyz" % run_name) else: atoms = files.read_xyz(atoms) for a in atoms: a.element = units.elem_i2s(a.element) files.write_xyz(atoms, "%s.xyz" % run_name) if run_name.endswith(".xyz"): run_name = run_name.split(".xyz")[0] # NOTE! xyzToIonposOpt will convert xyz Angstroms to Bohr os.system("%s/xyzToIonposOpt %s.xyz %d > xyzToIonpos.log" % (path_jdftx_scripts, run_name, periodic_distance)) previous_name = None if previous: previous_name = previous previous = "initial-state %s.$VAR" % previous # First, read in the xyz file to determine unique elements if pseudopotentials is None and atoms is not None: pseudopotentials = [] elements = geometry.reduce_list([a.element.lower() for a in atoms]) all_pps = [ fname for fname in os.listdir("%s/pseudopotentials/GBRV" % path_jdftx) if fname.endswith("uspp") and "pbe" in fname ] for e in elements: potential_pps = [] for pp in all_pps: if pp.startswith("%s_" % e): potential_pps.append(pp) if len(potential_pps) < 1: raise Exception( "Unable to automatically grab potential for element %s." % e) else: potential_pps.sort( ) # In theory this should be the "largest" number based on the naming convention. pseudopotentials.append("GBRV/" + potential_pps[0]) if atoms is None: pseudopotentials = '''ion-species GBRV/$ID_pbe_v1.2.uspp ion-species GBRV/$ID_pbe_v1.01.uspp ion-species GBRV/$ID_pbe_v1.uspp''' else: pseudopotentials = "\n".join( ["ion-species %s" % pp for pp in pseudopotentials]) script = ''' # --------------- Molecular Structure ---------------- $$ATOMS$$ coords-type cartesian $$PREVIOUS$$ # --------------- System Parameters ---------------- elec-cutoff $$ECUT$$ $$ECUTRHO$$ # Specify the pseudopotentials (this defines species O and H): $$PSEUDOPOTENTIALS$$ # --------------- Outputs ---------------- dump-name $$NAME$$.$VAR #Filename pattern for outputs $$DUMPS$$ #Output energy components and electron density at the end ''' atom_str = '''include $$NAME$$.lattice include $$NAME$$.ionpos''' if atoms is not None: atom_str = atom_str.replace("$$NAME$$", run_name) else: if previous_name is None: raise Exception("Forgot to specify previous when atoms is None!") atom_str = atom_str.replace("$$NAME$$", previous_name) script = script.replace("$$ATOMS$$", atom_str) while "$$NAME$$" in script: script = script.replace("$$NAME$$", run_name) if ecutrho is None: ecutrho = "" while "$$ECUTRHO$$" in script: script = script.replace("$$ECUTRHO$$", str(ecutrho)) while "$$ECUT$$" in script: script = script.replace("$$ECUT$$", str(ecut)) while "$$PSEUDOPOTENTIALS$$" in script: script = script.replace("$$PSEUDOPOTENTIALS$$", pseudopotentials) if previous is not None: script = script.replace("$$PREVIOUS$$", previous) else: script = script.replace("$$PREVIOUS$$", "") script = script.replace("$$DUMPS$$", dumps) if route is not None: script += "\n# --------------- Outputs ----------------\n\n" script += route.strip() + "\n\n" fptr = open("%s.in" % run_name, 'w') fptr.write(script) fptr.close() # Run the simulation if queue is None: process_handle = subprocess.Popen("%s/jdftx -i %s.in -o %s.out" % (path_jdftx, run_name, run_name), shell=True) elif queue == 'debug': print 'Would run', run_name else: job_to_submit = "source ~/.zshrc\nmpirun -n %d jdftx -c %d -i %s.in -o %s.out" % ( procs, threads, run_name, run_name) jobs.submit_job(run_name, job_to_submit, procs=procs, queue=queue, mem=mem, priority=priority, walltime=walltime, xhosts=xhost, redundancy=redundancy, unique_name=True, slurm_allocation=slurm_allocation) time.sleep(0.5) # Copy run script fname = sys.argv[0] if '/' in fname: fname = fname.split('/')[-1] try: shutil.copyfile('../../%s' % fname, fname) except IOError: # Submitted a job oddly enough that sys.argv[0] # is not the original python file name, so don't do this pass # Return to the appropriate directory os.chdir('../..') if queue is None: return jobs.Job(run_name, process_handle=process_handle) else: return jobs.Job(run_name)
def get_pdf(frames, start=0.0, stop=5.0, step=0.1, cutoff=10.0, rho=1.0, quanta=0.001, output=None, persist=False): """ Obtain the pair distribution function of a list of atoms using the Debyer code. **Parameters** frames: *str or list,* :class:`structures.Atom` An xyz file name (with or without the .xyz extension) or an input frame to calculate the pdf for. start: *float, optional* The starting radial distance in Angstroms for the calculated pattern. stop: *float, optional* The ending radial distance in Angstroms for the calculated pattern. step: *float, optional* Step in Angstroms for the calculated pattern. cutoff: *float, optional* Cutoff distance in Angstroms for Interatomic Distance (ID) calculations. rho: *float, optional* Numeric density of the system. quanta: *float, optional* Interatomic Distance (ID) discritization quanta. output: *str, optional* Output file name with NO extension given persist: *bool, optional* Whether to persist made .g and .xyz files (True), or remove them (False) **Returns** pdf: *list, tuple, float* A list of tuples holding the pdf data (distance in Angstroms and Intensity). **References** * https://debyer.readthedocs.io/en/latest/ """ # If passed frames and not an xyz file name, write to xyz append = str(int(random.random() * 1E12)) if type(frames) is not str: files.write_xyz(frames, "tmp_for_pdf_%s" % append) file_name = "tmp_for_pdf_%s" % append else: file_name = frames # Else, we want to ensure file_name is correct if file_name.endswith(".xyz"): file_name = file_name.split(".xyz")[0] if output is None: output = file_name if stop > cutoff: raise Exception( "Stopping position should be larger less than or equal to the cutoff." ) # Make command for debyer cmd = "debyer --cutoff=%.2f --quanta=%.2f -g -f%.2f -t%.2f -s%.2f --ro=%.2f -o %s.g %s.xyz" % ( cutoff, quanta, start, stop, step, rho, output, file_name) # Run debyer and read in the pdf os.system(cmd) fptr_pdf = open("%s.g" % output, 'r').read().split("\n") i = 0 while fptr_pdf[i].strip().startswith("#"): i += 1 j = len(fptr_pdf) - 1 while fptr_pdf[j].strip() == "": j -= 1 fptr_pdf = fptr_pdf[i:j + 1] pdf = [(float(a.split()[0]), float(a.split()[1])) for a in fptr_pdf] if not persist: os.system("rm %s.g" % output) os.system("rm %s.xyz" % file_name) return pdf
def calculate(coords): coord_count = 0 for s in NEB.states[1:-1]: for a in s: a.x, a.y, a.z = coords[coord_count], coords[ coord_count + 1], coords[coord_count + 2] coord_count += 3 #start DFT jobs running_jobs = [] for i, state in enumerate(NEB.states[1:-1]): guess = '' if NEB.step == 0 else ' Guess=Read' running_jobs.append( job('%s-%d-%d' % (NEB.name, NEB.step, i), NEB.theory + ' Force' + guess, state, queue=queue, force=True, previous=('%s-%d-%d' % (NEB.name, NEB.step - 1, i)) if NEB.step > 0 else None, extra_section=extra_section)) #wait for jobs to finish for j in running_jobs: j.wait() #get forces and energies from DFT calculations energies = [] for i, state in enumerate(NEB.states[1:-1]): try: new_energy, new_atoms = parse_atoms( '%s-%d-%d' % (NEB.name, NEB.step, i)) except: print 'Job failed: %s-%d-%d' % (NEB.name, NEB.step, i) exit() energies.append(new_energy) for a, b in zip(state, new_atoms): a.fx = b.fx a.fy = b.fy a.fz = b.fz dft_energies = copy.deepcopy(energies) #add spring forces to atoms for i, state in enumerate(NEB.states[1:-1]): for j, b in enumerate(state): a, c = NEB.states[i - 1][j], NEB.states[i + 1][j] if j in spring_atoms: b.fx += NEB.k * (a.x - b.x) + NEB.k * (c.x - b.x) b.fy += NEB.k * (a.y - b.y) + NEB.k * (c.y - b.y) b.fz += NEB.k * (a.z - b.z) + NEB.k * (c.z - b.z) energies[i] += 0.5 * NEB.k * (utils.dist_squared( a, b) + utils.dist_squared(b, c)) #set error NEB.error = sum(energies) #set forces NEB.forces = [] for state in NEB.states[1:-1]: for a in state: NEB.forces += [-a.fx, -a.fy, -a.fz] #derivative of the error #increment step NEB.step += 1 #write to xyz file NEB.xyz = open(name + '.xyz', 'w') for state in NEB.states: files.write_xyz(state, NEB.xyz) NEB.xyz.close() #print data print NEB.step, NEB.error, ( '%10.7g ' * len(dft_energies)) % tuple(dft_energies)
else: tail = 'Job has not converged.' length = max([len(tmp) for tmp in head.split('\n')] + [len(tmp) for tmp in body.split('\n')] + [len(tmp) for tmp in tail.split('\n')]) dash = '\n' + ''.join(['-'] * length) + '\n' if body != '': print(dash + head + dash + body + dash + tail + dash) else: print(dash + head + dash + tail + dash) try: if len(data.frames) > 0: if me: me = '/fs/home/%s/' % USERNAME else: me = '' files.write_xyz(data.frames, me + out_name[:-4]) if vmd: os.system('"' + sysconst.vmd_path + '" ' + me + out_name) elif ovito: os.system('"' + sysconst.ovito_path + '" ' + me + out_name) except TypeError: print("No atomic coordinates available yet...") except: print("An unexpected error has occurred.") sys.exit()
for frame in frames_to_plot: if frame == 0: energy = first_E atoms = first_frame elif frame == max_frame: energy = last_E atoms = last_frame else: energy = read("%s-%d-%d" % (run_name,iteration,frame)).energies[-1] atoms = read("%s-%d-%d" % (run_name,iteration,frame)).atoms energies.append(units.convert_energy(u1,u2,energy-first_E) * scale) pathway.append(atoms) full_energy_list.append(energies) # Save the final iteration xyz files.write_xyz(pathway, "%s" % out_name) # Plot the graph plot(full_energy_list,iterations_to_plot[0],x_label,y_label,title,x_range,y_range, x_low=frames_to_plot[0]) else: start = int(sys.argv[2]) stop = int(sys.argv[3]) if '-dft' in sys.argv: dft = sys.argv[sys.argv.index('-dft') + 1].lower() if dft not in dft_list: print("Error - %s not recognized for dft." % dft) sys.exit() if [s for s in ['-units','-u'] if s in sys.argv]:
def ovito_xyz_to_gif(frames, scratch, fname="image", camera_pos=(10, 0, 0), camera_dir=(-1, 0, 0), size=(800, 600), delay=10, display_cell=False, renderer="OpenGLRenderer", renderer_settings={}, overwrite=False): """ This function will, using the ovito python api, generate either a single image or a gif of the input frames. Note, a gif is only generated when more than one frame exists. **Parameters** frames: *str* or *list,* :class:`structures.Atom` A list of frames you wish to generate an image for, or a path to an xyz file. scratch: *str* A directory you want to have each image saved to. fname: *str, optional* The prefix for the image names. camera_pos: *tuple, float, optional* A tuple of x, y, and z coordinates for the camera to be positioned. camera_dir: *tuple, float, optional* The direction the camera is facing. size: *tuple, int, optional* Image size (width, height). delay: *int, optional* In the event of a gif, how long it should play for. display_cell: *bool, optional* Whether to display the box around the system or not. renderer: *str, optional* What kind of renderer you wish to use: OpenGL or Tachyon. renderer_settings: *dict, optional* Here you can change specific renderer settings. overwrite: *bool, optional* Whether to delete any files already existing in the scratch dir. **Returns** None """ # First ensure we have frames and things in the correct format if isinstance(frames, str): frames = open(frames) if not isinstance(frames[0], list): frames = [frames] if not scratch.endswith("/"): scratch += "/" # Next, ensure scratch exists if not os.path.exists(scratch): os.system("mkdir -p %s" % scratch) elif len(os.listdir(scratch)) > 0: if overwrite: os.system("rm %s/*.png" % scratch) else: raise Exception("Error - Scratch directory is not empty!") # For each frame, generate an image for i, frame in enumerate(frames): files.write_xyz(frame, "tmp.xyz") ovito_xyz_to_image("tmp.xyz", scratch, fname="%04d" % i, camera_pos=camera_pos, camera_dir=camera_dir, size=size, renderer=renderer, renderer_settings=renderer_settings, display_cell=display_cell) os.system("rm tmp.xyz") # If more than one frame exists, compile to gif if len(frames) > 1: cmd = "convert -delay $DELAY -loop 0 $(ls -v $PATH/*.png) output.gif" holders = [("$PATH", str(scratch)), ("$DELAY", str(delay))] for s_id, val in holders: cmd = cmd.replace(s_id, val) os.system(cmd)
def get_training_set(run_name, use_pickle=True, pickle_file_name=None): # Take care of pickle file I/O # Get file name if pickle_file_name is None: pfile = "training_sets/training_set.pickle" else: pfile = pickle_file_name system = None # If the pickle file does not exist, then make it # If use_pickle is False, then make the read in the data from the # training_sets folder if not os.path.isfile(pfile) or not use_pickle: if pickle_file_name is not None: raise Exception("Requested file %s, but unable to read it in." % pickle_file_name) # Generate the pickle itself if it doesn't exist # Create the size of the box to be 1000 x 100 x 100 to hold your # training sets system = structures.System(box_size=[1e3, 100.0, 100.0], name="training_set") systems_by_composition = {} # For each folder in the training_sets folder lets get the cml file # we want and write the energies and forces for that file for name in os.listdir("training_sets"): # We'll read in any training subset that succeeded and print # a warning on those that failed try: result = orca.read("training_sets/%s/%s.out" % (name, name)) except IOError: print( "Warning - Training Subset %s not included as results \ not found..." % name) continue # Parse the force output and change units. In the case of no force # found, do not use this set of data try: forces = orca.engrad_read("training_sets/%s/%s.orca.engrad" % (name, name), pos="Ang")[0] # Convert force from Ha/Bohr to kcal/mol-Ang def convert(x): units.convert_dist("Ang", "Bohr", units.convert_energy("Ha", "kcal", x)) for a, b in zip(result.atoms, forces): a.fx = convert(b.fx) a.fy = convert(b.fy) a.fz = convert(b.fz) except (IndexError, IOError): print( "Warning - Training Subset %s not included as results \ not found..." % name) continue # Get the bonding information with_bonds = structures.Molecule("training_sets/%s/system.cml" % name, extra_parameters=extra_Pb, test_charges=False) # Copy over the forces read in into the system that has the # bonding information for a, b in zip(with_bonds.atoms, result.atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz if geometry.dist(a, b) > 1e-4: # sanity check on atom positions raise Exception('Atoms are different:', (a.x, a.y, a.z), (b.x, b.y, b.z)) # Rename some things with_bonds.energy = result.energy with_bonds.name = name # Now, we read in all the potential three-body interactions that # our training set takes into account # This will be in a 1D array composition = ' '.join(sorted([a.element for a in result.atoms])) if composition not in systems_by_composition: systems_by_composition[composition] = [] systems_by_composition[composition].append(with_bonds) # Generate (1) xyz file of various systems as different time steps and # (2) system to simulate xyz_atoms = [] to_delete = [] for i, composition in enumerate(systems_by_composition): # Sort so that the lowest energy training subset is first in # the system systems_by_composition[composition].sort(key=lambda s: s.energy) baseline_energy = systems_by_composition[composition][0].energy # Offset the energies by the lowest energy, convert units of # the energy for j, s in enumerate(systems_by_composition[composition]): s.energy -= baseline_energy s.energy = units.convert_energy("Ha", "kcal/mol", s.energy) # Don't use high-energy systems, because these will not likely # be sampled in MD if s.energy > 500.0: to_delete.append([composition, j]) continue # For testing purposes, output print "DEBUG:", s.name, s.energy xyz_atoms.append(s.atoms) system.add(s, len(system.molecules) * 1000.0) # Delete the system_names that we aren't actually using due to # energy being too high to_delete = sorted(to_delete, key=lambda x: x[1])[::-1] for d1, d2 in to_delete: print "Warning - Training Subset %s not included as energy is too \ high..." % systems_by_composition[d1][d2].name del systems_by_composition[d1][d2] # Make the box just a little bigger (100) so that we can fit all # our systems system.xhi = len(system.molecules) * 1000.0 + 100.0 # Write all of the states we are using to training_sets.xyz if not os.path.isdir("training_sets"): os.mkdir("training_sets") os.chdir("training_sets") files.write_xyz(xyz_atoms, 'training_sets') os.chdir("../") # Generate our pickle file if desired if use_pickle: print("Saving pickle file %s..." % pfile) fptr = open(pfile, "wb") pickle.dump([system, systems_by_composition], fptr) fptr.close() # If use_pickle is true AND the pickle file exists, then we can just # read it in if system is None and use_pickle: print("Reading pickle file %s..." % pfile) fptr = open(pfile, "rb") system, systems_by_composition = pickle.load(fptr) system.name = run_name fptr.close() elif system is None: raise Exception("Requested file %s, but unable to read it in." % pfile) # Now we have the data, save it to files for this simulation of "run_name" # and return parameters if not os.path.isdir("lammps"): os.mkdir("lammps") if not os.path.isdir("lammps/%s" % run_name): os.mkdir("lammps/%s" % run_name) os.chdir("lammps/%s" % run_name) mcsmrff_files.write_system_and_training_data(run_name, system, systems_by_composition) os.chdir("../../") return system, systems_by_composition
def get_training_set(run_name, use_pickle=True, pickle_file_name=None): # Take care of pickle file I/O # Get file name if pickle_file_name is None: pfile = "training_sets/training_set.pickle" else: pfile = pickle_file_name system = None # If the pickle file does not exist, then make it # If use_pickle is False, then make the read in the data from the # training_sets folder if not os.path.isfile(pfile) or not use_pickle: if pickle_file_name is not None: raise Exception("Requested file %s, but unable to read it in." % pickle_file_name) # Generate the pickle itself if it doesn't exist # Create the size of the box to be 1000 x 100 x 100 to hold your # training sets system = structures.System(box_size=[1e3, 100.0, 100.0], name="training_set") systems_by_composition = {} # For each folder in the training_sets folder lets get the cml file # we want and write the energies and forces for that file for name in os.listdir("training_sets"): # We'll read in any training subset that succeeded and print # a warning on those that failed try: result = orca.read("training_sets/%s/%s.out" % (name, name)) except IOError: print("Warning - Training Subset %s not included as results \ not found..." % name) continue # Parse the force output and change units. In the case of no force # found, do not use this set of data try: forces = orca.engrad_read("training_sets/%s/%s.orca.engrad" % (name, name), pos="Ang")[0] # Convert force from Ha/Bohr to kcal/mol-Ang def convert(x): units.convert_dist("Ang", "Bohr", units.convert_energy("Ha", "kcal", x)) for a, b in zip(result.atoms, forces): a.fx = convert(b.fx) a.fy = convert(b.fy) a.fz = convert(b.fz) except (IndexError, IOError): print("Warning - Training Subset %s not included as results \ not found..." % name) continue # Get the bonding information with_bonds = structures.Molecule( "training_sets/%s/system.cml" % name, extra_parameters=extra_Pb, test_charges=False) # Copy over the forces read in into the system that has the # bonding information for a, b in zip(with_bonds.atoms, result.atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz if geometry.dist(a, b) > 1e-4: # sanity check on atom positions raise Exception('Atoms are different:', (a.x, a.y, a.z), (b.x, b.y, b.z)) # Rename some things with_bonds.energy = result.energy with_bonds.name = name # Now, we read in all the potential three-body interactions that # our training set takes into account # This will be in a 1D array composition = ' '.join(sorted([a.element for a in result.atoms])) if composition not in systems_by_composition: systems_by_composition[composition] = [] systems_by_composition[composition].append(with_bonds) # Generate (1) xyz file of various systems as different time steps and # (2) system to simulate xyz_atoms = [] to_delete = [] for i, composition in enumerate(systems_by_composition): # Sort so that the lowest energy training subset is first in # the system systems_by_composition[composition].sort(key=lambda s: s.energy) baseline_energy = systems_by_composition[composition][0].energy # Offset the energies by the lowest energy, convert units of # the energy for j, s in enumerate(systems_by_composition[composition]): s.energy -= baseline_energy s.energy = units.convert_energy("Ha", "kcal/mol", s.energy) # Don't use high-energy systems, because these will not likely # be sampled in MD if s.energy > 500.0: to_delete.append([composition, j]) continue # For testing purposes, output print "DEBUG:", s.name, s.energy xyz_atoms.append(s.atoms) system.add(s, len(system.molecules) * 1000.0) # Delete the system_names that we aren't actually using due to # energy being too high to_delete = sorted(to_delete, key=lambda x: x[1])[::-1] for d1, d2 in to_delete: print "Warning - Training Subset %s not included as energy is too \ high..." % systems_by_composition[d1][d2].name del systems_by_composition[d1][d2] # Make the box just a little bigger (100) so that we can fit all # our systems system.xhi = len(system.molecules) * 1000.0 + 100.0 # Write all of the states we are using to training_sets.xyz if not os.path.isdir("training_sets"): os.mkdir("training_sets") os.chdir("training_sets") files.write_xyz(xyz_atoms, 'training_sets') os.chdir("../") # Generate our pickle file if desired if use_pickle: print("Saving pickle file %s..." % pfile) fptr = open(pfile, "wb") pickle.dump([system, systems_by_composition], fptr) fptr.close() # If use_pickle is true AND the pickle file exists, then we can just # read it in if system is None and use_pickle: print("Reading pickle file %s..." % pfile) fptr = open(pfile, "rb") system, systems_by_composition = pickle.load(fptr) system.name = run_name fptr.close() elif system is None: raise Exception("Requested file %s, but unable to read it in." % pfile) # Now we have the data, save it to files for this simulation of "run_name" # and return parameters if not os.path.isdir("lammps"): os.mkdir("lammps") if not os.path.isdir("lammps/%s" % run_name): os.mkdir("lammps/%s" % run_name) os.chdir("lammps/%s" % run_name) mcsmrff_files.write_system_and_training_data(run_name, system, systems_by_composition) os.chdir("../../") return system, systems_by_composition
def pdf_metric(A, ref=None, persist=False, lammps_job=False, start=0.0, stop=10.0, step=0.1, cutoff=3.0, quanta=0.001, disregard=[]): # If we are checking a lammps job, grab the xyz file from # lammps/run_name/run_name.xyz if lammps_job is True: if A.endswith(".xyz"): A.split(".xyz")[0] if A.endswith(".dump"): A.split(".dump")[0] if A.endswith(".data"): A.split(".data")[0] A = read_dump_PbCl3MA("lammps/%s/%s2.dump" % (A, A)) # If we passed a string, then read in the file if type(A) is str: if not A.endswith(".xyz"): A += ".xyz" A = files.read_xyz(A) # Assume reference is the first frame if ref is None: ref = A[0] else: raise Exception("This is not coded yet.") B = copy.deepcopy([A[0], A[-1]]) # Remove anything in disregard for i, frame in enumerate(B): to_kill = [] for j, atom in enumerate(frame): if atom.element in disregard: to_kill.append(j) to_kill = sorted(to_kill)[::-1] for k in to_kill: del B[i][k] pdf_ref = debyer.get_pdf(B[0], persist=persist, output="tmp_pdf_ref", start=start, stop=stop, step=step, cutoff=cutoff, quanta=quanta) pdf_final = debyer.get_pdf(B[-1], persist=persist, output="tmp_pdf_final", start=start, stop=stop, step=step, cutoff=cutoff, quanta=quanta) files.write_xyz([B[0], B[-1]], "pdf_metric_debug") # Split lists pdf_ref = zip(*pdf_ref) pdf_final = zip(*pdf_final) difference = [(a - b)**2 for a, b in zip(pdf_ref[1], pdf_final[1])] rms = (np.asarray(difference).sum() / float(len(difference)))**0.5 if not persist: os.system("rm pdf_metric_debug.xyz") return rms, [pdf_ref, pdf_final]
body += '\t'.join([str(s) for s in line])+'\n' body = utils.spaced_print(body, delim='\t') if data.converged: tail = 'Job converged in %.2e seconds' % data.time else: tail = 'Job has not converged.' length = max([len(tmp) for tmp in head.split('\n')] + [len(tmp) for tmp in body.split('\n')] + [len(tmp) for tmp in tail.split('\n')]) dash = '\n'+''.join(['-']*length)+'\n' if body != '': print(dash+head+dash+body+dash+tail+dash) else: print(dash+head+dash+tail+dash) try: if len(data.frames) > 0: if me: me = '/fs/home/%s/' % USERNAME else: me = '' files.write_xyz(data.frames,me + out_name[:-4]) if vmd: os.system('"'+sysconst.vmd_path + '" ' + me + out_name) except TypeError: print("No atomic coordinates available yet...") except: print("An unexpected error has occurred.") sys.exit()
energy = first_E atoms = first_frame elif frame == max_frame: energy = last_E atoms = last_frame else: energy = read("%s-%d-%d" % (run_name, iteration, frame)).energies[-1] atoms = read("%s-%d-%d" % (run_name, iteration, frame)).atoms energies.append( units.convert_energy(u1, u2, energy - first_E) * scale) pathway.append(atoms) full_energy_list.append(energies) # Save the final iteration xyz files.write_xyz(pathway, "%s" % out_name) # Plot the graph plot(full_energy_list, iterations_to_plot[0], x_label, y_label, title, x_range, y_range, x_low=frames_to_plot[0]) else: start = int(sys.argv[2]) stop = int(sys.argv[3])