def minimize_seeds(nprocs=4): seeds = [] seed_names = [] route_low = "! OPT B97-D3 def2-TZVP ECP{def2-TZVP} Grid7" extra_section_low = "" route_high = "! OPT PW6B95 def2-TZVP ECP{def2-TZVP} Grid7" extra_section_high = "" for seed in os.listdir("seed"): seeds.append(files.read_cml("seed/%s" % seed, allow_errors=True, test_charges=False)[0]) seed_names.append(seed.split(".cml")[0]) jobs = [] for i,seed in enumerate(seeds): charge = sum([a.type.charge for a in seed]) jobs.append( orca.job("seed_%d_low" % i, route_low, atoms=seed, extra_section=extra_section_low, charge=charge, grad=False, queue=QUEUE_TO_RUN_ON, procs=nprocs, sandbox=False) ) for j in jobs: j.wait() for i,seed in enumerate(seeds): charge = sum([a.type.charge for a in seed]) jobs.append( orca.job("seed_%d_high" % i, route_high, atoms=[], extra_section=extra_section_high, charge=charge, grad=False, queue=QUEUE_TO_RUN_ON, procs=nprocs, previous="seed_%d_low" % i, sandbox=False) ) for j in jobs: j.wait() for i,seed in enumerate(seeds): new_pos = orca.read("seed_%d_high" % i) if not new_pos.converged: print("Failed to optimize %s" % seed_names[i]) continue new_pos = new_pos.atoms cml_file = files.read_cml("seed/%s" % seed_names[i], allow_errors=True, test_charges=False, return_molecules=True) j=0 for mol in cml_file: for k,a in enumerate(mol.atoms): b = new_pos[j] a.x, a.y, a.z = b.x, b.y, b.z j += 1 files.write_cml(cml_file, name="seed/%s_opt" % seed_names[i])
def run_low_level(training_sets_folder="training_set", procs=1, queue=None, extra_parameters={}): if not os.path.exists(training_sets_folder): raise Exception("No training set folder to run.") frange = [int(a.split('.cml')[0]) for a in os.listdir(training_sets_folder) if a.endswith(".cml")] frange.sort() if len(frange) == 0: raise Exception("No viable files in training sets folder.") route = "! B97-D3 def2-TZVP GCP(DFT/TZ) ECP{def2-TZVP} Grid7" extra_section = "" running_jobs = [] for i in frange: atoms = files.read_cml("%s/%d.cml" % (training_sets_folder, i), allow_errors=True, test_charges=False, return_molecules=False, extra_parameters=extra_parameters)[0] charge = sum([a.type.charge for a in atoms]) running_jobs.append(orca.job("ts_%d" % i, route, atoms=atoms, extra_section=extra_section, charge=charge, grad=True, queue=queue, procs=procs, sandbox=False)) return running_jobs
def __init__(self, atoms_or_filename_or_all, bonds=None, angles=None, dihedrals=None, parameter_file='oplsaa.prm', extra_parameters={}, check_charges=True): #set atoms, bonds, etc, or assume 'atoms' contains all those things if only one parameter is passed in if type(atoms_or_filename_or_all)==type('string'): self.filename = atoms_or_filename_or_all atoms, bonds, angles, dihedrals = files.read_cml(self.filename, parameter_file=parameter_file, extra_parameters=extra_parameters, check_charges=check_charges) elif not bonds: atoms, bonds, angles, dihedrals = atoms_or_filename_or_all else: atoms = atoms_or_filename_or_all self.atoms = atoms self.bonds = bonds self.angles = angles self.dihedrals = dihedrals
def compile_training_set(path="./training_sets"): if path.endswith("/"): path = path[:-1] if not os.path.exists(path): raise Exception("Unable to find training set directory") frames = [] i = 0 while os.path.exists(path+"/"+str(i)+".cml"): atoms = files.read_cml(path+"/"+str(i)+".cml", return_molecules=False, allow_errors=True, test_charges=False)[0] frames.append(atoms) i += 1 files.write_xyz(frames, "full_training_set")
def opls_options(molecule, parameter_file='oplsaa.prm'): elements, atom_types, bond_types, angle_types, dihedral_types = files.read_opls_parameters(parameter_file) elements_by_structure_indices = dict( [ (t.index2, elements_by_atomic_number[t.element] ) for t in atom_types ] ) elements_by_structure_indices[0] = 'X' def add_to_list(dic,key,value): if key in dic: dic[key].append(value) else: dic[key] = [value] dihedral_types_by_element={} for d in dihedral_types: structure_indices = d.index2s elements = [ elements_by_structure_indices[i] for i in structure_indices] add_to_list(dihedral_types_by_element, tuple(elements), d) add_to_list(dihedral_types_by_element, tuple(reversed(elements)), d) atoms, bonds, angles, dihedrals = files.read_cml(molecule, parameter_file=None) for a in atoms: a.index2_options = [] for d in dihedrals: elements = tuple([ a.element for a in d.atoms ]) options = dihedral_types_by_element[elements] options_by_i = [ [],[],[],[] ] if elements in dihedral_types_by_element: print elements for a in d.atoms: a.index2_options.append( set() ) for t in dihedral_types_by_element[elements]: #print '\t', t.index2s for i in range(4): d.atoms[i].index2_options[-1].add( t.index2s[i] ) else: print 'Error: dihedral', elements, 'does not exist in OPLS file', parameter_file for a in atoms: print a.element for option in a.index2_options: print '\t', option options = a.index2_options[0] for i in xrange(1,len(a.index2_options)): options = options.intersection( a.index2_options[i] ) print '\t\t', options
def read_seed(path="./seed", extra_parameters={}): """ Read in all cml files from the seed directory. **Parameters** path: *str, optional* A path to the seed directory. extra_parameters: *dict, optional* Additional parameters to add to OPLSAA. **Returns** molecules_A: *list, list, molecules* A list of molecules from the seed directory molecules_B: *list, molecules* A list of molecules from the seed directory. In this case, we merge child molecules into one. """ if path.endswith("/"): path = path[:-1] if not os.path.exists(path): raise Exception("Unable to find seed directory") molecules_A = [] for fptr in os.listdir(path): if not fptr.endswith(".cml"): continue molecules_A.append(files.read_cml(path + "/" + fptr, return_molecules=True, allow_errors=True, test_charges=False, extra_parameters=extra_parameters)) if molecules_A == []: raise Exception("Seed directory is empty") molecules_B = [] for seed in molecules_A: atoms, bonds, angles, dihedrals = [], [], [], [] for mol in seed: atoms += mol.atoms bonds += mol.bonds angles += mol.angles dihedrals += mol.dihedrals molecules_B.append(structures.Molecule(atoms, bonds, angles, dihedrals)) return molecules_A, molecules_B
def read_seed(path="./seed", extra_parameters={}): """ Read in all cml files from the seed directory. **Parameters** path: *str, optional* A path to the seed directory. extra_parameters: *dict, optional* Additional parameters to add to OPLSAA. **Returns** molecules_A: *list, list, molecules* A list of molecules from the seed directory molecules_B: *list, molecules* A list of molecules from the seed directory. In this case, we merge child molecules into one. """ if path.endswith("/"): path = path[:-1] if not os.path.exists(path): raise Exception("Unable to find seed directory") molecules_A = [] for fptr in os.listdir(path): if not fptr.endswith(".cml"): continue molecules_A.append( files.read_cml(path + "/" + fptr, return_molecules=True, allow_errors=True, test_charges=False, extra_parameters=extra_parameters)) if molecules_A == []: raise Exception("Seed directory is empty") molecules_B = [] for seed in molecules_A: atoms, bonds, angles, dihedrals = [], [], [], [] for mol in seed: atoms += mol.atoms bonds += mol.bonds angles += mol.angles dihedrals += mol.dihedrals molecules_B.append(structures.Molecule(atoms, bonds, angles, dihedrals)) return molecules_A, molecules_B
def compile_training_set(path="./training_set", extra_parameters={}): if path.endswith("/"): path = path[:-1] if not os.path.exists(path): raise Exception("Unable to find training set directory") frames = [] i = 0 while os.path.exists(path + "/" + str(i) + ".cml"): atoms = files.read_cml(path + "/" + str(i) + ".cml", return_molecules=False, allow_errors=True, test_charges=False, extra_parameters=extra_parameters)[0] frames.append(atoms) i += 1 files.write_xyz(frames, "full_training_set")
def run_high_level(training_sets_folder="training_set", procs=1, queue=None, extra_parameters={}): if not os.path.exists(training_sets_folder): raise Exception("No training set folder to run.") frange = [ int(a.split('.cml')[0]) for a in os.listdir(training_sets_folder) if a.endswith(".cml") ] frange.sort() if len(frange) == 0: raise Exception("No viable files in training sets folder.") route = "! PW6B95 def2-TZVP GCP(DFT/TZ) ECP{def2-TZVP} Grid7" extra_section = "" running_jobs = [] previous_failed = [] for i in frange: atoms = files.read_cml("%s/%d.cml" % (training_sets_folder, i), allow_errors=True, test_charges=False, return_molecules=False, extra_parameters=extra_parameters)[0] charge = sum([a.type.charge for a in atoms]) prev_converged = orca.read("ts_%d" % i).converged if prev_converged: running_jobs.append( orca.job("ts_%d_high" % i, route, atoms=[], extra_section=extra_section, charge=charge, grad=True, queue=queue, procs=procs, previous="ts_%d" % i, sandbox=False)) else: previous_failed.append(i) return running_jobs, previous_failed
def run_high_level(): if not os.path.exists("training_sets"): raise Exception("No training set folder to run.") frange = [int(a.split('.cml')[0]) for a in os.listdir("training_sets") if a.endswith(".cml")] frange.sort() if len(frange) == 0: raise Exception("No viable files in training sets folder.") route = "! PW6B95 def2-TZVP GCP(DFT/TZ) ECP{def2-TZVP} Grid7" extra_section = "" running_jobs = [] previous_failed = [] for i in frange: atoms = files.read_cml("training_sets/%d.cml" % i, allow_errors=True, test_charges=False, return_molecules=False)[0] charge = sum([a.type.charge for a in atoms]) prev_converged = orca.read("ts_%d" % i).converged if prev_converged: running_jobs.append( orca.job("ts_%d_high" % i, route, atoms=[], extra_section=extra_section, charge=charge, grad=True, queue=QUEUE_TO_RUN_ON, procs=QUEUE_PROCS, previous="ts_%d" % i, sandbox=False) ) else: previous_failed.append(i) return running_jobs, previous_failed
def minimize_seeds(procs=4, queue=None, extra_parameters={}): """ A function to optimize the geometry of the supplied seeds in the seed directory. Each optimized structure is then added to the seed directory under the name "previous_name_opt.cml". **Parameters** procs: *int, optional* How many processors to use for this. queue: *str, optional* What queue to run the simulation on. """ seeds = [] seed_names = [] route_low = "! OPT B97-D3 def2-TZVP ECP{def2-TZVP} Grid7" extra_section_low = "" route_high = "! OPT PW6B95 def2-TZVP ECP{def2-TZVP} Grid7" extra_section_high = "" for seed in os.listdir("seed"): seeds.append( files.read_cml("seed/%s" % seed, allow_errors=True, test_charges=False, extra_parameters=extra_parameters)[0]) seed_names.append(seed.split(".cml")[0]) jobs = [] for i, seed in enumerate(seeds): charge = sum([a.type.charge for a in seed]) jobs.append( orca.job("seed_%d_low" % i, route_low, atoms=seed, extra_section=extra_section_low, charge=charge, grad=False, queue=queue, procs=procs, sandbox=False)) for j in jobs: j.wait() for i, seed in enumerate(seeds): charge = sum([a.type.charge for a in seed]) jobs.append( orca.job("seed_%d_high" % i, route_high, atoms=[], extra_section=extra_section_high, charge=charge, grad=False, queue=queue, procs=procs, previous="seed_%d_low" % i, sandbox=False)) for j in jobs: j.wait() for i, seed in enumerate(seeds): new_pos = orca.read("seed_%d_high" % i) if not new_pos.converged: print("Failed to optimize %s" % seed_names[i]) continue new_pos = new_pos.atoms cml_file = files.read_cml("seed/%s" % seed_names[i], allow_errors=True, test_charges=False, return_molecules=True, extra_parameters=extra_parameters) j = 0 for mol in cml_file: for k, a in enumerate(mol.atoms): b = new_pos[j] a.x, a.y, a.z = b.x, b.y, b.z j += 1 files.write_cml(cml_file, name="seed/%s_opt" % seed_names[i])
def generate_lead_halide_cation(halide, cation, ion="Pb", run_opt=True): cml_path = fpl_constants.cml_dir # Check if system exists fname = reduce_to_name(ion, halide, cation) if not cml_path.endswith("/"): cml_path += "/" if os.path.exists(cml_path + fname + ".cml"): print("Found system in cml folder, returning system") system = structures.Molecule( files.read_cml(cml_path + fname + ".cml", test_charges=False, allow_errors=True)[0]) return system def vdw(y): return PERIODIC_TABLE[units.elem_s2i(y)]['vdw_r'] # Get the PbX3 system PbX3 = generate_lead_halide(halide, ion=ion) # Get the cation from the cml file atoms, bonds, _, _ = files.read_cml(cml_path + cation + ".cml", test_charges=False, allow_errors=True) system = structures.Molecule(atoms) # Align along X axis system.atoms = geometry.align_centroid(system.atoms)[0] # Rotate to Z axis # NOTE! In case of FA, we want flat so only translate to origin instead # NOTE! We have exactly 3 cations we observe: Cs, MA, FA. If 2 N, then FA elems = [a.element for a in system.atoms] if elems.count("N") == 2: system.translate(system.get_center_of_mass()) else: R = geometry.rotation_matrix([0, 1, 0], 90, units="deg") system.rotate(R) # If N and C in system, ensure N is below C (closer to Pb) if "N" in elems and "C" in elems: N_index = [i for i, a in enumerate(system.atoms) if a.element == "N"][0] C_index = [i for i, a in enumerate(system.atoms) if a.element == "C"][0] if system.atoms[N_index].z > system.atoms[C_index].z: # Flip if needed R = geometry.rotation_matrix([0, 1, 0], 180, units="deg") system.rotate(R) # Offset system so lowest point is at 0 in the z dir z_offset = min([a.z for a in system.atoms]) * -1 system.translate([0, 0, z_offset]) # Add to the PbX3 system with an offset of vdw(Pb) system.translate([0, 0, vdw(ion)]) system.atoms += PbX3.atoms # Run a geometry optimization of this system if run_opt: PbXY = orca.job(fname, fpl_constants.default_routes[0], atoms=system.atoms, extra_section=fpl_constants.extra_section, queue="batch", procs=2) PbXY.wait() new_pos = orca.read(fname).atoms for a, b in zip(system.atoms, new_pos): a.x, a.y, a.z = [b.x, b.y, b.z] # Set OPLS types for a in system.atoms: if a.element in [ion, "Cl", "Br", "I"]: a.type = fpl_constants.atom_types[a.element] a.type_index = a.type["index"] # Write cml file so we don't re-generate, and return system files.write_cml(system, bonds=bonds, name=cml_path + fname + ".cml") return system
def opt_opls(molecule, parameter_file='oplsaa.prm', taboo_time=100): elements, atom_types, bond_types, angle_types, dihedral_types = files.read_opls_parameters(parameter_file) atoms, bonds, angles, dihedrals = files.read_cml(molecule, parameter_file=None) bond_types_by_index2 = dict( [ (tuple(t.index2s),t) for t in bond_types ] + [ (tuple(reversed(t.index2s)),t) for t in bond_types ] ) angle_types_by_index2 = dict( [ (tuple(t.index2s),t) for t in angle_types ] + [ (tuple(reversed(t.index2s)),t) for t in angle_types ] ) dihedral_types_by_index2 = dict( [ (tuple(t.index2s),t) for t in dihedral_types ] + [ (tuple(reversed(t.index2s)),t) for t in dihedral_types ] ) charges_by_index = dict( [ (t.index,t.charge) for t in atom_types ] ) for a in atoms: a.possible_types = set() for t in atom_types: if elements_by_atomic_number[t.element] == a.element and t.bond_count==len(a.bonded): a.possible_types.add(t.index2) a.possible_types = list(a.possible_types) #a.possible_types.append(0) def count_conflicts(types): for i,a in enumerate(atoms): a.index2 = types[i] conflicts = 0 for b in bonds: index2s = (b.atoms[0].index2, b.atoms[1].index2) if not index2s in bond_types_by_index2: conflicts += 1 for a in angles: index2s = (a.atoms[0].index2, a.atoms[1].index2, a.atoms[2].index2) if not index2s in angle_types_by_index2: conflicts += 1 for d in dihedrals: index2s_0 = (d.atoms[0].index2, d.atoms[1].index2, d.atoms[2].index2, d.atoms[3].index2) index2s_1 = (0, d.atoms[1].index2, d.atoms[2].index2, d.atoms[3].index2) index2s_2 = (d.atoms[0].index2, d.atoms[1].index2, d.atoms[2].index2, 0) in0 = index2s_0 in dihedral_types_by_index2 in1 = index2s_1 in dihedral_types_by_index2 in2 = index2s_2 in dihedral_types_by_index2 if not in0 and not in1 and not in2: conflicts += 1 return conflicts import random types = [random.choice(a.possible_types) for a in atoms] taboo = [0 for a in atoms] best = count_conflicts(types) step = 0 for step in range(100000): i = random.randint( 0, len(types)-1 ) for guess in types: if taboo[i]>0: i = random.randint( 0, len(types)-1 ) else: break old_type = types[i] types[i] = random.choice(atoms[i].possible_types) conflicts = count_conflicts(types) if conflicts <= best: best = conflicts taboo[i] = taboo_time else: types[i] = old_type taboo = [t-1 if t>0 else 0 for t in taboo] if step % 10000 == 0: print best, conflicts, types step += 1 def types_from_index2(x): return [t for t in atom_types if t.index2==x and t.index<=440] for i,tt in enumerate( [ types_from_index2(x) for x in types] ): #print i, atoms[i].element atoms[i].index_options = [t.index for t in tt] #for t in tt: # print '\t', t.index, t.notes def net_charge(types): charge = 0.0 for t in types: charge += charges_by_index[t] return charge types = [random.choice(a.index_options) for a in atoms] taboo = [0 for a in atoms] best = net_charge(types) for step in range(100000): i = random.randint( 0, len(types)-1 ) for guess in types: if taboo[i]>0: i = random.randint( 0, len(types)-1 ) else: break old_type = types[i] types[i] = random.choice(atoms[i].index_options) charge = net_charge(types) if abs(charge) <= abs(best): best = charge taboo[i] = taboo_time else: types[i] = old_type taboo = [t-1 if t>0 else 0 for t in taboo] if step % 10000 == 0: print best, charge, types step += 1 for t in types: for t2 in atom_types: if t2.index==t: print t2.element, t2.notes
def minimize_seeds(procs=4, queue=None, extra_parameters={}): """ A function to optimize the geometry of the supplied seeds in the seed directory. Each optimized structure is then added to the seed directory under the name "previous_name_opt.cml". **Parameters** procs: *int, optional* How many processors to use for this. queue: *str, optional* What queue to run the simulation on. """ seeds = [] seed_names = [] route_low = "! OPT B97-D3 def2-TZVP ECP{def2-TZVP} Grid7" extra_section_low = "" route_high = "! OPT PW6B95 def2-TZVP ECP{def2-TZVP} Grid7" extra_section_high = "" for seed in os.listdir("seed"): seeds.append(files.read_cml("seed/%s" % seed, allow_errors=True, test_charges=False, extra_parameters=extra_parameters)[0]) seed_names.append(seed.split(".cml")[0]) jobs = [] for i, seed in enumerate(seeds): charge = sum([a.type.charge for a in seed]) jobs.append(orca.job("seed_%d_low" % i, route_low, atoms=seed, extra_section=extra_section_low, charge=charge, grad=False, queue=queue, procs=procs, sandbox=False)) for j in jobs: j.wait() for i, seed in enumerate(seeds): charge = sum([a.type.charge for a in seed]) jobs.append(orca.job("seed_%d_high" % i, route_high, atoms=[], extra_section=extra_section_high, charge=charge, grad=False, queue=queue, procs=procs, previous="seed_%d_low" % i, sandbox=False)) for j in jobs: j.wait() for i, seed in enumerate(seeds): new_pos = orca.read("seed_%d_high" % i) if not new_pos.converged: print("Failed to optimize %s" % seed_names[i]) continue new_pos = new_pos.atoms cml_file = files.read_cml("seed/%s" % seed_names[i], allow_errors=True, test_charges=False, return_molecules=True, extra_parameters=extra_parameters) j = 0 for mol in cml_file: for k, a in enumerate(mol.atoms): b = new_pos[j] a.x, a.y, a.z = b.x, b.y, b.z j += 1 files.write_cml(cml_file, name="seed/%s_opt" % seed_names[i])