Пример #1
0
def minimize_seeds(nprocs=4):
	seeds = []
	seed_names = []
	route_low = "! OPT B97-D3 def2-TZVP ECP{def2-TZVP} Grid7"
	extra_section_low = ""
	route_high = "! OPT PW6B95 def2-TZVP ECP{def2-TZVP} Grid7"
	extra_section_high = ""
	for seed in os.listdir("seed"):
		seeds.append(files.read_cml("seed/%s" % seed, allow_errors=True, test_charges=False)[0])
		seed_names.append(seed.split(".cml")[0])
	jobs = []
	for i,seed in enumerate(seeds):
		charge = sum([a.type.charge for a in seed])
		jobs.append( orca.job("seed_%d_low" % i, route_low, atoms=seed, extra_section=extra_section_low, charge=charge, grad=False, queue=QUEUE_TO_RUN_ON, procs=nprocs, sandbox=False) )
	for j in jobs: j.wait()
	for i,seed in enumerate(seeds):
		charge = sum([a.type.charge for a in seed])
		jobs.append( orca.job("seed_%d_high" % i, route_high, atoms=[], extra_section=extra_section_high, charge=charge, grad=False, queue=QUEUE_TO_RUN_ON, procs=nprocs, previous="seed_%d_low" % i, sandbox=False) )
	for j in jobs: j.wait()
	for i,seed in enumerate(seeds):
		new_pos = orca.read("seed_%d_high" % i)
		if not new_pos.converged:
			print("Failed to optimize %s" % seed_names[i])
			continue
		new_pos = new_pos.atoms
		cml_file = files.read_cml("seed/%s" % seed_names[i], allow_errors=True, test_charges=False, return_molecules=True)
		j=0
		for mol in cml_file:
			for k,a in enumerate(mol.atoms):
				b = new_pos[j]
				a.x, a.y, a.z = b.x, b.y, b.z
				j += 1
		files.write_cml(cml_file, name="seed/%s_opt" % seed_names[i])
Пример #2
0
def run_low_level(training_sets_folder="training_set", procs=1, queue=None,
                  extra_parameters={}):
    if not os.path.exists(training_sets_folder):
        raise Exception("No training set folder to run.")

    frange = [int(a.split('.cml')[0]) for a in os.listdir(training_sets_folder)
              if a.endswith(".cml")]
    frange.sort()
    if len(frange) == 0:
        raise Exception("No viable files in training sets folder.")

    route = "! B97-D3 def2-TZVP GCP(DFT/TZ) ECP{def2-TZVP} Grid7"
    extra_section = ""

    running_jobs = []
    for i in frange:
        atoms = files.read_cml("%s/%d.cml" % (training_sets_folder, i),
                               allow_errors=True,
                               test_charges=False,
                               return_molecules=False,
                               extra_parameters=extra_parameters)[0]
        charge = sum([a.type.charge for a in atoms])
        running_jobs.append(orca.job("ts_%d" % i, route,
                                     atoms=atoms,
                                     extra_section=extra_section,
                                     charge=charge,
                                     grad=True,
                                     queue=queue,
                                     procs=procs,
                                     sandbox=False))
    return running_jobs
Пример #3
0
	def __init__(self, atoms_or_filename_or_all, bonds=None, angles=None, dihedrals=None, parameter_file='oplsaa.prm', extra_parameters={}, check_charges=True): #set atoms, bonds, etc, or assume 'atoms' contains all those things if only one parameter is passed in
		if type(atoms_or_filename_or_all)==type('string'):
			self.filename = atoms_or_filename_or_all
			atoms, bonds, angles, dihedrals = files.read_cml(self.filename, parameter_file=parameter_file, extra_parameters=extra_parameters, check_charges=check_charges)
		elif not bonds:
			atoms, bonds, angles, dihedrals = atoms_or_filename_or_all
		else:
			atoms = atoms_or_filename_or_all
		self.atoms = atoms
		self.bonds = bonds
		self.angles = angles
		self.dihedrals = dihedrals
Пример #4
0
def compile_training_set(path="./training_sets"):
	if path.endswith("/"): path = path[:-1]
	if not os.path.exists(path):
		raise Exception("Unable to find training set directory")

	frames = []
	i = 0
	while os.path.exists(path+"/"+str(i)+".cml"):
		atoms = files.read_cml(path+"/"+str(i)+".cml", return_molecules=False, allow_errors=True, test_charges=False)[0]
		frames.append(atoms)
		i += 1
	files.write_xyz(frames, "full_training_set")
Пример #5
0
def opls_options(molecule, parameter_file='oplsaa.prm'):
	elements, atom_types, bond_types, angle_types, dihedral_types = files.read_opls_parameters(parameter_file)

	elements_by_structure_indices = dict( [ (t.index2, elements_by_atomic_number[t.element] ) for t in atom_types ] )
	elements_by_structure_indices[0] = 'X'

	def add_to_list(dic,key,value):
		if key in dic:
			dic[key].append(value)
		else:
			dic[key] = [value]

	dihedral_types_by_element={}
	for d in dihedral_types:
		structure_indices = d.index2s
		elements = [ elements_by_structure_indices[i] for i in structure_indices]
		add_to_list(dihedral_types_by_element, tuple(elements), d)
		add_to_list(dihedral_types_by_element, tuple(reversed(elements)), d)

	atoms, bonds, angles, dihedrals = files.read_cml(molecule, parameter_file=None)

	for a in atoms:
		a.index2_options = []

	for d in dihedrals:
		elements = tuple([ a.element for a in d.atoms ])
		options = dihedral_types_by_element[elements]
		options_by_i = [ [],[],[],[] ]
			
		
		if elements in dihedral_types_by_element:
			print elements
			for a in d.atoms:
				a.index2_options.append( set() )
			for t in dihedral_types_by_element[elements]:
				#print '\t', t.index2s
				for i in range(4):
					d.atoms[i].index2_options[-1].add( t.index2s[i] )
		else:
			print 'Error: dihedral', elements, 'does not exist in OPLS file', parameter_file

	for a in atoms:
		print a.element
		for option in a.index2_options:
			print '\t', option
		
		options = a.index2_options[0]
		
		for i in xrange(1,len(a.index2_options)):
			options = options.intersection( a.index2_options[i] )

		print '\t\t', options
Пример #6
0
def read_seed(path="./seed", extra_parameters={}):
    """
    Read in all cml files from the seed directory.

    **Parameters**

        path: *str, optional*
            A path to the seed directory.
        extra_parameters: *dict, optional*
            Additional parameters to add to OPLSAA.

    **Returns**

        molecules_A: *list, list, molecules*
            A list of molecules from the seed directory
        molecules_B: *list, molecules*
            A list of molecules from the seed directory.  In this case,
            we merge child molecules into one.
    """
    if path.endswith("/"):
        path = path[:-1]
    if not os.path.exists(path):
        raise Exception("Unable to find seed directory")

    molecules_A = []
    for fptr in os.listdir(path):
        if not fptr.endswith(".cml"):
            continue
        molecules_A.append(files.read_cml(path + "/" + fptr,
                                          return_molecules=True,
                                          allow_errors=True,
                                          test_charges=False,
                                          extra_parameters=extra_parameters))

    if molecules_A == []:
        raise Exception("Seed directory is empty")

    molecules_B = []
    for seed in molecules_A:
        atoms, bonds, angles, dihedrals = [], [], [], []
        for mol in seed:
            atoms += mol.atoms
            bonds += mol.bonds
            angles += mol.angles
            dihedrals += mol.dihedrals
        molecules_B.append(structures.Molecule(atoms,
                                               bonds,
                                               angles,
                                               dihedrals))

    return molecules_A, molecules_B
Пример #7
0
def read_seed(path="./seed", extra_parameters={}):
    """
    Read in all cml files from the seed directory.

    **Parameters**

        path: *str, optional*
            A path to the seed directory.
        extra_parameters: *dict, optional*
            Additional parameters to add to OPLSAA.

    **Returns**

        molecules_A: *list, list, molecules*
            A list of molecules from the seed directory
        molecules_B: *list, molecules*
            A list of molecules from the seed directory.  In this case,
            we merge child molecules into one.
    """
    if path.endswith("/"):
        path = path[:-1]
    if not os.path.exists(path):
        raise Exception("Unable to find seed directory")

    molecules_A = []
    for fptr in os.listdir(path):
        if not fptr.endswith(".cml"):
            continue
        molecules_A.append(
            files.read_cml(path + "/" + fptr,
                           return_molecules=True,
                           allow_errors=True,
                           test_charges=False,
                           extra_parameters=extra_parameters))

    if molecules_A == []:
        raise Exception("Seed directory is empty")

    molecules_B = []
    for seed in molecules_A:
        atoms, bonds, angles, dihedrals = [], [], [], []
        for mol in seed:
            atoms += mol.atoms
            bonds += mol.bonds
            angles += mol.angles
            dihedrals += mol.dihedrals
        molecules_B.append(structures.Molecule(atoms, bonds, angles,
                                               dihedrals))

    return molecules_A, molecules_B
Пример #8
0
def compile_training_set(path="./training_set", extra_parameters={}):
    if path.endswith("/"):
        path = path[:-1]
    if not os.path.exists(path):
        raise Exception("Unable to find training set directory")

    frames = []
    i = 0
    while os.path.exists(path + "/" + str(i) + ".cml"):
        atoms = files.read_cml(path + "/" + str(i) + ".cml",
                               return_molecules=False,
                               allow_errors=True,
                               test_charges=False,
                               extra_parameters=extra_parameters)[0]
        frames.append(atoms)
        i += 1
    files.write_xyz(frames, "full_training_set")
Пример #9
0
def run_high_level(training_sets_folder="training_set",
                   procs=1,
                   queue=None,
                   extra_parameters={}):
    if not os.path.exists(training_sets_folder):
        raise Exception("No training set folder to run.")

    frange = [
        int(a.split('.cml')[0]) for a in os.listdir(training_sets_folder)
        if a.endswith(".cml")
    ]
    frange.sort()
    if len(frange) == 0:
        raise Exception("No viable files in training sets folder.")

    route = "! PW6B95 def2-TZVP GCP(DFT/TZ) ECP{def2-TZVP} Grid7"
    extra_section = ""

    running_jobs = []
    previous_failed = []
    for i in frange:
        atoms = files.read_cml("%s/%d.cml" % (training_sets_folder, i),
                               allow_errors=True,
                               test_charges=False,
                               return_molecules=False,
                               extra_parameters=extra_parameters)[0]
        charge = sum([a.type.charge for a in atoms])
        prev_converged = orca.read("ts_%d" % i).converged
        if prev_converged:
            running_jobs.append(
                orca.job("ts_%d_high" % i,
                         route,
                         atoms=[],
                         extra_section=extra_section,
                         charge=charge,
                         grad=True,
                         queue=queue,
                         procs=procs,
                         previous="ts_%d" % i,
                         sandbox=False))
        else:
            previous_failed.append(i)
    return running_jobs, previous_failed
Пример #10
0
def run_high_level():
	if not os.path.exists("training_sets"):
		raise Exception("No training set folder to run.")

	frange = [int(a.split('.cml')[0]) for a in os.listdir("training_sets") if a.endswith(".cml")]
	frange.sort()
	if len(frange) == 0:
		raise Exception("No viable files in training sets folder.")

	route = "! PW6B95 def2-TZVP GCP(DFT/TZ) ECP{def2-TZVP} Grid7"
	extra_section = ""

	running_jobs = []
	previous_failed = []
	for i in frange:
		atoms = files.read_cml("training_sets/%d.cml" % i, allow_errors=True, test_charges=False, return_molecules=False)[0]
		charge = sum([a.type.charge for a in atoms])
		prev_converged = orca.read("ts_%d" % i).converged
		if prev_converged:
			running_jobs.append( orca.job("ts_%d_high" % i, route, atoms=[], extra_section=extra_section, charge=charge, grad=True, queue=QUEUE_TO_RUN_ON, procs=QUEUE_PROCS, previous="ts_%d" % i, sandbox=False) )
		else:
			previous_failed.append(i)
	return running_jobs, previous_failed
Пример #11
0
def minimize_seeds(procs=4, queue=None, extra_parameters={}):
    """
    A function to optimize the geometry of the supplied seeds in the seed
    directory.  Each optimized structure is then added to the seed directory
    under the name "previous_name_opt.cml".

    **Parameters**

        procs: *int, optional*
            How many processors to use for this.
        queue: *str, optional*
            What queue to run the simulation on.
    """
    seeds = []
    seed_names = []
    route_low = "! OPT B97-D3 def2-TZVP ECP{def2-TZVP} Grid7"
    extra_section_low = ""
    route_high = "! OPT PW6B95 def2-TZVP ECP{def2-TZVP} Grid7"
    extra_section_high = ""
    for seed in os.listdir("seed"):
        seeds.append(
            files.read_cml("seed/%s" % seed,
                           allow_errors=True,
                           test_charges=False,
                           extra_parameters=extra_parameters)[0])
        seed_names.append(seed.split(".cml")[0])
    jobs = []
    for i, seed in enumerate(seeds):
        charge = sum([a.type.charge for a in seed])
        jobs.append(
            orca.job("seed_%d_low" % i,
                     route_low,
                     atoms=seed,
                     extra_section=extra_section_low,
                     charge=charge,
                     grad=False,
                     queue=queue,
                     procs=procs,
                     sandbox=False))
    for j in jobs:
        j.wait()
    for i, seed in enumerate(seeds):
        charge = sum([a.type.charge for a in seed])
        jobs.append(
            orca.job("seed_%d_high" % i,
                     route_high,
                     atoms=[],
                     extra_section=extra_section_high,
                     charge=charge,
                     grad=False,
                     queue=queue,
                     procs=procs,
                     previous="seed_%d_low" % i,
                     sandbox=False))
    for j in jobs:
        j.wait()
    for i, seed in enumerate(seeds):
        new_pos = orca.read("seed_%d_high" % i)
        if not new_pos.converged:
            print("Failed to optimize %s" % seed_names[i])
            continue
        new_pos = new_pos.atoms
        cml_file = files.read_cml("seed/%s" % seed_names[i],
                                  allow_errors=True,
                                  test_charges=False,
                                  return_molecules=True,
                                  extra_parameters=extra_parameters)
        j = 0
        for mol in cml_file:
            for k, a in enumerate(mol.atoms):
                b = new_pos[j]
                a.x, a.y, a.z = b.x, b.y, b.z
                j += 1
        files.write_cml(cml_file, name="seed/%s_opt" % seed_names[i])
Пример #12
0
def generate_lead_halide_cation(halide, cation, ion="Pb", run_opt=True):
    cml_path = fpl_constants.cml_dir
    # Check if system exists
    fname = reduce_to_name(ion, halide, cation)
    if not cml_path.endswith("/"):
        cml_path += "/"

    if os.path.exists(cml_path + fname + ".cml"):
        print("Found system in cml folder, returning system")
        system = structures.Molecule(
            files.read_cml(cml_path + fname + ".cml",
                           test_charges=False,
                           allow_errors=True)[0])
        return system

    def vdw(y):
        return PERIODIC_TABLE[units.elem_s2i(y)]['vdw_r']

    # Get the PbX3 system
    PbX3 = generate_lead_halide(halide, ion=ion)
    # Get the cation from the cml file
    atoms, bonds, _, _ = files.read_cml(cml_path + cation + ".cml",
                                        test_charges=False,
                                        allow_errors=True)
    system = structures.Molecule(atoms)
    # Align along X axis
    system.atoms = geometry.align_centroid(system.atoms)[0]
    # Rotate to Z axis
    # NOTE! In case of FA, we want flat so only translate to origin instead
    # NOTE! We have exactly 3 cations we observe: Cs, MA, FA. If 2 N, then FA
    elems = [a.element for a in system.atoms]
    if elems.count("N") == 2:
        system.translate(system.get_center_of_mass())
    else:
        R = geometry.rotation_matrix([0, 1, 0], 90, units="deg")
        system.rotate(R)
    # If N and C in system, ensure N is below C (closer to Pb)
    if "N" in elems and "C" in elems:
        N_index = [i for i, a in enumerate(system.atoms)
                   if a.element == "N"][0]
        C_index = [i for i, a in enumerate(system.atoms)
                   if a.element == "C"][0]
        if system.atoms[N_index].z > system.atoms[C_index].z:
            # Flip if needed
            R = geometry.rotation_matrix([0, 1, 0], 180, units="deg")
            system.rotate(R)
    # Offset system so lowest point is at 0 in the z dir
    z_offset = min([a.z for a in system.atoms]) * -1
    system.translate([0, 0, z_offset])

    # Add to the PbX3 system with an offset of vdw(Pb)
    system.translate([0, 0, vdw(ion)])
    system.atoms += PbX3.atoms

    # Run a geometry optimization of this system
    if run_opt:
        PbXY = orca.job(fname,
                        fpl_constants.default_routes[0],
                        atoms=system.atoms,
                        extra_section=fpl_constants.extra_section,
                        queue="batch",
                        procs=2)
        PbXY.wait()
        new_pos = orca.read(fname).atoms
        for a, b in zip(system.atoms, new_pos):
            a.x, a.y, a.z = [b.x, b.y, b.z]

    # Set OPLS types
    for a in system.atoms:
        if a.element in [ion, "Cl", "Br", "I"]:
            a.type = fpl_constants.atom_types[a.element]
            a.type_index = a.type["index"]

    # Write cml file so we don't re-generate, and return system
    files.write_cml(system, bonds=bonds, name=cml_path + fname + ".cml")
    return system
Пример #13
0
def opt_opls(molecule, parameter_file='oplsaa.prm', taboo_time=100):
	elements, atom_types, bond_types, angle_types, dihedral_types = files.read_opls_parameters(parameter_file)
	atoms, bonds, angles, dihedrals = files.read_cml(molecule, parameter_file=None)
	
	bond_types_by_index2 = dict( [ (tuple(t.index2s),t) for t in bond_types ] + [ (tuple(reversed(t.index2s)),t) for t in bond_types ] )
	angle_types_by_index2 = dict( [ (tuple(t.index2s),t) for t in angle_types ] + [ (tuple(reversed(t.index2s)),t) for t in angle_types ] )
	dihedral_types_by_index2 = dict( [ (tuple(t.index2s),t) for t in dihedral_types ] + [ (tuple(reversed(t.index2s)),t) for t in dihedral_types ] )
	
	charges_by_index = dict( [ (t.index,t.charge) for t in atom_types ] )
	
	for a in atoms:
		a.possible_types = set()
		for t in atom_types:
			if elements_by_atomic_number[t.element] == a.element and t.bond_count==len(a.bonded):
				a.possible_types.add(t.index2)
		a.possible_types = list(a.possible_types)
		#a.possible_types.append(0)
	
	def count_conflicts(types):
		for i,a in enumerate(atoms):
			a.index2 = types[i]
		conflicts = 0
		for b in bonds:
			index2s = (b.atoms[0].index2, b.atoms[1].index2)
			if not index2s in bond_types_by_index2:
				conflicts += 1
		
		for a in angles:
			index2s = (a.atoms[0].index2, a.atoms[1].index2, a.atoms[2].index2)
			if not index2s in angle_types_by_index2:
				conflicts += 1
		
		for d in dihedrals: 
			index2s_0 = (d.atoms[0].index2, d.atoms[1].index2, d.atoms[2].index2, d.atoms[3].index2)
			index2s_1 = (0,                 d.atoms[1].index2, d.atoms[2].index2, d.atoms[3].index2)
			index2s_2 = (d.atoms[0].index2, d.atoms[1].index2, d.atoms[2].index2,        0)
			in0 = index2s_0 in dihedral_types_by_index2
			in1 = index2s_1 in dihedral_types_by_index2
			in2 = index2s_2 in dihedral_types_by_index2
			if not in0 and not in1 and not in2:
				conflicts += 1
		return conflicts
	
	import random
	types = [random.choice(a.possible_types) for a in atoms]
	taboo = [0 for a in atoms]
	best = count_conflicts(types)
	
	step = 0
	for step in range(100000):
		i = random.randint( 0, len(types)-1 )
		for guess in types:
			if taboo[i]>0:
				i = random.randint( 0, len(types)-1 )
			else:
				break
		old_type = types[i]
		types[i] = random.choice(atoms[i].possible_types)
		
		conflicts = count_conflicts(types)
		if conflicts <= best:
			best = conflicts
			taboo[i] = taboo_time
		else:
			types[i] = old_type
	
		taboo = [t-1 if t>0 else 0 for t in taboo]
	
		if step % 10000 == 0:
			print best, conflicts, types
		step += 1

	def types_from_index2(x):
		return [t for t in atom_types if t.index2==x and t.index<=440]

	for i,tt in enumerate( [ types_from_index2(x) for x in types] ):
		#print i, atoms[i].element
		atoms[i].index_options = [t.index for t in tt]
		#for t in tt:
		#	print '\t', t.index, t.notes
	
	
	
	
	
	def net_charge(types):
		charge = 0.0
		for t in types:
			charge += charges_by_index[t]
		return charge
	
	types = [random.choice(a.index_options) for a in atoms]
	taboo = [0 for a in atoms]
	best = net_charge(types)
	
	for step in range(100000):
		i = random.randint( 0, len(types)-1 )
		for guess in types:
			if taboo[i]>0:
				i = random.randint( 0, len(types)-1 )
			else:
				break
		old_type = types[i]
		types[i] = random.choice(atoms[i].index_options)
		
		charge = net_charge(types)
		if abs(charge) <= abs(best):
			best = charge
			taboo[i] = taboo_time
		else:
			types[i] = old_type
	
		taboo = [t-1 if t>0 else 0 for t in taboo]
	
		if step % 10000 == 0:
			print best, charge, types
		step += 1

	for t in types:
		for t2 in atom_types:
			if t2.index==t:
				print t2.element, t2.notes
Пример #14
0
def minimize_seeds(procs=4, queue=None, extra_parameters={}):
    """
    A function to optimize the geometry of the supplied seeds in the seed
    directory.  Each optimized structure is then added to the seed directory
    under the name "previous_name_opt.cml".

    **Parameters**

        procs: *int, optional*
            How many processors to use for this.
        queue: *str, optional*
            What queue to run the simulation on.
    """
    seeds = []
    seed_names = []
    route_low = "! OPT B97-D3 def2-TZVP ECP{def2-TZVP} Grid7"
    extra_section_low = ""
    route_high = "! OPT PW6B95 def2-TZVP ECP{def2-TZVP} Grid7"
    extra_section_high = ""
    for seed in os.listdir("seed"):
        seeds.append(files.read_cml("seed/%s" % seed,
                                    allow_errors=True,
                                    test_charges=False,
                                    extra_parameters=extra_parameters)[0])
        seed_names.append(seed.split(".cml")[0])
    jobs = []
    for i, seed in enumerate(seeds):
        charge = sum([a.type.charge for a in seed])
        jobs.append(orca.job("seed_%d_low" % i, route_low,
                             atoms=seed,
                             extra_section=extra_section_low,
                             charge=charge,
                             grad=False,
                             queue=queue,
                             procs=procs,
                             sandbox=False))
    for j in jobs:
        j.wait()
    for i, seed in enumerate(seeds):
        charge = sum([a.type.charge for a in seed])
        jobs.append(orca.job("seed_%d_high" % i, route_high,
                             atoms=[],
                             extra_section=extra_section_high,
                             charge=charge,
                             grad=False,
                             queue=queue,
                             procs=procs,
                             previous="seed_%d_low" % i,
                             sandbox=False))
    for j in jobs:
        j.wait()
    for i, seed in enumerate(seeds):
        new_pos = orca.read("seed_%d_high" % i)
        if not new_pos.converged:
            print("Failed to optimize %s" % seed_names[i])
            continue
        new_pos = new_pos.atoms
        cml_file = files.read_cml("seed/%s" % seed_names[i],
                                  allow_errors=True,
                                  test_charges=False,
                                  return_molecules=True,
                                  extra_parameters=extra_parameters)
        j = 0
        for mol in cml_file:
            for k, a in enumerate(mol.atoms):
                b = new_pos[j]
                a.x, a.y, a.z = b.x, b.y, b.z
                j += 1
        files.write_cml(cml_file, name="seed/%s_opt" % seed_names[i])