def _get_solute_index(fpl_obj): try: xyz = fpl_obj.data[-1] except TypeError: xyz = [fpl_obj.data.atoms] system = fpl_obj.system ## Store end of last LAMMPs simulation to system.atoms variable for a, b in zip(system.atoms, xyz[-1]): a.x, a.y, a.z = b.x, b.y, b.z if any([np.isnan(x) for x in (a.x, a.y, a.z)]): return None ## Grab only molecules we're interested in. Here we find relative distances to the solute in question molecules_in_cluster = [] m_solute = None if fpl_obj.solute: m_solute = structures.Molecule(fpl_constants.cml_dir + fpl_obj.solute, test_charges=False, allow_errors=True) diffs = [] for molec in system.molecules: # NOTE, ORDER MATTERS! As procrustes WILL change the atomic positions of the # second list of atoms to best match the first. We don't care if m_solute # changes, but if everything else overlaps with m_solute then we have an issue. chk = [molec.atoms, m_solute.atoms] if len(chk[0]) != len(chk[1]): continue #chk = [copy.deepcopy(molec.atoms), copy.deepcopy(m_solute.atoms)] geometry.procrustes(chk) diffs.append(geometry.motion_per_frame(chk)[-1]) index_of_solute = diffs.index(min(diffs)) else: index_of_solute = 0 return index_of_solute
def makeMoleculePattern(mol): '''Make molecule pattern from Molecule ''' m = st.Molecule(mol.name, mol.idx) sp = st.Species() sp.addMolecule(m) return sp
def makeUnboundPattern(mol,comp): '''Make unbound pattern from Molecule Component ''' c = st.Component(comp.name,comp.idx,[],[]) m = st.Molecule(mol.name,mol.idx) m.addComponent(c) sp = st.Species() sp.addMolecule(m) return sp
def generate_system(self, halide, cation, ion="Pb"): """ Generate a system of solute + solvents using Packmol. **Returns** None """ ## Generate empty system system = structures.System(box_size=(25, 25, 25), name=self.run_name) ## Get structures for solvent and solute # Check if upper case or lower case file exists and use the one that does if os.path.exists(fpl_constants.cml_dir + self.solvent_name.lower() + ".cml"): solvent = structures.Molecule( fpl_constants.cml_dir + self.solvent_name.lower() + ".cml", extra_parameters=self.extra, allow_errors=True, default_angles=fpl_constants.default_angles) elif os.path.exists(fpl_constants.cml_dir + self.solvent_name.upper() + ".cml"): solvent = structures.Molecule( fpl_constants.cml_dir + self.solvent_name.upper() + ".cml", extra_parameters=self.extra, allow_errors=True, default_angles=fpl_constants.default_angles) else: raise Exception( "Solvent file %s.cml does not exist in %s. Ensure you gave the file exists and re-run." % (self.solvent_name, fpl_constants.cml_dir)) if self.solute is not None: fpl_utils.generate_lead_halide_cation(halide, cation, ion=ion) solute = structures.Molecule( fpl_constants.cml_dir + self.solute, test_charges=False, allow_errors=True, default_angles=fpl_constants.default_angles) system.add(solute) ## Pack the system system.packmol((solvent, ), (1, ), fpl_constants.solvent[self.solvent_name]["density"], self.seed) self.system = system
def makeBondPattern(mc1,mc2): '''Make bond pattern from [Molecule Component], [Molecule Component] ''' [m1,c1] = mc1 [m2,c2] = mc2 c_1 = st.Component(c1.name,c1.idx,[],[]) c_1.addBond('1') m_1 = st.Molecule(m1.name, m1.idx) m_1.addComponent(c_1) c_2 = st.Component(c2.name,c2.idx,[],[]) c_2.addBond('1') m_2 = st.Molecule(m2.name, m2.idx) m_2.addComponent(c_2) sp = st.Species() sp.addMolecule(m_1) sp.addMolecule(m_2) return sp
def makeStatePattern(mol,comp,state): '''Make state pattern from Molecule Component State ''' c = st.Component(comp.name,comp.idx,[],[]) c.addState(state) c.setActiveState(state) m = st.Molecule(mol.name,mol.idx) m.addComponent(c) sp = st.Species() sp.addMolecule(m) return sp
def parseMolecules(molecules): ''' Parses an XML molecule section Returns: a molecule structure ''' mol = st.Molecule(molecules.get('name'), molecules.get('id')) components = \ molecules.find('.//{http://www.sbml.org/sbml/level3}ListOfComponentTypes') if components != None: for component in components.getchildren(): comp = st.Component(component.get('name'), component.get('id')) mol.addComponent(comp) return mol
def read_seed(path="./seed", extra_parameters={}): """ Read in all cml files from the seed directory. **Parameters** path: *str, optional* A path to the seed directory. extra_parameters: *dict, optional* Additional parameters to add to OPLSAA. **Returns** molecules_A: *list, list, molecules* A list of molecules from the seed directory molecules_B: *list, molecules* A list of molecules from the seed directory. In this case, we merge child molecules into one. """ if path.endswith("/"): path = path[:-1] if not os.path.exists(path): raise Exception("Unable to find seed directory") molecules_A = [] for fptr in os.listdir(path): if not fptr.endswith(".cml"): continue molecules_A.append( files.read_cml(path + "/" + fptr, return_molecules=True, allow_errors=True, test_charges=False, extra_parameters=extra_parameters)) if molecules_A == []: raise Exception("Seed directory is empty") molecules_B = [] for seed in molecules_A: atoms, bonds, angles, dihedrals = [], [], [], [] for mol in seed: atoms += mol.atoms bonds += mol.bonds angles += mol.angles dihedrals += mol.dihedrals molecules_B.append(structures.Molecule(atoms, bonds, angles, dihedrals)) return molecules_A, molecules_B
def generate_lead_halide(halide, ion="Pb"): PbX = structures.Molecule([structures.Atom(ion, 0, 0, 0)]) if type(halide) is str: halide = [halide, halide, halide] def vdw(y): return PERIODIC_TABLE[units.elem_s2i(y)]['vdw_r'] for x in halide: v = vdw(x) PbX.atoms.append(structures.Atom(x, v, 0, 0.5 * v)) R = geometry.rotation_matrix([0, 0, 1], 120, units="deg") PbX.rotate(R) return PbX
def merge(reactant1, reactant2, r1, r2, translator, outputFlag=False): ''' Receives two species reactant1 and reactant1, and their intersection points r1 and r2 and creates a new complex that is the union ''' species = st.Species() if reactant1 in translator: species.append(translator[reactant1]) if reactant2 in translator: species.append(translator[reactant2]) if outputFlag: print '-----------', species, reactant1, reactant2, reactant2 in translator rnd = max(species.getBondNumbers()) + 1 molecule1 = st.Molecule(binding1) molecule2 = st.Molecule(binding2) component1 = st.Component(r1) component2 = st.Component(r2) component1.addBond(str(rnd)) component2.addBond(str(rnd)) molecule1.addComponent(component1) molecule2.addComponent(component2) species.addMolecule(molecule1, True, 1) counter = 2 if binding1 == binding2 else 1 species.addMolecule(molecule2, True, counter) ####TODO: update the rawDAtabase with the m1m2 information if molecule1.name in translator: sp = st.Species() sp.addMolecule(deepcopy(molecule1)) translator[molecule1.name].extend(sp) translator[molecule1.name].reset() if molecule2.name in translator: sp = st.Species() sp.addMolecule(deepcopy(molecule2)) translator[molecule2.name].extend(sp) translator[molecule2.name].reset()
def createMolecule(molecule, bonds): nameDict = {} mol = st.Molecule(molecule.get('name'), molecule.get('id')) nameDict[molecule.get('id')] = molecule.get('name') listOfComponents = molecule.find( './/{http://www.sbml.org/sbml/level3}ListOfComponents') if listOfComponents != None: for element in listOfComponents: component = st.Component(element.get('name'), element.get('id')) nameDict[element.get('id')] = element.get('name') if element.get('numberOfBonds') in ['+', '?']: component.addBond(element.get('numberOfBonds')) elif element.get('numberOfBonds') != '0': component.addBond(findBond(bonds, element.get('id'))) state = element.get( 'state') if element.get('state') != None else '' component.states.append(state) component.activeState = state mol.addComponent(component) return mol, nameDict
def get_test_system(length_in_ang=6.0, number_per_side=3, path_to_unit_cell="/fs/home/hch54/Grad-MCSMRFF\ /PbCl3Cs/unit_cell"): L = length_in_ang N = number_per_side dim = L * N + 0.5 test_system = structures.System(box_size=[dim, dim, dim], name="test_run") PbMACl3 = structures.Molecule(path_to_unit_cell, extra_parameters=extra_Pb, test_charges=False) count = 0 for xi in range(N): for yi in range(N): for zi in range(N): count += 1 x, y, z = (xi - 0.5) * L, (yi - 0.5) * L, (zi - 0.5) * L test_system.add(PbMACl3, x, y, z) return test_system
def extractTransformations(rules): ''' goes through the list of rules and extracts its reactioncenter,context and product atomic patterns per transformation action also resolves wildcard patterns to create additional context links and deleting the wildcard pattern - js ''' atomicArray = {} transformationCenter = [] transformationContext = [] productElements = [] actionName = [] index = 0 label = [] #for idx,(react,product,act,mapp,nameDict) in enumerate(rules): # print idx #print "react\t"," ".join([str(x) for x in react]) #print "product\t"," ".join([str(x) for x in product]) #print "act\t"," ".join([str(x) for x in act]) #print "mapp\t"," ".join([str(x) for x in mapp]) #print "nameDict\t"," ".join([x+":"+y for x,y in nameDict.items()]) for react, product, act, mapp, nameDict in rules: index += 1 for action in act: atomic, reactionCenter, context = extractMolecules( action.action, action.site1, action.site2, react) atomicArray.update(atomic) # this method does not extract reaction centers for creation and deletion transformations # however it extracts context correctly # so generate the reactioncenter here if action.action == 'Delete': temp = st.Species() temp.addMolecule(st.Molecule(nameDict[action.site1], 1)) atomic = dict() atomic[str(temp)] = temp atomicArray.update(atomic) transformationCenter.append(set([str(temp)])) productElements.append(set()) transformationContext.append(context) elif action.action == 'Add': temp = st.Species() temp.addMolecule(st.Molecule(nameDict[action.site1], 1)) atomic = dict() atomic[str(temp)] = temp atomicArray.update(atomic) transformationCenter.append(set()) productElements.append(set([str(temp)])) transformationContext.append(context) else: transformationCenter.append(reactionCenter) transformationContext.append(context) atomicArray.update(atomic) productSites = [ getMapping(mapp, action.site1), getMapping(mapp, action.site2) ] atomic, rc, _ = extractMolecules(action.action, productSites[0], productSites[1], product) productElements.append(rc) atomicArray.update(atomic) actionName.append('%i-%s' % (index, action.action)) r = '+'.join([str(x) for x in react]) p = '+'.join([str(x) for x in product]) label.append('->'.join([r, p, '%i-%s' % (index, action.action)])) # resolving bond wildcards wildcards = [x for x in atomicArray if '!+' in x] bondedpatterns = [ x for x in atomicArray if '!' in x and x not in wildcards ] for item in wildcards: loc = string.find(item, '+') selected_bondedpatterns = [ x for x in bondedpatterns if item[0:loc] in x ] for idx, set1 in enumerate(transformationContext): if item in set1: set1.update(selected_bondedpatterns) set1.remove(item) transformationContext[idx] = set1 del atomicArray[item] return atomicArray, transformationCenter, transformationContext, productElements, actionName, label
def catalysis(original, dictionary, rawDatabase, catalysisDatabase, translator, namingConvention, classification, reactionProperties): """ This method is for reactions of the form A+ B -> A' + B """ #if 'EGF_EGFRim2_GAP_Grb2_Sos_Ras_GDP' in original[0] or 'EGF_EGFRim2_GAP_Grb2_Sos_Ras_GDP' in original[1]: # print original,'EGF_EGFRim2_GAP_Grb2_Sos_Ras_GDP' in translator result = catalyze(namingConvention[0], namingConvention[1], classification, rawDatabase, translator, reactionProperties) k = [min(namingConvention, key=len) in x for x in original[0]] k2 = [max(namingConvention, key=len) in x for x in original[1]] k = k and k2 sortedResult = [result[0], result[1]] if any(k) else [result[1], result[0]] sortedConvention = [ namingConvention[0], namingConvention[1] ] if any(k) else [namingConvention[1], namingConvention[0]] flag = False if 'EGF_EGFRm2' in original[1]: print 'hello' for reactantGroup, res, conv in zip(original, sortedResult, sortedConvention): for reactant in reactantGroup: flag = False species = st.Species() #if original[0][0] in translator: # species = deepcopy(translator[original[0][0]]) #make a copy of the original element we are going to modify if reactant in translator: species = deepcopy(translator[reactant]) elif sortedConvention[0] in translator: species = deepcopy(translator[sortedConvention[0]]) tmp = dictionary[reactant] for element in tmp: molecule = st.Molecule(element) #here it would be much more precise to have the molecule #that is going to be modified instead of just modifying the #first thing you find if element in conv: #chunk = result[1] if reactant == max(namingConvention,key=len) else result[0] component = st.Component(res[0]) component.addState(res[1]) molecule.addComponent(component, 1) flag = True finalMolecule = molecule ''' else: if conv in reactant: component = st.Component(res[0]) component.addState(res[1]) molecule.addComponent(component,1) print conv,molecule,element flag = True #continue ''' #FIXME: the comparison should be done a lil more carefully #to avoid overlap species.addMolecule(molecule, True) if str(species) == '': species.addMolecule(molecule) break if flag: if reactant not in translator: translator[reactant] = species else: translator[reactant].extend(species, False) if finalMolecule.name in translator: if len(translator[finalMolecule.name].molecules) == 1: sp = st.Species() sp.addMolecule(deepcopy(finalMolecule)) translator[finalMolecule.name].extend(sp, False) translator[finalMolecule.name].reset() else: sp = st.Species() sp.addMolecule(molecule) translator[molecule.name] = deepcopy(sp) if len(original[0]) < len(original[1]): rebalance(original, sortedConvention, translator)
def getIntersection(reactants, product, dictionary, rawDatabase, translator, synthesisDatabase, originalProductName, outputFlag=False): ''' this method goes through two complexes and tries to check how they get together to create a product (e.g. how their components link) either by using previous knowledge or by creating a new complex ''' #global log extended1 = (copy(dictionary[reactants[0]])) extended2 = (copy(dictionary[reactants[1]])) if isinstance(extended1, str): extended1 = [extended1] if isinstance(extended2, str): extended2 = [extended2] #if we can find an element in the database that is a subset of #union(extended1,extended2) we take it intersection = findIntersection(extended1, extended2, synthesisDatabase) #otherwise we create it from scratch if not intersection: r1 = getFreeRadical(extended1, extended2[0], rawDatabase, translator, product, dictionary) r2 = getFreeRadical(extended2, extended1[0], rawDatabase, translator, product, dictionary) binding1, binding2 = getBindingPoints(extended1, extended2, reactants, originalProductName[0]) if not r1 or not r2: #prin t 'Cannot infer how',extended1,'binds to',extended2 #log['reactions'].append((reactants,product)) #return None,None,None #TODO this section should be activated by a flag instead #of being accessed by default #print extended1,extended2 createIntersection((binding1, binding2), rawDatabase, translator, dictionary) r1 = getFreeRadical((binding1, ), binding2, rawDatabase, translator, product, dictionary) r2 = getFreeRadical((binding2, ), binding1, rawDatabase, translator, product, dictionary) #print 'rrrrrrrrrrr',r1,r2 if not r1 or not r2: return (None, None, None, None) ##todo: modify code to allow for carry over free radicals #FIXME: we can remove synthesisDatabase easily species = st.Species() if reactants[0] in translator: species.append(translator[reactants[0]]) if reactants[1] in translator: species.append(translator[reactants[1]]) if outputFlag: print '-----------', species, reactants[0], reactants[ 1], reactants[1] in translator print '+++', binding1, binding2, r1, r2 bondName = max(species.getBondNumbers()) + 1 molecule1 = st.Molecule(binding1) molecule2 = st.Molecule(binding2) component1 = st.Component(r1) component2 = st.Component(r2) component1.addBond(str(bondName)) component2.addBond(str(bondName)) molecule1.addComponent(component1) molecule2.addComponent(component2) if outputFlag: print '////////', molecule1, molecule2 species.addMolecule(molecule1, True, 1) counter = 2 if binding1 == binding2 else 1 species.addMolecule(molecule2, True, counter) if outputFlag: print '\\\\\\', species ####TODO: update the rawDAtabase with the m1m2 information sp = st.Species() sp.addMolecule(deepcopy(molecule1)) if molecule1.name in translator: translator[molecule1.name].extend(sp) else: translator[molecule1.name] = sp translator[molecule1.name].reset() sp = st.Species() sp.addMolecule(deepcopy(molecule2)) if molecule2.name in translator: translator[molecule2.name].extend(sp) else: translator[molecule2.name] = sp translator[molecule2.name].reset() if outputFlag: print '||||||||||||||||||||||', translator[molecule2.name] #print reactants,product,str(species) #print name1,name2,extended1,extended2 #print {x:str(translator[x]) for x in translator}, translator if len(species.molecules) == 0: return (None, None, None, None) return species, [], [], [] return extended1, extended2, intersection, []
def synthesis(original, dictionary, rawDatabase, synthesisDatabase, translator, outputFlag=False): #reaction = [] for elements in original: #temp = [] for sbml_name in elements: ## If we have translated it before and its in mem ory # if molecule in translator: # species.append(translator[molecule]) # else: if outputFlag: print '-', sbml_name #if 'EGF_EGFRim2_GAP_Grb2_Sos_Ras_GTP' in original[1]: # print original #if 'P_KKK_KK' in translator: # print 'hola' if 'EGF_EGFR2_PLCg' in original[1]: print original tags, molecules = findCorrespondence(original[0], original[1], dictionary, sbml_name, rawDatabase, synthesisDatabase, translator, outputFlag) if (tags, molecules) == (None, None): tmp = st.Species() tmp.addMolecule(st.Molecule(sbml_name)) if sbml_name not in translator: translator[sbml_name] = tmp #raise InsufficientInformationError libsbml2bngl.log['reactions'].append(original) #TODO: probably we will need to add a check if there are several ways of defining a reaction elif isinstance(molecules, st.Species): #FIXME: there shouls be a better way to check whether i actually want to check or not if tags not in translator: translator[tags] = molecules else: #tags = list(tags) #tags.sort() #tags = tuple(tags) precursors = [] if sbml_name not in translator: species = st.Species() #here we check if the new species is made of already existing molecules else: species = translator[sbml_name] species.addChunk(tags, molecules, precursors) if sbml_name not in translator: other = original[ 0] if original[0] != elements else original[1] for tag in [x for x in other if x in translator]: species.extend(translator[tag]) translator[sbml_name] = species if tags not in synthesisDatabase and tags not in rawDatabase: synthesisDatabase[tags] = tuple(molecules) return 0
def get_training_set(run_name, use_pickle=True, pickle_file_name=None): # Take care of pickle file I/O # Get file name if pickle_file_name is None: pfile = "training_sets/training_set.pickle" else: pfile = pickle_file_name system = None # If the pickle file does not exist, then make it # If use_pickle is False, then make the read in the data from the # training_sets folder if not os.path.isfile(pfile) or not use_pickle: if pickle_file_name is not None: raise Exception("Requested file %s, but unable to read it in." % pickle_file_name) # Generate the pickle itself if it doesn't exist # Create the size of the box to be 1000 x 100 x 100 to hold your # training sets system = structures.System(box_size=[1e3, 100.0, 100.0], name="training_set") systems_by_composition = {} # For each folder in the training_sets folder lets get the cml file # we want and write the energies and forces for that file for name in os.listdir("training_sets"): # We'll read in any training subset that succeeded and print # a warning on those that failed try: result = orca.read("training_sets/%s/%s.out" % (name, name)) except IOError: print( "Warning - Training Subset %s not included as results \ not found..." % name) continue # Parse the force output and change units. In the case of no force # found, do not use this set of data try: forces = orca.engrad_read("training_sets/%s/%s.orca.engrad" % (name, name), pos="Ang")[0] # Convert force from Ha/Bohr to kcal/mol-Ang def convert(x): units.convert_dist("Ang", "Bohr", units.convert_energy("Ha", "kcal", x)) for a, b in zip(result.atoms, forces): a.fx = convert(b.fx) a.fy = convert(b.fy) a.fz = convert(b.fz) except (IndexError, IOError): print( "Warning - Training Subset %s not included as results \ not found..." % name) continue # Get the bonding information with_bonds = structures.Molecule("training_sets/%s/system.cml" % name, extra_parameters=extra_Pb, test_charges=False) # Copy over the forces read in into the system that has the # bonding information for a, b in zip(with_bonds.atoms, result.atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz if geometry.dist(a, b) > 1e-4: # sanity check on atom positions raise Exception('Atoms are different:', (a.x, a.y, a.z), (b.x, b.y, b.z)) # Rename some things with_bonds.energy = result.energy with_bonds.name = name # Now, we read in all the potential three-body interactions that # our training set takes into account # This will be in a 1D array composition = ' '.join(sorted([a.element for a in result.atoms])) if composition not in systems_by_composition: systems_by_composition[composition] = [] systems_by_composition[composition].append(with_bonds) # Generate (1) xyz file of various systems as different time steps and # (2) system to simulate xyz_atoms = [] to_delete = [] for i, composition in enumerate(systems_by_composition): # Sort so that the lowest energy training subset is first in # the system systems_by_composition[composition].sort(key=lambda s: s.energy) baseline_energy = systems_by_composition[composition][0].energy # Offset the energies by the lowest energy, convert units of # the energy for j, s in enumerate(systems_by_composition[composition]): s.energy -= baseline_energy s.energy = units.convert_energy("Ha", "kcal/mol", s.energy) # Don't use high-energy systems, because these will not likely # be sampled in MD if s.energy > 500.0: to_delete.append([composition, j]) continue # For testing purposes, output print "DEBUG:", s.name, s.energy xyz_atoms.append(s.atoms) system.add(s, len(system.molecules) * 1000.0) # Delete the system_names that we aren't actually using due to # energy being too high to_delete = sorted(to_delete, key=lambda x: x[1])[::-1] for d1, d2 in to_delete: print "Warning - Training Subset %s not included as energy is too \ high..." % systems_by_composition[d1][d2].name del systems_by_composition[d1][d2] # Make the box just a little bigger (100) so that we can fit all # our systems system.xhi = len(system.molecules) * 1000.0 + 100.0 # Write all of the states we are using to training_sets.xyz if not os.path.isdir("training_sets"): os.mkdir("training_sets") os.chdir("training_sets") files.write_xyz(xyz_atoms, 'training_sets') os.chdir("../") # Generate our pickle file if desired if use_pickle: print("Saving pickle file %s..." % pfile) fptr = open(pfile, "wb") pickle.dump([system, systems_by_composition], fptr) fptr.close() # If use_pickle is true AND the pickle file exists, then we can just # read it in if system is None and use_pickle: print("Reading pickle file %s..." % pfile) fptr = open(pfile, "rb") system, systems_by_composition = pickle.load(fptr) system.name = run_name fptr.close() elif system is None: raise Exception("Requested file %s, but unable to read it in." % pfile) # Now we have the data, save it to files for this simulation of "run_name" # and return parameters if not os.path.isdir("lammps"): os.mkdir("lammps") if not os.path.isdir("lammps/%s" % run_name): os.mkdir("lammps/%s" % run_name) os.chdir("lammps/%s" % run_name) mcsmrff_files.write_system_and_training_data(run_name, system, systems_by_composition) os.chdir("../../") return system, systems_by_composition
def createBindingRBM(element, translator, dependencyGraph, bioGridFlag): species = st.Species() #go over the sct and reuse existing stuff for molecule in dependencyGraph[element[0]][0]: if molecule in translator: tmpSpecies = translator[molecule] if molecule != getTrueTag(dependencyGraph, molecule): original = translator[getTrueTag(dependencyGraph, molecule)] updateSpecies(tmpSpecies, original.molecules[0]) species.addMolecule(deepcopy(tmpSpecies.molecules[0])) else: mol = st.Molecule(molecule) dependencyGraph[molecule] = deepcopy(mol) species.addMolecule(mol) #how do things bind together? moleculePairsList = getComplexationComponents2(species, bioGridFlag) #TODO: update basic molecules with new components #translator[molecule[0].name].molecules[0].components.append(deepcopy(newComponent1)) #translator[molecule[1].name].molecules[0].components.append(deepcopy(newComponent2)) for idx, molecule in enumerate(moleculePairsList): flag = False #add bonds where binding components already exist for component in molecule[0].components: if component.name == molecule[1].name.lower() and \ len(component.bonds) == 0: component.bonds.append(idx) flag = True break if not flag: #create components if they dont exist already. #Add a bond afterwards newComponent1 = st.Component(molecule[1].name.lower()) molecule[0].components.append(newComponent1) if newComponent1.name not in [x.name for x in translator[molecule[0].name].molecules[0]. \ components]: translator[molecule[0].name].molecules[0]. \ components.append(deepcopy(newComponent1)) molecule[0].components[-1].bonds.append(idx) flag = False #same thing for the other member of the bond for component in molecule[1].components: if component.name == molecule[0].name.lower() and len( component.bonds) == 0: component.bonds.append(idx) flag = True break if not flag: newComponent2 = st.Component(molecule[0].name.lower()) molecule[1].components.append(newComponent2) if molecule[0].name != molecule[1].name: if newComponent2.name not in [x.name for x in translator[molecule[0].name].molecules[0]. \ components]: translator[ molecule[1].name].molecules[0].components.append( deepcopy(newComponent2)) molecule[1].components[-1].bonds.append(idx) #update the translator translator[element[0]] = species
def pickle_training_set(run_name, training_sets_folder="training_set", pickle_file_name="training_set", high_energy_cutoff=500.0, system_x_offset=1000.0, verbose=False, extra_parameters={}): """ A function to pickle together the training set in a manner that is readable for MCSMRFF. This is a single LAMMPs data file with each training set offset alongst the x-axis by system_x_offset. The pickle file, when read in later, holds a list of two objects. The first is the entire system as described above. The second is a dictionary of all molecules in the system, organized by composition. **Parameters** run_name: *str* Name of final training set. training_sets_folder: *str, optional* Path to the folder where all the training set data is. pickle_file_name: *str, optional* A name for the pickle file and training set system. high_energy_cutoff: *float, optional* A cutoff for systems that are too large in energy, as MD is likely never to sample them. system_x_offset: *float, optional* The x offset for the systems to be added by. verbose: *bool, optional* Whether to have additional stdout or not. extra_parameters: *dict, optional* A dictionaries for additional parameters that do not exist in the default OPLSAA parameter file. **Returns** system: *System* The entire training set system. systems_by_composition: *dict, list, Molecule* Each molecule organized in this hash table. """ # Take care of pickle file I/O if training_sets_folder.endswith("/"): training_sets_folder = training_sets_folder[:-1] if pickle_file_name is not None and pickle_file_name.endswith(".pickle"): pickle_file_name = pickle_file_name.split(".pickle")[0] pfile = training_sets_folder + "/" + pickle_file_name + ".pickle" sys_name = pickle_file_name if os.path.isfile(pfile): raise Exception("Pickled training set already exists!") # Generate empty system for your training set system = None system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name) systems_by_composition = {} # For each folder in the training_sets folder lets get the cml file we # want and write the energies and forces for that file for name in os.listdir(training_sets_folder): # We'll read in any training subset that succeeded and print a warning # on those that failed try: result = orca.read("%s/%s/%s.out" % (training_sets_folder, name, name)) except IOError: print( "Warning - Training Subset %s not included as \ out file not found..." % name) continue # Check for convergence if not result.converged: print("Warning - Results for %s have not converged." % name) continue # Parse the force output and change units. In the case of no force # found, do not use this set of data try: forces = orca.engrad_read("%s/%s/%s.orca.engrad" % (training_sets_folder, name, name), pos="Ang")[0] # Convert force from Ha/Bohr to kcal/mol-Ang def convert(x): return units.convert_dist( "Ang", "Bohr", units.convert_energy("Ha", "kcal", x)) for a, b in zip(result.atoms, forces): a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz) except (IndexError, IOError): print( "Warning - Training Subset %s not included as \ results not found..." % name) continue # Get the bonding information with_bonds = structures.Molecule("%s/%s/%s.cml" % (training_sets_folder, name, name), extra_parameters=extra_parameters, allow_errors=True, test_charges=False) # Copy over the forces read in into the system that has the bonding # information for a, b in zip(with_bonds.atoms, result.atoms): a.fx, a.fy, a.fz = b.fx, b.fy, b.fz # sanity check on atom positions if geometry.dist(a, b) > 1e-4: raise Exception('Atoms are different:', (a.x, a.y, a.z), (b.x, b.y, b.z)) # Rename and save energy with_bonds.energy = result.energy with_bonds.name = name # Now, we read in all the potential three-body interactions that our # training set takes into account. This will be in a 1D array composition = ' '.join(sorted([a.element for a in result.atoms])) if composition not in systems_by_composition: systems_by_composition[composition] = [] systems_by_composition[composition].append(with_bonds) # Generate: # (1) xyz file of various systems as different time steps # (2) system to simulate xyz_atoms = [] to_delete = [] for i, composition in enumerate(systems_by_composition): # Sort so that the lowest energy training subset is first # in the system systems_by_composition[composition].sort(key=lambda s: s.energy) baseline_energy = systems_by_composition[composition][0].energy # Offset the energies by the lowest energy, and convert energy units for j, s in enumerate(systems_by_composition[composition]): s.energy -= baseline_energy s.energy = units.convert_energy("Ha", "kcal/mol", s.energy) # Don't use high-energy systems, because these will not likely # be sampled in MD if s.energy > high_energy_cutoff: to_delete.append([composition, j]) continue # For testing purposes, output if verbose: print "Using:", s.name, s.energy xyz_atoms.append(s.atoms) system.add(s, len(system.molecules) * system_x_offset) # Delete the system_names that we aren't actually using due to energy # being too high to_delete = sorted(to_delete, key=lambda x: x[1])[::-1] for d1, d2 in to_delete: if verbose: print "Warning - Training Subset %s not included as energy \ is too high..." % systems_by_composition[d1][d2].name del systems_by_composition[d1][d2] # Make the box just a little bigger (100) so that we can fit all our # systems system.xhi = len(system.molecules) * system_x_offset + 100.0 # Write all of the states we are using to training_sets.xyz files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name) # Generate our pickle file print("Saving pickle file %s..." % pfile) fptr = open(pfile, "wb") pickle.dump([system, systems_by_composition], fptr) fptr.close() # Now we have the data, save it to files for this simulation of # "run_name" and return parameters if not os.path.isdir(run_name): os.mkdir(run_name) os.chdir(run_name) mcsmrff_files.write_system_and_training_data(run_name, system, systems_by_composition) os.chdir("../") shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name)) return system, systems_by_composition
def createEmptySpecies(name): species = st.Species() molecule = st.Molecule(name) species.addMolecule(molecule) return species
def generate_lead_halide_cation(halide, cation, ion="Pb", run_opt=True): cml_path = fpl_constants.cml_dir # Check if system exists fname = reduce_to_name(ion, halide, cation) if not cml_path.endswith("/"): cml_path += "/" if os.path.exists(cml_path + fname + ".cml"): print("Found system in cml folder, returning system") system = structures.Molecule( files.read_cml(cml_path + fname + ".cml", test_charges=False, allow_errors=True)[0]) return system def vdw(y): return PERIODIC_TABLE[units.elem_s2i(y)]['vdw_r'] # Get the PbX3 system PbX3 = generate_lead_halide(halide, ion=ion) # Get the cation from the cml file atoms, bonds, _, _ = files.read_cml(cml_path + cation + ".cml", test_charges=False, allow_errors=True) system = structures.Molecule(atoms) # Align along X axis system.atoms = geometry.align_centroid(system.atoms)[0] # Rotate to Z axis # NOTE! In case of FA, we want flat so only translate to origin instead # NOTE! We have exactly 3 cations we observe: Cs, MA, FA. If 2 N, then FA elems = [a.element for a in system.atoms] if elems.count("N") == 2: system.translate(system.get_center_of_mass()) else: R = geometry.rotation_matrix([0, 1, 0], 90, units="deg") system.rotate(R) # If N and C in system, ensure N is below C (closer to Pb) if "N" in elems and "C" in elems: N_index = [i for i, a in enumerate(system.atoms) if a.element == "N"][0] C_index = [i for i, a in enumerate(system.atoms) if a.element == "C"][0] if system.atoms[N_index].z > system.atoms[C_index].z: # Flip if needed R = geometry.rotation_matrix([0, 1, 0], 180, units="deg") system.rotate(R) # Offset system so lowest point is at 0 in the z dir z_offset = min([a.z for a in system.atoms]) * -1 system.translate([0, 0, z_offset]) # Add to the PbX3 system with an offset of vdw(Pb) system.translate([0, 0, vdw(ion)]) system.atoms += PbX3.atoms # Run a geometry optimization of this system if run_opt: PbXY = orca.job(fname, fpl_constants.default_routes[0], atoms=system.atoms, extra_section=fpl_constants.extra_section, queue="batch", procs=2) PbXY.wait() new_pos = orca.read(fname).atoms for a, b in zip(system.atoms, new_pos): a.x, a.y, a.z = [b.x, b.y, b.z] # Set OPLS types for a in system.atoms: if a.element in [ion, "Cl", "Br", "I"]: a.type = fpl_constants.atom_types[a.element] a.type_index = a.type["index"] # Write cml file so we don't re-generate, and return system files.write_cml(system, bonds=bonds, name=cml_path + fname + ".cml") return system
def job(fpl_obj, task_name): input_script = '''units real atom_style full pair_style lj/cut/coul/dsf 0.05 10.0 10.0 bond_style harmonic angle_style harmonic dihedral_style opls boundary p p p read_data $RUN_NAME$.data dump 1 all xyz 100 $RUN_NAME$.xyz fix av all ave/time 1 100 100 c_thermo_pe thermo_style custom step f_av pe temp press thermo 100 group mobile id > $MOBILE$ group immobile subtract all mobile $IMOBILE$ velocity mobile create 100.0 $SEED$ rot yes dist gaussian velocity immobile set 0.0 0.0 0.0 fix relax mobile nve/limit 0.1 run 10000 unfix relax fix motion mobile nvt temp 100.0 100.0 100.0 timestep 1.0 run $RUN_LEN$ write_restart $RUN_NAME$.restart''' # Setup input script solute = None if fpl_obj.solute is not None: solute = structures.Molecule(fpl_constants.cml_dir + fpl_obj.solute, test_charges=False, allow_errors=True) mobile = str(len(solute.atoms) if solute else 0) input_script = fpl_utils.input_variable("$MOBILE$", mobile, input_script) input_script = fpl_utils.input_variable("$RUN_NAME$", task_name, input_script) input_script = fpl_utils.input_variable("$SEED$", fpl_obj.seed, input_script) input_script = fpl_utils.input_variable("$RUN_LEN$", fpl_obj.lmp_run_len, input_script) imobile = "" if solute is not None: imobile = "velocity immobile zero linear\nfix freeze immobile setforce 0.0 0.0 0.0" input_script = fpl_utils.input_variable("$IMOBILE$", imobile, input_script) # Now we can generate the task # NOTE! Because the data file is written by the system name, we want to overwrite the system name here fpl_obj.system.name = task_name small_lammps_task = lammps_job.lmp_task(task_name, fpl_obj.system, queue=fpl_obj.queue, procs=fpl_obj.procs, priority=fpl_obj.priority, xhosts=fpl_obj.xhosts) small_lammps_task.set_parameters( input_script, email=fpl_obj.email, pair_coeffs_included=fpl_obj.pair_coeffs_included, hybrid_pair=fpl_obj.hybrid_pair, hybrid_angle=fpl_obj.hybrid_angle, trj_file=fpl_obj.trj_file, xyz_file=fpl_obj.xyz_file, read_atoms=fpl_obj.read_atoms, read_timesteps=fpl_obj.read_timesteps, read_num_atoms=fpl_obj.read_num_atoms, read_box_bounds=fpl_obj.read_box_bounds) small_lammps_task.callback = callback_grab_final return small_lammps_task