def enumerateDihs(f): #openbabel f_name, f_ext = os.path.splitext(f) obconversion = ob.OBConversion() obconversion.SetInFormat(f_ext) obmol = ob.OBMol() this = obconversion.ReadFile(obmol, f) all_dihs = [i for i in ob.OBMolTorsionIter(obmol)] return all_dihs
def _add_oop_angle_change_collection(self, index, threshold=0, CURVE_TYPE=4, zorder=50, **kwargs): """Compute and draw torsion changes on the axes.""" CURVE_TYPE_3, CURVE_TYPE_4 = 3, 4 col = [] edgecolors = [] if CURVE_TYPE is CURVE_TYPE_3: codes = [ Path.MOVETO, Path.CURVE3, Path.CURVE3, ] elif CURVE_TYPE is CURVE_TYPE_4: codes = [ Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4, ] for torsion in ob.OBMolTorsionIter(self.molecule): atoms = [self.molecule.GetAtom(idx + 1) for idx in torsion] atomsnc = [ self._to_normal_coordinates(atom, index) for atom in atoms ] teq = self.molecule.GetTorsion(*atoms) tnc = self.molecule.GetTorsion(*atomsnc) amplitude = (tnc - teq + 360.0) % 360.0 if amplitude > 180.0: amplitude -= 360.0 if abs(amplitude) <= threshold: continue intensity = abs(amplitude / 40) a, b, c, d = [self._2Dcoords(atom) for atom in atoms] p2 = 0.5 * (b + c) # middle p1 = intensity * (a - p2) + p2 p3 = intensity * (d - p2) + p2 color = self.oop_colors[0 if amplitude < 0.0 else 1] if CURVE_TYPE is CURVE_TYPE_3: verts = [p1, p2, p3] elif CURVE_TYPE is CURVE_TYPE_4: verts = [p1, b, c, p3] curve = Path(verts, codes) col.append(curve) edgecolors.append(color) kw = {'edgecolors': edgecolors, 'facecolors': 'none'} kwargs.update(kw) self._vib_oop = PathCollection(col, zorder=zorder, **kwargs) self.axes.add_collection(self._vib_oop)
def get_topology(molecule): """ Return the molecular topology obtained via openabel Args: molecule (Molecule): pymatgen Molecule object Returns: atoms (list): list of atoms and their force field mapping bonds (list): [[i,j, bond_type], ...] where bond_type is a sorted tuple of the force field names of atoms i and j. angles (list): [[i,j, k, angle_type], ...] where angle_type is a sorted tuple of the force field names of atoms i, j and k. dihedrals (list): [[i,j, k, l, dihedral_type], ...] where dihedral_type is a sorted tuple of the force field names of atoms i, j, k and l. """ bma = BabelMolAdaptor(molecule) obmol = bma.openbabel_mol #print obmol.NumAtoms(), obmol.NumBonds() atoms = [x.GetIdx() - 1 for x in openbabel.OBMolAtomIter(obmol)] bonds = [[x.GetBeginAtomIdx() - 1, x.GetEndAtomIdx() - 1] for x in openbabel.OBMolBondIter(obmol)] angles = [list(x) for x in openbabel.OBMolAngleIter(obmol)] dihedrals = [list(x) for x in openbabel.OBMolTorsionIter(obmol)] #print len(atoms), len(bonds), len(angles), len(dihedrals) atoms = [ tuple([str(molecule[x].specie), molecule[x].ff_map]) for x in atoms ] bonds = [ x + [tuple((molecule[x[0]].ff_map, molecule[x[1]].ff_map))] for x in bonds ] angles = [ x + [ tuple(((molecule[x[0]].ff_map, molecule[x[1]].ff_map, molecule[x[2]].ff_map))) ] for x in angles ] dihedrals = [ x + [ tuple((molecule[x[0]].ff_map, molecule[x[1]].ff_map, molecule[x[2]].ff_map, molecule[x[3]].ff_map)) ] for x in dihedrals ] return atoms, bonds, angles, dihedrals
def read_parameters(dfrfile, txtfile): # read the degrees of freedom from dfr into dictionaries dfrBonds, dfrAngles, dfrDihedrals, dfrImpDih = read_dfr_dof(dfrfile) # read the txt to a pybel mol object mol, q, eps, sig = read_txt_to_mol(txtfile) # check if all bonds are present in the .dfr bondIterator = openbabel.OBMolBondIter(mol.OBMol) for bond in bondIterator: lbl1 = str(bond.GetBeginAtom().GetId() + 1) + " " + str(bond.GetEndAtom().GetId() + 1) lbl2 = str(bond.GetEndAtom().GetId() + 1) + " " + str(bond.GetBeginAtom().GetId() + 1) if (lbl1 not in dfrBonds) and (lbl2 not in dfrBonds): print("The bond (%s) is not specified in your .dfr. Aborting..." % lbl1) sys.exit(0) # check if all angles are present in the .dfr angleIterator = openbabel.OBMolAngleIter(mol.OBMol) for angle in angleIterator: angidx = [str(x + 1) for x in angle] lbl1 = angidx[1] + " " + angidx[0] + " " + angidx[2] lbl2 = angidx[2] + " " + angidx[0] + " " + angidx[1] if (lbl1 not in dfrAngles) and (lbl2 not in dfrAngles): print("The angle (%s) is not specified in your .dfr. Aborting..." % lbl1) sys.exit(0) # check if all dihedrals are present in the .dfr torsionIterator = openbabel.OBMolTorsionIter(mol.OBMol) for torsional in torsionIterator: torsidx = [str(x + 1) for x in torsional] lbl1 = torsidx[0] + " " + torsidx[1] + " " + torsidx[ 2] + " " + torsidx[3] lbl2 = torsidx[3] + " " + torsidx[2] + " " + torsidx[ 1] + " " + torsidx[0] if (lbl1 not in dfrDihedrals) and (lbl2 not in dfrDihedrals): print( "The dihedral (%s) is not specified in your .dfr. Aborting..." % lbl1) sys.exit(0) return mol, q, eps, sig, dfrBonds, dfrAngles, dfrDihedrals, dfrImpDih
def generate_fragfile(filename, outtype, ffparams=None, eqgeom=False): # check outtype if outtype not in ["flex", "header", "min"]: sys.exit( 'Invalid argument indicating verbosity of .dfr (%s). Use "flex", "header" or "min".' % outtype) # get basename and file extension base, ext = os.path.splitext(filename) # set openbabel file format obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats(ext[1:], "xyz") # read molecule to OBMol object mol = openbabel.OBMol() obConversion.ReadFile(mol, filename) if ffparams: # get atomic labels from pdb idToAtomicLabel = {} for res in openbabel.OBResidueIter(mol): for atom in openbabel.OBResidueAtomIter(res): idToAtomicLabel[atom.GetId()] = res.GetAtomID(atom).strip() # read force field parameters and store into dictionaries labelToSLabel = {} charges = {} epsilons = {} sigmas = {} bonds = {} angles = {} dihedrals = {} impropers = {} with open(ffparams, 'r') as f: line = f.readline() # read nb params while "$bond" not in line: if line.strip().startswith("#") or not line.strip(): line = f.readline() continue lbl = line.split()[0] charges[lbl] = line.split()[1] epsilons[lbl] = line.split()[2] sigmas[lbl] = line.split()[3] labelToSLabel[lbl] = line.split()[4] line = f.readline() # read bond params line = f.readline() while "$angle" not in line: if line.strip().startswith( "#") or "$end" in line or not line.strip(): line = f.readline() continue line = line.replace("–", "-") # store the constants for the order of the input and the inverse order consts = "\t".join(line.split()[1:]) bonds[line.split()[0]] = consts bonds["-".join(line.split()[0].split("-")[::-1])] = consts line = f.readline() # read angle params line = f.readline() while "$dihedral" not in line: if line.strip().startswith( "#") or "$end" in line or not line.strip(): line = f.readline() continue line = line.replace("–", "-") # store the constants for the order of the input and the inverse order consts = "\t".join(line.split()[1:]) angles[line.split()[0]] = consts angles["-".join(line.split()[0].split("-")[::-1])] = consts line = f.readline() # read dihedrals line = f.readline() while "$improper" not in line: if line.strip().startswith( "#") or "$end" in line or not line.strip(): line = f.readline() continue line = line.replace("–", "-") # store the constants for the order of the input and the inverse order consts = "\t".join(line.split()[1:]) dihedrals[line.split()[0]] = consts dihedrals["-".join(line.split()[0].split("-")[::-1])] = consts line = f.readline() # read impropers line = f.readline() while line: if line.strip().startswith( "#") or "$end" in line or not line.strip(): line = f.readline() continue line = line.replace("–", "-") # store the constants for the order of the input and the inverse order consts = "\t".join(line.split()[1:]) impropers[line.split()[0]] = consts impropers["-".join(line.split()[0].split("-")[::-1])] = consts line = f.readline() # check if there are unused labels for lbl in charges.keys(): fnd = False for i in idToAtomicLabel: if lbl == idToAtomicLabel[i]: fnd = True break if not fnd: print( "!!! WARNING: There are unused atoms in your parameter file (%s) !!!" % lbl) # split the molecule fragments, fragConnection, dummyToAtom = split_mol_fragments_daylight(mol) # dummy atoms ids dummyAtoms = dummyToAtom.keys() # write molecule to .txt file (passed as ljname to DICE) with open(base + ".txt", "w") as f: f.write("*\n1\n") atomToPrint = [] for frag in fragments: fragAtomIterator = openbabel.OBMolAtomIter(frag) for atom in fragAtomIterator: if atom.GetId() not in dummyAtoms: atomToPrint.append(atom) # print number of atoms f.write( str(len(atomToPrint)) + " \t %s (generated with fragGen)\n" % os.path.basename(base)) # dictionary associating Atomic number with rdf label rdfs = {} rdf_label = 1 if ffparams: # sort atoms by index and print (this prints the atoms, e.g., in the same order of the xyz input) for atom in sorted(atomToPrint, key=lambda atom: atom.GetId()): if atom.GetAtomicNum() not in rdfs.keys(): rdfs[atom.GetAtomicNum()] = str(rdf_label) rdf_label += 1 f.write(rdfs[atom.GetAtomicNum()] + " " + str(atom.GetAtomicNum()) + " \t" + str(atom.GetX()) + " \t" + str(atom.GetY()) + " \t" + str(atom.GetZ()) + " \t" + charges[idToAtomicLabel[atom.GetId()]] + "\t" + epsilons[idToAtomicLabel[atom.GetId()]] + "\t" + sigmas[idToAtomicLabel[atom.GetId()]] + "\n") f.write("$end\n") else: # sort atoms by index and print (this prints the atoms, e.g., in the same order of the xyz input) for atom in sorted(atomToPrint, key=lambda atom: atom.GetId()): if atom.GetAtomicNum() not in rdfs.keys(): rdfs[atom.GetAtomicNum()] = str(rdf_label) rdf_label += 1 f.write(rdfs[atom.GetAtomicNum()] + " " + str(atom.GetAtomicNum()) + " \t" + str(atom.GetX()) + " \t" + str(atom.GetY()) + " \t" + str(atom.GetZ()) + " \t" + "q" + "\t" + "epsilon" + "\t" + "sigma\n") f.write("$end\n") # write info to dfr file with open(base + ".dfr", "w") as f: # fragments and fragments connections are printed to every outtype f.write("$atoms fragments\n") fragslst = [] for frag in fragments: f.write(frag.GetTitle() + "\t[ ") fragAtomIterator = openbabel.OBMolAtomIter(frag) atomlst = [ str(dummyToAtom[x.GetId()] + 1) if x.GetId() in dummyAtoms else str(x.GetId() + 1) for x in fragAtomIterator ] fragslst.append(atomlst) for atom in atomlst: f.write(atom + "\t") if outtype == "min" or outtype == "header": f.write("] R\n") else: f.write("] F\n") f.write("$end atoms fragments\n") f.write("\n$fragment connection\n") for frag1, frag2 in fragConnection: f.write(frag1 + "\t" + frag2 + "\n") f.write("$end fragment connection\n") # bonds are printed to every outtype that is not header, since we need a connection matrix if outtype != "header": f.write("\n$bond\n") bondIterator = openbabel.OBMolBondIter(mol) if ffparams: for bond in bondIterator: try: if eqgeom: f.write( str(bond.GetBeginAtom().GetId() + 1) + " " + str(bond.GetEndAtom().GetId() + 1) + " \t" + bonds[labelToSLabel[idToAtomicLabel[ bond.GetBeginAtom().GetId()]] + "-" + labelToSLabel[idToAtomicLabel[ bond.GetEndAtom().GetId()]]].split() [0] + "\t" + str("%.6f" % bond.GetLength()) + "\n") else: f.write( str(bond.GetBeginAtom().GetId() + 1) + " " + str(bond.GetEndAtom().GetId() + 1) + " \t" + bonds[labelToSLabel[idToAtomicLabel[ bond.GetBeginAtom().GetId()]] + "-" + labelToSLabel[idToAtomicLabel[ bond.GetEndAtom().GetId()]]] + "\n") except KeyError as e: print( "The parameters for atoms %d %d (%s) was not found in the bonds list\n" % (bond.GetBeginAtom().GetId() + 1, bond.GetEndAtom().GetId() + 1, e)) raise else: for bond in bondIterator: f.write( str(bond.GetBeginAtom().GetId() + 1) + " " + str(bond.GetEndAtom().GetId() + 1) + " \t0.0\t" + str("%.6f" % bond.GetLength()) + "\n") f.write("$end bond\n") # angles are only printed for outtype flex if outtype == "flex": f.write("\n$angle\n") angleIterator = openbabel.OBMolAngleIter(mol) if ffparams: for angle in angleIterator: try: if eqgeom: atom2 = mol.GetAtomById(angle[0]) atom1 = mol.GetAtomById(angle[1]) atom3 = mol.GetAtomById(angle[2]) aparams = angles[ labelToSLabel[idToAtomicLabel[angle[1]]] + "-" + labelToSLabel[idToAtomicLabel[angle[0]]] + "-" + labelToSLabel[idToAtomicLabel[ angle[2]]]].split() f.write( str(angle[1] + 1) + " " + str(angle[0] + 1) + " " + str(angle[2] + 1) + " \t" + aparams[0] + "\t" + aparams[1] + "\t" + str("%.6f" % mol.GetAngle(atom1, atom2, atom3)) + "\n") else: f.write( str(angle[1] + 1) + " " + str(angle[0] + 1) + " " + str(angle[2] + 1) + " \t" + angles[ labelToSLabel[idToAtomicLabel[angle[1]]] + "-" + labelToSLabel[idToAtomicLabel[angle[0]]] + "-" + labelToSLabel[idToAtomicLabel[angle[2]]]] + "\n") except KeyError as e: print( "The parameters for atoms %d %d %d (%s) was not found in the angles list\n" % (angle[1] + 1, angle[0] + 1, angle[2] + 1, e)) raise else: for angle in angleIterator: # carefully select the atoms to find the angle atom2 = mol.GetAtomById(angle[0]) atom1 = mol.GetAtomById(angle[1]) atom3 = mol.GetAtomById(angle[2]) f.write( str(angle[1] + 1) + " " + str(angle[0] + 1) + " " + str(angle[2] + 1) + " \tharmonic\tK\t" + str("%.6f" % mol.GetAngle(atom1, atom2, atom3)) + "\n") f.write("$end angle\n") # all the dihedrals are printed to outtype flex, but only connection between fragments are printed if outtype is min if outtype == "flex": f.write("\n$dihedral\n") torsionIterator = openbabel.OBMolTorsionIter(mol) if ffparams: for torsional in torsionIterator: # Need to sum 1: http://forums.openbabel.org/Rotable-bonds-tp957795p957798.html torsidx = [str(x + 1) for x in torsional] try: f.write(torsidx[0] + " " + torsidx[1] + " " + torsidx[2] + " " + torsidx[3] + " \t" + dihedrals["-".join([ labelToSLabel[idToAtomicLabel[x]] for x in torsional ])] + "\n") except KeyError as e: print( "The parameters for atoms %s %s %s %s (%s) was not found in the dihedrals list\n" % (torsidx[0], torsidx[1], torsidx[2], torsidx[3], e)) raise else: for torsional in torsionIterator: # Need to sum 1: http://forums.openbabel.org/Rotable-bonds-tp957795p957798.html torsional = [str(x + 1) for x in torsional] f.write(torsional[0] + " " + torsional[1] + " " + torsional[2] + " " + torsional[3] + " \tTYPE\tV1\tV2\tV3\tf1\tf2\tf3\n") f.write("$end dihedral\n") # improper dihedral = carbon with only 3 atoms connected to it (SP2 hybridization) # angle found following this definition --> http://cbio.bmt.tue.nl/pumma/index.php/Theory/Potentials f.write("\n$improper dihedral\n") atomIterator = openbabel.OBMolAtomIter(mol) for atom in atomIterator: # print(atom.GetHyb(), atom.GetAtomicNum(), atom.GetValence()) # if atom.GetAtomicNum() == 6 and atom.GetValence() == 3: if atom.GetHyb() == 2 and atom.GetValence() == 3: bondIterator = atom.BeginBonds() nbrAtom = atom.BeginNbrAtom(bondIterator) connectedAtoms = [] connectedAtoms.append(nbrAtom) for i in range(2): nbrAtom = atom.NextNbrAtom(bondIterator) connectedAtoms.append(nbrAtom) if ffparams: torsional = [ atom.GetId(), connectedAtoms[0].GetId(), connectedAtoms[1].GetId(), connectedAtoms[2].GetId() ] # create all the permutations to check if one is found perms = list(itertools.permutations(torsional[1:])) nfound = 0 for perm in perms: try: joined = "-".join([ labelToSLabel[idToAtomicLabel[ torsional[0]]] ] + [ labelToSLabel[idToAtomicLabel[x]] for x in perm ]) f.write( str(torsional[0] + 1) + " " + str(torsional[1] + 1) + " " + str(torsional[2] + 1) + " " + str(torsional[3] + 1) + " \t" + impropers[joined] + "\n") except: nfound += 1 if nfound == len(perms): joined = "-".join( [labelToSLabel[idToAtomicLabel[torsional[0]]] ] + [ labelToSLabel[idToAtomicLabel[x]] for x in perms[0] ]) raise KeyError( "The key %s (or its permutations) were not found in the improper dihedrals list\n" % (joined)) else: torsional = [ atom.GetId() + 1, connectedAtoms[0].GetId() + 1, connectedAtoms[1].GetId() + 1, connectedAtoms[2].GetId() + 1 ] torsionAngle = mol.GetTorsion(torsional[0], torsional[1], torsional[2], torsional[3]) f.write( str(torsional[0]) + " " + str(torsional[1]) + " " + str(torsional[2]) + " " + str(torsional[3]) + " \tV2\t" + str("%.6f" % torsionAngle) + "\n") f.write("$end improper dihedral\n") elif outtype == "min": torsionIterator = openbabel.OBMolTorsionIter(mol) # tjf = torsionals that join fragments tjf = [] # find the tjfs by checking if all the atoms of a torsional belong to the same fragment for tors in torsionIterator: tors = [str(x + 1) for x in tors] istjf = True for atomlst in fragslst: if (tors[0] in atomlst) and (tors[1] in atomlst) and ( tors[2] in atomlst) and (tors[3] in atomlst): istjf = False break if istjf: tjf.append(tors) f.write("\n$dihedral\n") if ffparams: for torsidx in tjf: torsional = [int(x) - 1 for x in torsidx] try: f.write(torsidx[0] + " " + torsidx[1] + " " + torsidx[2] + " " + torsidx[3] + " \t" + dihedrals["-".join([ labelToSLabel[idToAtomicLabel[x]] for x in torsional ])] + "\n") except KeyError as e: print( "The parameters for atoms %s %s %s %s (%s) was not found in the dihedrals list\n" % (torsidx[0], torsidx[1], torsidx[2], torsidx[3], e)) raise else: for torsional in tjf: f.write(torsional[0] + " " + torsional[1] + " " + torsional[2] + " " + torsional[3] + " \tTYPE\tV1\tV2\tV3\tf1\tf2\tf3\n") f.write("$end dihedral\n") # create directory to store the fragments if not os.path.exists(base + "_fragments"): os.makedirs(base + "_fragments") # write framents to the cml files for frag in fragments: obConversion.WriteFile( frag, os.path.join( base + "_fragments", os.path.basename(filename).split(".")[0] + "_fragment" + frag.GetTitle() + ".xyz"))
def generate_patch_defs(H_atoms, atom_dictionary, squalane_mol2_file, skeleton_file='squalane-patches_skeleton.txt'): """ Print necessary patch definitions for EVB parameters :param H_atoms: List of H atoms that get within threshold distance of CN carbon :return: """ with open(skeleton_file) as skeletonfile, open( 'squalane_patch_defs_all.txt', 'w') as outputfile: outputfile.writelines(skeletonfile) outputfile = open('squalane_patch_defs_all.txt', 'a') obconversion = openbabel.OBConversion() obconversion.SetInAndOutFormats("mol2", "mol2") mol = openbabel.OBMol() obconversion.ReadFile(mol, squalane_mol2_file) atom_table = {} for atom in openbabel.OBMolAtomBFSIter(mol): atom_table[atom.GetIndex()] = atom.GetType() for H_atom in H_atoms: # To get numbering to start at 0 H_atom = H_atom - 1 if list(atom_table[H_atom])[0] != 'H': print("ERROR: Atom %s is not an H atom." % H_atom) break else: outputfile.write( "PRES RH%s \t ! PATCH CN + squalane -> HCN + squalyl (missing H%s)" % ((H_atom + 1), (H_atom + 1))) for C_atom, connected_H_atoms in sorted(atom_dictionary.items()): if type(connected_H_atoms ) == tuple and H_atom in connected_H_atoms: outputfile.write("\nATOM 1C%s\t CSP2\t 0.00" % (C_atom + 1)) elif type(connected_H_atoms ) == int and H_atom == connected_H_atoms: outputfile.write("\nATOM 1C%s\t CSP2\t 0.00" % (C_atom + 1)) else: outputfile.write("\nATOM 1C%s\t CR\t 0.00" % (C_atom + 1)) for index, atom_type in atom_table.items(): if 'H' == atom_type: outputfile.write("\nATOM 1H%s\t HC\t 0.00" % (int(index) + 1)) outputfile.write("\nATOM 2C1\t CSP\t 0.00") outputfile.write("\nATOM 2N2\t NSP\t 0.00") for C_atom, connected_H_atoms in atom_dictionary.items(): if type(connected_H_atoms ) == tuple and H_atom in connected_H_atoms: outputfile.write("\nDELETE BOND 1C%s 1H%s" % ((C_atom + 1), (H_atom + 1))) outputfile.write("\nBOND 2C1 1H%s" % (H_atom + 1)) elif type(connected_H_atoms ) == int and H_atom == connected_H_atoms: outputfile.write("\nDELETE BOND 1C%s 1H%s" % ((C_atom + 1), (H_atom + 1))) outputfile.write("\nBOND 2C1 1H%s" % (H_atom + 1)) dihedrals_to_delete = [] for C_atom, connected_H_atoms in atom_dictionary.items(): for obtorsion in openbabel.OBMolTorsionIter(mol): if H_atom in obtorsion and C_atom in obtorsion and obtorsion not in dihedrals_to_delete: dihedrals_to_delete.append(obtorsion) outputfile.write( "\nDELETE DIHE 1%s%s 1%s%s 1%s%s 1%s%s" % (list(atom_table[obtorsion[0]])[0], (obtorsion[0] + 1), list( atom_table[obtorsion[1]])[0], (obtorsion[1] + 1), list( atom_table[obtorsion[2]])[0], (obtorsion[2] + 1), list( atom_table[obtorsion[3]])[0], (obtorsion[3] + 1))) outputfile.write("\n\n")
def parse_mol_info(fname, fcharges, axis, buffa, buffo, pbcbonds, printdih, ignorebonds, ignoreimproper): iaxis = {"x": 0, "y": 1, "z": 2} if axis in iaxis: repaxis = iaxis[axis] else: print("Error: invalid axis") sys.exit(0) if fcharges: chargesLabel = {} with open(fcharges, "r") as f: for line in f: chargesLabel[line.split()[0]] = float(line.split()[1]) # set openbabel file format base, ext = os.path.splitext(fname) obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats(ext[1:], "xyz") # trick to disable ring perception and make the ReadFile waaaay faster # Source: https://sourceforge.net/p/openbabel/mailman/openbabel-discuss/thread/56e1812d-396a-db7c-096d-d378a077853f%40ipcms.unistra.fr/#msg36225392 obConversion.AddOption("b", openbabel.OBConversion.INOPTIONS) # read molecule to OBMol object mol = openbabel.OBMol() obConversion.ReadFile(mol, fname) mol.ConnectTheDots() # necessary because of the 'b' INOPTION # split the molecules molecules = mol.Separate() # detect the molecules types mTypes = {} mapmTypes = {} atomIdToMol = {} nty = 0 for i, submol in enumerate(molecules, start=1): atomiter = openbabel.OBMolAtomIter(submol) atlist = [] for at in atomiter: atlist.append(at.GetAtomicNum()) atomIdToMol[at.GetId()] = i foundType = None for ty in mTypes: # check if there's already a molecule of this type if atlist == mTypes[ty]: foundType = ty # if not, create a new type if not foundType: nty += 1 foundType = nty mTypes[nty] = atlist mapmTypes[i] = foundType # get atomic labels from pdb idToAtomicLabel = {} if ext[1:] == "pdb": for res in openbabel.OBResidueIter(mol): for atom in openbabel.OBResidueAtomIter(res): if (atomIdToMol[atom.GetId()] > 1) and (len(mTypes) > 1): idToAtomicLabel[ atom.GetId()] = res.GetAtomID(atom).strip() + str( mapmTypes[atomIdToMol[atom.GetId()]]) else: idToAtomicLabel[atom.GetId()] = res.GetAtomID(atom).strip() else: if not ob3: etab = openbabel.OBElementTable() for atom in openbabel.OBMolAtomIter(mol): if (atomIdToMol[atom.GetId()] > 1) and (len(mTypes) > 1): if ob3: idToAtomicLabel[atom.GetId()] = openbabel.GetSymbol( atom.GetAtomicNum()) + str( mapmTypes[atomIdToMol[atom.GetId()]]) else: idToAtomicLabel[atom.GetId()] = etab.GetSymbol( atom.GetAtomicNum()) + str( mapmTypes[atomIdToMol[atom.GetId()]]) else: if ob3: idToAtomicLabel[atom.GetId()] = openbabel.GetSymbol( atom.GetAtomicNum()) else: idToAtomicLabel[atom.GetId()] = etab.GetSymbol( atom.GetAtomicNum()) # print(idToAtomicLabel) # identify atom types and get masses outMasses = "Masses\n\n" massTypes = {} mapTypes = {} nmassTypes = 0 atomIterator = openbabel.OBMolAtomIter(mol) for atom in atomIterator: i = atom.GetId() if idToAtomicLabel[i] not in massTypes: nmassTypes += 1 mapTypes[nmassTypes] = idToAtomicLabel[i] massTypes[idToAtomicLabel[i]] = nmassTypes outMasses += "\t%d\t%.3f\t# %s\n" % ( nmassTypes, atom.GetAtomicMass(), idToAtomicLabel[i]) # create atoms list outAtoms = "Atoms # full\n\n" xmin = float("inf") xmax = float("-inf") ymin = float("inf") ymax = float("-inf") zmin = float("inf") zmax = float("-inf") natoms = 0 acoords = [] for mnum, imol in enumerate(molecules, start=1): atomIterator = openbabel.OBMolAtomIter(imol) for atom in sorted(atomIterator, key=lambda x: x.GetId()): natoms += 1 i = atom.GetId() apos = (atom.GetX(), atom.GetY(), atom.GetZ()) acoords.append(Atom(atom.GetAtomicNum(), apos)) # look for the maximum and minimum x for the box (improve later with numpy and all coordinates) if apos[0] > xmax: xmax = apos[0] if apos[0] < xmin: xmin = apos[0] if apos[1] > ymax: ymax = apos[1] if apos[1] < ymin: ymin = apos[1] if apos[2] > zmax: zmax = apos[2] if apos[2] < zmin: zmin = apos[2] if fcharges: outAtoms += "\t%d\t%d\t%d\t%.6f\t%.4f\t%.4f\t%.4f\t# %s\n" % ( i + 1, mnum, massTypes[idToAtomicLabel[i]], chargesLabel[idToAtomicLabel[i]], atom.GetX(), atom.GetY(), atom.GetZ(), idToAtomicLabel[i]) else: outAtoms += "\t%d\t%d\t%d\tX.XXXXXX\t%.4f\t%.4f\t%.4f\t# %s\n" % ( i + 1, mnum, massTypes[idToAtomicLabel[i]], atom.GetX(), atom.GetY(), atom.GetZ(), idToAtomicLabel[i]) # define box shape and size try: fromBounds = False rcell = mol.GetData(12) cell = openbabel.toUnitCell(rcell) v1 = [ cell.GetCellVectors()[0].GetX(), cell.GetCellVectors()[0].GetY(), cell.GetCellVectors()[0].GetZ() ] v2 = [ cell.GetCellVectors()[1].GetX(), cell.GetCellVectors()[1].GetY(), cell.GetCellVectors()[1].GetZ() ] v3 = [ cell.GetCellVectors()[2].GetX(), cell.GetCellVectors()[2].GetY(), cell.GetCellVectors()[2].GetZ() ] boxinfo = [v1, v2, v3] orthogonal = True for i, array in enumerate(boxinfo): for j in range(3): if i == j: continue if not math.isclose(0., array[j], abs_tol=1e-6): orthogonal = False except: fromBounds = True v1 = [xmax - xmin, 0., 0.] v2 = [0., ymax - ymin, 0.] v3 = [0., 0., zmax - zmin] orthogonal = True # add buffer if orthogonal: buf = [] boxinfo = [v1, v2, v3] for i, val in enumerate(boxinfo[repaxis]): if i == repaxis: buf.append(val + buffa) else: buf.append(val) boxinfo[repaxis] = buf for i in range(3): if i == repaxis: continue buf = [] for j, val in enumerate(boxinfo[i]): if j == i: buf.append(val + buffo) else: buf.append(val) boxinfo[i] = buf # print(boxinfo) # Duplicate to get the bonds in the PBC. Taken from (method _crd2bond): # https://github.com/tongzhugroup/mddatasetbuilder/blob/66eb0f15e972be0f5534dcda27af253cd8891ff2/mddatasetbuilder/detect.py#L213 if pbcbonds: acoords = Atoms(acoords, cell=boxinfo, pbc=True) repatoms = acoords.repeat( 2 )[natoms:] # repeat the unit cell in each direction (len(repatoms) = 7*natoms) tree = cKDTree(acoords.get_positions()) d = tree.query(repatoms.get_positions(), k=1)[0] nearest = d < 8. ghost_atoms = repatoms[nearest] realnumber = np.where(nearest)[0] % natoms acoords += ghost_atoms write("replicated.xyz", acoords) # write the structure with the replicated atoms # write new mol with new bonds nmol = openbabel.OBMol() nmol.BeginModify() for idx, (num, position) in enumerate( zip(acoords.get_atomic_numbers(), acoords.positions)): a = nmol.NewAtom(idx) a.SetAtomicNum(int(num)) a.SetVector(*position) nmol.ConnectTheDots() # nmol.PerceiveBondOrders() # super slow becauses it looks for rings nmol.EndModify() else: acoords = Atoms(acoords, cell=boxinfo, pbc=False) nmol = openbabel.OBMol() nmol.BeginModify() for idx, (num, position) in enumerate( zip(acoords.get_atomic_numbers(), acoords.positions)): a = nmol.NewAtom(idx) a.SetAtomicNum(int(num)) a.SetVector(*position) nmol.ConnectTheDots() # nmol.PerceiveBondOrders() # super slow becauses it looks for rings nmol.EndModify() # identify bond types and create bond list outBonds = "Bonds # harmonic\n\n" bondTypes = {} mapbTypes = {} nbondTypes = 0 nbonds = 0 bondsToDelete = [] bondIterators = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: bondIterators.append(openbabel.OBMolBondIter(smol)) else: bondIterators.append(openbabel.OBMolBondIter(nmol)) lastidx = 1 for iterator in bondIterators: for i, bond in enumerate(iterator, lastidx): b1 = bond.GetBeginAtom().GetId() b2 = bond.GetEndAtom().GetId() # check if its a bond of the replica only if (b1 >= natoms) and (b2 >= natoms): bondsToDelete.append(bond) continue # remap to a real atom if needed if b1 >= natoms: b1 = realnumber[b1 - natoms] if b2 >= natoms: b2 = realnumber[b2 - natoms] # identify bond type btype1 = "%s - %s" % (idToAtomicLabel[b1], idToAtomicLabel[b2]) btype2 = "%s - %s" % (idToAtomicLabel[b2], idToAtomicLabel[b1]) if btype1 in bondTypes: bondid = bondTypes[btype1] bstring = btype1 elif btype2 in bondTypes: bondid = bondTypes[btype2] bstring = btype2 else: nbondTypes += 1 mapbTypes[nbondTypes] = btype1 bondid = nbondTypes bondTypes[btype1] = nbondTypes bstring = btype1 nbonds += 1 outBonds += "\t%d\t%d\t%d\t%d\t# %s\n" % (nbonds, bondid, b1 + 1, b2 + 1, bstring) lastidx = i # delete the bonds of atoms from other replicas for bond in bondsToDelete: nmol.DeleteBond(bond) # identify angle types and create angle list angleTypes = {} mapaTypes = {} nangleTypes = 0 nangles = 0 angleIterators = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: smol.FindAngles() angleIterators.append(openbabel.OBMolAngleIter(smol)) prevnumatoms = sepmols[0].NumAtoms() else: nmol.FindAngles() angleIterators.append(openbabel.OBMolAngleIter(nmol)) outAngles = "Angles # harmonic\n\n" lastidx = 1 for j, iterator in enumerate(angleIterators, 1): for i, angle in enumerate(iterator, lastidx): if ignorebonds: a1 = angle[1] + prevnumatoms a2 = angle[0] + prevnumatoms a3 = angle[2] + prevnumatoms else: a1 = angle[1] a2 = angle[0] a3 = angle[2] # remap to a real atom if needed if a1 >= natoms: a1 = realnumber[a1 - natoms] if a2 >= natoms: a2 = realnumber[a2 - natoms] if a3 >= natoms: a3 = realnumber[a3 - natoms] atype1 = "%s - %s - %s" % ( idToAtomicLabel[a1], idToAtomicLabel[a2], idToAtomicLabel[a3]) atype2 = "%s - %s - %s" % ( idToAtomicLabel[a3], idToAtomicLabel[a2], idToAtomicLabel[a1]) if atype1 in angleTypes: angleid = angleTypes[atype1] astring = atype1 elif atype2 in angleTypes: angleid = angleTypes[atype2] astring = atype2 else: nangleTypes += 1 mapaTypes[nangleTypes] = atype1 angleid = nangleTypes angleTypes[atype1] = nangleTypes astring = atype1 nangles += 1 outAngles += "\t%d\t%d\t%d\t%d\t%d\t# %s\n" % ( nangles, angleid, a1 + 1, a2 + 1, a3 + 1, astring) lastidx = i if ignorebonds: prevnumatoms += sepmols[j].NumAtoms() # identify dihedral types and create dihedral list if printdih: dihedralTypes = {} mapdTypes = {} ndihedralTypes = 0 ndihedrals = 0 dihedralIterators = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: smol.FindTorsions() dihedralIterators.append(openbabel.OBMolTorsionIter(smol)) else: nmol.FindTorsions() dihedralIterators.append(openbabel.OBMolTorsionIter(nmol)) outDihedrals = "Dihedrals # charmmfsw\n\n" lastidx = 1 for iterator in dihedralIterators: for i, dihedral in enumerate(iterator, lastidx): a1 = dihedral[0] a2 = dihedral[1] a3 = dihedral[2] a4 = dihedral[3] # remap to a real atom if needed if a1 >= natoms: a1 = realnumber[a1 - natoms] if a2 >= natoms: a2 = realnumber[a2 - natoms] if a3 >= natoms: a3 = realnumber[a3 - natoms] if a4 >= natoms: a4 = realnumber[a4 - natoms] dtype1 = "%s - %s - %s - %s" % ( idToAtomicLabel[a1], idToAtomicLabel[a2], idToAtomicLabel[a3], idToAtomicLabel[a4]) dtype2 = "%s - %s - %s - %s" % ( idToAtomicLabel[a4], idToAtomicLabel[a3], idToAtomicLabel[a2], idToAtomicLabel[a1]) if dtype1 in dihedralTypes: dihedralid = dihedralTypes[dtype1] dstring = dtype1 elif dtype2 in dihedralTypes: dihedralid = dihedralTypes[dtype2] dstring = dtype2 else: ndihedralTypes += 1 mapdTypes[ndihedralTypes] = dtype1 dihedralid = ndihedralTypes dihedralTypes[dtype1] = ndihedralTypes dstring = dtype1 ndihedrals += 1 outDihedrals += "\t%d\t%d\t%d\t%d\t%d\t%d\t# %s\n" % ( ndihedrals, dihedralid, a1 + 1, a2 + 1, a3 + 1, a4 + 1, dstring) lastidx = i if not ignoreimproper: # look for the improper dihedrals improperDihedralTypes = {} mapiDTypes = {} niDihedralTypes = 0 niDihedrals = 0 mollist = [] if ignorebonds: sepmols = nmol.Separate() for smol in sepmols[1:]: smol.PerceiveBondOrders() mollist.append(smol) else: nmol.PerceiveBondOrders() mollist.append(nmol) outImpropers = "Impropers # harmonic\n\n" for imol in mollist: atomIterator = openbabel.OBMolAtomIter(imol) for atom in atomIterator: try: # print(atom.GetHyb(), atom.GetAtomicNum(), atom.GetValence()) expDegree = atom.GetValence() except: # print(atom.GetHyb(), atom.GetAtomicNum(), atom.GetExplicitDegree()) expDegree = atom.GetExplicitDegree() # returns impropers for atoms with connected to other 3 atoms and SP2 hybridization if atom.GetHyb() == 2 and expDegree == 3: connectedAtoms = [] for atom2, depth in openbabel.OBMolAtomBFSIter( imol, atom.GetId() + 1): if depth == 2: connectedAtoms.append(atom2) torsional = [ atom.GetId() + 1, connectedAtoms[0].GetId() + 1, connectedAtoms[1].GetId() + 1, connectedAtoms[2].GetId() + 1 ] a1 = torsional[0] - 1 a2 = torsional[1] - 1 a3 = torsional[2] - 1 a4 = torsional[3] - 1 # remap to a real atom if needed if a1 >= natoms: a1 = realnumber[a1 - natoms] if a2 >= natoms: a2 = realnumber[a2 - natoms] if a3 >= natoms: a3 = realnumber[a3 - natoms] if a4 >= natoms: a4 = realnumber[a4 - natoms] dtype1 = "%s - %s - %s - %s" % ( idToAtomicLabel[a1], idToAtomicLabel[a2], idToAtomicLabel[a3], idToAtomicLabel[a4]) dtype2 = "%s - %s - %s - %s" % ( idToAtomicLabel[a4], idToAtomicLabel[a3], idToAtomicLabel[a2], idToAtomicLabel[a1]) if dtype1 in improperDihedralTypes: idihedralid = improperDihedralTypes[dtype1] dstring = dtype1 elif dtype2 in improperDihedralTypes: idihedralid = improperDihedralTypes[dtype2] dstring = dtype2 else: niDihedralTypes += 1 mapiDTypes[niDihedralTypes] = dtype1 idihedralid = niDihedralTypes improperDihedralTypes[dtype1] = niDihedralTypes dstring = dtype1 niDihedrals += 1 outImpropers += "\t%d\t%d\t%d\t%d\t%d\t%d\t# %s\n" % ( niDihedrals, idihedralid, a1 + 1, a2 + 1, a3 + 1, a4 + 1, dstring) # print header if printdih and (ndihedrals > 0): if ignoreimproper or (niDihedrals == 0): header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\t%d dihedrals\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\t%d dihedral types\n\n" % ( fname, natoms, nbonds, nangles, ndihedrals, nmassTypes, nbondTypes, nangleTypes, ndihedralTypes) else: header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\t%d dihedrals\n\t%d impropers\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\t%d dihedral types\n\t%d improper types\n\n" % ( fname, natoms, nbonds, nangles, ndihedrals, niDihedrals, nmassTypes, nbondTypes, nangleTypes, ndihedralTypes, niDihedralTypes) else: header = "LAMMPS topology created from %s using pdb2lmp.py - By Henrique Musseli Cezar, 2020\n\n\t%d atoms\n\t%d bonds\n\t%d angles\n\n\t%d atom types\n\t%d bond types\n\t%d angle types\n\n" % ( fname, natoms, nbonds, nangles, nmassTypes, nbondTypes, nangleTypes) # add box info if fromBounds: boxsize = [(xmin, xmax), (ymin, ymax), (zmin, zmax)] boxsize[repaxis] = (boxsize[repaxis][0] - buffa / 2., boxsize[repaxis][1] + buffa / 2.) for i in range(3): if i == repaxis: continue boxsize[i] = (boxsize[i][0] - buffo / 2., boxsize[i][1] + buffo / 2.) header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n" % ( boxsize[0][0], boxsize[0][1], boxsize[1][0], boxsize[1][1], boxsize[2][0], boxsize[2][1]) else: if orthogonal: header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n" % ( 0., boxinfo[0][0], 0., boxinfo[1][1], 0., boxinfo[2][2]) else: header += "\t%.8f\t%.8f\t xlo xhi\n\t%.8f\t%.8f\t ylo yhi\n\t%.8f\t%.8f\t zlo zhi\n\t%.8f\t%.8f\t%.8f\t xy xz yz\n" % ( 0., boxinfo[0][0], 0., boxinfo[1][1], 0., boxinfo[2][2], boxinfo[1][0], boxinfo[2][0], boxinfo[2][1]) # print Coeffs outCoeffs = "Pair Coeffs\n\n" for i in range(1, nmassTypes + 1): outCoeffs += "\t%d\teps\tsig\t# %s\n" % (i, mapTypes[i]) outCoeffs += "\nBond Coeffs\n\n" for i in range(1, nbondTypes + 1): outCoeffs += "\t%d\tK\tr_0\t# %s\n" % (i, mapbTypes[i]) outCoeffs += "\nAngle Coeffs\n\n" for i in range(1, nangleTypes + 1): outCoeffs += "\t%d\tK\ttetha_0 (deg)\t# %s\n" % (i, mapaTypes[i]) if printdih and (ndihedrals > 0): outCoeffs += "\nDihedral Coeffs\n\n" for i in range(1, ndihedralTypes + 1): outCoeffs += "\t%d\tK\tn\tphi_0 (deg)\tw\t# %s\n" % (i, mapdTypes[i]) if not ignoreimproper and (niDihedralTypes > 0): outCoeffs += "\nImproper Coeffs\n\n" for i in range(1, niDihedralTypes + 1): outCoeffs += "\t%d\tK\txi_0 (deg)\t# %s\n" % (i, mapiDTypes[i]) if printdih and (ndihedrals > 0): if ignoreimproper or (niDihedralTypes == 0): return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles + "\n" + outDihedrals else: return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles + "\n" + outDihedrals + "\n" + outImpropers else: return header + "\n" + outMasses + "\n" + outCoeffs + "\n" + outAtoms + "\n" + outBonds + "\n" + outAngles
def binarify_molecules(inputfile, outputfile): ofile = open(outputfile, 'wb') obconv = ob.OBConversion() obconv.SetInFormat('xyz') for molidx, xyzfile in enumerate(open(inputfile).readlines()): mol = ob.OBMol() obconv.ReadFile(mol, xyzfile.strip()) molecule_id = molidx number_of_atoms = mol.NumAtoms() # atom properties nuclear_coordinates, element_numbers = [], [] first = None for atom in ob.OBMolAtomIter(mol): element_numbers.append(atom.GetAtomicNum()) # Shift molecule to implicitly place the first atom at the origin if first is None: first = [atom.GetX(), atom.GetY(), atom.GetZ()] else: nuclear_coordinates.append([ atom.GetX() - first[0], atom.GetY() - first[1], atom.GetZ() - first[2] ]) # bond properties bond_orders = [] bonds = [] for bond in ob.OBMolBondIter(mol): bond_orders.append(bond.GetBO()) bonds.append( [bond.GetBeginAtomIdx() - 1, bond.GetEndAtomIdx() - 1]) number_of_bonds = len(bonds) # dihedral properties dihedrals = [] for dihedral in ob.OBMolTorsionIter(mol): heavy = [element_numbers[_] > 1 for _ in dihedral] # assert middle atoms to be heavy if not (heavy[1] and heavy[2]): continue # skip if two H are involved if not heavy[0] and not heavy[3]: continue dihedrals.append(list(dihedral)) number_of_dihedrals = len(dihedrals) # binary packing formatstring = '=IB%dB%ddB%dB%dBB%dB' % ( number_of_atoms, # element numbers 3 * (number_of_atoms - 1), # atomic coordinates 2 * number_of_bonds, # atom indices in bonds number_of_bonds, # bond orders 4 * number_of_dihedrals) # atom indices in dihedrals nuclear_coordinates = sum(nuclear_coordinates, []) bonds = sum(bonds, []) dihedrals = sum(dihedrals, []) ofile.write( struct.pack(formatstring, molecule_id, number_of_atoms, *element_numbers, *nuclear_coordinates, number_of_bonds, *bonds, *bond_orders, number_of_dihedrals, *dihedrals)) ofile.close()