def contactVecToMatrix(vector, atomIndexes): from copy import deepcopy # Calculating the unique atom groups in the mapping uqAtomGroups = [] atomIndexes = deepcopy(list(atomIndexes)) for ax in atomIndexes: ax[0] = ensurelist(ax[0]) ax[1] = ensurelist(ax[1]) if ax[0] not in uqAtomGroups: uqAtomGroups.append(ax[0]) if ax[1] not in uqAtomGroups: uqAtomGroups.append(ax[1]) uqAtomGroups.sort( key=lambda x: x[0]) # Sort by first atom in each atom list num = len(uqAtomGroups) matrix = np.zeros((num, num), dtype=vector.dtype) mapping = np.ones((num, num), dtype=int) * -1 for i in range(len(vector)): row = uqAtomGroups.index(atomIndexes[i][0]) col = uqAtomGroups.index(atomIndexes[i][1]) matrix[row, col] = vector[i] matrix[col, row] = vector[i] mapping[row, col] = i mapping[col, row] = i return matrix, mapping, uqAtomGroups
def parameterizeNonCanonicalResidues(cifs, outdir, method="gaff2", nnp=None, is_nterm=False, is_cterm=False): cifs = ensurelist(cifs) method = method.lower() if method == "ani-2x" and nnp is None: raise RuntimeError( "The user must provide an NNP calculator to user ANI-2x parameterization" ) if method not in ("gaff2", "ani-2x"): raise AttributeError( "Parameterization can only be performed with GAFF2 or ANI-2x methods" ) for cif in cifs: mol = Molecule(cif) _parameterize_non_canonical_residue(mol, outdir, method, nnp=nnp, is_nterm=is_nterm, is_cterm=is_cterm)
def __init__(self, plumed_inp): # I am not sure at all about opening files here is good style self._plumed_exe = shutil.which("plumed") self.colvar = None self.cvnames = None self.stmt = None try: pp = _getPlumedRoot() logger.info("Plumed path is " + pp) except Exception as e: raise Exception( "To use MetricPlumed2 please ensure PLUMED 2's executable is installed and in path" ) # Sanitize if single element if type(plumed_inp) == str: self._plumed_inp = plumed_inp else: # This should keep the CVs etc in scope from moleculekit.util import ensurelist self.stmt = PlumedStatement() self.stmt.prereq = ensurelist(plumed_inp) stmts = _printDFS(self.stmt) self._plumed_inp = "\n".join(stmts)
def compareGraphs(G, H, fields=("element", ), tolerance=0.5, returnmatching=False): # Comparison algorithm based on: # "Chemoisosterism in the Proteome", X. Jalencas, J. Mestres, JCIM 2013 # http://pubs.acs.org/doi/full/10.1021/ci3002974 fields = ensurelist(fields) if G == H: if returnmatching: return 1, len(G), [(x, x) for x in G.nodes()] else: return 1 if len(G.edges()) == 0 or len(H.edges()) == 0: if returnmatching: return 0, 0, [] else: return 0 Gprod = createProductGraph(G, H, tolerance, fields) # Calculate the maximal cliques and return the length of the largest one maxcliques = list(nx.find_cliques(Gprod)) cllen = np.array([len(x) for x in maxcliques]) score = cllen.max() / max(len(G.nodes()), len(H.nodes())) if returnmatching: return score, cllen.max(), maxcliques[cllen.argmax()] else: return score
def getSegIdx(m, mseg): # Calculate the atoms which belong to the selected segments if isinstance(mseg, str) and mseg == 'protein': msegidx = m.atomselect('protein and name CA') else: msegidx = np.zeros(m.numAtoms, dtype=bool) for seg in ensurelist(mseg): msegidx |= (m.segid == seg) & (m.name == 'CA') return np.where(msegidx)[0]
def _calculateMolProp(self, mol, props='all'): from moleculekit.util import ensurelist res = {} protsel = mol.atomselect(self._protsel) if self._dihedrals is None: # Default phi psi dihedrals dihedrals = Dihedral.proteinDihedrals(mol, protsel) else: dihedrals = ensurelist(self._dihedrals) res['dihedrals'] = Dihedral.dihedralsToIndexes(mol, dihedrals, protsel) return res
def dihedralsToIndexes(mol, dihedrals, sel="all"): """Converts dihedral objects to atom indexes of a given Molecule Parameters ---------- mol : :class:`Molecule <moleculekit.molecule.Molecule>` object A Molecule object from which to obtain atom information dihedrals : list A single dihedral or a list of Dihedral objects sel : str Atom selection string to restrict the application of the selections. See more `here <http://www.ks.uiuc.edu/Research/vmd/vmd-1.9.2/ug/node89.html>`__ Returns ------- indexes : list of lists A list containing a list of atoms that correspond to each dihedral. Examples -------- >>> dihs = [] >>> dihs.append(Dihedral.phi(mol, 1, 2)) >>> dihs.append(Dihedral.psi(mol, 2, 3)) >>> indexes = Dihedral.dihedralsToIndexes(mol, dihs) """ selatoms = mol.atomselect(sel) from moleculekit.util import ensurelist indexes = [] for dih in ensurelist(dihedrals): idx = [] for a in dih.atoms: atomsel = ( (mol.name == a["name"]) & (mol.resid == a["resid"]) & (mol.insertion == a["insertion"]) & (mol.chain == a["chain"]) & (mol.segid == a["segid"]) ) atomsel = atomsel & selatoms if np.sum(atomsel) != 1: raise RuntimeError( "Expected one atom from atomselection {}. Got {} instead.".format( a, np.sum(atomsel) ) ) idx.append(np.where(atomsel)[0][0]) indexes.append(idx) return indexes
def elements_from_masses(masses): from moleculekit.util import ensurelist from scipy.spatial.distance import cdist masses = np.array(ensurelist(masses)) if np.any(masses > 140): logger.warning( "Guessing element for atoms with mass > 140. This can lead to inaccurate element guesses." ) elements = list(_all_elements[np.argmin(cdist( masses[:, None], np.array(_all_masses)[:, None]), axis=1)]) if len(elements) == 1: return elements[0] return elements
def PDBwrite(mol, filename, frames=None, writebonds=True, mode='pdb'): if frames is None and mol.numFrames != 0: frames = mol.frame else: frames = 0 frames = ensurelist(frames) checkTruncations(mol) box = None if mol.numFrames != 0: coords = np.atleast_3d(mol.coords[:, :, frames]) if hasattr(mol, 'box'): box = mol.box[:, frames[0]] else: # If Molecule only contains topology, PDB requires some coordinates so give it zeros coords = np.zeros((mol.numAtoms, 3, 1), dtype=np.float32) numFrames = coords.shape[2] nAtoms = coords.shape[0] serial = np.arange(1, np.size(coords, 0) + 1).astype(object) serial[serial > 99999] = '*****' serial = serial.astype('U5') if nAtoms > 0: if coords.max() >= 1E8 or coords.min() <= -1E7: raise RuntimeError( 'Cannot write PDB coordinates with values smaller than -1E7 or larger than 1E8' ) if mol.occupancy.max() >= 1E6 or mol.occupancy.min() <= -1E5: raise RuntimeError( 'Cannot write PDB occupancy with values smaller than -1E5 or larger than 1E6' ) if mol.beta.max() >= 1E6 or mol.beta.min() <= -1E5: raise RuntimeError( 'Cannot write PDB beta/temperature with values smaller than -1E5 or larger than 1E6' ) fh = open(filename, 'w') if box is not None and not np.all(mol.box == 0): fh.write("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1 \n" % (box[0], box[1], box[2], 90, 90, 90)) for f in range(numFrames): fh.write("MODEL %5d\n" % (frames[f] + 1)) for i in range(0, len(mol.record)): name = _deduce_PDB_atom_name(mol.name[i], mol.resname[i]) if mode == 'pdb': linefmt = "{!s:6.6}{!s:>5.5} {}{!s:>1.1}{!s:4.4}{!s:>1.1}{!s:>4.4}{!s:>1.1} {}{}{}{}{} {!s:4.4}{!s:>2.2} \n" prelast = mol.segid[i] last = mol.element[i].upper() elif mode == 'pdbqt': linefmt = "{!s:6.6}{!s:>5.5} {}{!s:>1.1}{!s:4.4}{!s:>1.1}{!s:>4.4}{!s:>1.1} {}{}{}{}{} {:>6.3f} {!s:<2.2} \n" prelast = mol.charge[i] last = mol.atomtype[i] else: raise AssertionError('Invalid mode for PDB writer') fh.write( linefmt.format(mol.record[i], serial[i], name, mol.altloc[i], mol.resname[i], mol.chain[i], mol.resid[i], mol.insertion[i], '{:8.3f}'.format(coords[i, 0, f])[:8], '{:8.3f}'.format(coords[i, 1, f])[:8], '{:8.3f}'.format(coords[i, 2, f])[:8], '{:6.2f}'.format(mol.occupancy[i])[:6], '{:6.2f}'.format(mol.beta[i])[:6], prelast, last)) # TODO : convert charges to ints if we ever write them if i < len(mol.record) - 1 and mol.segid[i] != mol.segid[i + 1]: fh.write("TER\n") if writebonds and mol.bonds is not None and len(mol.bonds) != 0: bondedatoms = np.unique(mol.bonds) bondedatoms = bondedatoms[ bondedatoms < 99998] # Don't print bonds over 99999 as it overflows the field for a in bondedatoms: partners = mol.bonds[mol.bonds[:, 0] == a, 1] partners = np.unique( np.append(partners, mol.bonds[mol.bonds[:, 1] == a, 0])) partners = partners[ partners < 99998] + 1 # Don't print bonds over 99999 as it overflows the field # I need to support multi-line printing of atoms with more than 4 bonds while len( partners ) >= 3: # Write bonds as long as they are more than 3 in fast more fh.write("CONECT%5d%5d%5d%5d\n" % (a + 1, partners[0], partners[1], partners[2])) partners = partners[3:] if len(partners) > 0: # Write the rest of the bonds line = "CONECT%5d" % (a + 1) for p in partners: line = "%s%5d" % (line, p) fh.write(line) fh.write('\n') fh.write("ENDMDL\n") fh.write("END\n") fh.close()
def PDBwrite(mol, filename, frames=None, writebonds=True, mode="pdb"): if frames is None and mol.numFrames != 0: frames = mol.frame else: frames = 0 frames = ensurelist(frames) checkTruncations(mol) box = None if mol.numFrames != 0: coords = np.atleast_3d(mol.coords[:, :, frames]) if hasattr(mol, "box") and mol.box.shape[1] != 0: box = mol.box[:, frames[0]] else: # If Molecule only contains topology, PDB requires some coordinates so give it zeros coords = np.zeros((mol.numAtoms, 3, 1), dtype=np.float32) numFrames = coords.shape[2] nAtoms = coords.shape[0] serial = np.arange(1, np.size(coords, 0) + 1).astype(object) serial[serial > 99999] = "*****" serial = serial.astype("U5") if nAtoms > 0: if coords.max() >= 1e8 or coords.min() <= -1e7: raise RuntimeError( "Cannot write PDB coordinates with values smaller than -1E7 or larger than 1E8" ) if mol.occupancy.max() >= 1e6 or mol.occupancy.min() <= -1e5: raise RuntimeError( "Cannot write PDB occupancy with values smaller than -1E5 or larger than 1E6" ) if mol.beta.max() >= 1e6 or mol.beta.min() <= -1e5: raise RuntimeError( "Cannot write PDB beta/temperature with values smaller than -1E5 or larger than 1E6" ) fh = open(filename, "w") if box is not None and not np.all(mol.box == 0): fh.write("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1 1 \n" % (box[0], box[1], box[2], 90, 90, 90)) for f in range(numFrames): fh.write("MODEL %5d\n" % (frames[f] + 1)) for i in range(0, len(mol.record)): name = _deduce_PDB_atom_name(mol.name[i], mol.resname[i], mol.element[i]) if mode == "pdb": linefmt = "{!s:6.6}{!s:>5.5} {}{!s:>1.1}{!s:4.4}{!s:>1.1}{!s:>4.4}{!s:>1.1} {}{}{}{}{} {!s:4.4}{!s:>2.2} \n" prelast = mol.segid[i] last = mol.element[i].upper() elif mode == "pdbqt": linefmt = "{!s:6.6}{!s:>5.5} {}{!s:>1.1}{!s:4.4}{!s:>1.1}{!s:>4.4}{!s:>1.1} {}{}{}{}{} {:>6.3f} {!s:<2.2} \n" prelast = mol.charge[i] last = mol.atomtype[i] else: raise AssertionError("Invalid mode for PDB writer") fh.write( linefmt.format( mol.record[i], serial[i], name, mol.altloc[i], mol.resname[i], mol.chain[i], mol.resid[i], mol.insertion[i], "{:8.3f}".format(coords[i, 0, f])[:8], "{:8.3f}".format(coords[i, 1, f])[:8], "{:8.3f}".format(coords[i, 2, f])[:8], "{:6.2f}".format(mol.occupancy[i])[:6], "{:6.2f}".format(mol.beta[i])[:6], prelast, last, )) # TODO : convert charges to ints if we ever write them if i < len(mol.record) - 1 and mol.segid[i] != mol.segid[i + 1]: fh.write("TER\n") if writebonds and mol.bonds is not None and len(mol.bonds) != 0: goodbonds = mol.bonds[np.all( mol.bonds < 99998, axis=1 ), :] # Bonds over 99999 cause issues with PDB fixedwidth format bondgraph = nx.Graph() bondgraph.add_edges_from(goodbonds + 1) # Add 1 for PDB 1-based indexing for atom, neighbours in sorted(bondgraph.adj.items()): neighbours = sorted(list(neighbours)) for ni in range(0, len(neighbours), 3): subneighs = neighbours[ni:min(ni + 3, len(neighbours))] neighstring = "".join("%5d" % sn for sn in subneighs) fh.write("CONECT{:5d}{}\n".format(atom, neighstring)) fh.write("ENDMDL\n") fh.write("END\n") fh.close()