def lindemann_per_frames(u: Universe, select_lang): """Calculate the lindemann index for each atom AND FRAME Warning this can produce extremly large ndarrays in memory depending on the size of the cluster and the ammount of frames. Parameters ---------- u : MDA trajectory instance. select_lang : select language. Returns ------- a ndarray of shape (len_frames, natoms, natoms) """ # natoms = natoms sele_ori = u.select_atoms(select_lang) natoms = len(sele_ori) nframes = len(u.trajectory) len_frames = len(u.trajectory) array_mean = np.zeros((natoms, natoms)) array_var = np.zeros((natoms, natoms)) # array_distance = np.zeros((natoms, natoms)) iframe = 1 lindex_array = np.zeros((len_frames, natoms, natoms)) cluster = u.select_atoms(select_lang, updating=True) for q, ts in enumerate(u.trajectory): # print(ts) coords = cluster.positions n, p = coords.shape array_distance = distance.cdist(coords, coords) ################################################################################# # update mean and var arrays based on Welford algorithm suggested by Donald Knuth ################################################################################# for i in range(natoms): for j in range(i + 1, natoms): xn = array_distance[i, j] mean = array_mean[i, j] var = array_var[i, j] delta = xn - mean # update mean array_mean[i, j] = mean + delta / iframe # update variance array_var[i, j] = var + delta * (xn - array_mean[i, j]) iframe += 1 if iframe > nframes + 1: break for i in range(natoms): for j in range(i + 1, natoms): array_mean[j, i] = array_mean[i, j] array_var[j, i] = array_var[i, j] lindemann_indices = np.divide(np.sqrt(np.divide(array_var, nframes)), array_mean) # lindemann_indices = np.nanmean(np.sqrt(array_var/nframes)/array_mean, axis=1) lindex_array[q] = lindemann_indices return np.array([np.nanmean(i, axis=1) for i in lindex_array])
def classifyResiduesIntoTwo(apo_pdb, holo_pdb, ligname, cutoff=4.0): S_aromatic_resname = set(['PHE', 'TRP', 'TYR', 'HIS']) u_holo, u_apo = Universe(holo_pdb), Universe(apo_pdb) ligand = u_holo.select_atoms(f'resname {ligname}') holo = u_holo.select_atoms(f'not resname {ligname}') apo = u_apo.select_atoms(f'protein') resids = [] S_cryptic, S_not_cryptic = [], [] # -- calculate distances from atoms of a ligand to those of residues in an apo state # -- the aim is to detect residues in a cryptic site. # -- if the distance is less than a threshold (i.e., CRASHED!), then the aromatic residue is considered as cryptic one. for iatom in ligand: for jatom in apo: distance = np.linalg.norm(iatom.position - jatom.position) if distance <= cutoff and jatom.resname in S_aromatic_resname: #print(f'{iatom.name}-{iatom.resname}, {jatom.name}-{jatom.resname}{jatom.resid}, {distance}') resids.append(jatom.resid) S_cryptic.append(f'{jatom.resname}{jatom.resid}') S_cryptic = set(S_cryptic) # -- a set of aromatic residue's names are generated here. note that this is specialised for aromatic residues S_all_aroma = set([ f'{residue.resname}{residue.resid}' for residue in holo.residues if residue.resname in S_aromatic_resname ]) S_not_cryptic = S_all_aroma - S_cryptic return set(S_cryptic), set(S_not_cryptic)
def test_write_selection(self): ref = Universe(mol2_molecule) gr0 = ref.select_atoms("name C*") gr0.write(self.outfile) u = Universe(self.outfile) gr1 = u.select_atoms("name C*") assert_equal(len(gr0), len(gr1))
def main(): u = Universe( '/Volumes/HD-siida/gtail_b1_sys/analysis/merged_aligned_complexes.pdb') #u = Universe('complex_models.pdb') print(u.atoms.segids) ca_integrinAB = u.select_atoms('segid A B and name CA') ca_lamininE8 = u.select_atoms('segid C D E and name CA') lower, upper = 6.0, 10.0 with open('model_no.out', 'w') as fout: fout.write( f'#MODEL NO, nViolations (if r<{lower}), nContacts ({lower}<=r<={upper}) \n' ) for i, frame in enumerate(tqdm(u.trajectory), 1): # Note that i starts with 1. distances = distance.cdist(ca_integrinAB.positions, ca_lamininE8.positions, metric='euclidean') #nViolations = len(distances[distances<=cutoff]) nViolations = len(distances[distances < lower]) nContacts = len(distances[(distances <= upper) & (distances >= lower)]) # score = nContacts -nViolations if nViolations != 0: score = -0.59 * np.log(nContacts / nViolations) else: score = np.nan fout.write(f'{i}, {nViolations}, {nContacts}, {score}\n')
def distanceMatrix(trajFile, sele1, sele2, ref=None): if ref == None: u = Universe(trajFile) else: print("* Reference is given.") u = Universe(ref, trajFile) s1, s2 = u.select_atoms(sele1), u.select_atoms(sele2) print("* pair 1: \n ", s1) print("* pair 2: \n ", s2) distances = [] for itraj in tqdm(u.trajectory): pos1, pos2 = s1.positions, s2.positions if sele1 == sele2: #if self-distance pair calculation dist = np.triu(distance.cdist( pos1, pos2, metric='euclidean')) #symmetrical matrix if self-distances dist = dist[dist != 0] # remove 0 elements else: #if differnt distance pair calculation dist = distance.cdist(pos1, pos2, metric='euclidean').flatten() distances.append(dist) return distances
def calc_tilt_end_to_end(universe: mda.Universe, resid_up, resid_down, fname="TMD_tilt.dat"): ''' Calculate tilt related to angle between zaxis and resid_down --> resid_up Takes COM of resids ''' fstr2 = '{: <15}{: <20}' fstr = '{: <15}{: <20.5f}' with open(fname, "w") as outf: print(fstr2.format("time", "tilt"), file=outf) for t in range(universe.trajectory.n_frames): time = universe.trajectory[t].time LOGGER.info("At %s", time) zaxis = np.array([0, 0, 1]) sel_u = universe.select_atoms("resid {}".format(resid_up)) sel_d = universe.select_atoms("resid {}".format(resid_down)) pos_u = sel_u.center_of_mass() pos_d = sel_d.center_of_mass() costilt = np.dot( (pos_d - pos_u), zaxis) / np.linalg.norm(pos_d - pos_u) angle = np.arccos(costilt) * (180 / np.pi) if angle > 90: angle -= 180 print(fstr.format(time, abs(angle)), file=outf)
def cluster_coordinates( # TODO: rewrite the method nvt_run: Universe, select_dict: Dict[str, str], run_start: int, run_end: int, species: List[str], distance: float, basis_vectors: Optional[Union[List[np.ndarray], np.ndarray]] = None, cluster_center: str = "center", ) -> np.ndarray: """Calculates the average position of a cluster. Args: nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory. select_dict: A dictionary of atom species selection, where each atom species name is a key and the corresponding values are the selection language. run_start: Start frame of analysis. run_end: End frame of analysis. species: A list of species in the cluster. distance: The coordination cutoff distance. basis_vectors: The basis vector for normalizing the coordinates of the cluster atoms. cluster_center: Cluster center atom species. Returns: An array of coordinates of the cluster atoms. """ trj_analysis = nvt_run.trajectory[run_start:run_end:] cluster_center_atom = nvt_run.select_atoms(select_dict.get(cluster_center), periodic=True)[0] selection = ("(" + " or ".join(s for s in species) + ") and (around " + str(distance) + " index " + str(cluster_center_atom.index) + ")") shell = nvt_run.select_atoms(selection, periodic=True) cluster = [] for atom in shell: coord_list = [] for ts in trj_analysis: coord_list.append(atom.position) cluster.append(np.mean(np.array(coord_list), axis=0)) cluster_array = np.array(cluster) if basis_vectors: if len(basis_vectors) == 2: vec1 = basis_vectors[0] vec2 = basis_vectors[1] vec3 = np.cross(vec1, vec2) vec2 = np.cross(vec1, vec3) elif len(basis_vectors) == 3: vec1 = basis_vectors[0] vec2 = basis_vectors[1] vec3 = basis_vectors[2] else: raise ValueError("incorrect vector format") vec1 = vec1 / np.linalg.norm(vec1) vec2 = vec2 / np.linalg.norm(vec2) vec3 = vec3 / np.linalg.norm(vec3) basis_xyz = np.transpose([vec1, vec2, vec3]) cluster_norm = np.linalg.solve(basis_xyz, cluster_array.T).T cluster_norm = cluster_norm - np.mean(cluster_norm, axis=0) return cluster_norm return cluster_array
def res_dict_from_select_dict(u: Universe, select_dict: Dict[str, str]) -> Dict[str, str]: """ Infer res_dict (residue selection) from select_dict (atom selection) in a MDAnalysis.universe object. Args: u: The universe object to assign resnames to. select_dict: A dictionary of atom species, where each atom species name is a key and the corresponding values are the selection language. return: A dictionary of resnames. """ saved_select = [] res_dict = {} for key, val in select_dict.items(): res_select = "same resid as (" + val + ")" res_group = u.select_atoms(res_select) if key in ["cation", "anion"] or res_group not in saved_select: saved_select.append(res_group) res_dict[key] = res_select if ("cation" in res_dict and "anion" in res_dict and u.select_atoms( res_dict.get("cation")) == u.select_atoms(res_dict.get("anion"))): res_dict.pop("anion") res_dict["salt"] = res_dict.pop("cation") return res_dict
def save_systems(flex: mda.Universe, protein: mda.Universe, crystal: mda.Universe, dir: str): def sel(resnum, resname, segid, icode) -> str: s = f"(resid {resnum}{icode} and resname {resname} and segid {segid})" return s flexres = flex.select_atoms("protein").residues max_rmsd = -1 residues = [] for res in flexres: ressel = (sel(res.resnum, res.resname, res.segid, res.icode) + " and not (type H or name H*)") # Select single residue p_res = protein.select_atoms(ressel) c_res = crystal.select_atoms(ressel) assert p_res.n_atoms == c_res.n_atoms pfname = os.path.join( dir, f"pflex-{res.resname}-{res.segid}{res.resnum}{res.icode}.pdb") cfname = os.path.join( dir, f"cflex-{res.resname}-{res.segid}{res.resnum}{res.icode}.pdb") # Write out PDB files p_res.write(pfname) c_res.write(cfname) residues.append((res.resnum, res.resname, res.segid, res.icode)) # Check that all flexible residues are listed assert len(residues) == len(flexres) # TODO: Can be improved by using ressel selection = "".join([ sel(id, name, chain, icode) + " or " for id, name, chain, icode in residues ]) selection = selection[:-4] # Remove final " or " # Remove H atoms # TODO: Possibly need perception for atom name, when type is not present selection = f"({selection}) and not (type H or name H*)" p_atoms = protein.select_atoms(selection) c_atoms = crystal.select_atoms(selection) # Check that the number of atoms in the two selections is equal assert len(p_atoms) == len(c_atoms) pfname = os.path.join(dir, "pflex.pdb") cfname = os.path.join(dir, "cflex.pdb") p_atoms.write(pfname) c_atoms.write(cfname)
def test_atomgroups(self): u = Universe(self.filename) segidB0 = len(u.select_atoms("segid B and (not altloc B)")) segidB1 = len(u.select_atoms("segid B and (not altloc A)")) assert_equal(segidB0, segidB1) altlocB0 = len(u.select_atoms("segid B and (altloc A)")) altlocB1 = len(u.select_atoms("segid B and (altloc B)")) assert_equal(altlocB0, altlocB1) sum = len(u.select_atoms("segid B")) assert_equal(sum, segidB0 + altlocB0)
def num_of_neighbor_simple( nvt_run: Universe, center_atom: Atom, distance_dict: Dict[str, float], select_dict: Dict[str, str], run_start: int, run_end: int, ) -> Dict[str, np.ndarray]: """Calculates solvation structure type (1 for SSIP, 2 for CIP and 3 for AGG) with respect to the ``enter_atom`` in the specified frame range. Args: nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory. center_atom: The solvation shell center atom. distance_dict: A dict of coordination cutoff distance of the neighbor species. select_dict: A dictionary of atom species selection, where each atom species name is a key and the corresponding values are the selection language. run_start: Start frame of analysis. run_end: End frame of analysis. Returns: A dict with "total" as the key and an array of the solvation structure type in the specified frame range as the value. """ time_count = 0 trj_analysis = nvt_run.trajectory[run_start:run_end:] center_selection = "same type as index " + str(center_atom.index) assert len( distance_dict ) == 1, "Please only specify the counter-ion species in the distance_dict" species = list(distance_dict.keys())[0] cn_values = np.zeros(int(len(trj_analysis))) for ts in trj_analysis: selection = select_shell(select_dict, distance_dict, center_atom, species) shell = nvt_run.select_atoms(selection, periodic=True) shell_len = len(shell) if shell_len == 0: cn_values[time_count] = 1 elif shell_len == 1: selection_species = select_shell(center_selection, distance_dict, shell.atoms[0], species) shell_species = nvt_run.select_atoms(selection_species, periodic=True) shell_species_len = len(shell_species) - 1 if shell_species_len == 0: cn_values[time_count] = 2 else: cn_values[time_count] = 3 else: cn_values[time_count] = 3 time_count += 1 cn_values = {"total": cn_values} return cn_values
def split_molecules( u: mda.Universe, keep_ions: bool = False ) -> Dict[str, Union[mda.AtomGroup, List[mda.AtomGroup]]]: """ Split different molecules (protein, water, ligands, ...) within a structure in separate files. Args: u (mda.Universe): MDAnalysis universe keep_ions (bool, optional): Flag to keep/ignore ions Returns: A dictionaty with the name of the selection and the corresponding ``mda.AtomGroup`` (or a list of ``mda.AtomGroup`` is there are multiple molecules with the same name). """ split = {} # Select protein protein = u.select_atoms("protein") if len(protein.atoms) != 0: # Check if protein is present split["protein"] = protein # Select water molecules for water_name in ["WAT", "HOH"]: water = u.select_atoms(f"resname {water_name}") if len(water.atoms) != 0: break # If selection is not empty, stop if len(water.atoms) != 0: # Check if water is present split["water"] = water # Other molecules other = u.select_atoms("all") - protein - water for res in other.residues: # Loop over all "other" residues name = res.resname if re.search("[A-Z]?[+-]", name) is not None and not keep_ions: break # Skip ion if keep_ions=True try: old = split[name] if type(old) is list: split[name].append(res) else: split[name] = [old, res] except KeyError: split[name] = res return split
def select_flexres(flex: mda.Universe, prot: mda.Universe) -> mda.AtomGroup: """ Given a protein and a series of flexible residues, selectss the full flexible residues (including backbone atoms) from the protein structure. Args: flex (mda.Universe): flexible residues prot (mda.Universe): protein Returns: An `mda.AtomGroup` containing the atoms corresponding to flexible residues extracted from the protein (including backbone atoms) """ fres = [] for res in flex.residues: fres.append((res.resnum, res.resname, res.icode, res.segid)) sel = "".join( [ f"(resid {num}{icode} and resname {name} and segid {chain}) or " for num, name, icode, chain in fres ] ) # Sanitize selection and remove residues without Janin dihedrals # Ignoring them explicitly removes a warning sel = ( sel[:-4] + "and not (resname ALA or resname CYS or resname GLY or resname PRO or resname SER or resname THR or resname VAL)" ) return prot.select_atoms(sel)
def test_bonds(self): u = Universe(self.filename, guess_bonds=True) # need to force topology to load before querying individual atom bonds u.build_topology() bonds0 = u.select_atoms("segid B and (altloc A)")[0].bonds bonds1 = u.select_atoms("segid B and (altloc B)")[0].bonds assert_equal(len(bonds0), len(bonds1))
def select(system: mda.Universe, distance: float, removeHs: bool = False) -> Tuple[np.ndarray, np.ndarray]: """ Select binding site. Parameters --------- system: mda.Universe Protein-ligand complex distance: float Ligand-residues distance removeHs: bool Remove hydrogen atoms Returns ------- Tuple[np.ndarray, np.ndarray] Array of elements and array of cartesian coordinate for ligand and protein atoms within the binding site Notes ----- The binding site is defined by residues with at least one atom within :code:`distance` from the ligand. """ resselection = system.select_atoms( f"(byres (around {distance} (resname LIG))) or (resname LIG)") if removeHs: mask = resselection.elements != "H" # Elements from PDB file needs MDAnalysis@develop (see #2648) return resselection.elements[mask], resselection.positions[mask] else: return resselection.elements, resselection.positions
def main(): # get options options = parse_options() psf = options.psf_file dcd = options.dcd_file chain1 = options.segid1 chain2 = options.segid2 selection1 = options.selection1 selection2 = options.selection2 co = options.cutoff output = options.output_file visu = options.pymol pdbvisu = options.pymol_pdb # use MDAnalysis to read trajectory u = Universe(psf, dcd) # get contact probability cp = GetContacts(u) contactprob, bio1, bio2 = cp.run(chain1, chain2, selection1, selection2, co) np.savetxt(output, contactprob, fmt='%4.2f', delimiter=" ") # generate pymol scripts if needed if visu == 'Y': # if no pdb file is supplied, write one from trajectory, first frame if pdbvisu == None: seleforpymol = u.select_atoms("segid %s or segid %s" % (chain1, chain2)) seleforpymol.write('forpymol.pdb', remarks=None) pdbvisu = 'forpymol.pdb' # check pdb file format for weird encoding check_pdb(pdbvisu) pymol_contact_visu(contactprob, pdbvisu, chain1, chain2, bio1, bio2)
def analyze_radgyr(u: mda.Universe) -> List[float]: """Extract the radius of gyration metric for each trajectory frame.""" trajectory_radgyr = [] atoms = u.select_atoms(STANDARD_SELECTION) for _ in u.trajectory: trajectory_radgyr.append(atoms.radius_of_gyration()) return trajectory_radgyr
def select_dict_from_resname(u: Universe) -> Dict[str, str]: """ Infer select_dict (possibly interested atom species selection) from resnames in a MDAnalysis.universe object. The resname must be pre-assigned already. Args: u: The universe object to work with. return: A dictionary of atom species. """ select_dict: Dict[str, str] = {} resnames = np.unique(u.residues.resnames) for resname in resnames: if resname == "": continue residue = u.select_atoms("resname " + resname).residues[0] if np.isclose(residue.charge, 0, atol=1e-5): # np.sum(residue.atoms.charges) if len(residue.atoms.fragments) == 2: for i, frag in enumerate(residue.atoms.fragments): charge = np.sum(frag.charges) if charge > 0.001: extract_atom_from_ion(True, frag, select_dict) elif charge < -0.001: extract_atom_from_ion(False, frag, select_dict) else: extract_atom_from_molecule(resname, frag, select_dict, number=i + 1) elif len(residue.atoms.fragments) >= 2: cation_number = 1 anion_number = 1 molecule_number = 1 for frag in residue.atoms.fragments: charge = np.sum(frag.charges) if charge > 0.001: extract_atom_from_ion(True, frag, select_dict, cation_number) cation_number += 1 elif charge < -0.001: extract_atom_from_ion(False, frag, select_dict, anion_number) anion_number += 1 else: extract_atom_from_molecule(resname, frag, select_dict, molecule_number) molecule_number += 1 else: extract_atom_from_molecule(resname, residue, select_dict) elif residue.charge > 0: extract_atom_from_ion(True, residue, select_dict) else: extract_atom_from_ion(False, residue, select_dict) return select_dict
def output_pdb_w_index(self): #This scales sigma. The reason for this is because PDB files accepts few significant digits/ # sigma is usually 10^2 ~ 10^3 order, so if sigma was 0.011, then the sigma value to be written would be 0.01 in the PDB. I want to avoid this. scale_factor = 100.0 u = Universe(self.__ref) #initialize the b-factor column u.atoms.tempfactors = 0 for icalpha in u.atoms.select_atoms('name CA'): if icalpha.resname in ['PHE','TRP','TYR','HIS']: key = icalpha.resname + str(icalpha.resid) + icalpha.segid.replace('SYSTEM', 'A') DF = self.cryptic_index[key][0] sigma = self.cryptic_index[key][1] print(key, DF, sigma) if np.abs(DF) < self.__alpha: # print(key, DF, sigma) icalpha.tempfactor = sigma * scale_factor u.select_atoms('protein').write(f'index_{self.__out_suffix}.pdb')
def check_contiguous_steps( nvt_run: Universe, center_atom: Atom, distance_dict: Dict[str, float], select_dict: Dict[str, str], run_start: int, run_end: int, checkpoints: np.ndarray, lag: int = 20, ) -> Dict[str, np.ndarray]: """Calculates the distance between the center atom and the neighbor atom in the checkpoint +/- lag time range. Args: nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory. center_atom: The center atom object. distance_dict: A dictionary of Cutoff distance of neighbor for each species. select_dict: A dictionary of atom species selection, where each atom species name is a key and the corresponding values are the selection language. run_start: Start frame of analysis. run_end: End frame of analysis. checkpoints: The frame numberings of interest to check for contiguous steps. lag: The range (+/- lag) of the contiguous steps. Default to 20. Returns: An array of distance between the center atom and the neighbor atoms in the checkpoint +/- lag time range. """ coord_num: Dict[str, Union[List[List[int]], np.ndarray]] = { x: [[] for _ in range(lag * 2 + 1)] for x in distance_dict } trj_analysis = nvt_run.trajectory[run_start:run_end:] has = False for i, ts in enumerate(trj_analysis): log = False checkpoint = -1 for j in checkpoints: if abs(i - j) <= lag: log = True has = True checkpoint = j if log: for kw in distance_dict: selection = select_shell(select_dict, distance_dict, center_atom, kw) shell = nvt_run.select_atoms(selection, periodic=True) coord_num[kw][i - checkpoint + lag].append(len(shell)) one_atom_ave = {} if has: for kw in coord_num: np_arrays = np.array( [np.array(time).mean() for time in coord_num[kw]]) one_atom_ave[kw] = np_arrays return one_atom_ave
def _list_types(coordinates_file): # Check the extension _check_input_file(coordinates_file, extensions=[".gro"]) # Load the system system = Universe(coordinates_file) # List the residue names resnames = system.select_atoms("all").resnames return np.unique(resnames)
def analyze_sasa(u: mda.Universe) -> np.ndarray: """Extract SASA value for each trajectory frame.""" atoms = u.select_atoms(STANDARD_SELECTION) positions = u.trajectory.timeseries(asel=atoms) trajectory_sasa = [] atom_radius = list(map(get_atom_radius, atoms)) for frame in np.swapaxes(positions, 0, 1): sasa = freesasa.calcCoord(frame.reshape(-1), atom_radius).totalArea() trajectory_sasa.append(sasa) return np.array(trajectory_sasa)
def analyze_pca(u: mda.Universe, n_dimensions=40): """Fetch PCA component contribution values for a single trajectory.""" pca_analysis = pca.PCA(u, select='backbone') space = pca_analysis.run() space_3 = space.transform(u.select_atoms('backbone'), 3) w = pca.cosine_content(space_3, 0) print(w) return [ space.variance[:n_dimensions], space.cumulated_variance[:n_dimensions] ]
def calc_neigh_corr( nvt_run: Universe, distance_dict: Dict[str, float], select_dict: Dict[str, str], time_step: float, run_start: int, run_end: int, center_atom: str = "cation", ) -> Tuple[np.ndarray, Dict[str, np.ndarray]]: """Calculates the neighbor auto-correlation function (ACF) of selected species around center atom. Args: nvt_run: An MDAnalysis ``Universe``. distance_dict: select_dict: time_step: run_start: Start frame of analysis. run_end: End frame of analysis. center_atom: The center atom to calculate the ACF for. Default to "cation". Returns: A tuple containing the time series, and a dict of acf of neighbor species. """ # Set up times array times = [] step = 0 center_atoms = nvt_run.select_atoms(select_dict[center_atom]) for ts in nvt_run.trajectory[run_start:run_end]: times.append(step * time_step) step += 1 times = np.array(times) acf_avg = {} for kw in distance_dict.keys(): acf_all = [] for atom in tqdm(center_atoms[::]): distance = distance_dict.get(kw) assert distance is not None adjacency_matrix = neighbors_one_atom( nvt_run, atom, kw, select_dict, distance, run_start, run_end, ) acfs = calc_acf(adjacency_matrix) for acf in acfs: acf_all.append(acf) acf_avg[kw] = np.mean(acf_all, axis=0) return times, acf_avg
def generate_universe(topology, trajectory=None): print('Generating Universe...') if trajectory is None or trajectory == '': u = Universe(topology) else: u = Universe(topology, trajectory) x, y, z = u.dimensions[:3] print(f'Universe with dimensions x: {x}, y: {y}, z: {z} loaded!') n_waters = u.select_atoms('resname WAT').n_residues print(f'{n_waters} water molecules detected!') return u
def distance_to_cnt(u: Universe, selection_cluster, cluster_size): """For carbon nanotube included trajectories, analyze cluster atoms. Parameters ---------- u : MDA trajectory instance. selection_cluster : selection_cluster: cluster_size : size of clusters Returns ------- """ distances = np.zeros((len(u.trajectory), cluster_size)) cnt = u.select_atoms('name C', updating=True) pt = u.select_atoms(selection_cluster, updating=True) for q, ts in enumerate(u.trajectory): cg = cnt.center_of_geometry() for p, t in enumerate(pt.positions): dis = distance.euclidean(t, [cg[0], cg[1], t[2]]) distances[q, p] = dis return distances
def check_inputs(selection: list, start: int, stop: int, step: int, universe: mda.Universe): ag_sel = selection[0] ag_names = selection[1] ag_pair = selection[2] # Testing names and selections if len(ag_sel) > len(ag_names): raise InputError('Not all selections are named') elif len(ag_sel) < len(ag_names): raise InputError('Too many selection names for number of selections') for sel in ag_sel: try: ag = universe.select_atoms(sel) except mda.SelectionError: raise InputError('Error in selection: {}'.format(sel)) for pair in ag_pair: if len(pair) != 4: raise InputError( 'Pairs must be a python list of string with 4 items') found0 = False found1 = False for name in ag_names: if pair[0] == name: found0 = True if pair[1] == name: found1 = True if found0 is False: raise InputError( f'{pair[0]} in {pair} group_pair_selections is not in defined in atom_group_names' ) if found1 is False: raise InputError( f'{pair[1]} in {pair} group_pair_selections is not in defined in atom_group_names' ) if start >= stop: raise InputError('Start is greater than or equal to stop') if step >= stop: raise InputError('Step is greater than or equal to stop') if step == 0: raise InputError('Step cannot be 0') if len(universe.trajectory) < stop: raise InputError( f'Stop exceeds length of trajectory, trajectory is {len(universe.trajectory)} frames' ) print('Input Parameters Accepted')
def main(): #24.1.2020 #they were downloaded by a certain rule via the adcanced serarch in rcsb pdb, #but they contains more than 3 chains, which was out of my scope. omited_pdbs = ['4e7u.pdb', '4e7t.pdb', '3exx.pdb', '4fka.pdb', '5ep6.pdb', '4gxv.pdb', '4uqp.pdb', '3uqy.pdb', '4dg4.pdb', '4urh.pdb', '6f4j.pdb', '1xd3.pdb', '3bog.pdb', '6mee.pdb', '4pj2.pdb', '5bpk.pdb', '3cjs.pdb', '4c2v.pdb', '1pid.pdb', '6fu9.pdb', '2oxg.pdb', '1svf.pdb', '6fc1.pdb', '1q7l.pdb', '4kn9.pdb', '4b2b.pdb', '6g6k.pdb', '4m4l.pdb', '4b2c.pdb', '1ben.pdb', '3tt8.pdb', '3fq9.pdb', '5nwg.pdb', '4uql.pdb', '2xkn.pdb', '5nwd.pdb'] filenames = glob.glob('./interfaces/heterodimer/*.pdb') whole_distances = [] for j, file in enumerate(filenames): #print(j, file, file.split('/')[-1] in omited_pdbs) if file.split('/')[-1] in omited_pdbs: continue else: print(file.split('/')[-1].split('.')[0].upper()+",",end = '') continue u = Universe(file) nchains = len(set(u.segments.segids)) if nchains != 2: sys.exit(f'The number of chains = {nchains}. that is out of scope for this program.') chain_objs = [] for i, chain in enumerate(set(u.segments.segids)): chain_objs.append(u.select_atoms(f'protein and segid {chain} and name CA')) print(f' *{chain}') # print(chain_objs[i].atoms) whole_distances.append(distances(chain_objs[0], chain_objs[1])) sys.exit('stop! you might have already done this. so i forced you to procced.') print(whole_distances) whole_distances = np.hstack(whole_distances) filtered_dist = whole_distances[whole_distances<=50.0] with open('whole_distances.pkl','wb') as f: pickle.dump(whole_distances, f) mean = np.mean(whole_distances) std = np.std(whole_distances) print(f'mean: {mean}, std:{std}')
def assign_resname(u: Universe, res_dict: Dict[str, str]): """ Assign resnames to residues in a MDAnalysis.universe object. The function will not overwrite existing resnames. Args: u: The universe object to assign resnames to. res_dict: A dictionary of resnames, where each resname is a key and the corresponding values are the selection language. """ u.add_TopologyAttr("resname") for key, val in res_dict.items(): res_group = u.select_atoms(val) res_names = res_group.residues.resnames res_names[res_names == ""] = key res_group.residues.resnames = res_names
def neighbor_distance( nvt_run: Universe, center_atom: Atom, run_start: int, run_end: int, species: str, select_dict: Dict[str, str], distance: float, ) -> Dict[str, np.ndarray]: """ Calculates a dictionary of distances between the ``center_atom`` and neighbor atoms. Args: nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory. center_atom: The center atom object. run_start: Start frame of analysis. run_end: End frame of analysis. species: The neighbor species in the select_dict. select_dict: A dictionary of atom species selection, where each atom species name is a key and the corresponding values are the selection language. distance: The neighbor cutoff distance. Returns: A dictionary of distance of neighbor atoms to the ``center_atom``. The keys are atom indexes in string type . """ dist_dict = {} time_count = 0 trj_analysis = nvt_run.trajectory[run_start:run_end:] species_selection = select_dict.get(species) if species_selection is None: raise ValueError("Invalid species selection") for ts in trj_analysis: selection = ("(" + species_selection + ") and (around " + str(distance) + " index " + str(center_atom.index) + ")") shell = nvt_run.select_atoms(selection, periodic=True) for atom in shell.atoms: if str(atom.index) not in dist_dict: dist_dict[str(atom.index)] = np.full(run_end - run_start, 100.0) time_count += 1 time_count = 0 for ts in trj_analysis: for atom_index, val in dist_dict.items(): dist = distance_array(ts[center_atom.index], ts[int(atom_index)], ts.dimensions) val[time_count] = dist time_count += 1 return dist_dict
def test_write_read(self): u = Universe(self.filename) u.select_atoms("all").write(self.outfile) u2 = Universe(self.outfile) assert_equal(len(u.atoms), len(u2.atoms))
args = parser.parse_args() if not args.static: header_string = "; Umbrella potential for a spherical shell cavity\n"\ "; Name Type Group Kappa Nstar mu width cutoff outfile nstout\n"\ "hydshell dyn_union_sph_sh OW 0.0 0 XXX 0.01 0.02 phiout.dat 50 \\\n" else: header_string = "; Umbrella potential for a spherical shell cavity\n"\ "; Name Type Group Kappa Nstar mu width cutoff outfile nstout\n"\ "hydshell union_sph_sh OW 0.0 0 XXX 0.01 0.02 phiout.dat 50 \\\n" if args.traj is None: u = Universe(args.gro) if args.sspec is not None: prot_heavies = u.select_atoms(args.sspec) else: # Select peptide heavies - exclude water's and ions prot_heavies = u.select_atoms("not (name H* or type H or resname SOL) and not (name NA or name CL) and not (resname WAL) and not (resname DUM)") fout = open(args.outfile, 'w') fout.write(header_string) if args.static: for atm in prot_heavies: fout.write("{:<10.1f} {:<10.1f} {:<10.3f} {:<10.3f} {:<10.3f}\\\n".format(-0.5, args.rad/10.0, atm.pos[0]/10.0, atm.pos[1]/10.0, atm.pos[2]/10.0)) else: for atm in prot_heavies: fout.write("{:<10.1f} {:<10.1f} {:d} \\\n".format(-0.5, args.rad/10.0, atm.index+1)) fout.close()
At this time, I wanted to confirm if the com of s100b was canceled. Caution: this program is specialized for s100b-CTD system. Usage: python conform_com_cancel.py [ PDB file name ] """ file_name = sys.argv[1] print "Input file name : ", file_name u = Universe(file_name) f_out = open(file_name+"_comTraj.dat", "w") print "No of snapshots: ", len(u.trajectory) for i, ts in enumerate(u.trajectory): #Select the all atoms constitute s100b selected_atoms = u.select_atoms("resid 1-94") print "atom ids: ", selected_atoms.ids com = selected_atoms.center_of_mass() cog = selected_atoms.center_of_geometry() f_out.write(str(com[0]) + " " + str(com[1]) + " " + str(com[2]) + " \n")
.. SeeAlso:: :mod:`MDAnalysis.analysis.psa` """ from MDAnalysis import Universe from MDAnalysis.analysis.align import rotation_matrix from MDAnalysis.analysis.psa import PSAnalysis if __name__ == '__main__': print("Generating AdK CORE C-alpha reference coordinates and structure...") # Read in closed/open AdK structures; work with C-alphas only u_closed = Universe('structs/adk1AKE.pdb') u_open = Universe('structs/adk4AKE.pdb') ca_closed = u_closed.select_atoms('name CA') ca_open = u_open.select_atoms('name CA') # Move centers-of-mass of C-alphas of each structure's CORE domain to origin adkCORE_resids = "(resid 1:29 or resid 60:121 or resid 160:214)" u_closed.atoms.translate(-ca_closed.select_atoms(adkCORE_resids).center_of_mass()) u_open.atoms.translate(-ca_open.select_atoms(adkCORE_resids).center_of_mass()) # Get C-alpha CORE coordinates for each structure closed_ca_core_coords = ca_closed.select_atoms(adkCORE_resids).positions open_ca_core_coords = ca_open.select_atoms(adkCORE_resids).positions # Compute rotation matrix, R, that minimizes rmsd between the C-alpha COREs R, rmsd_value = rotation_matrix(open_ca_core_coords, closed_ca_core_coords) # Rotate open structure to align its C-alpha CORE to closed structure's
from MDAnalysis import Universe, collection, Timeseries from MDAnalysis.tests.datafiles import PSF, DCD try: import matplotlib matplotlib.use('agg') # no interactive plotting, only save figures from pylab import errorbar, legend, xlabel, ylabel, savefig, clf, gca, draw have_matplotlib = True except ImportError: have_matplotlib = False universe = Universe(PSF, DCD) protein = universe.select_atoms("protein") numresidues = protein.numberOfResidues() collection.clear() for res in range(2, numresidues - 1): print "Processing residue {0:d}".format(res) # selection of the atoms involved for the phi for resid '%d' %res ## select_atoms("atom 4AKE %d C"%(res-1), "atom 4AKE %d N"%res, "atom %d 4AKE CA"%res, "atom 4AKE %d C" % res) phi_sel = universe.residues[res].phi_selection() # selection of the atoms involved for the psi for resid '%d' %res psi_sel = universe.residues[res].psi_selection() # collect the timeseries of a dihedral collection.addTimeseries(Timeseries.Dihedral(phi_sel))
import MDAnalysis from MDAnalysis import Universe from MDAnalysis.analysis.contacts import calculate_contacts import numpy as np import pandas as pd ref = Universe("conf_protein.gro.bz2") u = Universe("conf_protein.gro.bz2", "traj_protein_0.xtc") x = len(ref.select_atoms("protein")) selA = "not name H* and resid 72-95 and bynum {}:{}".format(1, x//2) selB = "not name H* and resid 72-95 and bynum {}:{}".format(x//2, x) data = calculate_contacts(ref, u, selA, selB) df = pd.DataFrame(data, columns=["Time (ps)", "Q"]) print(df)