def write_custom_gridpdb( self, dest: Path, exclude_ss=True, exclude_id=None, exclude_resid=None ): """write custom grid.pdb file for a given set of indices""" if not hasattr(self, "link"): self.logger.debug("Creating Linkage") _ = self.create_linkage() # initializes and returns link u: mda.Universe = self.link.u u.add_TopologyAttr("tempfactor") # init as 0. u.add_TopologyAttr("occupancy") # init as 0. atoms_exclude = AtomGroup([], u) if exclude_ss: for residue in u.residues: res_id = residue.resindex is_bp_scaffold = res_id in self.Fbp.keys() is_bp_staple = res_id in self.Fbp.values() if not is_bp_scaffold and not is_bp_staple: atoms_exclude += residue.atoms if exclude_id is not None: for atom_id in exclude_id: atoms_exclude += u.atoms[atom_id] if exclude_resid is not None: for res_id in exclude_resid: atoms_exclude += u.residues[res_id].atoms for atom in u.residues.atoms: is_H = "H" in atom.name is_excluded = atom in atoms_exclude if not is_H and not is_excluded: atom.tempfactor = atom.mass atom.occupancy = 1.0 u.atoms.write(str(dest), bonds=None)
def setup(self, args): self.calphas = self.processor.universe.select_atoms("name CA") indices = [] for res in self.calphas.residues: try: indices.append(res["CB"].index) except: indices.append(res["CA"].index) self.cbetas = AtomGroup(indices, self.processor.universe) n = len(self.calphas) self.contacts = np.zeros(n * (n - 1) / 2) self.tempmat = np.zeros(n * (n - 1) / 2) self.out = args.out
def _get_dh_pairs(self): """Finds donor-hydrogen pairs. Returns ------- donors, hydrogens: AtomGroup, AtomGroup AtomGroups corresponding to all donors and all hydrogens. AtomGroups are ordered such that, if zipped, will produce a list of donor-hydrogen pairs. """ # If donors_sel is not provided, use topology to find d-h pairs if not self.donors_sel: # We're using u._topology.bonds rather than u.bonds as it is a million times faster to access. # This is because u.bonds also calculates properties of each bond (e.g bond length). # See https://github.com/MDAnalysis/mdanalysis/issues/2396#issuecomment-596251787 if not (hasattr(self.u._topology, 'bonds') and len(self.u._topology.bonds.values) != 0): raise NoDataError( 'Cannot assign donor-hydrogen pairs via topology as no bond information is present. ' 'Please either: load a topology file with bond information; use the guess_bonds() ' 'topology guesser; or set HydrogenBondAnalysis.donors_sel so that a distance cutoff ' 'can be used.') hydrogens = self.u.select_atoms(self.hydrogens_sel) donors = sum(h.bonded_atoms[0] for h in hydrogens) if hydrogens \ else AtomGroup([], self.u) # Otherwise, use d_h_cutoff as a cutoff distance else: hydrogens = self.u.select_atoms(self.hydrogens_sel) donors = self.u.select_atoms(self.donors_sel) donors_indices, hydrogen_indices = capped_distance( donors.positions, hydrogens.positions, max_cutoff=self.d_h_cutoff, box=self.u.dimensions, return_distances=False).T donors = donors[donors_indices] hydrogens = hydrogens[hydrogen_indices] return donors, hydrogens
def get_centers_by_residue(selection: AtomGroup, centers: Optional[ArrayLike] = None, box: Optional[ArrayLike] = None) -> ArrayLike: """ Get center-of-geometry of residues, unwrapping over periodic boundaries Parameters ---------- selection: AtomGroup """ if box is None: return selection.center(None, compound='residues', pbc=False) splix = np.where(np.ediff1d(selection.resindices))[0] + 1 sel = np.split(selection.positions, splix) if centers is None: centers = [x[0] for x in sel] unwrapped = np.array( [mean_unwrap_around(x, c, box) for x, c in zip(sel, centers)]) return unwrapped
def empty_atom_group(self) -> AtomGroup: """return an empty mdanalysis atom group""" return AtomGroup([], self.u)
def test_emptyAG_ValueError(self, u_protein): a = AtomGroup([], u_protein) b = AtomGroup([], u_protein) with pytest.raises(ValueError): Merge(a, b)
def test_emptyAG_ValueError(self): u = self.universes[0] a = AtomGroup([], u) b = AtomGroup([], u) assert_raises(ValueError, Merge, a, b)
def coarse_grain(universe, residue_list, simulation_name='simulation_name', export=False): # ============== Misc Initiation ============== # with open('src/mapping_dict.json', "r") as f: mapping_dict = load(f) with open('src/abrev_dict.json', "r") as f: abrev_dict = load(f) u = universe # ================= Execution ================= # print('Calculating Bond connections...') resnames = ' '.join(residue_list) original_bond_count = len(u.bonds) u.select_atoms(f'resname {resnames}').guess_bonds(vdwradii=config.vdw_radi) print( f'Original file contained {original_bond_count} bonds. {len(u.bonds) - original_bond_count} additional bonds infered.' ) print(f'Begining Coarse-Graining process...') bead_data = [] cg_beads = [] dummy_parents = {} non_water_atoms = u.select_atoms('not resname WAT') for residue in non_water_atoms.residues: # loops thu each matching residue id resid = residue.resid # store int id resname = residue.resname if resname in residue_list: # if resname == "PHOSPHATE" or resname == "RIBOSE": # resname_atoms = u.atoms.select_atoms('resname DA DT DG DC DU') # else: # resname_atoms = u.atoms.select_atoms(f'resname {resname}') # selects all resname-specific atoms if len(resname) == 4 and resname[0] == 'D': # for D-varants resname_key = resname[1:] else: resname_key = resname try: segments = mapping_dict[resname_key].keys() for segment in segments: # loops thru each segment of each residue params = 'name ' + ' '.join( mapping_dict[resname_key][segment] ['atoms']) # generates param # selects all atoms in a given residue segment atms = residue.atoms.select_atoms(params) dummy = atms[0] # names dummy atom in propper format dummy.name = str(abrev_dict[resname_key]) + str( segment[0]) + str(resid) dummy.type = mapping_dict[resname_key][segment]['name'] dummy.charge = mapping_dict[resname_key][segment]['charge'] bead_data.append((dummy, atms)) cg_beads.append(dummy) for atm in atms: dummy_parents[atm.ix] = dummy except KeyError: print( f'{resname_key} was not found in mapping/abrev_dict, skipping coarse grain. Please add its parameters to the dictionary. (See README section A3. for help)' ) new_bonds = [] # for residue in residue_list: # for mapping in mapping_dict[residue]["Bonds"]: # first_code = mapping[0] # segment, resid offset, resname # seccond_code = mapping[1] if isinstance(mapping[1], list) else [mapping[1], 0, residue] # type_params = list(mapping_dict[residue]["Mapping"].keys())[first_code] # first_atoms = cg_beads.select_atoms(f'resname {residue} and type {type_params}') # for first_atom in first_atoms: # type_params = list(mapping_dict[residue]["Mapping"].keys())[seccond_code[0]] # segment # seccond_atom_resid = int(first_atom.resid) + int(seccond_code[1]) # try: # seccond_atom = cg_beads.atoms.select_atoms(f'resname {seccond_code[2]} and type {type_params} and resid {seccond_atom_resid}') # except IndexError: # pass # if isinstance(seccond_atom, mda.core.groups.AtomGroup): # closest = seccond_atom[0] # closest_dist = mda.AtomGroup([first_atom, seccond_atom[0]]).bond.length() # for atom in seccond_atom: # dist = mda.AtomGroup([first_atom, atom]).bond.length() # if dist < closest_dist: # closest = atom # closest_dist = dist # seccond_atom = closest # new_bonds.append([first_atom.index, seccond_atom.index]) # new_bonds = [] for dummy, atms in bead_data: # connect all parents with connected children for atom in atms: for bond in atom.bonds: for bonded_atom in bond.atoms: if bonded_atom not in atms: # make more efficent if atms were a set # by the end of all these loops and ifs, every bonded_atom that gets to this point is an atom connected to the edge of the cluster of atoms assigned to the coarse grain dummy bead in question try: new_bonds.append([ cg_beads.index(dummy), cg_beads.index(dummy_parents[bonded_atom.ix]) ]) # type is used to store the cluster dummy except KeyError: # raises if atom does not belong to a coarse grain bead pass # try: # new_bonds.append([cg_beads.index(dummy), cg_beads.index(bonded_atom)]) # adds the bond between the dummies # except ValueError: # if the other atom is just an atom withouot a coarse grain bead parent, ignore it # pass cg_beads = mda.AtomGroup(cg_beads) # TODO: EXPORT NEW_U INSTEAD OF OLD U # TODO: EXPORT NEW_U TO HAVE APPROPRIATE FRAMES # TODO: SHIFT THE DEFINITION OF CENTERS IN THE UNIVERSE EVEN IF NOT EXPORTING # TODO: AUTOTUNE THE CURVE TO FIND THE RIGHT STEP progress(0) number_of_frames = len(u.trajectory) for frame in u.trajectory: # loops tru each frame f = frame.frame # positions a dummy atoms at cluster center of mass for dummy, atms in bead_data: dummy.position = AtomGroup(atms).center_of_mass() progress(f / number_of_frames) progress(1) print() for dummy, atms in bead_data: dummy.mass = AtomGroup(atms).masses.sum() # purge existing reminant bonds u.delete_bonds(u.bonds) u.delete_angles(u.angles) u.delete_dihedrals(u.dihedrals) print(f'Building new coarse-grained universe...') coordinates = AnalysisFromFunction(lambda ag: ag.positions.copy(), cg_beads).run().results new_u = mda.Merge(cg_beads) new_u.load_new(coordinates, format=MemoryReader) new_u.add_TopologyAttr('bonds', new_bonds) new_u.add_TopologyAttr('angles', guess_angles(new_u.bonds)) new_u.add_TopologyAttr('dihedrals', guess_dihedrals(new_u.angles)) print( f'Built universe with {len(new_u.atoms)} coarse-grained beads, {len(new_u.bonds)} bonds, {len(new_u.angles)} angles, and {len(new_u.dihedrals)} dihedrals' ) if export: print('Writing Output Files...') out_file = f'outputs/CoarseGrain/{simulation_name}_CG.pdb' with open(out_file, 'w+') as _: new_u.atoms.write(out_file, bonds='all') print(f'Topology written to {simulation_name}_CG.pdb!') is_multiframe = number_of_frames > 1 with mda.Writer(f'outputs/CoarseGrain/{simulation_name}_CG.dcd', new_u.atoms.n_atoms, multiframe=is_multiframe, bonds='all') as w: for frame in new_u.trajectory[1:]: # loops tru each frame w.write(new_u.atoms) print('Generated All Coarse Grained Molecules!') print(f'Trajectory written to {simulation_name}_CG.dcd!') # for dummy, atms in bead_data: # dummy.type = '' print(f'Reduced {len(u.atoms)} atoms to {len(new_u.atoms)} beads!') print('Coarse Graining Task complete!') return new_u
def compute_simple_protein_features(u, key_res): """ This function takes the PDB code, chain id and certain coordinates of a kinase from a command line and returns its structural features. Parameters ---------- u : object A MDAnalysis.core.universe.Universe object of the input structure (a pdb file or a simulation trajectory). key_res : dict of int A dictionary (with keys 'group0' ... 'group4') of feature-related residue indices in five feature groups. Returns ------- features: list of floats A list (single structure) or lists (multiple frames in a trajectory) of 72 features in 5 groups (A-loop, P-loop, aC, DFG, FRET) .. todo :: Use kwargs with sensible defaults instead of relying only on positional arguments. """ from MDAnalysis.core.groups import AtomGroup from MDAnalysis.analysis.dihedrals import Dihedral from MDAnalysis.analysis.distances import dist import numpy as np import pandas as pd # get the array of atom indices for the calculation of: # * seven dihedrals (a 7*4 array where each row contains indices of the four atoms for each dihedral) # * two ditances (a 2*2 array where each row contains indices of the two atoms for each dihedral) dih = np.zeros(shape=(7, 4), dtype=int, order="C") dis = np.zeros(shape=(2, 2), dtype=int, order="C") # name list of the dihedrals and distances dih_names = ["xDFG_phi", "xDFG_psi", "dFG_phi", "dFG_psi", "DfG_phi", "DfG_psi", "DfG_chi1"] dis_names = ["DFG_conf1", "DFG_conf2", "DFG_conf3", "DFG_conf4"] # parse the topology info (0-based atom indices) ### dihedrals (feature group 3) # dihedral 0 & 1: X-DFG Phi & Psi dih[0][0] = int(u.select_atoms(f"resid {key_res['group3'][0]-1} and name C")[0].ix) # xxDFG C dih[0][1] = int(u.select_atoms(f"resid {key_res['group3'][0]} and name N")[0].ix) # xDFG N dih[0][2] = int(u.select_atoms(f"resid {key_res['group3'][0]} and name CA")[0].ix) # xDFG CA dih[0][3] = int(u.select_atoms(f"resid {key_res['group3'][0]} and name C")[0].ix) # xDFG C dih[1][0] = dih[0][1] # xDFG N dih[1][1] = dih[0][2] # xDFG CA dih[1][2] = dih[0][3] # xDFG C dih[1][3] = int(u.select_atoms(f"resid {key_res['group3'][1]} and name N")[0].ix) # DFG-Asp N # dihedral 2 & 3: DFG-Asp Phi & Psi dih[2][0] = dih[0][3] # xDFG C dih[2][1] = dih[1][3] # DFG-Asp N dih[2][2] = int( u.select_atoms(f"resid {key_res['group3'][1]} and name CA")[0].ix ) # DFG-Asp CA dih[2][3] = int(u.select_atoms(f"resid {key_res['group3'][1]} and name C")[0].ix) # DFG-Asp C dih[3][0] = dih[2][1] # DFG-Asp N dih[3][1] = dih[2][2] # DFG-Asp CA dih[3][2] = dih[2][3] # DFG-Asp C dih[3][3] = int(u.select_atoms(f"resid {key_res['group3'][2]} and name N")[0].ix) # DFG-Phe N # dihedral 4 & 5: DFG-Phe Phi & Psi dih[4][0] = dih[2][3] # DFG-Asp C dih[4][1] = dih[3][3] # DFG-Phe N dih[4][2] = int( u.select_atoms(f"resid {key_res['group3'][2]} and name CA")[0].ix ) # DFG-Phe CA dih[4][3] = int(u.select_atoms(f"resid {key_res['group3'][2]} and name C")[0].ix) # DFG-Phe C dih[5][0] = dih[4][1] # DFG-Phe N dih[5][1] = dih[4][2] # DFG-Phe CA dih[5][2] = dih[4][3] # DFG-Phe C dih[5][3] = int( u.select_atoms(f"resid {key_res['group3'][2]+1} and name N")[0].ix ) # DFG-Gly N # dihedral 6: DFG-Phe Chi1 dih[6][0] = dih[3][3] # DFG-Phe N dih[6][1] = dih[4][2] # DFG-Phe CA dih[6][2] = int( u.select_atoms(f"resid {key_res['group3'][2]} and name CB")[0].ix ) # DFG-Phe CB dih[6][3] = int( u.select_atoms(f"resid {key_res['group3'][2]} and name CG")[0].ix ) # DFG-Phe CG ### distances ## Dunbrack distances D1, D2 dis[0][0] = int(u.select_atoms(f"resid {key_res['group3'][3]} and name CA")[0].ix) # ExxxX CA dis[0][1] = int( u.select_atoms(f"resid {key_res['group3'][2]} and name CZ")[0].ix ) # DFG-Phe CZ dis[1][0] = int( u.select_atoms(f"resid {key_res['group2'][3]} and name CA")[0].ix ) # K in beta III CA dis[1][1] = dis[0][1] # DFG-Phe CZ # check if there is any missing coordinates; if so, skip dihedral/distance calculation for those residues check_flag = 1 for i in range(len(dih)): if 0 in dih[i]: dih[i] = [0, 0, 0, 0] check_flag = 0 for i in range(len(dis)): if 0 in dis[i]: dis[i] = [0, 0] check_flag = 0 if check_flag: print("There is no missing coordinates. All dihedrals and distances will be computed.") # compute dihedrals and distances distances = list() dih_ags = list() for i in range(7): # for each of the dihedrals dih_ags.append(AtomGroup(dih[i], u)) dihedrals = Dihedral(dih_ags).run().angles each_frame = list() for i in range(2): ag0 = AtomGroup([dis[i][0]], u) # first atom in each atom pair ag1 = AtomGroup([dis[i][1]], u) # second atom in each atom pair each_frame.append(dist(ag0, ag1)[-1][0]) each_frame = np.array(each_frame) distances.append(each_frame) # clean up del u, dih, dis return dihedrals, distances
u = Universe(f) i: Atom rnd = np.random.rand(len(u.atoms)) m = max(np.array(u.coord)[:, 2]) n = 0 ind = [] oxy = [] zinc = [] for j, i in enumerate(u.atoms): if i.atomic_name == 'Zn': if rnd[j] > 0.3 * math.exp((i.position[2] - m) * 0.5): zinc.append(j) else: n += 1 else: oxy.append(j) ind.extend(oxy) ind.extend(zinc) a = AtomGroup(ind, u) zn = AtomGroup(zinc, u) o = AtomGroup(oxy, u) hist, _ = np.histogram(zn.positions[:, 2], bins=20) hist2, _ = np.histogram(o.positions[:, 2], bins=20) plt.plot(hist, '+') plt.plot(hist2, 'x') plt.ylim((0, 70)) w.write(a) znw.write(zn) plt.show() # znw.write(zn)