def get_torsion_matrix(file_list, cgmodel, frame_start, frame_stride, frame_end, backbone_torsion_type): """Internal function for reading trajectory files and computing torsions""" # Load files as {replica number: replica trajectory} rep_traj = {} for i in range(len(file_list)): if file_list[0][-3:] == 'dcd': rep_traj[i] = md.load(file_list[i], top=md.Topology.from_openmm( cgmodel.topology)) else: rep_traj[i] = md.load(file_list[i]) # Combine all trajectories, selecting specified frames if frame_end == -1: frame_end = rep_traj[0].n_frames if frame_start == -1: frame_start == frame_end traj_all = rep_traj[0][frame_start:frame_end:frame_stride] for i in range(len(file_list) - 1): traj_all = traj_all.join( rep_traj[i + 1][frame_start:frame_end:frame_stride]) # Get torsion list: torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types: torsion_types, torsion_array, torsion_sub_arrays, n_i, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list) # Compute specified torsion angles over all frames: for i in range(i_torsion_type): if inv_torsion_dict[str(i + 1)] == backbone_torsion_type: # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array torsion_val_array = md.compute_dihedrals( traj_all, torsion_sub_arrays[str(i + 1)]) # Convert to degrees: torsion_val_array = (180 / np.pi) * torsion_val_array return torsion_val_array, traj_all
def calc_ramachandran( cgmodel, file_list, nbin_theta=180, nbin_alpha=180, frame_start=0, frame_stride=1, frame_end=-1, plotfile="ramachandran.pdf", backbone_angle_type = "bb_bb_bb", backbone_torsion_type = "bb_bb_bb_bb", colormap="nipy_spectral", temperature_list=None, ): """ Calculate and plot ramachandran plot for backbone bond bending-angle and torsion angle, given a CGModel object and pdb or dcd trajectory. :param cgmodel: CGModel() object :type cgmodel: class :param file_list: path to pdb or dcd trajectory file(s) - can be a list or single string :type file_list: str or list(str) :param nbin_theta: number of bins for bond-bending angle (spanning from 0 to 180 degrees) :type nbin_theta: int :param nbin_alpha: number of bins for torsion angle (spanning from -180 to +180 degrees) :type nbin_alpha: :param frame_start: First frame in trajectory file to use for analysis. :type frame_start: int :param frame_stride: Advance by this many frames when reading trajectories. :type frame_stride: int :param frame_end: Last frame in trajectory file to use for analysis. :type frame_end: int :param plotfile: Filename for saving torsion distribution pdf plots :type plotfile: str :param backbone_angle_type: particle sequence of the backbone angles (default="bb_bb_bb") - for now only single sequence permitted :type backbone_angle_type: str :param backbone_torsion_type: particle sequence of the backbone angles (default="bb_bb_bb_bb") - for now only single sequence permitted :type backbone_torsion_type: str :param colormap: matplotlib pyplot colormap to use (default='nipy_spectral') :type colormap: str (case sensitive) :param temperature_list: list of temperatures corresponding to file_list. If None, no subplot labels will be used. :type temperature_list: list(Quantity()) :returns: - hist_data ( dict ) - xedges ( dict ) - yedges ( dict ) """ # Convert file_list to list if a single string: if type(file_list) == str: # Single file file_list = file_list.split() # Store angle, torsion values by filename for computing global colormap ang_val_array = {} torsion_val_array = {} for file in file_list: # Load in a trajectory file: if file[-3:] == 'dcd': traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology)) else: traj = md.load(file) # Select frames for analysis: if frame_end == -1: frame_end = traj.n_frames traj = traj[frame_start:frame_end:frame_stride] nframes = traj.n_frames # Get angle list angle_list = CGModel.get_bond_angle_list(cgmodel) # Assign angle types: ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \ assign_angle_types(cgmodel, angle_list) # Set bin edges: angle_bin_edges = np.linspace(0,180,nbin_theta+1) angle_bin_centers = np.zeros((len(angle_bin_edges)-1,1)) for i in range(len(angle_bin_edges)-1): angle_bin_centers[i] = (angle_bin_edges[i]+angle_bin_edges[i+1])/2 for i in range(i_angle_type): if inv_ang_dict[str(i+1)] == backbone_angle_type: # Compute all angle values in trajectory # This returns an [nframes x n_angles] array ang_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)]) # We will have different numbers of bond-bending angle and torsion angle. # We will set a convention of omitting the last angle value. # Convert to degrees and exclude last angle: ang_val_array[file] = (180/np.pi)*ang_val_array[file][:,:-1] # Reshape array: ang_val_array[file] = np.reshape(ang_val_array[file], (nframes*(n_i[i]-1)[0],1)) # Get torsion list torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list) # Set bin edges: torsion_bin_edges = np.linspace(-180,180,nbin_alpha+1) torsion_bin_centers = np.zeros((len(torsion_bin_edges)-1,1)) for i in range(len(torsion_bin_edges)-1): torsion_bin_centers[i] = (torsion_bin_edges[i]+torsion_bin_edges[i+1])/2 for i in range(i_torsion_type): if inv_torsion_dict[str(i+1)] == backbone_torsion_type: # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array torsion_val_array[file] = md.compute_dihedrals( traj,torsion_sub_arrays[str(i+1)]) # Convert to degrees: torsion_val_array[file] *= (180/np.pi) # Reshape array torsion_val_array[file] = np.reshape(torsion_val_array[file], (nframes*n_j[i][0],1)) # 2d histogram the data and plot: hist_data, xedges, yedges = plot_2d_distribution( file_list, torsion_val_array, ang_val_array, torsion_bin_edges, angle_bin_edges, plotfile, colormap, xlabel='Alpha (degrees)', ylabel='Theta (degrees)', temperature_list=temperature_list) return hist_data, xedges, yedges
def calc_2d_distribution( cgmodel, file_list, nbin_xvar=180, nbin_yvar=180, frame_start=0, frame_stride=1, frame_end=-1, plotfile="2d_hist.pdf", xvar_name = "bb_bb_bb", yvar_name = "bb_bb_bb_bb", colormap="nipy_spectral", temperature_list=None, ): """ Calculate and plot 2d histogram for any 2 bonded variables, given a CGModel object and pdb or dcd trajectory. :param cgmodel: CGModel() object :type cgmodel: class :param file_list: path to pdb or dcd trajectory file(s) - can be a list or single string :type file_list: str or list(str) :param nbin_xvar: number of bins for x bonded variable :type nbin_xvar: int :param nbin_yvar: number of bins for y bonded variable :type nbin_yvar: :param frame_start: First frame in trajectory file to use for analysis. :type frame_start: int :param frame_stride: Advance by this many frames when reading trajectories. :type frame_stride: int :param frame_end: Last frame in trajectory file to use for analysis. :type frame_end: int :param plotfile: Filename for saving torsion distribution pdf plots :type plotfile: str :param xvar_name: particle sequence of the x bonded parameter (default="bb_bb_bb") :type xvar_name: str :param yvar_name: particle sequence of the y bonded parameter (default="bb_bb_bb_bb") :type yvar_name: str :param colormap: matplotlib pyplot colormap to use (default='nipy_spectral') :type colormap: str (case sensitive) :param temperature_list: list of temperatures corresponding to file_list. If None, no subplot labels will be used. :type temperature_list: list(Quantity()) :returns: - hist_data ( dict ) - xedges ( dict ) - yedges ( dict ) """ # Convert file_list to list if a single string: if type(file_list) == str: # Single file file_list = file_list.split() # Store angle, torsion values by filename for computing global colormap xvar_val_array = {} yvar_val_array = {} # Store the reverse name of the bonded type (need to check both) # x variable particle_list = [] particle = "" for c in xvar_name: if c == '_': particle_list.append(particle) particle = "" else: particle += c particle_list.append(particle) particle_list_reverse = particle_list[::-1] xvar_name_reverse = "" for par in particle_list_reverse: xvar_name_reverse += par xvar_name_reverse += "_" xvar_name_reverse = xvar_name_reverse[:-1] # y variable particle_list = [] particle = "" for c in yvar_name: if c == '_': particle_list.append(particle) particle = "" else: particle += c particle_list.append(particle) particle_list_reverse = particle_list[::-1] yvar_name_reverse = "" for par in particle_list_reverse: yvar_name_reverse += par yvar_name_reverse += "_" yvar_name_reverse = yvar_name_reverse[:-1] for file in file_list: # Load in a trajectory file: if file[-3:] == 'dcd': traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology)) else: traj = md.load(file) # Select frames for analysis: if frame_end == -1: frame_end = traj.n_frames traj = traj[frame_start:frame_end:frame_stride] nframes = traj.n_frames # x variable # Determine parameter type of xvar: n_particle_x = xvar_name.count('_')+1 if n_particle_x == 2: # Bond # Get bond list bond_list = CGModel.get_bond_list(cgmodel) # Assign bond types: bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \ assign_bond_types(cgmodel, bond_list) for i in range(i_bond_type): if inv_bond_dict[str(i+1)] == xvar_name or inv_bond_dict[str(i+1)] == xvar_name_reverse: # Compute all bond length values in trajectory # This returns an [nframes x n_bonds] array xvar_val_array[file] = md.compute_distances(traj,bond_sub_arrays[str(i+1)]) # Get equilibrium value: b_eq = cgmodel.get_bond_length(bond_sub_arrays[str(i+1)][0]) # Set bin edges: # This should be the same across all files - use heuristic from equilibrium bond length b_min = 0.5*b_eq.value_in_unit(unit.nanometer) b_max = 1.5*b_eq.value_in_unit(unit.nanometer) xvar_bin_edges = np.linspace(b_min,b_max,nbin_xvar+1) xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1)) for i in range(len(xvar_bin_edges)-1): xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2 xlabel = f'{xvar_name} distance ({unit.nanometer})' elif n_particle_x == 3: # Angle # Get angle list angle_list = CGModel.get_bond_angle_list(cgmodel) # Assign angle types: ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \ assign_angle_types(cgmodel, angle_list) # Set bin edges: xvar_bin_edges = np.linspace(0,180,nbin_xvar+1) xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1)) for i in range(len(xvar_bin_edges)-1): xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2 for i in range(i_angle_type): if inv_ang_dict[str(i+1)] == xvar_name or inv_ang_dict[str(i+1)] == xvar_name_reverse: # Compute all angle values in trajectory # This returns an [nframes x n_angles] array xvar_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)]) # Convert to degrees: xvar_val_array[file] *= (180/np.pi) xlabel = f'{xvar_name} angle (degrees)' elif n_particle_x == 4: # Torsion # Get torsion list torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list) # Set bin edges: xvar_bin_edges = np.linspace(-180,180,nbin_xvar+1) xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1)) for i in range(len(xvar_bin_edges)-1): xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2 for i in range(i_torsion_type): if inv_torsion_dict[str(i+1)] == xvar_name or inv_torsion_dict[str(i+1)] == xvar_name_reverse: # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array xvar_val_array[file] = md.compute_dihedrals( traj,torsion_sub_arrays[str(i+1)]) # Convert to degrees: xvar_val_array[file] *= (180/np.pi) xlabel = f'{xvar_name} angle (degrees)' # y variable # Determine parameter type of yvar: n_particle_y = yvar_name.count('_')+1 if n_particle_y == 2: # Bond # Get bond list bond_list = CGModel.get_bond_list(cgmodel) # Assign bond types: bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \ assign_bond_types(cgmodel, bond_list) for i in range(i_bond_type): if inv_bond_dict[str(i+1)] == yvar_name or inv_bond_dict[str(i+1)] == yvar_name_reverse: # Compute all bond length values in trajectory # This returns an [nframes x n_bonds] array yvar_val_array[file] = md.compute_distances(traj,bond_sub_arrays[str(i+1)]) # Get equilibrium value: b_eq = cgmodel.get_bond_length(bond_sub_arrays[str(i+1)][0]) # Set bin edges: # This should be the same across all files - use heuristic from equilibrium bond length b_min = 0.5*b_eq.value_in_unit(unit.nanometer) b_max = 1.5*b_eq.value_in_unit(unit.nanometer) yvar_bin_edges = np.linspace(b_min,b_max,nbin_yvar+1) yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1)) for i in range(len(yvar_bin_edges)-1): yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2 ylabel = f'{yvar_name} distance ({unit.nanometer})' elif n_particle_y == 3: # Angle # Get angle list angle_list = CGModel.get_bond_angle_list(cgmodel) # Assign angle types: ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \ assign_angle_types(cgmodel, angle_list) # Set bin edges: yvar_bin_edges = np.linspace(0,180,nbin_yvar+1) yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1)) for i in range(len(yvar_bin_edges)-1): yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2 for i in range(i_angle_type): if inv_ang_dict[str(i+1)] == yvar_name or inv_ang_dict[str(i+1)] == yvar_name_reverse: # Compute all angle values in trajectory # This returns an [nframes x n_angles] array yvar_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)]) # Convert to degrees: yvar_val_array[file] *= (180/np.pi) ylabel = f'{yvar_name} angle (degrees)' elif n_particle_y == 4: # Torsion # Get torsion list torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list) # Set bin edges: yvar_bin_edges = np.linspace(-180,180,nbin_yvar+1) yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1)) for i in range(len(yvar_bin_edges)-1): yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2 for i in range(i_torsion_type): if inv_torsion_dict[str(i+1)] == yvar_name or inv_torsion_dict[str(i+1)] == yvar_name_reverse: # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array yvar_val_array[file] = md.compute_dihedrals( traj,torsion_sub_arrays[str(i+1)]) # Convert to degrees: yvar_val_array[file] *= (180/np.pi) ylabel = f'{yvar_name} angle (degrees)' # Since the bonded variables may have different numbers of observables, we can use all # combinations of the 2 parameter observables to create the histograms. xvar_val_array_combo = {} yvar_val_array_combo = {} # Each array of single observables is [n_frames x n_occurances] # x value arrays should be [xval0_y0, xval1_y0, ...xvaln_y0, ... xval0_yn, xval1_yn, xvaln_yn] # y value arrays should be [yval0_x0, yval0_x1, ...yval0_xn, ... yvaln_x0, yvaln_x1, yvaln_xn] for file in file_list: n_occ_x = xvar_val_array[file].shape[1] n_occ_y = yvar_val_array[file].shape[1] xvar_val_array_combo[file] = np.zeros((nframes,n_occ_x*n_occ_y)) yvar_val_array_combo[file] = np.zeros_like(xvar_val_array_combo[file]) for iy in range(n_occ_y): xvar_val_array_combo[file][:,(iy*n_occ_x):((iy+1)*n_occ_x)] = xvar_val_array[file] for ix in range(n_occ_x): yvar_val_array_combo[file][:,ix+iy*n_occ_x] = yvar_val_array[file][:,iy] # Reshape arrays for histogramming: xvar_val_array_combo[file] = np.reshape(xvar_val_array_combo[file], (nframes*n_occ_x*n_occ_y,1)) yvar_val_array_combo[file] = np.reshape(yvar_val_array_combo[file], (nframes*n_occ_x*n_occ_y,1)) # 2d histogram the data and plot: hist_data, xedges, yedges = plot_2d_distribution( file_list, xvar_val_array_combo, yvar_val_array_combo, xvar_bin_edges, yvar_bin_edges, plotfile, colormap, xlabel, ylabel, temperature_list=temperature_list) return hist_data, xedges, yedges
def calc_torsion_distribution( cgmodel, file_list, nbins=180, frame_start=0, frame_stride=1, frame_end=-1, plot_per_page=2, temperature_list=None, plotfile="torsion_hist.pdf" ): """ Calculate and plot all torsion distributions from a CGModel object and pdb or dcd trajectory :param cgmodel: CGModel() object :type cgmodel: class :param file_list: path to pdb or dcd trajectory file(s) :type file_list: str or list(str) :param nbins: number of bins spanning the range of -180 to 180 degrees, default = 180 :type nbins: int :param frame_start: First frame in trajectory file to use for analysis. :type frame_start: int :param frame_stride: Advance by this many frames when reading trajectories. :type frame_stride: int :param frame_end: Last frame in trajectory file to use for analysis. :type frame_end: int :param plot_per_page: number of subplots to display on each page (default=2) :type plot_per_page: int :param temperature_list: list of temperatures corresponding to file_list. If None, file names will be the plot labels. :type temperature_list: list(Quantity()) :param plotfile: Base filename for saving torsion distribution pdf plots :type plotfile: str :returns: - torsion_hist_data ( dict ) """ # Convert file_list to list if a single string: if type(file_list) == str: # Single file file_list = file_list.split() # Get torsion list torsion_list = CGModel.get_torsion_list(cgmodel) # Assign torsion types torsion_types, torsion_array, torsion_sub_arrays, n_i, i_torsion_type, torsion_dict, inv_torsion_dict = \ assign_torsion_types(cgmodel, torsion_list) # Create dictionary for saving torsion histogram data: torsion_hist_data = {} # Set bin edges: torsion_bin_edges = np.linspace(-180,180,nbins+1) torsion_bin_centers = np.zeros((len(torsion_bin_edges)-1,1)) for i in range(len(torsion_bin_edges)-1): torsion_bin_centers[i] = (torsion_bin_edges[i]+torsion_bin_edges[i+1])/2 file_index = 0 for file in file_list: # Load in a trajectory file: if file[-3:] == 'dcd': traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology)) else: traj = md.load(file) # Select frames for analysis: if frame_end == -1: frame_end = traj.n_frames traj = traj[frame_start:frame_end:frame_stride] nframes = traj.n_frames # Create inner dictionary for current file: if temperature_list is not None: file_key = f"{temperature_list[file_index].value_in_unit(unit.kelvin):.2f}" else: file_key = file[:-4] torsion_hist_data[file_key] = {} for i in range(i_torsion_type): # Compute all torsion values in trajectory # This returns an [nframes x n_torsions] array torsion_val_array = md.compute_dihedrals( traj,torsion_sub_arrays[str(i+1)]) # Reshape arrays and convert to degrees: torsion_val_array = (180/np.pi)*np.reshape(torsion_val_array, (nframes*n_i[i][0],1)) # Histogram and plot results: n_out, bin_edges_out = np.histogram( torsion_val_array, bins=torsion_bin_edges,density=True) torsion_hist_data[file_key][f"{inv_torsion_dict[str(i+1)]}_density"]=n_out torsion_hist_data[file_key][f"{inv_torsion_dict[str(i+1)]}_bin_centers"]=torsion_bin_centers file_index += 1 plot_distribution( inv_torsion_dict, torsion_hist_data, xlabel="Torsion angle (degrees)", ylabel="Probability density", xlim=[-180,180], figure_title="Torsion_distributions", file_name=f"{plotfile}", plot_per_page=plot_per_page, ) return torsion_hist_data