def get_torsion_matrix(file_list, cgmodel, frame_start, frame_stride,
                       frame_end, backbone_torsion_type):
    """Internal function for reading trajectory files and computing torsions"""

    # Load files as {replica number: replica trajectory}
    rep_traj = {}
    for i in range(len(file_list)):
        if file_list[0][-3:] == 'dcd':
            rep_traj[i] = md.load(file_list[i],
                                  top=md.Topology.from_openmm(
                                      cgmodel.topology))
        else:
            rep_traj[i] = md.load(file_list[i])

    # Combine all trajectories, selecting specified frames
    if frame_end == -1:
        frame_end = rep_traj[0].n_frames

    if frame_start == -1:
        frame_start == frame_end

    traj_all = rep_traj[0][frame_start:frame_end:frame_stride]

    for i in range(len(file_list) - 1):
        traj_all = traj_all.join(
            rep_traj[i + 1][frame_start:frame_end:frame_stride])

    # Get torsion list:
    torsion_list = CGModel.get_torsion_list(cgmodel)

    # Assign torsion types:
    torsion_types, torsion_array, torsion_sub_arrays, n_i, i_torsion_type, torsion_dict, inv_torsion_dict = \
        assign_torsion_types(cgmodel, torsion_list)

    # Compute specified torsion angles over all frames:
    for i in range(i_torsion_type):
        if inv_torsion_dict[str(i + 1)] == backbone_torsion_type:
            # Compute all torsion values in trajectory
            # This returns an [nframes x n_torsions] array
            torsion_val_array = md.compute_dihedrals(
                traj_all, torsion_sub_arrays[str(i + 1)])

            # Convert to degrees:
            torsion_val_array = (180 / np.pi) * torsion_val_array

    return torsion_val_array, traj_all
def calc_ramachandran(
    cgmodel,
    file_list,
    nbin_theta=180,
    nbin_alpha=180,
    frame_start=0,
    frame_stride=1,
    frame_end=-1,
    plotfile="ramachandran.pdf",
    backbone_angle_type = "bb_bb_bb",
    backbone_torsion_type = "bb_bb_bb_bb",
    colormap="nipy_spectral",
    temperature_list=None,
):
    """
    Calculate and plot ramachandran plot for backbone bond bending-angle and torsion
    angle, given a CGModel object and pdb or dcd trajectory.

    :param cgmodel: CGModel() object
    :type cgmodel: class
    
    :param file_list: path to pdb or dcd trajectory file(s) - can be a list or single string
    :type file_list: str or list(str)
    
    :param nbin_theta: number of bins for bond-bending angle (spanning from 0 to 180 degrees)
    :type nbin_theta: int
    
    :param nbin_alpha: number of bins for torsion angle (spanning from -180 to +180 degrees)
    :type nbin_alpha:
    
    :param frame_start: First frame in trajectory file to use for analysis.
    :type frame_start: int

    :param frame_stride: Advance by this many frames when reading trajectories.
    :type frame_stride: int

    :param frame_end: Last frame in trajectory file to use for analysis.
    :type frame_end: int
    
    :param plotfile: Filename for saving torsion distribution pdf plots
    :type plotfile: str
    
    :param backbone_angle_type: particle sequence of the backbone angles (default="bb_bb_bb") - for now only single sequence permitted
    :type backbone_angle_type: str
    
    :param backbone_torsion_type: particle sequence of the backbone angles (default="bb_bb_bb_bb") - for now only single sequence permitted
    :type backbone_torsion_type: str    
    
    :param colormap: matplotlib pyplot colormap to use (default='nipy_spectral')
    :type colormap: str (case sensitive)
    
    :param temperature_list: list of temperatures corresponding to file_list. If None, no subplot labels will be used.
    :type temperature_list: list(Quantity())    
    
    :returns:
       - hist_data ( dict )
       - xedges ( dict )
       - yedges ( dict )
    """
    
    # Convert file_list to list if a single string:
    if type(file_list) == str:
        # Single file
        file_list = file_list.split()
    
    # Store angle, torsion values by filename for computing global colormap
    ang_val_array = {}
    torsion_val_array = {}
    
    for file in file_list:
    
        # Load in a trajectory file:
        if file[-3:] == 'dcd':
            traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology))
        else:
            traj = md.load(file)
            
            
        # Select frames for analysis:    
        if frame_end == -1:
            frame_end = traj.n_frames

        traj = traj[frame_start:frame_end:frame_stride]             
            
        nframes = traj.n_frames
        
        # Get angle list
        angle_list = CGModel.get_bond_angle_list(cgmodel)
        
        # Assign angle types:
        ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \
            assign_angle_types(cgmodel, angle_list)
        
        # Set bin edges:
        angle_bin_edges = np.linspace(0,180,nbin_theta+1)
        angle_bin_centers = np.zeros((len(angle_bin_edges)-1,1))
        for i in range(len(angle_bin_edges)-1):
            angle_bin_centers[i] = (angle_bin_edges[i]+angle_bin_edges[i+1])/2
                       
        for i in range(i_angle_type):
            if inv_ang_dict[str(i+1)] == backbone_angle_type:
                # Compute all angle values in trajectory
                # This returns an [nframes x n_angles] array
                ang_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)])
                
                # We will have different numbers of bond-bending angle and torsion angle.
                # We will set a convention of omitting the last angle value.
                
                # Convert to degrees and exclude last angle:  
                ang_val_array[file] = (180/np.pi)*ang_val_array[file][:,:-1]
                
                # Reshape array:
                ang_val_array[file] = np.reshape(ang_val_array[file], (nframes*(n_i[i]-1)[0],1))
            
        # Get torsion list
        torsion_list = CGModel.get_torsion_list(cgmodel)

        # Assign torsion types
        torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \
            assign_torsion_types(cgmodel, torsion_list)
        
        # Set bin edges:
        torsion_bin_edges = np.linspace(-180,180,nbin_alpha+1)
        torsion_bin_centers = np.zeros((len(torsion_bin_edges)-1,1))
        for i in range(len(torsion_bin_edges)-1):
            torsion_bin_centers[i] = (torsion_bin_edges[i]+torsion_bin_edges[i+1])/2
            
        for i in range(i_torsion_type):
            if inv_torsion_dict[str(i+1)] == backbone_torsion_type:
                # Compute all torsion values in trajectory
                # This returns an [nframes x n_torsions] array
                torsion_val_array[file] = md.compute_dihedrals(
                    traj,torsion_sub_arrays[str(i+1)])
                
                # Convert to degrees:  
                torsion_val_array[file] *= (180/np.pi)
                
                # Reshape array
                torsion_val_array[file] = np.reshape(torsion_val_array[file], (nframes*n_j[i][0],1))
        
    # 2d histogram the data and plot:
    hist_data, xedges, yedges = plot_2d_distribution(
        file_list, torsion_val_array, ang_val_array, torsion_bin_edges, angle_bin_edges,
        plotfile, colormap, xlabel='Alpha (degrees)', ylabel='Theta (degrees)', temperature_list=temperature_list)
    
    return hist_data, xedges, yedges
def calc_2d_distribution(
    cgmodel,
    file_list,
    nbin_xvar=180,
    nbin_yvar=180,
    frame_start=0,
    frame_stride=1,
    frame_end=-1,
    plotfile="2d_hist.pdf",
    xvar_name = "bb_bb_bb",
    yvar_name = "bb_bb_bb_bb",
    colormap="nipy_spectral",
    temperature_list=None,
    ):      

    """
    Calculate and plot 2d histogram for any 2 bonded variables,
    given a CGModel object and pdb or dcd trajectory.

    :param cgmodel: CGModel() object
    :type cgmodel: class
    
    :param file_list: path to pdb or dcd trajectory file(s) - can be a list or single string
    :type file_list: str or list(str)
    
    :param nbin_xvar: number of bins for x bonded variable
    :type nbin_xvar: int
    
    :param nbin_yvar: number of bins for y bonded variable
    :type nbin_yvar:
    
    :param frame_start: First frame in trajectory file to use for analysis.
    :type frame_start: int

    :param frame_stride: Advance by this many frames when reading trajectories.
    :type frame_stride: int

    :param frame_end: Last frame in trajectory file to use for analysis.
    :type frame_end: int
    
    :param plotfile: Filename for saving torsion distribution pdf plots
    :type plotfile: str
    
    :param xvar_name: particle sequence of the x bonded parameter (default="bb_bb_bb")
    :type xvar_name: str
    
    :param yvar_name: particle sequence of the y bonded parameter (default="bb_bb_bb_bb")
    :type yvar_name: str    
    
    :param colormap: matplotlib pyplot colormap to use (default='nipy_spectral')
    :type colormap: str (case sensitive)
    
    :param temperature_list: list of temperatures corresponding to file_list. If None, no subplot labels will be used.
    :type temperature_list: list(Quantity()) 
    
    :returns:
       - hist_data ( dict )
       - xedges ( dict )
       - yedges ( dict )
    """
    
    # Convert file_list to list if a single string:
    if type(file_list) == str:
        # Single file
        file_list = file_list.split()
    
    # Store angle, torsion values by filename for computing global colormap
    xvar_val_array = {}
    yvar_val_array = {}
    
    # Store the reverse name of the bonded type (need to check both)
    
    # x variable
    particle_list = []
    particle = ""
    for c in xvar_name:
        if c == '_':
            particle_list.append(particle)
            particle = ""
        else:
            particle += c
    particle_list.append(particle)
    
    particle_list_reverse = particle_list[::-1]
    
    xvar_name_reverse = ""
    for par in particle_list_reverse:
        xvar_name_reverse += par
        xvar_name_reverse += "_"
    xvar_name_reverse = xvar_name_reverse[:-1]
    
    # y variable
    particle_list = []
    particle = ""
    for c in yvar_name:
        if c == '_':
            particle_list.append(particle)
            particle = ""
        else:
            particle += c
    particle_list.append(particle)
    
    particle_list_reverse = particle_list[::-1]
    
    yvar_name_reverse = ""
    for par in particle_list_reverse:
        yvar_name_reverse += par
        yvar_name_reverse += "_"
    yvar_name_reverse = yvar_name_reverse[:-1]
    
    for file in file_list:
    
        # Load in a trajectory file:
        if file[-3:] == 'dcd':
            traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology))
        else:
            traj = md.load(file)
            
        # Select frames for analysis:    
        if frame_end == -1:
            frame_end = traj.n_frames

        traj = traj[frame_start:frame_end:frame_stride]             
            
        nframes = traj.n_frames
        
        # x variable   
        
        # Determine parameter type of xvar:
        n_particle_x = xvar_name.count('_')+1
        
        if n_particle_x == 2:
            # Bond
           
            # Get bond list
            bond_list = CGModel.get_bond_list(cgmodel)
            
            # Assign bond types:
            bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \
                assign_bond_types(cgmodel, bond_list)
            
            for i in range(i_bond_type):
                if inv_bond_dict[str(i+1)] == xvar_name or inv_bond_dict[str(i+1)] == xvar_name_reverse:
                    # Compute all bond length values in trajectory
                    # This returns an [nframes x n_bonds] array
                    xvar_val_array[file] = md.compute_distances(traj,bond_sub_arrays[str(i+1)])
                    
                    # Get equilibrium value:
                    b_eq = cgmodel.get_bond_length(bond_sub_arrays[str(i+1)][0])
                    
            # Set bin edges:
            # This should be the same across all files - use heuristic from equilibrium bond length
            b_min = 0.5*b_eq.value_in_unit(unit.nanometer)
            b_max = 1.5*b_eq.value_in_unit(unit.nanometer)
           
            xvar_bin_edges = np.linspace(b_min,b_max,nbin_xvar+1)
            xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1))
            for i in range(len(xvar_bin_edges)-1):
                xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2  
                
            xlabel = f'{xvar_name} distance ({unit.nanometer})'
                    
        elif n_particle_x == 3:
            # Angle
            
            # Get angle list
            angle_list = CGModel.get_bond_angle_list(cgmodel)
        
            # Assign angle types:
            ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \
                assign_angle_types(cgmodel, angle_list)
                
            # Set bin edges:
            xvar_bin_edges = np.linspace(0,180,nbin_xvar+1)
            xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1))
            for i in range(len(xvar_bin_edges)-1):
                xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2    
            
            for i in range(i_angle_type):
                if inv_ang_dict[str(i+1)] == xvar_name or inv_ang_dict[str(i+1)] == xvar_name_reverse:
                    # Compute all angle values in trajectory
                    # This returns an [nframes x n_angles] array
                    xvar_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)])
                    
                    # Convert to degrees:  
                    xvar_val_array[file] *= (180/np.pi)
                    
            xlabel = f'{xvar_name} angle (degrees)'
                
        elif n_particle_x == 4:
            # Torsion
            
            # Get torsion list
            torsion_list = CGModel.get_torsion_list(cgmodel)

            # Assign torsion types
            torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \
                assign_torsion_types(cgmodel, torsion_list)
            
            # Set bin edges:
            xvar_bin_edges = np.linspace(-180,180,nbin_xvar+1)
            xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1))
            for i in range(len(xvar_bin_edges)-1):
                xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2
                
            for i in range(i_torsion_type):
                if inv_torsion_dict[str(i+1)] == xvar_name or inv_torsion_dict[str(i+1)] == xvar_name_reverse:
                    # Compute all torsion values in trajectory
                    # This returns an [nframes x n_torsions] array
                    xvar_val_array[file] = md.compute_dihedrals(
                        traj,torsion_sub_arrays[str(i+1)])
                    
                    # Convert to degrees:  
                    xvar_val_array[file] *= (180/np.pi)
                    
            xlabel = f'{xvar_name} angle (degrees)'
                    
        # y variable   
        
        # Determine parameter type of yvar:
        n_particle_y = yvar_name.count('_')+1
        
        if n_particle_y == 2:
            # Bond
           
            # Get bond list
            bond_list = CGModel.get_bond_list(cgmodel)
            
            # Assign bond types:
            bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \
                assign_bond_types(cgmodel, bond_list)
            
            for i in range(i_bond_type):
                if inv_bond_dict[str(i+1)] == yvar_name or inv_bond_dict[str(i+1)] == yvar_name_reverse:
                    # Compute all bond length values in trajectory
                    # This returns an [nframes x n_bonds] array
                    yvar_val_array[file] = md.compute_distances(traj,bond_sub_arrays[str(i+1)])
                    
                    # Get equilibrium value:
                    b_eq = cgmodel.get_bond_length(bond_sub_arrays[str(i+1)][0])
                    
            # Set bin edges:
            # This should be the same across all files - use heuristic from equilibrium bond length
            b_min = 0.5*b_eq.value_in_unit(unit.nanometer)
            b_max = 1.5*b_eq.value_in_unit(unit.nanometer)
           
            yvar_bin_edges = np.linspace(b_min,b_max,nbin_yvar+1)
            yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1))
            for i in range(len(yvar_bin_edges)-1):
                yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2  
                
            ylabel = f'{yvar_name} distance ({unit.nanometer})'
                    
        elif n_particle_y == 3:
            # Angle
            
            # Get angle list
            angle_list = CGModel.get_bond_angle_list(cgmodel)
        
            # Assign angle types:
            ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \
                assign_angle_types(cgmodel, angle_list)
                
            # Set bin edges:
            yvar_bin_edges = np.linspace(0,180,nbin_yvar+1)
            yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1))
            for i in range(len(yvar_bin_edges)-1):
                yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2    
            
            for i in range(i_angle_type):
                if inv_ang_dict[str(i+1)] == yvar_name or inv_ang_dict[str(i+1)] == yvar_name_reverse:
                    # Compute all angle values in trajectory
                    # This returns an [nframes x n_angles] array
                    yvar_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)])
                    
                    # Convert to degrees:  
                    yvar_val_array[file] *= (180/np.pi)
                    
            ylabel = f'{yvar_name} angle (degrees)'
                
        elif n_particle_y == 4:
            # Torsion
            
            # Get torsion list
            torsion_list = CGModel.get_torsion_list(cgmodel)

            # Assign torsion types
            torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \
                assign_torsion_types(cgmodel, torsion_list)
            
            # Set bin edges:
            yvar_bin_edges = np.linspace(-180,180,nbin_yvar+1)
            yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1))
            for i in range(len(yvar_bin_edges)-1):
                yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2
                
            for i in range(i_torsion_type):
                if inv_torsion_dict[str(i+1)] == yvar_name or inv_torsion_dict[str(i+1)] == yvar_name_reverse:
                    # Compute all torsion values in trajectory
                    # This returns an [nframes x n_torsions] array
                    yvar_val_array[file] = md.compute_dihedrals(
                        traj,torsion_sub_arrays[str(i+1)])
                    
                    # Convert to degrees:  
                    yvar_val_array[file] *= (180/np.pi)

            ylabel = f'{yvar_name} angle (degrees)'
            
    # Since the bonded variables may have different numbers of observables, we can use all 
    # combinations of the 2 parameter observables to create the histograms.
    
    xvar_val_array_combo = {}
    yvar_val_array_combo = {}
    
    # Each array of single observables is [n_frames x n_occurances]
    # x value arrays should be [xval0_y0, xval1_y0, ...xvaln_y0, ... xval0_yn, xval1_yn, xvaln_yn]
    # y value arrays should be [yval0_x0, yval0_x1, ...yval0_xn, ... yvaln_x0, yvaln_x1, yvaln_xn]
    
    
    for file in file_list:
        n_occ_x = xvar_val_array[file].shape[1]
        n_occ_y = yvar_val_array[file].shape[1]
    
        xvar_val_array_combo[file] = np.zeros((nframes,n_occ_x*n_occ_y))
        yvar_val_array_combo[file] = np.zeros_like(xvar_val_array_combo[file])
        
        for iy in range(n_occ_y):
            xvar_val_array_combo[file][:,(iy*n_occ_x):((iy+1)*n_occ_x)] = xvar_val_array[file]
            for ix in range(n_occ_x):
                yvar_val_array_combo[file][:,ix+iy*n_occ_x] = yvar_val_array[file][:,iy]
        
        # Reshape arrays for histogramming:
        xvar_val_array_combo[file] = np.reshape(xvar_val_array_combo[file], (nframes*n_occ_x*n_occ_y,1))
        yvar_val_array_combo[file] = np.reshape(yvar_val_array_combo[file], (nframes*n_occ_x*n_occ_y,1))        
        
    # 2d histogram the data and plot:
    hist_data, xedges, yedges = plot_2d_distribution(
        file_list, xvar_val_array_combo, yvar_val_array_combo, xvar_bin_edges, yvar_bin_edges,
        plotfile, colormap, xlabel, ylabel, temperature_list=temperature_list)
    
    return hist_data, xedges, yedges
def calc_torsion_distribution(
    cgmodel, file_list, nbins=180, frame_start=0, frame_stride=1, frame_end=-1,
    plot_per_page=2, temperature_list=None, plotfile="torsion_hist.pdf"
    ):
    """
    Calculate and plot all torsion distributions from a CGModel object and pdb or dcd trajectory

    :param cgmodel: CGModel() object
    :type cgmodel: class
    
    :param file_list: path to pdb or dcd trajectory file(s)
    :type file_list: str or list(str)
    
    :param nbins: number of bins spanning the range of -180 to 180 degrees, default = 180
    :type nbins: int
    
    :param frame_start: First frame in trajectory file to use for analysis.
    :type frame_start: int

    :param frame_stride: Advance by this many frames when reading trajectories.
    :type frame_stride: int

    :param frame_end: Last frame in trajectory file to use for analysis.
    :type frame_end: int
    
    :param plot_per_page: number of subplots to display on each page (default=2)
    :type plot_per_page: int   
    
    :param temperature_list: list of temperatures corresponding to file_list. If None, file names will be the plot labels.
    :type temperature_list: list(Quantity())
    
    :param plotfile: Base filename for saving torsion distribution pdf plots
    :type plotfile: str
    
    :returns:
       - torsion_hist_data ( dict )
    
    """
    
    # Convert file_list to list if a single string:
    if type(file_list) == str:
        # Single file
        file_list = file_list.split()     
    
    # Get torsion list
    torsion_list = CGModel.get_torsion_list(cgmodel)
    
    # Assign torsion types
    torsion_types, torsion_array, torsion_sub_arrays, n_i, i_torsion_type, torsion_dict, inv_torsion_dict = \
        assign_torsion_types(cgmodel, torsion_list)
    
    # Create dictionary for saving torsion histogram data:
    torsion_hist_data = {}
    
    # Set bin edges:
    torsion_bin_edges = np.linspace(-180,180,nbins+1)
    torsion_bin_centers = np.zeros((len(torsion_bin_edges)-1,1))
    for i in range(len(torsion_bin_edges)-1):
        torsion_bin_centers[i] = (torsion_bin_edges[i]+torsion_bin_edges[i+1])/2
        
    file_index = 0    
    for file in file_list:
        # Load in a trajectory file:
        if file[-3:] == 'dcd':
            traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology))
        else:
            traj = md.load(file)
            
        # Select frames for analysis:    
        if frame_end == -1:
            frame_end = traj.n_frames

        traj = traj[frame_start:frame_end:frame_stride] 
            
        nframes = traj.n_frames    
        
        # Create inner dictionary for current file:
        if temperature_list is not None:
            file_key = f"{temperature_list[file_index].value_in_unit(unit.kelvin):.2f}" 
        else:
            file_key = file[:-4]
            
        torsion_hist_data[file_key] = {}
        
        for i in range(i_torsion_type):
            # Compute all torsion values in trajectory
            # This returns an [nframes x n_torsions] array
            torsion_val_array = md.compute_dihedrals(
                traj,torsion_sub_arrays[str(i+1)])
            
            # Reshape arrays and convert to degrees:  
            torsion_val_array = (180/np.pi)*np.reshape(torsion_val_array, (nframes*n_i[i][0],1))
            
            # Histogram and plot results:
            n_out, bin_edges_out = np.histogram(
                torsion_val_array, bins=torsion_bin_edges,density=True)
            
            torsion_hist_data[file_key][f"{inv_torsion_dict[str(i+1)]}_density"]=n_out
            torsion_hist_data[file_key][f"{inv_torsion_dict[str(i+1)]}_bin_centers"]=torsion_bin_centers  
      
        file_index += 1
      
    plot_distribution(
        inv_torsion_dict,
        torsion_hist_data,
        xlabel="Torsion angle (degrees)",
        ylabel="Probability density",
        xlim=[-180,180],
        figure_title="Torsion_distributions",
        file_name=f"{plotfile}",
        plot_per_page=plot_per_page,
    )
      
    return torsion_hist_data