def calc_2d_distribution(
    cgmodel,
    file_list,
    nbin_xvar=180,
    nbin_yvar=180,
    frame_start=0,
    frame_stride=1,
    frame_end=-1,
    plotfile="2d_hist.pdf",
    xvar_name = "bb_bb_bb",
    yvar_name = "bb_bb_bb_bb",
    colormap="nipy_spectral",
    temperature_list=None,
    ):      

    """
    Calculate and plot 2d histogram for any 2 bonded variables,
    given a CGModel object and pdb or dcd trajectory.

    :param cgmodel: CGModel() object
    :type cgmodel: class
    
    :param file_list: path to pdb or dcd trajectory file(s) - can be a list or single string
    :type file_list: str or list(str)
    
    :param nbin_xvar: number of bins for x bonded variable
    :type nbin_xvar: int
    
    :param nbin_yvar: number of bins for y bonded variable
    :type nbin_yvar:
    
    :param frame_start: First frame in trajectory file to use for analysis.
    :type frame_start: int

    :param frame_stride: Advance by this many frames when reading trajectories.
    :type frame_stride: int

    :param frame_end: Last frame in trajectory file to use for analysis.
    :type frame_end: int
    
    :param plotfile: Filename for saving torsion distribution pdf plots
    :type plotfile: str
    
    :param xvar_name: particle sequence of the x bonded parameter (default="bb_bb_bb")
    :type xvar_name: str
    
    :param yvar_name: particle sequence of the y bonded parameter (default="bb_bb_bb_bb")
    :type yvar_name: str    
    
    :param colormap: matplotlib pyplot colormap to use (default='nipy_spectral')
    :type colormap: str (case sensitive)
    
    :param temperature_list: list of temperatures corresponding to file_list. If None, no subplot labels will be used.
    :type temperature_list: list(Quantity()) 
    
    :returns:
       - hist_data ( dict )
       - xedges ( dict )
       - yedges ( dict )
    """
    
    # Convert file_list to list if a single string:
    if type(file_list) == str:
        # Single file
        file_list = file_list.split()
    
    # Store angle, torsion values by filename for computing global colormap
    xvar_val_array = {}
    yvar_val_array = {}
    
    # Store the reverse name of the bonded type (need to check both)
    
    # x variable
    particle_list = []
    particle = ""
    for c in xvar_name:
        if c == '_':
            particle_list.append(particle)
            particle = ""
        else:
            particle += c
    particle_list.append(particle)
    
    particle_list_reverse = particle_list[::-1]
    
    xvar_name_reverse = ""
    for par in particle_list_reverse:
        xvar_name_reverse += par
        xvar_name_reverse += "_"
    xvar_name_reverse = xvar_name_reverse[:-1]
    
    # y variable
    particle_list = []
    particle = ""
    for c in yvar_name:
        if c == '_':
            particle_list.append(particle)
            particle = ""
        else:
            particle += c
    particle_list.append(particle)
    
    particle_list_reverse = particle_list[::-1]
    
    yvar_name_reverse = ""
    for par in particle_list_reverse:
        yvar_name_reverse += par
        yvar_name_reverse += "_"
    yvar_name_reverse = yvar_name_reverse[:-1]
    
    for file in file_list:
    
        # Load in a trajectory file:
        if file[-3:] == 'dcd':
            traj = md.load(file,top=md.Topology.from_openmm(cgmodel.topology))
        else:
            traj = md.load(file)
            
        # Select frames for analysis:    
        if frame_end == -1:
            frame_end = traj.n_frames

        traj = traj[frame_start:frame_end:frame_stride]             
            
        nframes = traj.n_frames
        
        # x variable   
        
        # Determine parameter type of xvar:
        n_particle_x = xvar_name.count('_')+1
        
        if n_particle_x == 2:
            # Bond
           
            # Get bond list
            bond_list = CGModel.get_bond_list(cgmodel)
            
            # Assign bond types:
            bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \
                assign_bond_types(cgmodel, bond_list)
            
            for i in range(i_bond_type):
                if inv_bond_dict[str(i+1)] == xvar_name or inv_bond_dict[str(i+1)] == xvar_name_reverse:
                    # Compute all bond length values in trajectory
                    # This returns an [nframes x n_bonds] array
                    xvar_val_array[file] = md.compute_distances(traj,bond_sub_arrays[str(i+1)])
                    
                    # Get equilibrium value:
                    b_eq = cgmodel.get_bond_length(bond_sub_arrays[str(i+1)][0])
                    
            # Set bin edges:
            # This should be the same across all files - use heuristic from equilibrium bond length
            b_min = 0.5*b_eq.value_in_unit(unit.nanometer)
            b_max = 1.5*b_eq.value_in_unit(unit.nanometer)
           
            xvar_bin_edges = np.linspace(b_min,b_max,nbin_xvar+1)
            xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1))
            for i in range(len(xvar_bin_edges)-1):
                xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2  
                
            xlabel = f'{xvar_name} distance ({unit.nanometer})'
                    
        elif n_particle_x == 3:
            # Angle
            
            # Get angle list
            angle_list = CGModel.get_bond_angle_list(cgmodel)
        
            # Assign angle types:
            ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \
                assign_angle_types(cgmodel, angle_list)
                
            # Set bin edges:
            xvar_bin_edges = np.linspace(0,180,nbin_xvar+1)
            xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1))
            for i in range(len(xvar_bin_edges)-1):
                xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2    
            
            for i in range(i_angle_type):
                if inv_ang_dict[str(i+1)] == xvar_name or inv_ang_dict[str(i+1)] == xvar_name_reverse:
                    # Compute all angle values in trajectory
                    # This returns an [nframes x n_angles] array
                    xvar_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)])
                    
                    # Convert to degrees:  
                    xvar_val_array[file] *= (180/np.pi)
                    
            xlabel = f'{xvar_name} angle (degrees)'
                
        elif n_particle_x == 4:
            # Torsion
            
            # Get torsion list
            torsion_list = CGModel.get_torsion_list(cgmodel)

            # Assign torsion types
            torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \
                assign_torsion_types(cgmodel, torsion_list)
            
            # Set bin edges:
            xvar_bin_edges = np.linspace(-180,180,nbin_xvar+1)
            xvar_bin_centers = np.zeros((len(xvar_bin_edges)-1,1))
            for i in range(len(xvar_bin_edges)-1):
                xvar_bin_centers[i] = (xvar_bin_edges[i]+xvar_bin_edges[i+1])/2
                
            for i in range(i_torsion_type):
                if inv_torsion_dict[str(i+1)] == xvar_name or inv_torsion_dict[str(i+1)] == xvar_name_reverse:
                    # Compute all torsion values in trajectory
                    # This returns an [nframes x n_torsions] array
                    xvar_val_array[file] = md.compute_dihedrals(
                        traj,torsion_sub_arrays[str(i+1)])
                    
                    # Convert to degrees:  
                    xvar_val_array[file] *= (180/np.pi)
                    
            xlabel = f'{xvar_name} angle (degrees)'
                    
        # y variable   
        
        # Determine parameter type of yvar:
        n_particle_y = yvar_name.count('_')+1
        
        if n_particle_y == 2:
            # Bond
           
            # Get bond list
            bond_list = CGModel.get_bond_list(cgmodel)
            
            # Assign bond types:
            bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \
                assign_bond_types(cgmodel, bond_list)
            
            for i in range(i_bond_type):
                if inv_bond_dict[str(i+1)] == yvar_name or inv_bond_dict[str(i+1)] == yvar_name_reverse:
                    # Compute all bond length values in trajectory
                    # This returns an [nframes x n_bonds] array
                    yvar_val_array[file] = md.compute_distances(traj,bond_sub_arrays[str(i+1)])
                    
                    # Get equilibrium value:
                    b_eq = cgmodel.get_bond_length(bond_sub_arrays[str(i+1)][0])
                    
            # Set bin edges:
            # This should be the same across all files - use heuristic from equilibrium bond length
            b_min = 0.5*b_eq.value_in_unit(unit.nanometer)
            b_max = 1.5*b_eq.value_in_unit(unit.nanometer)
           
            yvar_bin_edges = np.linspace(b_min,b_max,nbin_yvar+1)
            yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1))
            for i in range(len(yvar_bin_edges)-1):
                yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2  
                
            ylabel = f'{yvar_name} distance ({unit.nanometer})'
                    
        elif n_particle_y == 3:
            # Angle
            
            # Get angle list
            angle_list = CGModel.get_bond_angle_list(cgmodel)
        
            # Assign angle types:
            ang_types, ang_array, ang_sub_arrays, n_i, i_angle_type, ang_dict, inv_ang_dict = \
                assign_angle_types(cgmodel, angle_list)
                
            # Set bin edges:
            yvar_bin_edges = np.linspace(0,180,nbin_yvar+1)
            yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1))
            for i in range(len(yvar_bin_edges)-1):
                yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2    
            
            for i in range(i_angle_type):
                if inv_ang_dict[str(i+1)] == yvar_name or inv_ang_dict[str(i+1)] == yvar_name_reverse:
                    # Compute all angle values in trajectory
                    # This returns an [nframes x n_angles] array
                    yvar_val_array[file] = md.compute_angles(traj,ang_sub_arrays[str(i+1)])
                    
                    # Convert to degrees:  
                    yvar_val_array[file] *= (180/np.pi)
                    
            ylabel = f'{yvar_name} angle (degrees)'
                
        elif n_particle_y == 4:
            # Torsion
            
            # Get torsion list
            torsion_list = CGModel.get_torsion_list(cgmodel)

            # Assign torsion types
            torsion_types, torsion_array, torsion_sub_arrays, n_j, i_torsion_type, torsion_dict, inv_torsion_dict = \
                assign_torsion_types(cgmodel, torsion_list)
            
            # Set bin edges:
            yvar_bin_edges = np.linspace(-180,180,nbin_yvar+1)
            yvar_bin_centers = np.zeros((len(yvar_bin_edges)-1,1))
            for i in range(len(yvar_bin_edges)-1):
                yvar_bin_centers[i] = (yvar_bin_edges[i]+yvar_bin_edges[i+1])/2
                
            for i in range(i_torsion_type):
                if inv_torsion_dict[str(i+1)] == yvar_name or inv_torsion_dict[str(i+1)] == yvar_name_reverse:
                    # Compute all torsion values in trajectory
                    # This returns an [nframes x n_torsions] array
                    yvar_val_array[file] = md.compute_dihedrals(
                        traj,torsion_sub_arrays[str(i+1)])
                    
                    # Convert to degrees:  
                    yvar_val_array[file] *= (180/np.pi)

            ylabel = f'{yvar_name} angle (degrees)'
            
    # Since the bonded variables may have different numbers of observables, we can use all 
    # combinations of the 2 parameter observables to create the histograms.
    
    xvar_val_array_combo = {}
    yvar_val_array_combo = {}
    
    # Each array of single observables is [n_frames x n_occurances]
    # x value arrays should be [xval0_y0, xval1_y0, ...xvaln_y0, ... xval0_yn, xval1_yn, xvaln_yn]
    # y value arrays should be [yval0_x0, yval0_x1, ...yval0_xn, ... yvaln_x0, yvaln_x1, yvaln_xn]
    
    
    for file in file_list:
        n_occ_x = xvar_val_array[file].shape[1]
        n_occ_y = yvar_val_array[file].shape[1]
    
        xvar_val_array_combo[file] = np.zeros((nframes,n_occ_x*n_occ_y))
        yvar_val_array_combo[file] = np.zeros_like(xvar_val_array_combo[file])
        
        for iy in range(n_occ_y):
            xvar_val_array_combo[file][:,(iy*n_occ_x):((iy+1)*n_occ_x)] = xvar_val_array[file]
            for ix in range(n_occ_x):
                yvar_val_array_combo[file][:,ix+iy*n_occ_x] = yvar_val_array[file][:,iy]
        
        # Reshape arrays for histogramming:
        xvar_val_array_combo[file] = np.reshape(xvar_val_array_combo[file], (nframes*n_occ_x*n_occ_y,1))
        yvar_val_array_combo[file] = np.reshape(yvar_val_array_combo[file], (nframes*n_occ_x*n_occ_y,1))        
        
    # 2d histogram the data and plot:
    hist_data, xedges, yedges = plot_2d_distribution(
        file_list, xvar_val_array_combo, yvar_val_array_combo, xvar_bin_edges, yvar_bin_edges,
        plotfile, colormap, xlabel, ylabel, temperature_list=temperature_list)
    
    return hist_data, xedges, yedges
def calc_bond_length_distribution(cgmodel,
                                  file_list,
                                  nbins=90,
                                  frame_start=0,
                                  frame_stride=1,
                                  frame_end=-1,
                                  plot_per_page=2,
                                  temperature_list=None,
                                  plotfile="bond_hist.pdf"):
    """
    Calculate and plot all bond length distributions from a CGModel object and trajectory

    :param cgmodel: CGModel() object
    :type cgmodel: class
    
    :param file_list: path to pdb or dcd trajectory file(s)
    :type file_list: str or list(str)
    
    :param nbins: number of histogram bins
    :type nbins: int
    
    :param frame_start: First frame in trajectory file to use for analysis.
    :type frame_start: int

    :param frame_stride: Advance by this many frames when reading trajectories.
    :type frame_stride: int

    :param frame_end: Last frame in trajectory file to use for analysis.
    :type frame_end: int
    
    :param plot_per_page: number of subplots to display on each page (default=2)
    :type plot_per_page: int
    
    :param temperature_list: list of temperatures corresponding to file_list. If None, file names will be the plot labels.
    :type temperature_list: list(Quantity())
    
    :param plotfile: filename for saving bond length distribution pdf plots
    :type plotfile: str
    
    :returns:
       - bond_hist_data ( dict )
    """

    # Convert file_list to list if a single string:
    if type(file_list) == str:
        # Single file
        file_list = file_list.split()

    # Create dictionary for saving bond histogram data:
    bond_hist_data = {}

    # Get bond list
    bond_list = CGModel.get_bond_list(cgmodel)

    # Assign bond types:
    bond_types, bond_array, bond_sub_arrays, n_i, i_bond_type, bond_dict, inv_bond_dict = \
        assign_bond_types(cgmodel, bond_list)

    file_index = 0
    for file in file_list:
        # Load in a trajectory file:
        if file[-3:] == 'dcd':
            traj = md.load(file, top=md.Topology.from_openmm(cgmodel.topology))
        else:
            traj = md.load(file)

        # Select frames for analysis:
        if frame_end == -1:
            frame_end = traj.n_frames

        traj = traj[frame_start:frame_end:frame_stride]

        nframes = traj.n_frames

        # Create inner dictionary for current file:
        if temperature_list is not None:
            file_key = f"{temperature_list[file_index].value_in_unit(unit.kelvin):.2f}"
        else:
            file_key = file[:-4]

        bond_hist_data[file_key] = {}

        for i in range(i_bond_type):
            # Compute all bond distances in trajectory
            # This returns an [nframes x n_bonds] array
            bond_val_array = md.compute_distances(traj,
                                                  bond_sub_arrays[str(i + 1)])

            # Reshape arrays:
            bond_val_array = np.reshape(bond_val_array,
                                        (nframes * n_i[i][0], 1))

            # Histogram and plot results:
            n_out, bin_edges_out = np.histogram(bond_val_array,
                                                bins=nbins,
                                                density=True)

            bond_bin_centers = np.zeros((len(bin_edges_out) - 1, 1))
            for j in range(len(bin_edges_out) - 1):
                bond_bin_centers[j] = (bin_edges_out[j] +
                                       bin_edges_out[j + 1]) / 2

            bond_hist_data[file_key][
                f"{inv_bond_dict[str(i+1)]}_density"] = n_out
            bond_hist_data[file_key][
                f"{inv_bond_dict[str(i+1)]}_bin_centers"] = bond_bin_centers

        file_index += 1

    plot_distribution(
        inv_bond_dict,
        bond_hist_data,
        xlabel="Bond length (nm)",
        ylabel="Probability density",
        figure_title="Bond distributions",
        file_name=f"{plotfile}",
        plot_per_page=plot_per_page,
    )

    return bond_hist_data