Пример #1
0
def sample_region(
    data,
    pt_dict,
    n_frames,
):
    """Function to sample a region of the data.

    Parameters
    ----------
    data : list of lists
        List of low dimensional data(output of tica)
    pt_dict : dict
        Dictionary where the keys are the dimensions and the
        value is the value of the dimension.
        pt = {0:0.15, 4:0.2}
    n_frames: int
        Number of frames required

    Returns
    -------
       list of tuples where first number is the trajectory index and
       second is the frame index
    """
    dimensions = list(pt_dict.keys())
    d_data = [i[:, dimensions] for i in data]

    tree = KDTree(d_data)
    pt = [pt_dict[i] for i in dimensions]
    dis, ind = tree.query(pt, n_frames)
    return ind
Пример #2
0
def sample_region(data, pt_dict, n_frames,):
    """Function to sample a region of the data.

    Parameters
    ----------
    data : list of lists
        List of low dimensional data(output of tica)
    pt_dict : dict
        Dictionary where the keys are the dimensions and the
        value is the value of the dimension.
        pt = {0:0.15, 4:0.2}
    n_frames: int
        Number of frames required

    Returns
    -------
       list of tuples where first number is the trajectory index and
       second is the frame index
    """
    dimensions = list(pt_dict.keys())
    d_data = [i[:, dimensions] for i in data]

    tree = KDTree(d_data)
    pt = [pt_dict[i] for i in dimensions]
    dis, ind = tree.query(pt, n_frames)
    return ind
Пример #3
0
def sample_dimension(data, dimension, n_frames, scheme="linear"):
    """Function to sample a dimension of the data
    using one of 3 schemes. All other dimensions are ignored.

    Parameters
    ----------
    data : list of lists
        List of low dimensional data(output of tica)
    dimension : int
        dimension to sample on
    n_frames: int
        Number of frames required
    scheme: string
        One of either "linear", "random" or "edges". Linear
        samples the tic linearly, random samples randomly
        thereby taking approximate free energies into account,
        and edges samples the edges of the tic only.

    Returns
    -------
       list of tuples where first number is the trajectory index and
       second is the frame index
    """
    d_data = [i[:, dimension][:, np.newaxis] for i in data]

    #sort it because all three sampling schemes use it

    all_vals = []
    for i in d_data:
        all_vals.extend(i.flatten())
    all_vals = np.sort(all_vals)

    #get lineraly placed points
    if scheme == "linear":
        max_val = all_vals[-1]
        min_val = all_vals[0]
        spaced_points = np.linspace(min_val, max_val, n_frames)

    elif scheme == "random":
        spaced_points = np.sort(np.random.choice(all_vals, n_frames))

    elif scheme == "edge":
        _cut_point = np.int(n_frames / 2)
        spaced_points = np.hstack(
            (all_vals[:_cut_point], all_vals[-_cut_point:]))
    else:
        raise ValueError("Scheme has be to one of linear, random or edge")

    tree = KDTree(d_data)

    return_vec = []
    for pt in spaced_points:
        dis, ind = tree.query([pt])
        return_vec.append(ind)

    return return_vec
Пример #4
0
def sample_dimension(data, dimension, n_frames, scheme="linear"):
    """Function to sample a dimension of the data
    using one of 3 schemes. All other dimensions are ignored.

    Parameters
    ----------
    data : list of lists
        List of low dimensional data(output of tica)
    dimension : int
        dimension to sample on
    n_frames: int
        Number of frames required
    scheme: string
        One of either "linear", "random" or "edges". Linear
        samples the tic linearly, random samples randomly
        thereby taking approximate free energies into account,
        and edges samples the edges of the tic only.

    Returns
    -------
       list of tuples where first number is the trajectory index and
       second is the frame index
    """
    d_data = [i[:,dimension][:,np.newaxis] for i in data]

    #sort it because all three sampling schemes use it

    all_vals = []
    for i in d_data:
        all_vals.extend(i.flatten())
    all_vals = np.sort(all_vals)

    #get lineraly placed points
    if scheme=="linear":
        max_val = all_vals[-1]
        min_val = all_vals[0]
        spaced_points = np.linspace(min_val, max_val, n_frames)

    elif scheme=="random":
        spaced_points = np.sort(np.random.choice(all_vals, n_frames))

    elif scheme=="edge":
        _cut_point = np.int(n_frames / 2)
        spaced_points = np.hstack((all_vals[:_cut_point], all_vals[-_cut_point:]))
    else:
        raise ValueError("Scheme has be to one of linear, random or edge")

    tree = KDTree(d_data)

    return_vec = []
    for pt in spaced_points:
        dis, ind = tree.query([pt])
        return_vec.append(ind)

    return return_vec
Пример #5
0
def kdtree_maker(prt, stride=1):
    key_list = list(prt.tica_data.keys())[::stride]
    data = [prt.tica_data[i] for i in key_list]

    tree = KDTree(data)
    return key_list, tree
Пример #6
0
msm_mdl = ContinuousTimeMSM(lag_time=10,ergodic_cutoff=1/10) 
assignments.fit_with(msm_mdl)  
msm_mdl.percent_retained_

### CALCULATING 10 MOST POPULATED CONFORMATIONS 
f=open("msm_mdl.pkl",'wb')
pickle.dump({k: msm_mdl.__dict__[k] for k in ['lag_time','n_timescales','ergodic_cutoff','verbose','sliding_window','guess','theta_','ratemat_','transmat_','countsmat_','n_states_',
'mapping_','populations_','information_','loglikelihoods_','eigenvalues_','left_eigenvectors_','right_eigenvectors_','percent_retained_'] }, f)
f.close()
q=msm_mdl.__dict__.get('populations_')
ind=np.argpartition(q, -10)[-10:]
p=ind[np.argsort(q[ind])]
print("The most populated conformations:", q[p])

### EXTRACTING REPRESENTATIVE CONFORMATIONS OF MACROSTATE
ktree=KDTree(tica_features)
trj_ds = dataset("*.nc", topology="s.pdb")
trj_list = []
for cind in p:
    pt = kmeans_mdl.cluster_centers_[cind]
    _,(t,f) = ktree.query(pt)
    print("The trajectory and frame number of representative macrostate:",t,f)
    trj_list.append(trj_ds[t][f])

#CALCULATING COMMON FREE ENERGY LANDSCAPE FOR ALL PH
pi_0=msm_mdl.__dict__.get('populations_')[np.concatenate(assignments, axis=0)] 
data = np.concatenate(tica_trajs, axis=0) 
clip = [(-np.inf, np.inf), (-np.inf, np.inf)]
levels = np.linspace(0,2,21)
def _thermo_transform(Z, temperature):  
  return - THERMO_CONSTANT * temperature * np.log(Z)