def fit_bootstrap(yaml_file,pool=None): mdl_params = yaml_file["mdl_params"] current_mdl_params={} bootstrap_mdl_params={} for i in mdl_params.keys(): if i.startswith("msm__"): current_mdl_params[i.split("msm__")[1]] = mdl_params[i] if i.startswith("bootstrap__"): bootstrap_mdl_params[i.split("bootstrap__")[1]] = mdl_params[i] if "n_samples" not in bootstrap_mdl_params.keys(): bootstrap_mdl_params["n_samples"] = 100 for protein in yaml_file["protein_list"]: with enter_protein_mdl_dir(yaml_file, protein): print(protein) assignments = verboseload("assignments.pkl") msm_mdl =BootStrapMarkovStateModel(n_procs=2, msm_args = current_mdl_params, **bootstrap_mdl_params) msm_mdl.fit([assignments[i] for i in assignments.keys()], pool=pool) verbosedump(msm_mdl, "bootstrap_msm_mdl.pkl") verbosedump(msm_mdl.mle_, "msm_mdl.pkl") fixed_assignments = {} for i in assignments.keys(): fixed_assignments[i] = msm_mdl.mle_.transform( assignments[i], mode='fill')[0] verbosedump(fixed_assignments, 'fixed_assignments.pkl') return
def fit_bootstrap(yaml_file,pool=None): mdl_params = yaml_file["mdl_params"] current_mdl_params={} for i in mdl_params.keys(): if i.startswith("msm__"): current_mdl_params[i.split("msm__")[1]] = mdl_params[i] if "bootstrap__n_samples" in mdl_params.keys(): bootstrap__n_samples = mdl_params["bootstrap__n_samples"] else: bootstrap__n_samples = 100 for protein in yaml_file["protein_list"]: with enter_protein_mdl_dir(yaml_file, protein): print(protein) assignments = verboseload("assignments.pkl") msm_mdl =BootStrapMarkovStateModel(n_samples= bootstrap__n_samples, n_procs=2, msm_args = current_mdl_params ) msm_mdl.fit([assignments[i] for i in assignments.keys()], pool=pool) verbosedump(msm_mdl, "bootstrap_msm_mdl.pkl") verbosedump(msm_mdl.mle_, "msm_mdl.pkl") fixed_assignments = {} for i in assignments.keys(): fixed_assignments[i] = msm_mdl.mle_.transform( assignments[i], mode='fill')[0] verbosedump(fixed_assignments, 'fixed_assignments.pkl') return
def test_mle_eq(): seq = [[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1]] mle_mdl = MarkovStateModel(lag_time=1) b_mdl = BootStrapMarkovStateModel(n_samples=10, n_procs=2, msm_args={'lag_time': 1}) mle_mdl.fit(seq) b_mdl.fit(seq) #make sure we have good model eq(mle_mdl.populations_, b_mdl.mle_.populations_) eq(mle_mdl.timescales_, b_mdl.mle_.timescales_)
def test_resampler(): sequences = [np.random.randint(20, size=100) for _ in range(100)] mdl = BootStrapMarkovStateModel(n_samples=5, n_procs=2, msm_args={'lag_time': 10}) #probability that mdl.fit(sequences) #given a size of 100 input trajectories the probability that # we re-pick the original set is about (1/100)^100. # we test that the set of unique traj ids is never equal to #original 100 sets in all 5 samples for i in mdl.resample_ind_: assert len(np.unique(i)) != 100
def test_score(): seq = [np.random.randint(20, size=100), np.random.randint(20, size=100), np.random.randint(20, size=100)] bmsm = BootStrapMarkovStateModel(n_samples=10, n_procs=2, msm_args={'lag_time':1}) bmsm.fit(seq) # test that all samples got a training score ... assert np.array(bmsm.all_training_scores_).shape[0] == 10 # ... and that the training score wasn't NaN assert sum(np.isnan(bmsm.all_training_scores_)) == 0 # test that a test score was attempted (OK if it's NaN) assert bmsm.n_samples == np.array(bmsm.all_test_scores_).shape[0]
def test_score(): seq = [ np.random.randint(20, size=100), np.random.randint(20, size=100), np.random.randint(20, size=100) ] bmsm = BootStrapMarkovStateModel(n_samples=10, n_procs=2, msm_args={'lag_time': 1}) bmsm.fit(seq) # test that all samples got a training score ... assert np.array(bmsm.all_training_scores_).shape[0] == 10 # ... and that the training score wasn't NaN assert sum(np.isnan(bmsm.all_training_scores_)) == 0 # test that a test score was attempted (OK if it's NaN) assert bmsm.n_samples == np.array(bmsm.all_test_scores_).shape[0]