data_inverse_permutation_indices, num_rows, num_cols, num_views, num_clusters) X_L_gen, X_D_gen = ttu.get_generative_clustering(M_c, M_r, T, data_inverse_permutation_indices, num_clusters, num_views) T_test = ctu.create_test_set(M_c, T, X_L_gen, X_D_gen, n_test, seed_seed=0) # generative_mean_test_log_likelihood = ctu.calc_mean_test_log_likelihood(M_c, T, X_L_gen, X_D_gen, T_test) # run some tests engine = MultiprocessingEngine(seed=inf_seed) # single state test single_state_ARIs = [] single_state_mean_test_lls = [] X_L, X_D = engine.initialize(M_c, M_r, T, n_chains=1) single_state_ARIs.append(ctu.get_column_ARI(X_L, view_assignment_truth)) single_state_mean_test_lls.append( ctu.calc_mean_test_log_likelihood(M_c, T, X_L, X_D, T_test) ) for time_i in range(n_times): X_L, X_D = engine.analyze(M_c, T, X_L, X_D, n_steps=n_steps) single_state_ARIs.append(ctu.get_column_ARI(X_L, view_assignment_truth)) single_state_mean_test_lls.append( ctu.calc_mean_test_log_likelihood(M_c, T, X_L, X_D, T_test) ) # multistate test multi_state_ARIs = [] multi_state_mean_test_lls = [] X_L_list, X_D_list = engine.initialize(M_c, M_r, T, n_chains=n_chains) multi_state_ARIs.append(ctu.get_column_ARIs(X_L_list, view_assignment_truth)) multi_state_mean_test_lls.append(ctu.calc_mean_test_log_likelihoods(M_c, T,
def get_ari(p_State): # requires environment: {view_assignment_truth} # requires import: {crosscat.utils.convergence_test_utils} X_L = p_State.get_X_L() ctu = crosscat.utils.convergence_test_utils return ctu.get_column_ARI(X_L, view_assignment_truth)
def convergence_analyze_helper(table_data, data_dict, command_dict): gen_seed = data_dict['SEED'] num_clusters = data_dict['num_clusters'] num_cols = data_dict['num_cols'] num_rows = data_dict['num_rows'] num_views = data_dict['num_views'] max_mean = data_dict['max_mean'] n_test = data_dict['n_test'] num_transitions = data_dict['n_steps'] block_size = data_dict['block_size'] init_seed = data_dict['init_seed'] # generate some data T, M_r, M_c, data_inverse_permutation_indices = \ du.gen_factorial_data_objects(gen_seed, num_clusters, num_cols, num_rows, num_views, max_mean=max_mean, max_std=1, send_data_inverse_permutation_indices=True) view_assignment_ground_truth = \ ctu.determine_synthetic_column_ground_truth_assignments(num_cols, num_views) X_L_gen, X_D_gen = ttu.get_generative_clustering(M_c, M_r, T, data_inverse_permutation_indices, num_clusters, num_views) T_test = ctu.create_test_set(M_c, T, X_L_gen, X_D_gen, n_test, seed_seed=0) generative_mean_test_log_likelihood = \ ctu.calc_mean_test_log_likelihood(M_c, T, X_L_gen, X_D_gen, T_test) # additional set up engine=LE.LocalEngine(init_seed) column_ari_list = [] mean_test_ll_list = [] elapsed_seconds_list = [] # get initial ARI, test_ll with gu.Timer('initialize', verbose=False) as timer: X_L, X_D = engine.initialize(M_c, M_r, T, initialization='from_the_prior') column_ari = ctu.get_column_ARI(X_L, view_assignment_ground_truth) column_ari_list.append(column_ari) mean_test_ll = ctu.calc_mean_test_log_likelihood(M_c, T, X_L, X_D, T_test) mean_test_ll_list.append(mean_test_ll) elapsed_seconds_list.append(timer.elapsed_secs) # run blocks of transitions, recording ARI, test_ll progression completed_transitions = 0 n_steps = min(block_size, num_transitions) while (completed_transitions < num_transitions): # We won't be limiting by time in the convergence runs with gu.Timer('initialize', verbose=False) as timer: X_L, X_D = engine.analyze(M_c, T, X_L, X_D, kernel_list=(), n_steps=n_steps, max_time=-1) completed_transitions = completed_transitions + block_size # column_ari = ctu.get_column_ARI(X_L, view_assignment_ground_truth) column_ari_list.append(column_ari) mean_test_ll = ctu.calc_mean_test_log_likelihood(M_c, T, X_L, X_D, T_test) mean_test_ll_list.append(mean_test_ll) elapsed_seconds_list.append(timer.elapsed_secs) ret_dict = dict( num_rows=num_rows, num_cols=num_cols, num_views=num_views, num_clusters=num_clusters, max_mean=max_mean, column_ari_list=column_ari_list, mean_test_ll_list=mean_test_ll_list, generative_mean_test_log_likelihood=generative_mean_test_log_likelihood, elapsed_seconds_list=elapsed_seconds_list, n_steps=num_transitions, block_size=block_size, ) return ret_dict
def convergence_analyze_helper(table_data, data_dict, command_dict): gen_seed = data_dict['SEED'] num_clusters = data_dict['num_clusters'] num_cols = data_dict['num_cols'] num_rows = data_dict['num_rows'] num_views = data_dict['num_views'] max_mean = data_dict['max_mean'] n_test = data_dict['n_test'] num_transitions = data_dict['n_steps'] block_size = data_dict['block_size'] init_seed = data_dict['init_seed'] # generate some data T, M_r, M_c, data_inverse_permutation_indices = \ du.gen_factorial_data_objects(gen_seed, num_clusters, num_cols, num_rows, num_views, max_mean=max_mean, max_std=1, send_data_inverse_permutation_indices=True) view_assignment_ground_truth = \ ctu.determine_synthetic_column_ground_truth_assignments(num_cols, num_views) X_L_gen, X_D_gen = ttu.get_generative_clustering( M_c, M_r, T, data_inverse_permutation_indices, num_clusters, num_views) T_test = ctu.create_test_set(M_c, T, X_L_gen, X_D_gen, n_test, seed_seed=0) generative_mean_test_log_likelihood = \ ctu.calc_mean_test_log_likelihood(M_c, T, X_L_gen, X_D_gen, T_test) # additional set up engine = LE.LocalEngine(init_seed) column_ari_list = [] mean_test_ll_list = [] elapsed_seconds_list = [] # get initial ARI, test_ll with gu.Timer('initialize', verbose=False) as timer: X_L, X_D = engine.initialize(M_c, M_r, T, initialization='from_the_prior') column_ari = ctu.get_column_ARI(X_L, view_assignment_ground_truth) column_ari_list.append(column_ari) mean_test_ll = ctu.calc_mean_test_log_likelihood(M_c, T, X_L, X_D, T_test) mean_test_ll_list.append(mean_test_ll) elapsed_seconds_list.append(timer.elapsed_secs) # run blocks of transitions, recording ARI, test_ll progression completed_transitions = 0 n_steps = min(block_size, num_transitions) while (completed_transitions < num_transitions): # We won't be limiting by time in the convergence runs with gu.Timer('initialize', verbose=False) as timer: X_L, X_D = engine.analyze(M_c, T, X_L, X_D, kernel_list=(), n_steps=n_steps, max_time=-1) completed_transitions = completed_transitions + block_size # column_ari = ctu.get_column_ARI(X_L, view_assignment_ground_truth) column_ari_list.append(column_ari) mean_test_ll = ctu.calc_mean_test_log_likelihood( M_c, T, X_L, X_D, T_test) mean_test_ll_list.append(mean_test_ll) elapsed_seconds_list.append(timer.elapsed_secs) ret_dict = dict( num_rows=num_rows, num_cols=num_cols, num_views=num_views, num_clusters=num_clusters, max_mean=max_mean, column_ari_list=column_ari_list, mean_test_ll_list=mean_test_ll_list, generative_mean_test_log_likelihood=generative_mean_test_log_likelihood, elapsed_seconds_list=elapsed_seconds_list, n_steps=num_transitions, block_size=block_size, ) return ret_dict