示例#1
0
def generate_multinomial_data(next_seed, n_cols, n_rows, n_views):
    # generate the partitions
    random.seed(next_seed)

    cols_to_views = [0 for _ in range(n_cols)]
    rows_in_views_to_cols = []
    for view in range(n_views):
        partition = eu.CRP(n_rows, 2.0)
        random.shuffle(partition)
        rows_in_views_to_cols.append(partition)

    # generate the data
    data = numpy.zeros((n_rows, n_cols), dtype=float)
    for col in range(n_cols):
        view = cols_to_views[col]
        for row in range(n_rows):
            cluster = rows_in_views_to_cols[view][row]
            data[row, col] = cluster

    T = data.tolist()
    M_r = du.gen_M_r_from_T(T)
    M_c = du.gen_M_c_from_T(T)

    T, M_c = du.convert_columns_to_multinomial(T, M_c, list(range(n_cols)))

    return T, M_r, M_c
def generate_multinomial_data(next_seed,n_cols,n_rows,n_views):
	# generate the partitions
	random.seed(next_seed)
	
	cols_to_views = [0 for _ in range(n_cols)]
	rows_in_views_to_cols = []
	for view in range(n_views):
		partition = eu.CRP(n_rows,2.0)
		random.shuffle(partition)
		rows_in_views_to_cols.append(partition)

	# generate the data
	data = numpy.zeros((n_rows,n_cols),dtype=float)
	for col in range(n_cols):
		view = cols_to_views[col]
		for row in range(n_rows):
			cluster = rows_in_views_to_cols[view][row]
			data[row,col] = cluster

	T = data.tolist()
	M_r = du.gen_M_r_from_T(T)
	M_c = du.gen_M_c_from_T(T)

	T, M_c = du.convert_columns_to_multinomial(T, M_c, range(n_cols))

	return T, M_r, M_c
示例#3
0
        num_cols,
        num_rows,
        num_splits,
        max_mean=max_mean,
        max_std=max_std,
    )
else:
    with open('SynData2.csv') as fh:
        import numpy
        import csv
        T = numpy.array([row for row in csv.reader(fh)], dtype=float).tolist()
        M_r = du.gen_M_r_from_T(T)
        M_c = du.gen_M_c_from_T(T)

T = du.discretize_data(T, multinomial_column_indices)
T, M_c = du.convert_columns_to_multinomial(T, M_c, multinomial_column_indices)

# create the state
p_State = State.p_State(M_c, T, N_GRID=N_GRID, SEED=inf_seed)
p_State.plot_T(filename='T')
print(M_c)
print(numpy.array(T))
print(p_State)
print("multinomial_column_indices: %s" % str(multinomial_column_indices))


def summarize_p_State(p_State):
    counts = [
        view_state['row_partition_model']['counts']
        for view_state in p_State.get_X_L()['view_state']
    ]
示例#4
0
# create the data
if True:
    T, M_r, M_c = du.gen_factorial_data_objects(
        gen_seed, num_clusters, num_cols, num_rows, num_splits, max_mean=max_mean, max_std=max_std
    )
else:
    with open("SynData2.csv") as fh:
        import numpy
        import csv

        T = numpy.array([row for row in csv.reader(fh)], dtype=float).tolist()
        M_r = du.gen_M_r_from_T(T)
        M_c = du.gen_M_c_from_T(T)

T = du.discretize_data(T, multinomial_column_indices)
T, M_c = du.convert_columns_to_multinomial(T, M_c, multinomial_column_indices)

# create the state
p_State = State.p_State(M_c, T, N_GRID=N_GRID, SEED=inf_seed)
p_State.plot_T(filename="T")
print(M_c)
print(numpy.array(T))
print(p_State)
print("multinomial_column_indices: %s" % str(multinomial_column_indices))


def summarize_p_State(p_State):
    counts = [view_state["row_partition_model"]["counts"] for view_state in p_State.get_X_L()["view_state"]]
    format_list = "; ".join(
        ["s.num_views: %s", "cluster counts: %s", "s.column_crp_score: %.3f", "s.data_score: %.1f", "s.score:%.1f"]
    )