Пример #1
0
def forward_sample(X,
                   n_iters,
                   Zv=None,
                   Zrcv=None,
                   n_grid=30,
                   n_chains=1,
                   ct_kernel=0):
    total_iters = n_chains * n_iters
    n_cols = len(X)
    cctypes = ['normal'] * n_cols
    distargs = [None] * n_cols
    forward_samples = dict()
    stats = []
    i = 0
    for chain in range(n_chains):
        forward_samples[chain] = []
        for itr in range(n_iters):
            i += 1
            state = cc_state.cc_state(X,
                                      cctypes,
                                      distargs,
                                      Zv=Zv,
                                      Zrcv=Zrcv,
                                      n_grid=n_grid,
                                      ct_kernel=ct_kernel)
            Y = su.resample_data(state)
            forward_samples[chain].append(Y)
            stats.append(get_data_stats(Y, state))
            string = "\r%1.2f  " % (i * 100.0 / float(total_iters))
            sys.stdout.write(string)
            sys.stdout.flush()

    return stats, forward_samples
Пример #2
0
def construct_state_from_legacy_metadata(T, M_c, X_L, X_D):
    """
    Generates a state from CrossCat-formated data, T, and metadata
    """
    # ignores suffstats, calculates them manually
    Zv = X_L['column_partition']['assignments']
    Zrcv = [Z for Z in X_D]
    T_array = numpy.array(T)

    X = [T_array[:, col].flatten(1) for col in range(T_array.shape[1])]

    cctypes = ['normal'] * len(X)
    distargs = [None] * len(X)

    state = cc_state.cc_state(X, cctypes, distargs, Zv=Zv, Zrcv=Zrcv)

    # set Column alpha
    state.alpha = X_L['column_partition']['hypers']['alpha']

    for v in range(state.V):
        view_state = X_L['view_state'][v]
        state.views[v].alpha = view_state['row_partition_model']['hypers'][
            'alpha']
        for index, dim in state.views[v].dims.iteritems():
            # dict_index = view_state.column_names.index(str(index))
            hypers = X_L['column_hypers'][index]
            model_type = M_c['column_metadata'][index]['modeltype']
            _set_dim_hypers_from_legacy(dim, hypers, model_type)

    return state
Пример #3
0
def construct_state_from_legacy_metadata(T, M_c, X_L, X_D):
    """
    Generates a state from CrossCat-formated data, T, and metadata
    """
    # ignores suffstats, calculates them manually
    Zv = X_L['column_partition']['assignments']
    Zrcv = [ Z for Z in X_D ]
    T_array = numpy.array(T)

    X = [ T_array[:,col].flatten(1) for col in range(T_array.shape[1]) ]

    cctypes = ['normal']*len(X)
    distargs = [None]*len(X)

    state = cc_state.cc_state(X, cctypes, distargs, Zv=Zv, Zrcv=Zrcv)

    # set Column alpha
    state.alpha = X_L['column_partition']['hypers']['alpha']

    for v in range(state.V):
        view_state = X_L['view_state'][v]
        state.views[v].alpha = view_state['row_partition_model']['hypers']['alpha']
        for index, dim in state.views[v].dims.iteritems():
            # dict_index = view_state.column_names.index(str(index))
            hypers = X_L['column_hypers'][index]
            model_type = M_c['column_metadata'][index]['modeltype']
            _set_dim_hypers_from_legacy(dim, hypers, model_type)

    return state
Пример #4
0
def _do_intialize(args):
    X = args[0]
    cctypes = args[1]
    distargs = args[2]
    init_mode = args[3]

    S = cc_state.cc_state(X, cctypes, distargs=distargs)

    return S.get_metadata()
Пример #5
0
def run_test(argsin):
    n_rows = args["num_rows"]
    n_iters = args["num_iters"]
    n_chains = args["num_chains"]
    ct_kernel = args["ct_kernel"]

    fig = pylab.figure(num=None, facecolor='w', edgecolor='k',frameon=False, tight_layout=True)


    plt = 0
    data = {'x':[], 'sin':[], 'ring':[], 'dots':[]}
    xlims = dict()
    ylims = dict()
    for shape in shapes:
        plt += 1
        data[shape] = gen_function[shape](n_rows)

        ax = pylab.subplot(n_chains+1,4,plt)
        pylab.scatter( data[shape][0], data[shape][1], s=10, color='blue', edgecolor='none', alpha=.2 )
        # pylab.ylabel("X")
        # pylab.ylabel("Y")
        # pylab.title("%s original" % shape)

        ax.set_xticks([])
        ax.set_yticks([])
        pylab.suptitle( "Kernel %i" % ct_kernel)

        xlims[shape] = ax.get_xlim()
        ylims[shape] = ax.get_ylim()

    States = []
    for chain in range(n_chains):
        print("chain %i of %i." % (chain+1, n_chains))
        plt = 0
        for shape in shapes:
            print("\tWorking on %s." % shape)
            plt += 1
            T = data[shape]
            S = cc_state.cc_state(T, cctypes, ct_kernel=ct_kernel, distargs=distargs)
            S.transition(N=n_iters)
            T_chain = numpy.array(su.simple_predictive_sample(S, n_rows, [0,1], N=n_rows))

            ax = pylab.subplot(n_chains+1,4,chain*4+4+plt)
            ax.set_xticks([])
            ax.set_yticks([])
            pylab.scatter( T_chain[:,0], T_chain[:,1], s=10, color='red', edgecolor='none', alpha=.2 )
            pylab.xlim(xlims[shape])
            pylab.ylim(ylims[shape])
            # pylab.title("%s simulated (%i)" % (shape, chain))

    print("Done.")
    pylab.show()
Пример #6
0
    def initialize(self,
                   M_c,
                   M_r,
                   T,
                   initialization='from_the_prior',
                   specified_s_grid=None,
                   specified_mu_grid=None,
                   row_initialization=-1,
                   n_chains=1):

        # assumes all columns are normal data
        T = numpy.array(T)
        n_rows, n_cols = T.shape

        X = [T[:, c] for c in range(n_cols)]
        cctypes = ['normal'] * n_cols
        distargs = [None] * n_cols

        # it don't use M_r
        X_L_list = []
        X_D_list = []
        for chain in range(n_chains):
            state = cc_state.cc_state(X, cctypes, distargs)

            if specified_mu_grid is not None:
                if len(specified_mu_grid) > 0:
                    for dim in state.dims:
                        dim.hypers_grids['m'] = numpy.array(specified_mu_grid)
                        dim.hypers['m'] = random.sample(specified_mu_grid,
                                                        1)[0]
                        for cluster in dim.clusters:
                            cluster.set_hypers(dim.hypers)

            if specified_s_grid is not None:
                if len(specified_s_grid) > 0:
                    for dim in state.dims:
                        dim.hypers_grids['s'] = numpy.array(specified_s_grid)
                        dim.hypers['s'] = random.sample(specified_s_grid, 1)[0]
                        for cluster in dim.clusters:
                            cluster.set_hypers(dim.hypers)

            _, X_L, X_D = get_legacy_metadata(state)

            X_L_list.append(X_L)
            X_D_list.append(X_D)

        if n_chains == 1:
            X_L_list, X_D_list = X_L_list[0], X_D_list[0]

        return X_L_list, X_D_list
Пример #7
0
    def initialize(self, M_c, M_r, T, initialization='from_the_prior',
            specified_s_grid=None, specified_mu_grid=None,
            row_initialization=-1, n_chains=1):
        
        # assumes all columns are normal data
        T = numpy.array(T)
        n_rows, n_cols = T.shape

        X = [ T[:,c] for c in range(n_cols) ]
        cctypes = ['normal']*n_cols
        distargs = [None]*n_cols

        # it don't use M_r
        X_L_list = []
        X_D_list = []
        for chain in range(n_chains):
            state = cc_state.cc_state(X, cctypes, distargs)

            if specified_mu_grid is not None:
                if len(specified_mu_grid) > 0:
                    for dim in state.dims:
                        dim.hypers_grids['m'] = numpy.array(specified_mu_grid)
                        dim.hypers['m'] = random.sample(specified_mu_grid, 1)[0]
                        for cluster in dim.clusters:
                            cluster.set_hypers(dim.hypers)

            if specified_s_grid is not None:
                if len(specified_s_grid) > 0:
                    for dim in state.dims:
                        dim.hypers_grids['s'] = numpy.array(specified_s_grid)
                        dim.hypers['s'] = random.sample(specified_s_grid, 1)[0]
                        for cluster in dim.clusters:
                            cluster.set_hypers(dim.hypers)

            _, X_L, X_D = get_legacy_metadata(state)

            X_L_list.append(X_L)
            X_D_list.append(X_D)

        if n_chains == 1:
            X_L_list, X_D_list = X_L_list[0], X_D_list[0]

        return X_L_list, X_D_list
Пример #8
0
def forward_sample(X, n_iters, Zv=None, Zrcv=None, n_grid=30, n_chains=1, ct_kernel=0):
    total_iters = n_chains*n_iters
    n_cols = len(X)
    cctypes = ['normal']*n_cols
    distargs = [None]*n_cols
    forward_samples = dict()
    stats = []
    i = 0
    for chain in range(n_chains):
        forward_samples[chain] = []
        for itr in range(n_iters):
            i += 1
            state = cc_state.cc_state(X, cctypes, distargs, Zv=Zv, Zrcv=Zrcv, n_grid=n_grid, ct_kernel=ct_kernel)
            Y = su.resample_data(state)
            forward_samples[chain].append(Y)
            stats.append(get_data_stats(Y, state))
            string = "\r%1.2f  " % (i*100.0/float(total_iters))
            sys.stdout.write(string)
            sys.stdout.flush()

    return stats, forward_samples
def run_test(argsin):
    n_rows = args["num_rows"]
    n_iters = args["num_iters"]
    n_chains = args["num_chains"]

    n_per_chain = int(float(n_rows) / n_chains)

    plt = 0
    for shape in shapes:
        print "Shape: %s" % shape
        plt += 1
        T_o = gen_function[shape](n_rows)
        T_i = []
        for chain in range(n_chains):
            print "chain %i of %i" % (chain + 1, n_chains)
            S = cc_state.cc_state(T_o, cctypes, ct_kernel=1, distargs=distargs)
            S.transition(N=n_iters)

            T_i.extend(
                su.simple_predictive_sample(S, n_rows, [0, 1], N=n_per_chain))

        T_i = numpy.array(T_i)

        ax = pylab.subplot(2, 4, plt)
        pylab.scatter(T_o[0], T_o[1], color='blue', edgecolor='none')
        pylab.ylabel("X")
        pylab.ylabel("Y")
        pylab.title("%s original" % shape)

        pylab.subplot(2, 4, plt + 4)
        pylab.scatter(T_i[:, 0], T_i[:, 1], color='red', edgecolor='none')
        pylab.ylabel("X")
        pylab.ylabel("Y")
        pylab.xlim(ax.get_xlim())
        pylab.ylim(ax.get_ylim())
        pylab.title("%s simulated" % shape)

    print "Done."
    pylab.show()
def run_test(argsin):
    n_rows = args["num_rows"]
    n_iters = args["num_iters"]
    n_chains = args["num_chains"]

    n_per_chain = int(float(n_rows) / n_chains)

    plt = 0
    for shape in shapes:
        print "Shape: %s" % shape
        plt += 1
        T_o = gen_function[shape](n_rows)
        T_i = []
        for chain in range(n_chains):
            print "chain %i of %i" % (chain + 1, n_chains)
            S = cc_state.cc_state(T_o, cctypes, ct_kernel=1, distargs=distargs)
            S.transition(N=n_iters)

            T_i.extend(su.simple_predictive_sample(S, n_rows, [0, 1], N=n_per_chain))

        T_i = numpy.array(T_i)

        ax = pylab.subplot(2, 4, plt)
        pylab.scatter(T_o[0], T_o[1], color="blue", edgecolor="none")
        pylab.ylabel("X")
        pylab.ylabel("Y")
        pylab.title("%s original" % shape)

        pylab.subplot(2, 4, plt + 4)
        pylab.scatter(T_i[:, 0], T_i[:, 1], color="red", edgecolor="none")
        pylab.ylabel("X")
        pylab.ylabel("Y")
        pylab.xlim(ax.get_xlim())
        pylab.ylim(ax.get_ylim())
        pylab.title("%s simulated" % shape)

    print "Done."
    pylab.show()
Пример #11
0
def posterior_sample(X, n_iters, kernels=_all_kernels, Zv=None, Zrcv=None, n_grid=30, n_chains=1, ct_kernel=0):
    n_cols = len(X)
    cctypes = ['normal']*n_cols
    distargs = [None]*n_cols
    stats = []
    posterior_samples = dict()
    i = 0.0;
    total_iters = n_chains*n_iters
    for chain in range(n_chains):
        state = cc_state.cc_state(X, cctypes, distargs, Zv=Zv, Zrcv=Zrcv, n_grid=n_grid, ct_kernel=ct_kernel)
        Y = su.resample_data(state)
        posterior_samples[chain] = Y
        for _ in range(n_iters):
            state.transition(kernel_list=kernels)
            Y = su.resample_data(state)
            stats.append(get_data_stats(Y, state))
            posterior_samples[chain].append(Y)
            i += 1.0
            string = "\r%1.2f  " % (i*100.0/float(total_iters))
            sys.stdout.write(string)
            sys.stdout.flush()
            
    return stats, posterior_samples
Пример #12
0
def posterior_sample(X,
                     n_iters,
                     kernels=_all_kernels,
                     Zv=None,
                     Zrcv=None,
                     n_grid=30,
                     n_chains=1,
                     ct_kernel=0):
    n_cols = len(X)
    cctypes = ['normal'] * n_cols
    distargs = [None] * n_cols
    stats = []
    posterior_samples = dict()
    i = 0.0
    total_iters = n_chains * n_iters
    for chain in range(n_chains):
        state = cc_state.cc_state(X,
                                  cctypes,
                                  distargs,
                                  Zv=Zv,
                                  Zrcv=Zrcv,
                                  n_grid=n_grid,
                                  ct_kernel=ct_kernel)
        Y = su.resample_data(state)
        posterior_samples[chain] = Y
        for _ in range(n_iters):
            state.transition(kernel_list=kernels)
            Y = su.resample_data(state)
            stats.append(get_data_stats(Y, state))
            posterior_samples[chain].append(Y)
            i += 1.0
            string = "\r%1.2f  " % (i * 100.0 / float(total_iters))
            sys.stdout.write(string)
            sys.stdout.flush()

    return stats, posterior_samples
Пример #13
0
for kernel in range(2):
    MI = numpy.zeros((n_data_sets * n_samples, len(W_list)))
    c = 0
    for w in W_list:
        r = 0
        for ds in range(n_data_sets):
            # seed control so that data is always the same
            numpy.random.seed(r + ds)
            random.seed(r + ds)

            X = _gen_ring(N, w)

            for _ in range(n_samples):

                S = cc_state.cc_state([X[:, 0], X[:, 1]], ["normal"] * 2, ct_kernel=kernel, distargs=[None] * 2)
                S.transition(N=200)

                mi = iu.mutual_information(S, 0, 1)
                # linfoot = iu.mutual_information_to_linfoot(MI)

                MI[r, c] = mi

                print("w: %1.2f, MI: %1.6f" % (w, mi))
                print("%i of %i" % (i + 1, len(W_list) * n_data_sets * n_samples * 2))

                del S

                i += 1
                r += 1
        c += 1
Пример #14
0
# for kernel in range(2):
for kernel in range(2):
    L = numpy.zeros((n_data_sets*n_samples, len(rho_list)))
    c = 0
    for rho in rho_list:
        r = 0
        for ds in range(n_data_sets):
            # seed control so that data is always the same
            numpy.random.seed(r+ds)
            random.seed(r+ds)

            sigma = numpy.array([[1,rho],[rho,1]])
            X = numpy.random.multivariate_normal(mu,sigma,N)

            for _ in range(n_samples):
                S = cc_state.cc_state([X[:,0], X[:,1]], ['normal']*2, Zv=[0,0],
                    ct_kernel=kernel, distargs=distargs)

                S.transition(N=100)

                MI = iu.mutual_information(S, 0, 1)
                linfoot = iu.mutual_information_to_linfoot(MI)

                # del S

                L[r,c] = linfoot

                print("rho: %1.2f, MI: %1.6f, Linfoot: %1.6f" %(rho, MI, linfoot))
                print("%i of %i" % (i+1, len(rho_list)*n_data_sets*n_samples*2))

                del S
Пример #15
0
cluster_weights = [numpy.ones(3)/3.0, numpy.ones(2)/2.0]
cctypes = ['normal']*5
distargs = [None]*5
separation = [.7, .9]

T, Zv, Zc, dims = tu.gen_data_table(
                    n_rows, 
                    view_weights, 
                    cluster_weights, 
                    cctypes, 
                    distargs, 
                    separation, 
                    return_dims=True)


state = cc_state.cc_state(T, cctypes, distargs)
state.transition(N=10)

M_c, X_L, X_D = lu.get_legacy_metadata(state)

Tcc = T[0]
for i in range(1,len(T)):
    Tcc = numpy.vstack( (Tcc, T[i]) )
Tcc = numpy.transpose(Tcc) 

# make sure the data came out right
for i in range(len(T)):
    assert numpy.all(T[i] == Tcc[:,i])

state_b = lu.construct_state_from_legacy_metadata(Tcc, M_c, X_L, X_D)
Пример #16
0
for kernel in range(2):
    MI = numpy.zeros((n_data_sets * n_samples, len(W_list)))
    c = 0
    for w in W_list:
        r = 0
        for ds in range(n_data_sets):
            # seed control so that data is always the same
            numpy.random.seed(r + ds)
            random.seed(r + ds)

            X = _gen_ring(N, w)

            for _ in range(n_samples):

                S = cc_state.cc_state([X[:, 0], X[:, 1]], ['normal'] * 2,
                                      ct_kernel=kernel,
                                      distargs=[None] * 2)
                S.transition(N=200)

                mi = iu.mutual_information(S, 0, 1)
                # linfoot = iu.mutual_information_to_linfoot(MI)

                MI[r, c] = mi

                print("w: %1.2f, MI: %1.6f" % (w, mi))
                print("%i of %i" %
                      (i + 1, len(W_list) * n_data_sets * n_samples * 2))

                del S

                i += 1
Пример #17
0
def run_test(argsin):
    n_rows = args["num_rows"]
    n_iters = args["num_iters"]
    n_chains = args["num_chains"]
    ct_kernel = args["ct_kernel"]

    fig = pylab.figure(num=None,
                       facecolor='w',
                       edgecolor='k',
                       frameon=False,
                       tight_layout=True)

    plt = 0
    data = {'x': [], 'sin': [], 'ring': [], 'dots': []}
    xlims = dict()
    ylims = dict()
    for shape in shapes:
        plt += 1
        data[shape] = gen_function[shape](n_rows)

        ax = pylab.subplot(n_chains + 1, 4, plt)
        pylab.scatter(data[shape][0],
                      data[shape][1],
                      s=10,
                      color='blue',
                      edgecolor='none',
                      alpha=.2)
        # pylab.ylabel("X")
        # pylab.ylabel("Y")
        # pylab.title("%s original" % shape)

        ax.set_xticks([])
        ax.set_yticks([])
        pylab.suptitle("Kernel %i" % ct_kernel)

        xlims[shape] = ax.get_xlim()
        ylims[shape] = ax.get_ylim()

    States = []
    for chain in range(n_chains):
        print("chain %i of %i." % (chain + 1, n_chains))
        plt = 0
        for shape in shapes:
            print("\tWorking on %s." % shape)
            plt += 1
            T = data[shape]
            S = cc_state.cc_state(T,
                                  cctypes,
                                  ct_kernel=ct_kernel,
                                  distargs=distargs)
            S.transition(N=n_iters)
            T_chain = numpy.array(
                su.simple_predictive_sample(S, n_rows, [0, 1], N=n_rows))

            ax = pylab.subplot(n_chains + 1, 4, chain * 4 + 4 + plt)
            ax.set_xticks([])
            ax.set_yticks([])
            pylab.scatter(T_chain[:, 0],
                          T_chain[:, 1],
                          s=10,
                          color='red',
                          edgecolor='none',
                          alpha=.2)
            pylab.xlim(xlims[shape])
            pylab.ylim(ylims[shape])
            # pylab.title("%s simulated (%i)" % (shape, chain))

    print("Done.")
    pylab.show()
Пример #18
0
Ts, Zv, Zc = tu.gen_data_table(n_rows,
            numpy.array([.5,.5]), 
            [numpy.array([1./2]*2),
            numpy.array([1./5]*5)], 
            cctypes, 
            distargs, 
            [1.0]*n_cols)

 
for kernel in range(n_kernels):
    # for a set number of chains
    ARI_view = numpy.zeros((n_data_sets, n_transitions))
    ARI_cols = numpy.zeros((n_data_sets, n_transitions))

    for r in range(n_data_sets):
        S = cc_state.cc_state(Ts, cctypes, ct_kernel=kernel, distargs=distargs)
        for c in range(n_transitions):
            S.transition(N=1)

            # calucalte ARI
            ari_view = adjusted_rand_score(Zv, S.Zv.tolist())
            ari_cols = tu.column_average_ari(Zv, Zc, S)

            ARI_view[r,c] = ari_view
            ARI_cols[r,c] = ari_cols

        itr += 1
        print("itr %i of %i." % (itr, total_itr))

    ###
    pylab.subplot(2,n_kernels,kernel+1)
Пример #19
0
# four cols of 
for _ in range(4):
    x_clustered = []
    for i in range(n_rows):
        x_clustered.append( numpy.random.randn()+Z[i]*4 )

    X.append( numpy.array( x_clustered) )


# S = cc_state.cc_state(X, cctypes, distargs, ct_kernel=0, seed=random.randrange(200000))
# S.transition(N=200, do_plot=True)

states = []
for s in range(n_states):
    states.append( cc_state.cc_state(X, cctypes, distargs, ct_kernel=1, seed=random.randrange(200000)) )


num_iters = 200

i = 0
for state in states:
    i += 1
    state.transition(N=num_iters)
    print("state %i of %i" % (i, n_states) )


Zvs = []
for state in states: 
    Zvs.append(state.Zv.tolist())
Пример #20
0
for _ in range(4):
    x_clustered = []
    for i in range(n_rows):
        x_clustered.append(numpy.random.randn() + Z[i] * 4)

    X.append(numpy.array(x_clustered))

# S = cc_state.cc_state(X, cctypes, distargs, ct_kernel=0, seed=random.randrange(200000))
# S.transition(N=200, do_plot=True)

states = []
for s in range(n_states):
    states.append(
        cc_state.cc_state(X,
                          cctypes,
                          distargs,
                          ct_kernel=1,
                          seed=random.randrange(200000)))

num_iters = 200

i = 0
for state in states:
    i += 1
    state.transition(N=num_iters)
    print("state %i of %i" % (i, n_states))

Zvs = []
for state in states:
    Zvs.append(state.Zv.tolist())