示例#1
0
def run_synth_test():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness(
    )

    # If x0 specified, load x0 from file
    x0 = None
    if options.x0_file is not None:
        with open(options.x0_file, 'r') as f:
            print "Initializing with state from: %s" % options.x0_file
            prev_x0 = cPickle.load(f)
            if isinstance(prev_x0, list):

                x0 = prev_x0[-1]
            else:
                mle_x0 = prev_x0
                # HACK: We're assuming x0 came from a standard GLM
                mle_model = make_model('standard_glm', N=data['N'])
                mle_popn = Population(mle_model)
                mle_popn.set_data(data)

                x0 = popn.sample(None)
                x0 = convert_model(mle_popn, mle_model, mle_x0, popn,
                                   popn.model, x0)

    use_existing = False

    fname = os.path.join(options.resultsDir,
                         '%s_marginal_lkhd.pkl' % options.model)
    if use_existing and  \
       os.path.exists(fname):

        print "Found existing results"
        with open(fname) as f:
            marg_lkhd = cPickle.load(f)
    else:
        N_samples = 10
        popn_true.set_data(data)

        # Estimate the marginal log likelihood
        print "Performing parallel inference"
        marg_lkhd, log_weights = parallel_ais(client,
                                              data,
                                              x0=x0,
                                              N_samples=N_samples,
                                              steps_per_B=50,
                                              resdir=options.resultsDir)

        # Save results
        print "Saving results to %s" % fname
        with open(fname, 'w') as f:
            cPickle.dump((marg_lkhd, log_weights), f, protocol=-1)
示例#2
0
def run_synth_test():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness()

    # If x0 specified, load x0 from file
    x0 = None
    if options.x0_file is not None:
        with open(options.x0_file, "r") as f:
            print "Initializing with state from: %s" % options.x0_file
            prev_x0 = cPickle.load(f)
            if isinstance(prev_x0, list):

                x0 = prev_x0[-1]
            else:
                mle_x0 = prev_x0
                # HACK: We're assuming x0 came from a standard GLM
                mle_model = make_model("standard_glm", N=data["N"])
                mle_popn = Population(mle_model)
                mle_popn.set_data(data)

                x0 = popn.sample(None)
                x0 = convert_model(mle_popn, mle_model, mle_x0, popn, popn.model, x0)

    use_existing = False

    fname = os.path.join(options.resultsDir, "%s_marginal_lkhd.pkl" % options.model)
    if use_existing and os.path.exists(fname):

        print "Found existing results"
        with open(fname) as f:
            marg_lkhd = cPickle.load(f)
    else:
        N_samples = 10
        popn_true.set_data(data)

        # Estimate the marginal log likelihood
        print "Performing parallel inference"
        marg_lkhd, log_weights = parallel_ais(
            client, data, x0=x0, N_samples=N_samples, steps_per_B=50, resdir=options.resultsDir
        )

        # Save results
        print "Saving results to %s" % fname
        with open(fname, "w") as f:
            cPickle.dump((marg_lkhd, log_weights), f, protocol=-1)
示例#3
0
def run_parallel_map():
    """ Run a test with synthetic data and MCMC inference
    """
    # Parse command line args
    (options, args) = parse_cmd_line_args()

    # Load the data
    data = load_data(options)
    # Get a model for the data
    model_type = 'standard_glm'
    model = make_model(model_type, N=data['N'])

    # Get parallel clients
    rc = Client(profile="sge")
    dview = rc[:]
    # dview = get_engines(n_workers=8)

    # Load imports on the client
    load_imports_on_client(dview)

    # Initialize population objects on the clients
    dview.apply_sync(initialize_client, (model_type,N,data))
示例#4
0
def load_set_of_results(N, T, graph_model='er', sample_frac=0.1):
    data_dir = os.path.join('/group', 'hips', 'scott', 'pyglm', 'data', 'synth', graph_model, 'N%dT%d' % (N, T))

    # Evaluate the state for each of the parameter settings
    s_infs_mcmc = []
    s_infs_map = []
    s_trues = []

    # Enumerate the subdirectories containing the data
    subdirs = os.listdir(data_dir)
    subdirs = reduce(lambda sd, d: sd + [d] \
                                   if os.path.isdir(os.path.join(data_dir, d)) \
                                   else sd,
                     subdirs, [])

    # For each data subdirectory, load the true data, the MAP estimate, and the MCMC results
    print "WARNING: Make sure we sample all subdirs"
    # import pdb; pdb.set_trace()
    for d in subdirs:
        print "Loading data and results from %s" % d
        print "Loading true data"
        with open(os.path.join(data_dir, d, 'data.pkl'), 'r') as f:
            data = cPickle.load(f)

        print "Loading model"
        with open(os.path.join(data_dir, d, 'model.pkl'), 'r') as f:
            model_data = cPickle.load(f)
            #HACK
            if 'N_dims' not in model_data['network']['graph']:
                model_data['network']['graph']['N_dims'] = 1
            if 'location_prior' not in model_data['network']['graph']:
                model_data['network']['graph']['location_prior'] = \
                         {
                             'type' : 'gaussian',
                             'mu' : 0.0,
                             'sigma' : 1.0
                         }
            if 'L' in data['vars']['net']['graph']:
                data['vars']['net']['graph']['L'] = data['vars']['net']['graph']['L'].ravel()
        popn_data = Population(model_data)
        popn_data.set_data(data)
        s_trues.append(popn_data.eval_state(data['vars']))

        try:
            print "Loading map estimate"
            with open(os.path.join(data_dir, d, 'map', 'results.pkl'), 'r') as f:
                x_map = cPickle.load(f)

            model_map = make_model('standard_glm', N=data['N'])
            popn_map = Population(model_map)
            popn_map.set_data(data)
            print "Evaluating MAP state"
            s_infs_map.append(popn_map.eval_state(x_map))

        except Exception as e:
            print "ERROR: Failed to load MAP estimate"

        try:
            print "Loading mcmc estimate"
            with open(os.path.join(data_dir, d, 'mcmc', 'results.pkl'), 'r') as f:
                x_mcmc = cPickle.load(f)

            model_mcmc = make_model('sparse_weighted_model', N=data['N'])
            popn_mcmc = Population(model_mcmc)
            popn_mcmc.set_data(data)

            # Now compute the true and false positive rates for MCMC
            # For MCMC results, only consider the tail of the samples
            print "Evaluating MCMC states"
            N_samples = len(x_mcmc)
            start_smpl = int(np.floor(N_samples - sample_frac*N_samples))

            # Evaluate the state
            this_s_mcmc = []
            for i in range(start_smpl, N_samples):
                this_s_mcmc.append(popn_mcmc.eval_state(x_mcmc[i]))
            s_infs_mcmc.append(this_s_mcmc)
        except Exception as e:
            print "ERROR: Failed to load MCMC estimate"

    return s_trues, s_infs_map, s_infs_mcmc
示例#5
0
def run_synth_test():
    """ Run a test with synthetic data and MAP inference with cross validation
    """
    options, popn, data, popn_true, x_true = initialize_test_harness()

    # Get the list of models for cross validation
    base_model = make_model(options.model, N=data['N'], dt=0.001)
    models = get_xv_models(base_model)

    # TODO Segment data into training and cross validation sets
    train_frac = 0.75
    T_split = data['T'] * train_frac
    train_data = segment_data(data, (0, T_split))
    xv_data = segment_data(data, (T_split, data['T']))

    # Preprocess the data sequences
    train_data = popn.preprocess_data(train_data)
    xv_data = popn.preprocess_data(xv_data)

    # Sample random initial state
    x0 = popn.sample()

    # Track the best model and parameters
    best_ind = -1
    best_xv_ll = -np.Inf
    best_x = x0
    best_model = None

    # Fit each model using the optimum of the previous models
    train_lls = np.zeros(len(models))
    xv_lls = np.zeros(len(models))
    total_lls = np.zeros(len(models))
    for (i, model) in enumerate(models):
        print "Training model %d" % i
        x0 = copy.deepcopy(best_x)
        popn.set_hyperparameters(model)
        popn.set_data(train_data)
        ll0 = popn.compute_log_p(x0)
        print "Training LL0: %f" % ll0

        # Perform inference
        x_inf = coord_descent(popn, x0=x0, maxiter=1)
        ll_train = popn.compute_log_p(x_inf)
        print "Training LP_inf: %f" % ll_train
        train_lls[i] = ll_train

        # Compute log lkhd on xv data
        popn.set_data(xv_data)
        ll_xv = popn.compute_ll(x_inf)
        print "Cross Validation LL: %f" % ll_xv
        xv_lls[i] = ll_xv

        # Compute log lkhd on total dataset
        popn.set_data(data)
        ll_total = popn.compute_ll(x_inf)
        print "Total LL: %f" % ll_total
        total_lls[i] = ll_total

        # Update best model
        if ll_xv > best_xv_ll:
            best_ind = i
            best_xv_ll = ll_xv
            best_x = copy.deepcopy(x_inf)
            best_model = copy.deepcopy(model)

    # Create a population with the best model
    popn.set_hyperparameters(best_model)
    popn.set_data(data)

    # Fit the best model on the full training data
    best_x = coord_descent(popn,
                           data,
                           x0=x0,
                           maxiter=1,
                           use_hessian=False,
                           use_rop=False)

    # Print results summary
    for i in np.arange(len(models)):
        print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % (
            i, train_lls[i], xv_lls[i], total_lls[i])
    print "Best model: %d" % best_ind
    print "Best Total LL: %f" % popn.compute_ll(best_x)
    print "True LL: %f" % popn_true.compute_ll(x_true)

    # Save results
    results_file = os.path.join(options.resultsDir, 'results.pkl')
    print "Saving results to %s" % results_file
    with open(results_file, 'w') as f:
        cPickle.dump(best_x, f)

    # Plot results
    plot_results(popn, best_x, popn_true, x_true, resdir=options.resultsDir)
def test_latent_distance_network_sampler(N, N_samples=10000):
    """
    Generate a bunch of latent distance networks, run the sampler
    on them to see how well we mix over latent locations.

    :param N: Number of neurons in the network
    """
    true_model_type = 'latent_distance'
    if true_model_type == 'erdos_renyi':
        true_model = make_model('sparse_weighted_model', N)
    elif true_model_type == 'latent_distance':
        true_model = make_model('distance_weighted_model', N)

    distmodel = make_model('distance_weighted_model', N)
    D = distmodel['network']['graph']['N_dims']
    trials = 1
    for t in range(trials):
        # Generate a true random network
        popn_true, x_true, A_true = sample_network_from_prior(true_model)
        dist_popn, x_inf, _ = sample_network_from_prior(distmodel)

        # Seed the inference population with the true network
        x_inf['net']['graph']['A'] = A_true

        # Create a location sampler
        print "Initializing latent location sampler"
        loc_sampler = LatentLocationUpdate()
        loc_sampler.preprocess(dist_popn)

        # Run the sampler
        N_samples = 1000
        smpls = fit_latent_network_given_A(x_inf, loc_sampler, N_samples=N_samples)

        if true_model_type == 'latent_distance':
            # Evaluate the state
            L_true = x_true['net']['graph']['L'].reshape((N,D))
            L_smpls = [x['net']['graph']['L'].reshape((N,D)) for x in smpls]

            # Visualize the results
            plot_latent_distance_samples(L_true, L_smpls, A_true=A_true)

            # Plot errors in relative distance over time
            compute_diff_of_dists(L_true, L_smpls)

        # Compute marginal likelihood of erdos renyi with the same sparsity
        nnz_A = float(A_true.sum())
        N_conns = A_true.size
        # Ignore the diagonal
        nnz_A -= N
        N_conns -= N
        # Now compute density
        er_rho = nnz_A / N_conns
        true_er_marg_lkhd = nnz_A * np.log(er_rho) + (N_conns-nnz_A)*np.log(1-er_rho)
        print "True ER Marg Lkhd: ", true_er_marg_lkhd

        # DEBUG: Make sure AIS gives the same answer as what we just computed
        # er_model = make_model('sparse_weighted_model', N)
        # er_model['network']['graph']['rho'] = er_rho
        # er_popn, x_inf, _ = sample_network_from_prior(er_model)
        # # Make a dummy update for the ER model
        # er_sampler = MetropolisHastingsUpdate()
        # er_x0 = er_popn.sample()
        # er_x0['net']['graph']['A'] = A_true
        # er_marg_lkhd = ais_latent_network_given_A(er_x0,
        #                                           er_popn.network.graph,
        #                                           er_sampler
        #                                           )
        #
        # print "AIS ER Marg Lkhd: ", er_marg_lkhd



        # Approximate the marginal log likelihood of the distance mode
        dist_x0 = dist_popn.sample()
        dist_x0['net']['graph']['A'] = A_true
        dist_marg_lkhd = ais_latent_network_given_A(dist_x0,
                                                    dist_popn.network.graph,
                                                    loc_sampler
                                                    )
        print "Dist Marg Lkhd: ", dist_marg_lkhd
def fit_latent_network_to_mle():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, popn_true, x_true = initialize_test_harness()

    import pdb; pdb.set_trace()
    # Load MLE parameters from command line
    mle_x = None
    if options.x0_file is not None:
        with open(options.x0_file, 'r') as f:
            print "Initializing with state from: %s" % options.x0_file
            mle_x = cPickle.load(f)

            mle_model = make_model('standard_glm', N=data['N'])
            mle_popn = Population(mle_model)
            mle_popn.set_data(data)

    # Create a location sampler
    print "Initializing latent location sampler"
    loc_sampler = LatentLocationUpdate()
    loc_sampler.preprocess(popn)

    # Convert the mle results into a weighted adjacency matrix
    x_aw = popn.sample(None)
    x_aw = convert_model(mle_popn, mle_model, mle_x, popn, popn.model, x_aw)

    # Get rid of unnecessary keys
    del x_aw['glms']

    # Fit the latent distance network to a thresholded adjacency matrix
    ws = np.sort(np.abs(x_aw['net']['weights']['W']))

    wperm = np.argsort(np.abs(x_aw['net']['weights']['W']))
    nthrsh = 20
    threshs = np.arange(ws.size, step=ws.size/nthrsh)

    res = []

    N = popn.N
    for th in threshs:
        print "Fitting network for threshold: %.3f" % th
        A = np.zeros_like(ws, dtype=np.int8)
        A[wperm[th:]] = 1
        A = A.reshape((N,N))
        # A = (np.abs(x_aw['net']['weights']['W']) >= th).astype(np.int8).reshape((N,N))

        # Make sure the diag is still all 1s
        A[np.diag_indices(N)] = 1

        x = copy.deepcopy(x_aw)
        x['net']['graph']['A'] = A
        smpls = fit_latent_network_given_A(x, loc_sampler)

        # Index the results by the overall sparsity of A
        key = (np.sum(A)-N) / (np.float(np.size(A))-N)
        res.append((key, smpls))

    # Save results
    results_file = os.path.join(options.resultsDir, 'fit_latent_network_results.pkl')
    print "Saving results to %s" % results_file
    with open(results_file, 'w') as f:
        cPickle.dump(res, f)
def run_parallel_map():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness()

    # Get the list of models for cross validation
    base_model = make_model(options.model, N=data['N'])
    models = get_xv_models(base_model)

    # Segment data into training and cross validation sets
    train_frac = 0.75
    T_split = data['T'] * train_frac
    train_data = segment_data(data, (0,T_split))
    xv_data = segment_data(data, (T_split,data['T']))

    # Sample random initial state
    x0 = popn.sample(None)

    # Track the best model and parameters
    best_ind = -1
    best_xv_ll = -np.Inf
    best_x = x0
    best_model = None

    use_existing = False

    start_time = time.clock()

    # Fit each model using the optimum of the previous models
    train_lls = np.zeros(len(models))
    xv_lls = np.zeros(len(models))
    total_lls = np.zeros(len(models))
    for (i,model) in enumerate(models):
        print "Evaluating model %d" % i
        set_hyperparameters_on_engines(client[:], model)
        add_data_on_engines(client[:], train_data)

        if use_existing and  \
           os.path.exists(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)):
            print "Found existing results for model %d" % i
            with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)) as f:
                (x_inf, ll_train, ll_xv, ll_total) = cPickle.load(f)
                train_lls[i] = ll_train
                xv_lls[i] = ll_xv
                total_lls[i] = ll_total

        else:
            x0 = copy.deepcopy(best_x)
            # set_data_on_engines(client[:], train_data)
            ll0 = parallel_compute_ll(client[:], x0, data['N'])
            print "Training LL0: %f" % ll0

            # Perform inference
            x_inf = parallel_coord_descent(client, data['N'], x0=x0, maxiter=1,
                                           use_hessian=False,
                                           use_rop=False)

            ll_train = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Training LL_inf: %f" % ll_train
            train_lls[i] = ll_train

            # Compute log lkhd on xv data
            add_data_on_engines(client[:], xv_data)
            ll_xv = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Cross Validation LL: %f" % ll_xv
            xv_lls[i] = ll_xv

            # Compute log lkhd on total dataset
            add_data_on_engines(client[:], data)
            ll_total = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Total LL: %f" % ll_total
            total_lls[i] = ll_total

            print "Saving partial results"
            with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i),'w') as f:
                cPickle.dump((x_inf, ll_train, ll_xv, ll_total) ,f, protocol=-1)

        # Update best model
        if ll_xv > best_xv_ll:
            best_ind = i
            best_xv_ll = ll_xv
            best_x = copy.deepcopy(x_inf)
            best_model = copy.deepcopy(model)

    print "Training the best model (%d) with the full dataset" % best_ind
    # Set the best hyperparameters
    set_hyperparameters_on_engines(client[:], best_model)
    add_data_on_engines(client[:], data)

    # Fit the best model on the full training data
    best_x = parallel_coord_descent(client, data['N'], x0=best_x, maxiter=1,
                                    use_hessian=False,
                                    use_rop=False)

    # Print results summary
    for i in np.arange(len(models)):
        print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % (i, train_lls[i], xv_lls[i], total_lls[i])
    print "Best model: %d" % best_ind
    print "Best Total LL: %f" % parallel_compute_ll(client[:], best_x, data['N'])
    print "True LL: %f" % popn_true.compute_ll(x_true)


    stop_time = time.clock()

    # Save results
    with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f:
        cPickle.dump(best_x, f, protocol=-1)

    # Save runtime
    with open(os.path.join(options.resultsDir, 'runtime.pkl'),'w') as f:
        cPickle.dump(stop_time-start_time, f, protocol=-1)
示例#9
0
def load_set_of_results(N, T, graph_model='er', sample_frac=0.1):
    data_dir = os.path.join('/group', 'hips', 'scott', 'pyglm', 'data',
                            'synth', graph_model, 'N%dT%d' % (N, T))

    # Evaluate the state for each of the parameter settings
    s_infs_mcmc = []
    s_infs_map = []
    s_trues = []

    # Enumerate the subdirectories containing the data
    subdirs = os.listdir(data_dir)
    subdirs = reduce(lambda sd, d: sd + [d] \
                                   if os.path.isdir(os.path.join(data_dir, d)) \
                                   else sd,
                     subdirs, [])

    # For each data subdirectory, load the true data, the MAP estimate, and the MCMC results
    print "WARNING: Make sure we sample all subdirs"
    # import pdb; pdb.set_trace()
    for d in subdirs:
        print "Loading data and results from %s" % d
        print "Loading true data"
        with open(os.path.join(data_dir, d, 'data.pkl'), 'r') as f:
            data = cPickle.load(f)

        print "Loading model"
        with open(os.path.join(data_dir, d, 'model.pkl'), 'r') as f:
            model_data = cPickle.load(f)
            #HACK
            if 'N_dims' not in model_data['network']['graph']:
                model_data['network']['graph']['N_dims'] = 1
            if 'location_prior' not in model_data['network']['graph']:
                model_data['network']['graph']['location_prior'] = \
                         {
                             'type' : 'gaussian',
                             'mu' : 0.0,
                             'sigma' : 1.0
                         }
            if 'L' in data['vars']['net']['graph']:
                data['vars']['net']['graph']['L'] = data['vars']['net'][
                    'graph']['L'].ravel()
        popn_data = Population(model_data)
        popn_data.set_data(data)
        s_trues.append(popn_data.eval_state(data['vars']))

        try:
            print "Loading map estimate"
            with open(os.path.join(data_dir, d, 'map', 'results.pkl'),
                      'r') as f:
                x_map = cPickle.load(f)

            model_map = make_model('standard_glm', N=data['N'])
            popn_map = Population(model_map)
            popn_map.set_data(data)
            print "Evaluating MAP state"
            s_infs_map.append(popn_map.eval_state(x_map))

        except Exception as e:
            print "ERROR: Failed to load MAP estimate"

        try:
            print "Loading mcmc estimate"
            with open(os.path.join(data_dir, d, 'mcmc', 'results.pkl'),
                      'r') as f:
                x_mcmc = cPickle.load(f)

            model_mcmc = make_model('sparse_weighted_model', N=data['N'])
            popn_mcmc = Population(model_mcmc)
            popn_mcmc.set_data(data)

            # Now compute the true and false positive rates for MCMC
            # For MCMC results, only consider the tail of the samples
            print "Evaluating MCMC states"
            N_samples = len(x_mcmc)
            start_smpl = int(np.floor(N_samples - sample_frac * N_samples))

            # Evaluate the state
            this_s_mcmc = []
            for i in range(start_smpl, N_samples):
                this_s_mcmc.append(popn_mcmc.eval_state(x_mcmc[i]))
            s_infs_mcmc.append(this_s_mcmc)
        except Exception as e:
            print "ERROR: Failed to load MCMC estimate"

    return s_trues, s_infs_map, s_infs_mcmc
示例#10
0
def run_gen_synth_data():
    """ Run a test with synthetic data and MCMC inference
    """
    options, args = parse_cmd_line_args()

    # Create the model
    dt = 0.001
    model = make_model(options.model, N=options.N, dt=dt)
    # Set the sparsity level to minimize the risk of unstable networks
    stabilize_sparsity(model)

    print "Creating master population object"
    popn = Population(model)

    # Sample random parameters from the model
    x_true = popn.sample()

    # Check stability of matrix
    assert check_stability(model, x_true,
                           options.N), "ERROR: Sampled network is unstable!"

    # Save the model so it can be loaded alongside the data
    fname_model = os.path.join(options.resultsDir, 'model.pkl')
    print "Saving data to %s" % fname_model
    with open(fname_model, 'w') as f:
        cPickle.dump(model, f, protocol=-1)

    print "Generating synthetic data with %d neurons and %.2f seconds." % \
          (options.N, options.T_stop)

    # Set simulation parametrs
    dt_stim = 0.1
    D_stim = (5, 5)
    # D_stim = model['bkgd']['D_stim'] if 'D_stim' in model['bkgd'] else 0
    if isinstance(D_stim, int):
        D_stim = [D_stim]
    stim = np.random.randn(options.T_stop / dt_stim, *D_stim)

    data = gen_synth_data(options.N, options.T_stop, popn, x_true, dt, dt_stim,
                          D_stim, stim)

    # Set the data so that the population state can be evaluated
    popn.add_data(data)

    # DEBUG Evaluate the firing rate and the simulated firing rate
    state = popn.eval_state(x_true)
    for n in np.arange(options.N):
        lam_true = state['glms'][n]['lam']
        lam_sim = popn.glm.nlin_model.f_nlin(data['X'][:, n])
        assert np.allclose(lam_true, lam_sim)

    # Pickle the data so we can open it more easily
    fname_pkl = os.path.join(options.resultsDir, 'data.pkl')
    print "Saving data to %s" % fname_pkl
    with open(fname_pkl, 'w') as f:
        cPickle.dump(data, f, protocol=-1)

    # Plot firing rates, stimulus responses, etc
    do_plot_imp_resonses = int(options.N) <= 16
    plot_results(popn,
                 data['vars'],
                 resdir=options.resultsDir,
                 do_plot_stim_resp=True,
                 do_plot_imp_responses=do_plot_imp_resonses)
示例#11
0
def run_gen_synth_data():
    """ Run a test with synthetic data and MCMC inference
    """
    options, args = parse_cmd_line_args()
    
    # Create the model
    dt = 0.001
    model = make_model(options.model, N=options.N, dt=dt)
    # Set the sparsity level to minimize the risk of unstable networks
    stabilize_sparsity(model)

    print "Creating master population object"
    popn = Population(model)

    # Sample random parameters from the model
    x_true = popn.sample()

    # Check stability of matrix
    assert check_stability(model, x_true, options.N), "ERROR: Sampled network is unstable!"


    # Save the model so it can be loaded alongside the data
    fname_model = os.path.join(options.resultsDir, 'model.pkl')
    print "Saving data to %s" % fname_model
    with open(fname_model,'w') as f:
        cPickle.dump(model, f, protocol=-1)

    print "Generating synthetic data with %d neurons and %.2f seconds." % \
          (options.N, options.T_stop)

    # Set simulation parametrs
    dt_stim = 0.1
    D_stim = (5,5)
    # D_stim = model['bkgd']['D_stim'] if 'D_stim' in model['bkgd'] else 0
    if isinstance(D_stim, int):
        D_stim = [D_stim]
    stim = np.random.randn(options.T_stop/dt_stim, *D_stim)

    data = gen_synth_data(options.N, options.T_stop, popn, x_true, dt, dt_stim, D_stim, stim)

    # Set the data so that the population state can be evaluated
    popn.add_data(data)
    
    # DEBUG Evaluate the firing rate and the simulated firing rate
    state = popn.eval_state(x_true)
    for n in np.arange(options.N):
        lam_true = state['glms'][n]['lam']
        lam_sim =  popn.glm.nlin_model.f_nlin(data['X'][:,n])
        assert np.allclose(lam_true, lam_sim)

    # Pickle the data so we can open it more easily
    fname_pkl = os.path.join(options.resultsDir, 'data.pkl')
    print "Saving data to %s" % fname_pkl
    with open(fname_pkl,'w') as f:
        cPickle.dump(data, f, protocol=-1)

    # Plot firing rates, stimulus responses, etc
    do_plot_imp_resonses = int(options.N) <= 16
    plot_results(popn, data['vars'],
                 resdir=options.resultsDir,
                 do_plot_stim_resp=True,
                 do_plot_imp_responses=do_plot_imp_resonses)
示例#12
0
def run_synth_test():
    """ Run a test with synthetic data and MAP inference with cross validation
    """
    options, popn, data, popn_true, x_true = initialize_test_harness()
    
    # Get the list of models for cross validation
    base_model = make_model(options.model, N=data['N'], dt=0.001)
    models = get_xv_models(base_model)

    # TODO Segment data into training and cross validation sets
    train_frac = 0.75
    T_split = data['T'] * train_frac
    train_data = segment_data(data, (0,T_split))
    xv_data = segment_data(data, (T_split,data['T']))

    # Preprocess the data sequences
    train_data = popn.preprocess_data(train_data)
    xv_data = popn.preprocess_data(xv_data)

    # Sample random initial state
    x0 = popn.sample()

    # Track the best model and parameters
    best_ind = -1
    best_xv_ll = -np.Inf
    best_x = x0
    best_model = None

    # Fit each model using the optimum of the previous models
    train_lls = np.zeros(len(models))
    xv_lls = np.zeros(len(models))
    total_lls = np.zeros(len(models))
    for (i,model) in enumerate(models):
        print "Training model %d" % i
        x0 = copy.deepcopy(best_x)
        popn.set_hyperparameters(model)
        popn.set_data(train_data)
        ll0 = popn.compute_log_p(x0)
        print "Training LL0: %f" % ll0

        # Perform inference
        x_inf = coord_descent(popn, x0=x0, maxiter=1)
        ll_train = popn.compute_log_p(x_inf)
        print "Training LP_inf: %f" % ll_train
        train_lls[i] = ll_train

        
        # Compute log lkhd on xv data
        popn.set_data(xv_data)
        ll_xv = popn.compute_ll(x_inf)
        print "Cross Validation LL: %f" % ll_xv
        xv_lls[i] = ll_xv

        # Compute log lkhd on total dataset
        popn.set_data(data)
        ll_total = popn.compute_ll(x_inf)
        print "Total LL: %f" % ll_total
        total_lls[i] = ll_total

        # Update best model
        if ll_xv > best_xv_ll:
            best_ind = i
            best_xv_ll = ll_xv
            best_x = copy.deepcopy(x_inf)
            best_model = copy.deepcopy(model)
        
    # Create a population with the best model
    popn.set_hyperparameters(best_model)
    popn.set_data(data)

    # Fit the best model on the full training data
    best_x = coord_descent(popn, data, x0=x0, maxiter=1,
                           use_hessian=False,
                           use_rop=False)

    # Print results summary
    for i in np.arange(len(models)):
        print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % (i, train_lls[i], xv_lls[i], total_lls[i])
    print "Best model: %d" % best_ind
    print "Best Total LL: %f" % popn.compute_ll(best_x)
    print "True LL: %f" % popn_true.compute_ll(x_true)

    # Save results
    results_file = os.path.join(options.resultsDir, 'results.pkl')
    print "Saving results to %s" % results_file
    with open(results_file, 'w') as f:
        cPickle.dump(best_x, f)

    # Plot results
    plot_results(popn, best_x, popn_true, x_true, resdir=options.resultsDir)
示例#13
0
def run_parallel_map():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness(
    )

    # Get the list of models for cross validation
    base_model = make_model(options.model, N=data['N'])
    models = get_xv_models(base_model)

    # Segment data into training and cross validation sets
    train_frac = 0.75
    T_split = data['T'] * train_frac
    train_data = segment_data(data, (0, T_split))
    xv_data = segment_data(data, (T_split, data['T']))

    # Sample random initial state
    x0 = popn.sample(None)

    # Track the best model and parameters
    best_ind = -1
    best_xv_ll = -np.Inf
    best_x = x0
    best_model = None

    use_existing = False

    start_time = time.clock()

    # Fit each model using the optimum of the previous models
    train_lls = np.zeros(len(models))
    xv_lls = np.zeros(len(models))
    total_lls = np.zeros(len(models))
    for (i, model) in enumerate(models):
        print "Evaluating model %d" % i
        set_hyperparameters_on_engines(client[:], model)
        add_data_on_engines(client[:], train_data)

        if use_existing and  \
           os.path.exists(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)):
            print "Found existing results for model %d" % i
            with open(
                    os.path.join(options.resultsDir,
                                 'results.partial.%d.pkl' % i)) as f:
                (x_inf, ll_train, ll_xv, ll_total) = cPickle.load(f)
                train_lls[i] = ll_train
                xv_lls[i] = ll_xv
                total_lls[i] = ll_total

        else:
            x0 = copy.deepcopy(best_x)
            # set_data_on_engines(client[:], train_data)
            ll0 = parallel_compute_ll(client[:], x0, data['N'])
            print "Training LL0: %f" % ll0

            # Perform inference
            x_inf = parallel_coord_descent(client,
                                           data['N'],
                                           x0=x0,
                                           maxiter=1,
                                           use_hessian=False,
                                           use_rop=False)

            ll_train = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Training LL_inf: %f" % ll_train
            train_lls[i] = ll_train

            # Compute log lkhd on xv data
            add_data_on_engines(client[:], xv_data)
            ll_xv = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Cross Validation LL: %f" % ll_xv
            xv_lls[i] = ll_xv

            # Compute log lkhd on total dataset
            add_data_on_engines(client[:], data)
            ll_total = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Total LL: %f" % ll_total
            total_lls[i] = ll_total

            print "Saving partial results"
            with open(
                    os.path.join(options.resultsDir,
                                 'results.partial.%d.pkl' % i), 'w') as f:
                cPickle.dump((x_inf, ll_train, ll_xv, ll_total),
                             f,
                             protocol=-1)

        # Update best model
        if ll_xv > best_xv_ll:
            best_ind = i
            best_xv_ll = ll_xv
            best_x = copy.deepcopy(x_inf)
            best_model = copy.deepcopy(model)

    print "Training the best model (%d) with the full dataset" % best_ind
    # Set the best hyperparameters
    set_hyperparameters_on_engines(client[:], best_model)
    add_data_on_engines(client[:], data)

    # Fit the best model on the full training data
    best_x = parallel_coord_descent(client,
                                    data['N'],
                                    x0=best_x,
                                    maxiter=1,
                                    use_hessian=False,
                                    use_rop=False)

    # Print results summary
    for i in np.arange(len(models)):
        print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % (
            i, train_lls[i], xv_lls[i], total_lls[i])
    print "Best model: %d" % best_ind
    print "Best Total LL: %f" % parallel_compute_ll(client[:], best_x,
                                                    data['N'])
    print "True LL: %f" % popn_true.compute_ll(x_true)

    stop_time = time.clock()

    # Save results
    with open(os.path.join(options.resultsDir, 'results.pkl'), 'w') as f:
        cPickle.dump(best_x, f, protocol=-1)

    # Save runtime
    with open(os.path.join(options.resultsDir, 'runtime.pkl'), 'w') as f:
        cPickle.dump(stop_time - start_time, f, protocol=-1)