示例#1
0
    def compute_bootstrap(self):
        print "Computing bootstrap..."

        self.dataset['bootstrap_nitems_pval'] = np.nan*np.empty(self.dataset['n_items_size'])
        self.dataset['bootstrap_nitems'] = np.empty(self.dataset['n_items_size'], dtype=np.object)
        self.dataset['bootstrap_subject_nitems'] = np.empty((self.dataset['subject_size'], self.dataset['n_items_size']), dtype=np.object)
        self.dataset['bootstrap_subject_nitems_pval'] = np.nan*np.empty((self.dataset['subject_size'], self.dataset['n_items_size']))


        for n_items_i, n_items in enumerate(np.unique(self.dataset['n_items'])):
            if n_items > 1:
                for subject_i, subject in enumerate(np.unique(self.dataset['subject'])):
                    print "Nitems %d, subject %d" % (n_items, subject)

                    # Bootstrap per subject and nitems
                    ids_filtered = (self.dataset['subject']==subject).flatten() & (self.dataset['n_items'] == n_items).flatten()

                    # Compute bootstrap if required

                    bootstrap = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(self.dataset['response'][ids_filtered, 0], self.dataset['item_angle'][ids_filtered, 0], self.dataset['item_angle'][ids_filtered, 1:n_items], nb_bootstrap_samples=nb_bootstrap_samples)
                    self.dataset['bootstrap_subject_nitems'][subject_i, n_items_i] = bootstrap
                    self.dataset['bootstrap_subject_nitems_pval'][subject_i, n_items_i] = bootstrap['p_value']

                    print self.dataset['bootstrap_subject_nitems_pval'][:, n_items_i]

                print "Nitems %d, all subjects" % (n_items)

                # Data collapsed accross subjects
                ids_filtered = (self.dataset['n_items'] == n_items).flatten()

                bootstrap = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(self.dataset['response'][ids_filtered, 0], self.dataset['item_angle'][ids_filtered, 0], self.dataset['item_angle'][ids_filtered, 1:n_items], nb_bootstrap_samples=nb_bootstrap_samples)
                self.dataset['bootstrap_nitems'][n_items_i] = bootstrap
                self.dataset['bootstrap_nitems_pval'][n_items_i] = bootstrap['p_value']

                print self.dataset['bootstrap_nitems_pval']
示例#2
0
def launcher_do_nontarget_bootstrap(args):
    '''
        Compute a bootstrap estimate, using outputs from the model run earlier
    '''

    print "Doing a piece of work for launcher_do_nontarget_bootstrap"

    try:
        # Convert Argparse.Namespace to dict
        all_parameters = vars(args)
    except TypeError:
        # Assume it's already done
        assert type(args) is dict, "args is neither Namespace nor dict, WHY?"
        all_parameters = args

    print all_parameters

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Load the data
    if all_parameters['subaction'] == 'mixed':
        # Mixed runs
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-d977e237-cfce-473b-a292-00695e725259.npy'))
    else:
        # Conjunctive runs
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-cc1a49b0-f5f0-4e82-9f0f-5a16a2bfd4e8.npy'))

    data_responses_all = model_outputs['result_responses_all'][..., 0]
    data_target_all = model_outputs['result_target_all'][..., 0]
    data_nontargets_all = model_outputs['result_nontargets_all'][..., 0]
    T_space = model_outputs['T_space']
    sigmax_space = model_outputs['sigmax_space']

    K = data_nontargets_all.shape[-1]

    # Result arrays
    result_bootstrap_samples_allitems = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.nan*np.ones((sigmax_space.size, T_space.size, K*all_parameters['num_repetitions']))

    search_progress = progress.Progress(sigmax_space.size*(T_space.size-1))

    for sigmax_i, sigmax in enumerate(sigmax_space):
        for T_i, T in enumerate(T_space[1:]):
            T_i += 1

            print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str())

            print "Bootstrap for T=%d, sigmax=%.2f, %d bootstrap samples" % (T, sigmax, all_parameters['num_repetitions'])

            # Update parameter

            ### WORK WORK WORK work? ###

            # Get some bootstrap samples
            bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                    data_responses_all[sigmax_i, T_i],
                    data_target_all[sigmax_i, T_i],
                    data_nontargets_all[sigmax_i, T_i, :, :T_i],
                    nb_bootstrap_samples=all_parameters['num_repetitions'],
                    resample_targets=False)
            # bootstrap_allitems_nontargets_allitems = em_circularmixture_allitems.bootstrap_nontarget_stat(
            #         data_responses_all[sigmax_i, T_i],
            #         data_target_all[sigmax_i, T_i],
            #         data_nontargets_all[sigmax_i, T_i, :, :T_i],
            #         nb_bootstrap_samples=all_parameters['num_repetitions'],
            #         resample_targets=False)
            bootstrap_allitems_nontargets = em_circularmixture.bootstrap_nontarget_stat(
                    data_responses_all[sigmax_i, T_i],
                    data_target_all[sigmax_i, T_i],
                    data_nontargets_all[sigmax_i, T_i, :, :T_i],
                    nb_bootstrap_samples=all_parameters['num_repetitions'],
                    resample_targets=False)

            # Collect and store responses
            # result_bootstrap_samples_allitems[sigmax_i, T_i] = bootstrap_allitems_nontargets_allitems['nontarget_bootstrap_samples']
            result_bootstrap_samples[sigmax_i, T_i] = bootstrap_allitems_nontargets['nontarget_bootstrap_samples']

            result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['nontarget_bootstrap_samples']
            result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i, :all_parameters['num_repetitions']*T_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_bootstrap_samples']

            print result_bootstrap_samples_allitems[sigmax_i, T_i]
            print result_bootstrap_samples[sigmax_i, T_i]
            print result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]
            print result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]

            ### /Work ###

            search_progress.increment()
            if run_counter % save_every == 0 or search_progress.done():
                dataio.save_variables_default(locals())
            run_counter += 1

    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()
示例#3
0
def launcher_do_bootstrap_experimental(args):
    '''
        Compute a bootstrap estimate, using outputs from the experimental data
    '''

    print "Doing a piece of work for launcher_do_bootstrap_experimental"

    try:
        # Convert Argparse.Namespace to dict
        all_parameters = vars(args)
    except TypeError:
        # Assume it's already done
        assert type(args) is dict, "args is neither Namespace nor dict, WHY?"
        all_parameters = args

    print all_parameters

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Load the data
    if all_parameters['subaction'] == 'bays09' or all_parameters['subaction'] == '':
        # Bays2009 dataset
        dataset = load_experimental_data.load_data_bays09(fit_mixture_model=True)

    # Result arrays
    result_bootstrap_nitems_samples = np.nan*np.empty((dataset['n_items_size'], all_parameters['num_repetitions']))
    result_bootstrap_subject_nitems_samples = np.nan*np.empty((dataset['subject_size'], dataset['n_items_size'], all_parameters['num_repetitions']))

    search_progress = progress.Progress(dataset['subject_size']*(dataset['n_items_size']-1))

    for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])):
        if n_items > 1:
            print "Nitems %d, all subjects" % (n_items)

            # Data collapsed accross subjects
            ids_filtered = (dataset['n_items'] == n_items).flatten()

            bootstrap = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                dataset['response'][ids_filtered, 0],
                dataset['item_angle'][ids_filtered, 0],
                dataset['item_angle'][ids_filtered, 1:n_items],
                nb_bootstrap_samples=all_parameters['num_repetitions'],
                resample_targets=False
                )

            result_bootstrap_nitems_samples[n_items_i] = bootstrap['nontarget_bootstrap_samples']

            print result_bootstrap_nitems_samples

            for subject_i, subject in enumerate(np.unique(dataset['subject'])):
                print "Nitems %d, subject %d" % (n_items, subject)

                # Bootstrap per subject and nitems
                ids_filtered = (dataset['subject']==subject).flatten() & (dataset['n_items'] == n_items).flatten()

                # Compute bootstrap if required

                bootstrap = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                    dataset['response'][ids_filtered, 0],
                    dataset['item_angle'][ids_filtered, 0],
                    dataset['item_angle'][ids_filtered, 1:n_items],
                    nb_bootstrap_samples=all_parameters['num_repetitions'],
                    resample_targets=False)
                result_bootstrap_subject_nitems_samples[subject_i, n_items_i] = bootstrap['nontarget_bootstrap_samples']

                print result_bootstrap_subject_nitems_samples[:, n_items_i]

                search_progress.increment()
                if run_counter % save_every == 0 or search_progress.done():
                    dataio.save_variables_default(locals())
                run_counter += 1


    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()
示例#4
0
def launcher_do_nontarget_bootstrap_misbindingruns(args):
    '''
        Compute a bootstrap estimate, using outputs from a Misbinding generator run.
    '''

    print "Doing a piece of work for launcher_do_nontarget_bootstrap"

    try:
        # Convert Argparse.Namespace to dict
        all_parameters = vars(args)
    except TypeError:
        # Assume it's already done
        assert type(args) is dict, "args is neither Namespace nor dict, WHY?"
        all_parameters = args

    print all_parameters

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Load the data
    if all_parameters['subaction'] == 'mixed' or all_parameters['subaction'] == '':
        # Mixed runs
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'SAVE_global_plots_misbinding_logposterior-plots_misbinding_logposterior-36eb41e9-6370-453e-995e-3876d5105388.npy'))

    data_responses_all = model_outputs['result_all_thetas']
    data_target = model_outputs['target_angle']
    data_nontargets = model_outputs['nontarget_angles']
    ratio_space = model_outputs['ratio_space']

    # Result arrays
    result_bootstrap_samples_allitems = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))
    result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions']))

    search_progress = progress.Progress(ratio_space.size)

    for ratio_conj_i, ratio_conj in enumerate(ratio_space):
        print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str())

        print "Bootstrap for ratio=%.2f, %d bootstrap samples" % (ratio_conj, all_parameters['num_repetitions'])

        ### WORK WORK WORK work? ###

        # Get some bootstrap samples
        bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                data_responses_all[ratio_conj_i],
                data_target,
                data_nontargets,
                nb_bootstrap_samples=all_parameters['num_repetitions'],
                resample_targets=False)

        bootstrap_allitems_nontargets = em_circularmixture.bootstrap_nontarget_stat(
                data_responses_all[ratio_conj_i],
                data_target,
                data_nontargets,
                nb_bootstrap_samples=all_parameters['num_repetitions'],
                resample_targets=False)

        # Collect and store responses
        result_bootstrap_samples[ratio_conj_i] = bootstrap_allitems_nontargets['nontarget_bootstrap_samples']

        result_bootstrap_samples_allitems_uniquekappa_sumnontarget[ratio_conj_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['nontarget_bootstrap_samples']
        result_bootstrap_samples_allitems_uniquekappa_allnontarget[ratio_conj_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_bootstrap_samples']

        print result_bootstrap_samples_allitems[ratio_conj_i]
        print result_bootstrap_samples[ratio_conj_i]
        print result_bootstrap_samples_allitems_uniquekappa_sumnontarget[ratio_conj_i]
        print result_bootstrap_samples_allitems_uniquekappa_allnontarget[ratio_conj_i]

        ### /Work ###

        search_progress.increment()
        if run_counter % save_every == 0 or search_progress.done():
            dataio.save_variables_default(locals())
        run_counter += 1

    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()
def plots_boostrap(data_pbs, generator_module=None):
    '''
        Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use.
    '''

    #### SETUP
    #
    savefigs = True
    savedata = True

    load_fit_bootstrap = True
    plots_hist_cdf = True
    estimate_bootstrap = True

    should_fit_bootstrap = True
    # caching_bootstrap_filename = None
    caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap.pickle')

    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", generator_module.dict_parameters_range.keys()

    result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples_allitems_uniquekappa_sumnontarget']['results'])
    result_bootstrap_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples']['results'])
    result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples_allitems_uniquekappa_allnontarget']['results'])


    sigmax_space = data_pbs.loaded_data['datasets_list'][0]['sigmax_space']
    T_space = data_pbs.loaded_data['datasets_list'][0]['T_space']

    print result_bootstrap_samples_allitems_uniquekappa_sumnontarget.shape
    print result_bootstrap_samples.shape
    print result_bootstrap_samples_allitems_uniquekappa_allnontarget.shape

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    if load_fit_bootstrap:
        if caching_bootstrap_filename is not None:

            if os.path.exists(caching_bootstrap_filename):
                # Got file, open it and try to use its contents
                try:
                    with open(caching_bootstrap_filename, 'r') as file_in:
                        # Load and assign values
                        cached_data = pickle.load(file_in)
                        bootstrap_ecdf_bays_sigmax_T = cached_data['bootstrap_ecdf_bays_sigmax_T']
                        bootstrap_ecdf_allitems_sum_sigmax_T = cached_data['bootstrap_ecdf_allitems_sum_sigmax_T']
                        bootstrap_ecdf_allitems_all_sigmax_T = cached_data['bootstrap_ecdf_allitems_all_sigmax_T']
                        should_fit_bootstrap = False

                except IOError:
                    print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits"

        if should_fit_bootstrap:

            bootstrap_ecdf_bays_sigmax_T = dict()
            bootstrap_ecdf_allitems_sum_sigmax_T = dict()
            bootstrap_ecdf_allitems_all_sigmax_T = dict()

            # Fit bootstrap
            for sigmax_i, sigmax in enumerate(sigmax_space):
                for T_i, T in enumerate(T_space):
                    if T>1:
                        # One bootstrap CDF per condition
                        bootstrap_ecdf_bays = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]))
                        bootstrap_ecdf_allitems_sum = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]))
                        bootstrap_ecdf_allitems_all = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]))


                        # Store in a dict(sigmax) -> dict(T) -> ECDF object
                        bootstrap_ecdf_bays_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_bays, T=T, sigmax=sigmax)
                        bootstrap_ecdf_allitems_sum_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_allitems_sum, T=T, sigmax=sigmax)
                        bootstrap_ecdf_allitems_all_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_allitems_all, T=T, sigmax=sigmax)



            # Save everything to a file, for faster later plotting
            if caching_bootstrap_filename is not None:
                try:
                    with open(caching_bootstrap_filename, 'w') as filecache_out:
                        data_bootstrap = dict(bootstrap_ecdf_allitems_sum_sigmax_T=bootstrap_ecdf_allitems_sum_sigmax_T, bootstrap_ecdf_allitems_all_sigmax_T=bootstrap_ecdf_allitems_all_sigmax_T, bootstrap_ecdf_bays_sigmax_T=bootstrap_ecdf_bays_sigmax_T)
                        pickle.dump(data_bootstrap, filecache_out, protocol=2)
                except IOError:
                    print "Error writing out to caching file ", caching_bootstrap_filename


    if plots_hist_cdf:
        ## Plots now
        for sigmax_i, sigmax in enumerate(sigmax_space):
            for T_i, T in enumerate(T_space):
                if T > 1:
                    # Histogram of samples
                    _, axes = plt.subplots(ncols=3, figsize=(18, 6))
                    axes[0].hist(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]), bins=100, normed='density')
                    axes[0].set_xlim([0.0, 1.0])
                    axes[1].hist(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]), bins=100, normed='density')
                    axes[1].set_xlim([0.0, 1.0])
                    axes[2].hist(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]), bins=100, normed='density')
                    axes[2].set_xlim([0.0, 1.0])

                    if savefigs:
                        dataio.save_current_figure('hist_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T))

                    # ECDF now
                    _, axes = plt.subplots(ncols=3, sharey=True, figsize=(18, 6))
                    axes[0].plot(bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2)
                    axes[0].set_xlim([0.0, 1.0])
                    axes[1].plot(bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2)
                    axes[1].set_xlim([0.0, 1.0])
                    axes[2].plot(bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2)
                    axes[2].set_xlim([0.0, 1.0])

                    if savefigs:
                        dataio.save_current_figure('ecdf_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T))

    if estimate_bootstrap:
        ## Should be in reloader_error_distribution_mixed_121113 instead
        model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-d977e237-cfce-473b-a292-00695e725259.npy'))

        data_responses_all = model_outputs['result_responses_all'][..., 0]
        data_target_all = model_outputs['result_target_all'][..., 0]
        data_nontargets_all = model_outputs['result_nontargets_all'][..., 0]

        # Compute bootstrap p-value
        result_pvalue_bootstrap_sum = np.empty((sigmax_space.size, T_space.size-1))*np.nan
        result_pvalue_bootstrap_all = np.empty((sigmax_space.size, T_space.size-1, T_space.size-1))*np.nan
        for sigmax_i, sigmax in enumerate(sigmax_space):
            for T in T_space[1:]:
                bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(data_responses_all[sigmax_i, (T-1)], data_target_all[sigmax_i, (T-1)], data_nontargets_all[sigmax_i, (T-1), :, :(T-1)], sumnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T-1]['ecdf'], allnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T-1]['ecdf'])

                result_pvalue_bootstrap_sum[sigmax_i, T-2] = bootstrap_allitems_nontargets_allitems_uniquekappa['p_value']
                result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_p_value']

                print sigmax, T, result_pvalue_bootstrap_sum[sigmax_i, T-2], result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)], np.sum(result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] < 0.05)

    all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['nb_repetitions']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)

        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='bootstrap_nontargets')


    plt.show()


    return locals()
示例#6
0
def plots_boostrap(data_pbs, generator_module=None):
    '''
        Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use.
    '''

    #### SETUP
    #
    savefigs = True
    savedata = True

    load_fit_bootstrap = True
    plots_hist_cdf = True
    estimate_bootstrap = True

    should_fit_bootstrap = True
    # caching_bootstrap_filename = None
    caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap_bays09.pickle')

    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", generator_module.dict_parameters_range.keys()

    result_bootstrap_nitems_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_nitems_samples']['results'])
    result_bootstrap_subject_nitems_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_subject_nitems_samples']['results'])


    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    dataset = load_experimental_data.load_data_bays09(fit_mixture_model=True)

    if load_fit_bootstrap:
        if caching_bootstrap_filename is not None:

            if os.path.exists(caching_bootstrap_filename):
                # Got file, open it and try to use its contents
                try:
                    with open(caching_bootstrap_filename, 'r') as file_in:
                        # Load and assign values
                        cached_data = pickle.load(file_in)
                        bootstrap_nitems_samples = cached_data['bootstrap_nitems_samples']
                        bootstrap_subject_nitems_samples = cached_data['bootstrap_subject_nitems_samples']
                        should_fit_bootstrap = False

                except IOError:
                    print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits"

        if should_fit_bootstrap:

            bootstrap_nitems_samples = dict()
            bootstrap_subject_nitems_samples = dict()

            # Fit ECDF
            for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])):
                if n_items > 1:
                    print "Nitems %d, all subjects" % (n_items)
                    current_ecdf_allitems = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_nitems_samples[n_items_i]))

                    # Store in a dict(n_items_i) -> {ECDF object, n_items}
                    bootstrap_nitems_samples[n_items_i] = dict(ecdf=current_ecdf_allitems, n_items=n_items)

                    for subject_i, subject in enumerate(np.unique(dataset['subject'])):
                        print "Nitems %d, subject %d" % (n_items, subject)

                        current_ecdf_subj_items = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_subject_nitems_samples[subject_i, n_items_i]))

                        if n_items_i not in bootstrap_subject_nitems_samples:
                            bootstrap_subject_nitems_samples[n_items_i] = dict()
                        bootstrap_subject_nitems_samples[n_items_i][subject_i] = dict(ecdf=current_ecdf_subj_items, n_items=n_items, subject=subject)

            # Save everything to a file, for faster later plotting
            if caching_bootstrap_filename is not None:
                try:
                    with open(caching_bootstrap_filename, 'w') as filecache_out:
                        data_bootstrap = dict(bootstrap_nitems_samples=bootstrap_nitems_samples, bootstrap_subject_nitems_samples=bootstrap_subject_nitems_samples)
                        pickle.dump(data_bootstrap, filecache_out, protocol=2)
                except IOError:
                    print "Error writing out to caching file ", caching_bootstrap_filename


    if plots_hist_cdf:
        ## Plots now
        for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])):
            if n_items > 1:
                for subject_i, subject in enumerate(np.unique(dataset['subject'])):

                    # Histogram of samples, for subject/nitems
                    _, axes = plt.subplots(ncols=2, figsize=(12, 6))
                    axes[0].hist(utils.dropnan(result_bootstrap_subject_nitems_samples[subject_i, n_items_i]), bins=100, normed='density')
                    axes[0].set_xlim([0.0, 1.0])
                    # ECDF now
                    axes[1].plot(bootstrap_subject_nitems_samples[n_items_i][subject_i]['ecdf'].x, bootstrap_subject_nitems_samples[n_items_i][subject_i]['ecdf'].y, linewidth=2)
                    axes[1].set_xlim([0.0, 1.0])

                    if savefigs:
                        dataio.save_current_figure('histecdf_bootstrap_nitems%d_subject%d_{label}_{unique_id}.pdf' % (n_items, subject))

                # Same for collapsed data accross subjects
                # Histogram of samples, for subject/nitems
                _, axes = plt.subplots(ncols=2, figsize=(12, 6))
                axes[0].hist(utils.dropnan(result_bootstrap_nitems_samples[n_items_i]), bins=100, normed='density')
                axes[0].set_xlim([0.0, 1.0])
                # ECDF now
                axes[1].plot(bootstrap_nitems_samples[n_items_i]['ecdf'].x, bootstrap_nitems_samples[n_items_i]['ecdf'].y, linewidth=2)
                axes[1].set_xlim([0.0, 1.0])

                if savefigs:
                    dataio.save_current_figure('histecdf_bootstrap_nitems%d_{label}_{unique_id}.pdf' % (n_items))

    if estimate_bootstrap:
        # Compute bootstrap p-value
        result_pvalue_bootstrap_nitems = np.empty(dataset['n_items_size'])*np.nan
        result_pvalue_bootstrap_subject_nitems_samples = np.empty((dataset['n_items_size'], dataset['subject_size']))*np.nan


        for n_items_i, n_items in enumerate(np.unique(dataset['n_items'])):
            if n_items > 1:
                print "Nitems %d, all subjects" % (n_items)
                # Data collapsed accross subjects
                ids_filtered = (dataset['n_items'] == n_items).flatten()

                bootstrap = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                    dataset['response'][ids_filtered, 0],
                    dataset['item_angle'][ids_filtered, 0],
                    dataset['item_angle'][ids_filtered, 1:n_items],
                    sumnontargets_bootstrap_ecdf=bootstrap_nitems_samples[n_items_i]['ecdf']
                    )

                result_pvalue_bootstrap_nitems[n_items_i] = bootstrap['p_value']
                print "p_val:", result_pvalue_bootstrap_nitems

                for subject_i, subject in enumerate(np.unique(dataset['subject'])):
                    print "Nitems %d, subject %d" % (n_items, subject)

                    # Bootstrap per subject and nitems
                    ids_filtered = (dataset['subject']==subject).flatten() & (dataset['n_items'] == n_items).flatten()

                    # Get pvalue
                    bootstrap = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(
                        dataset['response'][ids_filtered, 0],
                        dataset['item_angle'][ids_filtered, 0],
                        dataset['item_angle'][ids_filtered, 1:n_items],
                        sumnontargets_bootstrap_ecdf=bootstrap_subject_nitems_samples[n_items_i][subject_i]['ecdf'])
                    result_pvalue_bootstrap_subject_nitems_samples[n_items_i, subject_i] = bootstrap['p_value']

                    print "p_val:", result_pvalue_bootstrap_subject_nitems_samples[n_items_i, subject_i]

        signif_level = 0.05
        result_signif_nitems = result_pvalue_bootstrap_nitems < signif_level
        result_num_signif_subject_nitems = np.sum(result_pvalue_bootstrap_subject_nitems_samples < signif_level, axis=1)
        print "Summary:"
        print "Collapsed subjects:",result_signif_nitems
        print "Per subjects (%d total): %s" % (dataset['subject_size'], result_num_signif_subject_nitems)


    all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['nb_repetitions', 'signif_level']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)

        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='bootstrap_nontargets')


    plt.show()


    return locals()
def plots_errors_distribution(data_pbs, generator_module=None):
    '''
        Reload responses

        Plot errors distributions.
    '''

    #### SETUP
    #
    savefigs = True
    savedata = True

    plot_persigmax = True
    do_best_nontarget = False

    load_test_bootstrap = True
    caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap_errordistrib_mixed_sigmaxT.pickle')

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16

    angle_space = np.linspace(-np.pi, np.pi, 51)
    #
    #### /SETUP

    print "Order parameters: ", generator_module.dict_parameters_range.keys()

    result_responses_all = data_pbs.dict_arrays['result_responses']['results']
    result_target_all = data_pbs.dict_arrays['result_target']['results']
    result_nontargets_all =  data_pbs.dict_arrays['result_nontargets']['results']
    result_em_fits_all = data_pbs.dict_arrays['result_em_fits']['results']

    T_space = data_pbs.loaded_data['parameters_uniques']['T']
    sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax']
    nb_repetitions = result_responses_all.shape[-1]
    N = result_responses_all.shape[-2]

    result_pval_vtest_nontargets = np.empty((sigmax_space.size, T_space.size))*np.nan
    result_pvalue_bootstrap_sum = np.empty((sigmax_space.size, T_space.size-1))*np.nan
    result_pvalue_bootstrap_all = np.empty((sigmax_space.size, T_space.size-1, T_space.size-1))*np.nan

    print sigmax_space
    print T_space
    print result_responses_all.shape, result_target_all.shape, result_nontargets_all.shape, result_em_fits_all.shape

    dataio = DataIO.DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    if load_test_bootstrap:

        if caching_bootstrap_filename is not None:
            if os.path.exists(caching_bootstrap_filename):
                # Got file, open it and try to use its contents
                try:
                    with open(caching_bootstrap_filename, 'r') as file_in:
                        # Load and assign values
                        cached_data = pickle.load(file_in)
                        bootstrap_ecdf_bays_sigmax_T = cached_data['bootstrap_ecdf_bays_sigmax_T']
                        bootstrap_ecdf_allitems_sum_sigmax_T = cached_data['bootstrap_ecdf_allitems_sum_sigmax_T']
                        bootstrap_ecdf_allitems_all_sigmax_T = cached_data['bootstrap_ecdf_allitems_all_sigmax_T']


                except IOError:
                    print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits"
                    load_test_bootstrap = False


        if load_test_bootstrap:
            # Now compute the pvalue for each sigmax/T
            # only use 1000 samples
            data_responses_all = result_responses_all[..., 0]
            data_target_all = result_target_all[..., 0]
            data_nontargets_all = result_nontargets_all[..., 0]

            # Compute bootstrap p-value
            for sigmax_i, sigmax in enumerate(sigmax_space):
                for T in T_space[1:]:
                    bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(data_responses_all[sigmax_i, (T-1)], data_target_all[sigmax_i, (T-1)], data_nontargets_all[sigmax_i, (T-1), :, :(T-1)], sumnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T-1]['ecdf'], allnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T-1]['ecdf'])

                    result_pvalue_bootstrap_sum[sigmax_i, T-2] = bootstrap_allitems_nontargets_allitems_uniquekappa['p_value']
                    result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_p_value']

                    print sigmax, T, result_pvalue_bootstrap_sum[sigmax_i, T-2], result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)], np.sum(result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] < 0.05)


    if plot_persigmax:

        T_space_filtered = np.array([1, 2, 4, 6])

        for sigmax_i, sigmax in enumerate(sigmax_space):
            print "sigmax: ", sigmax

            # Compute the error between the response and the target
            errors_targets = utils.wrap_angles(result_responses_all[sigmax_i] - result_target_all[sigmax_i])

            errors_nontargets = np.nan*np.empty(result_nontargets_all[sigmax_i].shape)
            if do_best_nontarget:
                errors_best_nontarget = np.empty(errors_targets.shape)
            for T_i in xrange(1, T_space.size):
                for repet_i in xrange(nb_repetitions):
                    # Could do a nicer indexing but f**k it

                    # Compute the error between the responses and nontargets.
                    errors_nontargets[T_i, :, :, repet_i] = utils.wrap_angles((result_responses_all[sigmax_i, T_i, :, repet_i, np.newaxis] - result_nontargets_all[sigmax_i, T_i, :, :, repet_i]))

                    # Errors between the response the best nontarget.
                    if do_best_nontarget:
                        errors_best_nontarget[T_i, :, repet_i] = errors_nontargets[T_i, np.arange(errors_nontargets.shape[1]), np.nanargmin(np.abs(errors_nontargets[T_i, ..., repet_i]), axis=1), repet_i]

            f1, axes1 = plt.subplots(ncols=T_space_filtered.size, figsize=(T_space_filtered.size*6, 6), sharey=True)
            f2, axes2 = plt.subplots(ncols=T_space_filtered.size-1, figsize=((T_space_filtered.size-1)*6, 6), sharey=True)
            for T_i, T in enumerate(T_space_filtered):
                print "T: ", T
                # Now, per T items, show the distribution of errors and of errors to non target

                # Error to target
                # hist_errors_targets = np.zeros((angle_space.size, nb_repetitions))
                # for repet_i in xrange(nb_repetitions):
                #     hist_errors_targets[:, repet_i], _, _ = utils_math.histogram_binspace(errors_targets[T_i, :, repet_i], bins=angle_space)

                # f, ax = plt.subplots()
                # ax.bar(angle_space, np.mean(hist_errors_targets, axis=-1), width=2.*np.pi/(angle_space.size-1), align='center')
                # ax.set_xlim([angle_space[0] - np.pi/(angle_space.size-1), angle_space[-1] + np.pi/(angle_space.size-1)])

                # utils.plot_mean_std_area(angle_space, np.mean(hist_errors_targets, axis=-1), np.std(hist_errors_targets, axis=-1))

                # utils.hist_samples_density_estimation(errors_targets[T_i].reshape(nb_repetitions*N), bins=angle_space, title='Errors between response and target, N=%d' % (T))

                utils.hist_angular_data(utils.dropnan(errors_targets[T_i]), bins=angle_space, norm='density', ax_handle=axes1[T_i], pretty_xticks=True)
                axes1[T_i].set_ylim([0., 2.0])


                if T > 1:
                    # Error to nontarget
                    # ax_handle = utils.hist_samples_density_estimation(errors_nontargets[T_i, :, :T_i].reshape(nb_repetitions*N*T_i), bins=angle_space, title='Errors between response and non targets, N=%d' % (T))
                    utils.hist_angular_data(utils.dropnan(errors_nontargets[T_i, :, :T_i]), bins=angle_space, title='N=%d' % (T), norm='density', ax_handle=axes2[T_i-1], pretty_xticks=True)
                    axes2[T_i-1].set_title('')

                    result_pval_vtest_nontargets[sigmax_i, T_i] = utils.V_test(utils.dropnan(errors_nontargets[T_i, :, :T_i]))['pvalue']

                    print result_pval_vtest_nontargets[sigmax_i, T_i]

                    # axes2[T_i-1].text(0.03, 0.96, "Vtest pval: %.2f" % (result_pval_vtest_nontargets[sigmax_i, T_i]), transform=axes2[T_i - 1].transAxes, horizontalalignment='left', fontsize=12)
                    axes2[T_i-1].text(0.03, 0.94, "$p=%.1f$" % (result_pvalue_bootstrap_sum[sigmax_i, T_i]), transform=axes2[T_i - 1].transAxes, horizontalalignment='left', fontsize=18)

                    axes2[T_i-1].set_ylim([0., 0.30])

                    # Error to best non target
                    if do_best_nontarget:
                        utils.hist_samples_density_estimation(errors_best_nontarget[T_i].reshape(nb_repetitions*N), bins=angle_space, title='N=%d' % (T))

                        if savefigs:
                            dataio.save_current_figure('error_bestnontarget_hist_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T))

            if savefigs:
                plt.figure(f1.number)
                plt.tight_layout()
                dataio.save_current_figure('error_target_hist_sigmax%.2f_Tall_{label}_{unique_id}.pdf' % (sigmax))

                plt.figure(f2.number)
                plt.tight_layout()
                dataio.save_current_figure('error_nontargets_hist_sigmax%.2f_Tall_{label}_{unique_id}.pdf' % (sigmax))





    all_args = data_pbs.loaded_data['args_list']

    if savedata:
        dataio.save_variables_default(locals())

        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='error_distribution')

    plt.show()

    return locals()
def plots_misbinding_logposterior(data_pbs, generator_module=None):
    '''
        Reload 3D volume runs from PBS and plot them

    '''


    #### SETUP
    #
    savedata = False
    savefigs = True

    plot_logpost = False
    plot_error = False
    plot_mixtmodel = True
    plot_hist_responses_fisherinfo = True
    compute_plot_bootstrap = False
    compute_fisher_info_perratioconj = True

    # mixturemodel_to_use = 'original'
    mixturemodel_to_use = 'allitems'
    # mixturemodel_to_use = 'allitems_kappafi'

    caching_fisherinfo_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'cache_fisherinfo.pickle')


    #
    #### /SETUP

    print "Order parameters: ", generator_module.dict_parameters_range.keys()

    result_all_log_posterior = np.squeeze(data_pbs.dict_arrays['result_all_log_posterior']['results'])
    result_all_thetas = np.squeeze(data_pbs.dict_arrays['result_all_thetas']['results'])

    ratio_space = data_pbs.loaded_data['parameters_uniques']['ratio_conj']

    print ratio_space
    print result_all_log_posterior.shape

    N = result_all_thetas.shape[-1]

    result_prob_wrong = np.zeros((ratio_space.size, N))
    result_em_fits = np.empty((ratio_space.size, 6))*np.nan

    all_args = data_pbs.loaded_data['args_list']

    fixed_means = [-np.pi*0.6, np.pi*0.6]
    all_angles = np.linspace(-np.pi, np.pi, result_all_log_posterior.shape[-1])

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])


    plt.rcParams['font.size'] = 18


    if plot_hist_responses_fisherinfo:

        # From cache
        if caching_fisherinfo_filename is not None:
            if os.path.exists(caching_fisherinfo_filename):
                # Got file, open it and try to use its contents
                try:
                    with open(caching_fisherinfo_filename, 'r') as file_in:
                        # Load and assign values
                        cached_data = pickle.load(file_in)
                        result_fisherinfo_ratio = cached_data['result_fisherinfo_ratio']
                        compute_fisher_info_perratioconj = False

                except IOError:
                    print "Error while loading ", caching_fisherinfo_filename, "falling back to computing the Fisher Info"

        if compute_fisher_info_perratioconj:
            # We did not save the Fisher info, but need it if we want to fit the mixture model with fixed kappa. So recompute them using the args_dicts

            result_fisherinfo_ratio = np.empty(ratio_space.shape)

            # Invert the all_args_i -> ratio_conj direction
            parameters_indirections = data_pbs.loaded_data['parameters_dataset_index']

            for ratio_conj_i, ratio_conj in enumerate(ratio_space):
                # Get index of first dataset with the current ratio_conj (no need for the others, I think)
                arg_index = parameters_indirections[(ratio_conj,)][0]

                # Now using this dataset, reconstruct a RandomFactorialNetwork and compute the fisher info
                curr_args = all_args[arg_index]

                curr_args['stimuli_generation'] = lambda T: np.linspace(-np.pi*0.6, np.pi*0.6, T)

                (random_network, data_gen, stat_meas, sampler) = launchers.init_everything(curr_args)

                # Theo Fisher info
                result_fisherinfo_ratio[ratio_conj_i] = sampler.estimate_fisher_info_theocov()

                del curr_args['stimuli_generation']

            # Save everything to a file, for faster later plotting
            if caching_fisherinfo_filename is not None:
                try:
                    with open(caching_fisherinfo_filename, 'w') as filecache_out:
                        data_cache = dict(result_fisherinfo_ratio=result_fisherinfo_ratio)
                        pickle.dump(data_cache, filecache_out, protocol=2)
                except IOError:
                    print "Error writing out to caching file ", caching_fisherinfo_filename

        # Now plots. Do histograms of responses (around -pi/6 and pi/6), add Von Mises derived from Theo FI on top, and vertical lines for the correct target/nontarget angles.
        for ratio_conj_i, ratio_conj in enumerate(ratio_space):
            # Histogram
            ax = utils.hist_angular_data(result_all_thetas[ratio_conj_i], bins=100, title='ratio %.2f, fi %.0f' % (ratio_conj, result_fisherinfo_ratio[ratio_conj_i]))
            bar_heights, _, _ = utils.histogram_binspace(result_all_thetas[ratio_conj_i], bins=100, norm='density')

            # Add Fisher info prediction on top
            x = np.linspace(-np.pi, np.pi, 1000)
            if result_fisherinfo_ratio[ratio_conj_i] < 700:
                # Von Mises PDF
                utils.plot_vonmises_pdf(x, utils.stddev_to_kappa(1./result_fisherinfo_ratio[ratio_conj_i]**0.5), mu=fixed_means[-1], ax_handle=ax, linewidth=3, color='r', scale=np.max(bar_heights), fmt='-')
            else:
                # Switch to Gaussian instead
                utils.plot_normal_pdf(x, mu=fixed_means[-1], std=1./result_fisherinfo_ratio[ratio_conj_i]**0.5, ax_handle=ax, linewidth=3, color='r', scale=np.max(bar_heights), fmt='-')

            # ax.set_xticks([])
            # ax.set_yticks([])

            # Add vertical line to correct target/nontarget
            ax.axvline(x=fixed_means[0], color='g', linewidth=2)
            ax.axvline(x=fixed_means[1], color='r', linewidth=2)

            ax.get_figure().canvas.draw()

            if savefigs:
                # plt.tight_layout()
                dataio.save_current_figure('results_misbinding_histresponses_vonmisespdf_ratioconj%.2f{label}_{unique_id}.pdf' % (ratio_conj))



    if plot_logpost:
        for ratio_conj_i, ratio_conj in enumerate(ratio_space):
            # ax = utils.plot_mean_std_area(all_angles, nanmean(result_all_log_posterior[ratio_conj_i], axis=0), nanstd(result_all_log_posterior[ratio_conj_i], axis=0))

            # ax.set_xlim((-np.pi, np.pi))
            # ax.set_xticks((-np.pi, -np.pi / 2, 0, np.pi / 2., np.pi))
            # ax.set_xticklabels((r'$-\pi$', r'$-\frac{\pi}{2}$', r'$0$', r'$\frac{\pi}{2}$', r'$\pi$'))
            # ax.set_yticks(())

            # ax.get_figure().canvas.draw()

            # if savefigs:
            #     dataio.save_current_figure('results_misbinding_logpost_ratioconj%.2f_{label}_global_{unique_id}.pdf' % ratio_conj)


            # Compute the probability of answering wrongly (from fitting mixture distrib onto posterior)
            for n in xrange(result_all_log_posterior.shape[1]):
                result_prob_wrong[ratio_conj_i, n], _, _ = utils.fit_gaussian_mixture_fixedmeans(all_angles, np.exp(result_all_log_posterior[ratio_conj_i, n]), fixed_means=fixed_means, normalise=True, return_fitted_data=False, should_plot=False)

        # ax = utils.plot_mean_std_area(ratio_space, nanmean(result_prob_wrong, axis=-1), nanstd(result_prob_wrong, axis=-1))
        plt.figure()
        plt.plot(ratio_space, utils.nanmean(result_prob_wrong, axis=-1))

        # ax.get_figure().canvas.draw()
        if savefigs:
            dataio.save_current_figure('results_misbinding_probwrongpost_allratioconj_{label}_global_{unique_id}.pdf')

    if plot_error:

        ## Compute Standard deviation/precision from samples and plot it as a function of ratio_conj
        stats = utils.compute_mean_std_circular_data(utils.wrap_angles(result_all_thetas - fixed_means[1]).T)

        f = plt.figure()
        plt.plot(ratio_space, stats['std'])
        plt.ylabel('Standard deviation [rad]')

        if savefigs:
            dataio.save_current_figure('results_misbinding_stddev_allratioconj_{label}_global_{unique_id}.pdf')

        f = plt.figure()
        plt.plot(ratio_space, utils.compute_angle_precision_from_std(stats['std'], square_precision=False), linewidth=2)
        plt.ylabel('Precision [$1/rad$]')
        plt.xlabel('Proportion of conjunctive units')
        plt.grid()

        if savefigs:
            dataio.save_current_figure('results_misbinding_precision_allratioconj_{label}_global_{unique_id}.pdf')

        ## Compute the probability of misbinding
        # 1) Just count samples < 0 / samples tot
        # 2) Fit a mixture model, average over mixture probabilities
        prob_smaller0 = np.sum(result_all_thetas <= 1, axis=1)/float(result_all_thetas.shape[1])

        em_centers = np.zeros((ratio_space.size, 2))
        em_covs = np.zeros((ratio_space.size, 2))
        em_pk = np.zeros((ratio_space.size, 2))
        em_ll = np.zeros(ratio_space.size)
        for ratio_conj_i, ratio_conj in enumerate(ratio_space):
            cen_lst, cov_lst, em_pk[ratio_conj_i], em_ll[ratio_conj_i] = pygmm.em(result_all_thetas[ratio_conj_i, np.newaxis].T, K = 2, max_iter = 400, init_kw={'cluster_init':'fixed', 'fixed_means': fixed_means})

            em_centers[ratio_conj_i] = np.array(cen_lst).flatten()
            em_covs[ratio_conj_i] = np.array(cov_lst).flatten()

        # print em_centers
        # print em_covs
        # print em_pk

        f = plt.figure()
        plt.plot(ratio_space, prob_smaller0)
        plt.ylabel('Misbound proportion')
        if savefigs:
            dataio.save_current_figure('results_misbinding_countsmaller0_allratioconj_{label}_global_{unique_id}.pdf')

        f = plt.figure()
        plt.plot(ratio_space, np.max(em_pk, axis=-1), 'g', linewidth=2)
        plt.ylabel('Mixture proportion, correct')
        plt.xlabel('Proportion of conjunctive units')
        plt.grid()
        if savefigs:
            dataio.save_current_figure('results_misbinding_emmixture_allratioconj_{label}_global_{unique_id}.pdf')


        # Put everything on one figure
        f = plt.figure(figsize=(10, 6))
        norm_for_plot = lambda x: (x - np.min(x))/np.max((x - np.min(x)))
        plt.plot(ratio_space, norm_for_plot(stats['std']), ratio_space, norm_for_plot(utils.compute_angle_precision_from_std(stats['std'], square_precision=False)), ratio_space, norm_for_plot(prob_smaller0), ratio_space, norm_for_plot(em_pk[:, 1]), ratio_space, norm_for_plot(em_pk[:, 0]))
        plt.legend(('Std dev', 'Precision', 'Prob smaller 1', 'Mixture proportion correct', 'Mixture proportion misbinding'))
        # plt.plot(ratio_space, norm_for_plot(compute_angle_precision_from_std(stats['std'], square_precision=False)), ratio_space, norm_for_plot(em_pk[:, 1]), linewidth=2)
        # plt.legend(('Precision', 'Mixture proportion correct'), loc='best')
        plt.grid()
        if savefigs:
            dataio.save_current_figure('results_misbinding_allmetrics_allratioconj_{label}_global_{unique_id}.pdf')


    if plot_mixtmodel:
        # Fit Paul's model
        target_angle = np.ones(N)*fixed_means[1]
        nontarget_angles = np.ones((N, 1))*fixed_means[0]

        for ratio_conj_i, ratio_conj in enumerate(ratio_space):
            print "Ratio: ", ratio_conj

            responses = result_all_thetas[ratio_conj_i]

            if mixturemodel_to_use == 'allitems_kappafi':
                curr_params_fit = em_circularmixture_allitems_kappafi.fit(responses, target_angle, nontarget_angles, kappa=result_fisherinfo_ratio[ratio_conj_i])
            elif mixturemodel_to_use == 'allitems':
                curr_params_fit = em_circularmixture_allitems_uniquekappa.fit(responses, target_angle, nontarget_angles)
            else:
                curr_params_fit = em_circularmixture.fit(responses, target_angle, nontarget_angles)

            result_em_fits[ratio_conj_i] = [curr_params_fit['kappa'], curr_params_fit['mixt_target']] + utils.arrnum_to_list(curr_params_fit['mixt_nontargets']) + [curr_params_fit[key] for key in ('mixt_random', 'train_LL', 'bic')]

            print curr_params_fit


        if False:
            f, ax = plt.subplots()
            ax2 = ax.twinx()

            # left axis, kappa
            ax = utils.plot_mean_std_area(ratio_space, result_em_fits[:, 0], 0*result_em_fits[:, 0], xlabel='Proportion of conjunctive units', ylabel="Inverse variance $[rad^{-2}]$", ax_handle=ax, linewidth=3, fmt='o-', markersize=8, label='Fitted kappa', color='k')

            # Right axis, mixture probabilities
            utils.plot_mean_std_area(ratio_space, result_em_fits[:, 1], 0*result_em_fits[:, 1], xlabel='Proportion of conjunctive units', ylabel="Mixture probabilities", ax_handle=ax2, linewidth=3, fmt='o-', markersize=8, label='Target')
            utils.plot_mean_std_area(ratio_space, result_em_fits[:, 2], 0*result_em_fits[:, 2], xlabel='Proportion of conjunctive units', ylabel="Mixture probabilities", ax_handle=ax2, linewidth=3, fmt='o-', markersize=8, label='Nontarget')
            utils.plot_mean_std_area(ratio_space, result_em_fits[:, 3], 0*result_em_fits[:, 3], xlabel='Proportion of conjunctive units', ylabel="Mixture probabilities", ax_handle=ax2, linewidth=3, fmt='o-', markersize=8, label='Random')

            lines, labels = ax.get_legend_handles_labels()
            lines2, labels2 = ax2.get_legend_handles_labels()
            ax.legend(lines + lines2, labels + labels2, fontsize=12, loc='right')

            # ax.set_xlim([0.9, 5.1])
            # ax.set_xticks(range(1, 6))
            # ax.set_xticklabels(range(1, 6))
            plt.grid()

            f.canvas.draw()

        if True:
            # Mixture probabilities
            ax = utils.plot_mean_std_area(ratio_space, result_em_fits[:, 1], 0*result_em_fits[:, 1], xlabel='Proportion of conjunctive units', ylabel="Mixture probabilities", linewidth=3, fmt='-', markersize=8, label='Target')
            utils.plot_mean_std_area(ratio_space, result_em_fits[:, 2], 0*result_em_fits[:, 2], xlabel='Proportion of conjunctive units', ylabel="Mixture probabilities", ax_handle=ax, linewidth=3, fmt='-', markersize=8, label='Nontarget')
            utils.plot_mean_std_area(ratio_space, result_em_fits[:, 3], 0*result_em_fits[:, 3], xlabel='Proportion of conjunctive units', ylabel="Mixture probabilities", ax_handle=ax, linewidth=3, fmt='-', markersize=8, label='Random')

            ax.legend(loc='right')

            # ax.set_xlim([0.9, 5.1])
            # ax.set_xticks(range(1, 6))
            # ax.set_xticklabels(range(1, 6))
            plt.grid()

            if savefigs:
                dataio.save_current_figure('results_misbinding_emmixture_allratioconj_{label}_global_{unique_id}.pdf')

        if True:
            # Kappa
            # ax = utils.plot_mean_std_area(ratio_space, result_em_fits[:, 0], 0*result_em_fits[:, 0], xlabel='Proportion of conjunctive units', ylabel="$\kappa [rad^{-2}]$", linewidth=3, fmt='-', markersize=8, label='Kappa')
            ax = utils.plot_mean_std_area(ratio_space, utils.kappa_to_stddev(result_em_fits[:, 0]), 0*result_em_fits[:, 2], xlabel='Proportion of conjunctive units', ylabel="Standard deviation [rad]", linewidth=3, fmt='-', markersize=8, label='Mixture model $\kappa$')

            # Add Fisher Info theo
            ax = utils.plot_mean_std_area(ratio_space, utils.kappa_to_stddev(result_fisherinfo_ratio), 0*result_em_fits[:, 2], xlabel='Proportion of conjunctive units', ylabel="Standard deviation [rad]", linewidth=3, fmt='-', markersize=8, label='Fisher Information', ax_handle=ax)

            ax.legend(loc='best')

            # ax.set_xlim([0.9, 5.1])
            # ax.set_xticks(range(1, 6))
            # ax.set_xticklabels(range(1, 6))
            plt.grid()

            if savefigs:
                dataio.save_current_figure('results_misbinding_kappa_allratioconj_{label}_global_{unique_id}.pdf')

    if compute_plot_bootstrap:
        ## Compute the bootstrap pvalue for each ratio
        #       use the bootstrap CDF from mixed runs, not the exact current ones, not sure if good idea.

        bootstrap_to_load = 1
        if bootstrap_to_load == 1:
            cache_bootstrap_fn = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap_mixed_from_bootstrapnontargets.pickle')
            bootstrap_ecdf_sum_label = 'bootstrap_ecdf_allitems_sum_sigmax_T'
            bootstrap_ecdf_all_label = 'bootstrap_ecdf_allitems_all_sigmax_T'
        elif bootstrap_to_load == 2:
            cache_bootstrap_fn = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap_misbinding_mixed.pickle')
            bootstrap_ecdf_sum_label = 'bootstrap_ecdf_allitems_sum_ratioconj'
            bootstrap_ecdf_all_label = 'bootstrap_ecdf_allitems_all_ratioconj'

        try:
            with open(cache_bootstrap_fn, 'r') as file_in:
                # Load and assign values
                cached_data = pickle.load(file_in)
                assert bootstrap_ecdf_sum_label in cached_data
                assert bootstrap_ecdf_all_label in cached_data
                should_fit_bootstrap = False

        except IOError:
            print "Error while loading ", cache_bootstrap_fn

        # Select the ECDF to use
        if bootstrap_to_load == 1:
            sigmax_i = 3    # corresponds to sigmax = 2, input here.
            T_i = 1         # two possible targets here.
            bootstrap_ecdf_sum_used = cached_data[bootstrap_ecdf_sum_label][sigmax_i][T_i]['ecdf']
            bootstrap_ecdf_all_used = cached_data[bootstrap_ecdf_all_label][sigmax_i][T_i]['ecdf']
        elif bootstrap_to_load == 2:
            ratio_conj_i = 4
            bootstrap_ecdf_sum_used = cached_data[bootstrap_ecdf_sum_label][ratio_conj_i]['ecdf']
            bootstrap_ecdf_all_used = cached_data[bootstrap_ecdf_all_label][ratio_conj_i]['ecdf']


        result_pvalue_bootstrap_sum = np.empty(ratio_space.size)*np.nan
        result_pvalue_bootstrap_all = np.empty((ratio_space.size, nontarget_angles.shape[-1]))*np.nan

        for ratio_conj_i, ratio_conj in enumerate(ratio_space):
            print "Ratio: ", ratio_conj

            responses = result_all_thetas[ratio_conj_i]

            bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(responses, target_angle, nontarget_angles,
                sumnontargets_bootstrap_ecdf=bootstrap_ecdf_sum_used,
                allnontargets_bootstrap_ecdf=bootstrap_ecdf_all_used)

            result_pvalue_bootstrap_sum[ratio_conj_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['p_value']
            result_pvalue_bootstrap_all[ratio_conj_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_p_value']

        ## Plots
        # f, ax = plt.subplots()
        # ax.plot(ratio_space, result_pvalue_bootstrap_all, linewidth=2)

        # if savefigs:
        #     dataio.save_current_figure("pvalue_bootstrap_all_ratioconj_{label}_{unique_id}.pdf")

        f, ax = plt.subplots()
        ax.plot(ratio_space, result_pvalue_bootstrap_sum, linewidth=2)
        plt.grid()

        if savefigs:
            dataio.save_current_figure("pvalue_bootstrap_sum_ratioconj_{label}_{unique_id}.pdf")


    # plt.figure()
    # plt.plot(ratio_MMlower, results_filtered_smoothed/np.max(results_filtered_smoothed, axis=0), linewidth=2)
    # plt.plot(ratio_MMlower[np.argmax(results_filtered_smoothed, axis=0)], np.ones(results_filtered_smoothed.shape[-1]), 'ro', markersize=10)
    # plt.grid()
    # plt.ylim((0., 1.1))
    # plt.subplots_adjust(right=0.8)
    # plt.legend(['%d item' % i + 's'*(i>1) for i in xrange(1, T+1)], loc='center right', bbox_to_anchor=(1.3, 0.5))
    # plt.xticks(np.linspace(0, 1.0, 5))

    variables_to_save = ['target_angle', 'nontarget_angles']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='misbindings')


    plt.show()

    return locals()