def plots_fit_collapsedmixturemodels_random(data_pbs, generator_module=None):
    '''
        Reload runs from PBS

        Sequential data analysis.
    '''

    #### SETUP
    #
    plots_bestfits = True
    plots_scatter3d = False

    savefigs = True
    savedata = True

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", data_pbs.dataset_infos['parameters']
    # parameters: M, ratio_conj, sigmax

    # Extract data
    T_space = data_pbs.loaded_data['datasets_list'][0]['T_space']

    result_em_fits_collapsed_tr_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_tr']['results_flat'])
    result_em_fits_collapsed_summary_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_summary']['results_flat'])
    result_dist_gorgo11_sequ_collapsed_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11_sequ_collapsed']['results_flat'])
    result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11_sequ_collapsed_emmixt_KL']['results_flat'])

    result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_tr']['parameters_flat'])
    all_repeats_completed = data_pbs.dict_arrays['result_em_fits_collapsed_tr']['repeats_completed']

    all_args_arr = np.array(data_pbs.loaded_data['args_list'])

    M_space = data_pbs.loaded_data['parameters_uniques']['M']
    ratio_conj_space = data_pbs.loaded_data['parameters_uniques']['ratio_conj']
    sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax']
    alpha_space = data_pbs.loaded_data['parameters_uniques']['alpha']

    num_repetitions = generator_module.num_repetitions
    parameter_names_sorted = data_pbs.dataset_infos['parameters']

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    # Load ground truth
    data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(fit_mixture_model=True)

    ## Filter everything with repeats_completed == num_repet
    filter_data = all_repeats_completed == num_repetitions - 1
    result_parameters_flat = result_parameters_flat[filter_data]

    result_em_fits_collapsed_tr_flat = result_em_fits_collapsed_tr_flat[filter_data]
    result_em_fits_collapsed_summary_flat = result_em_fits_collapsed_summary_flat[filter_data]
    result_dist_gorgo11_sequ_collapsed_flat = result_dist_gorgo11_sequ_collapsed_flat[filter_data]
    result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat = result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat[filter_data]

    all_args_arr = all_args_arr[filter_data]
    all_repeats_completed = all_repeats_completed[filter_data]

    print "Size post-filter: ", result_parameters_flat.shape[0]

    # Compute lots of averages over the repetitions
    result_em_fits_collapsed_tr_flat_avg = utils.nanmean(result_em_fits_collapsed_tr_flat, axis=-1)
    result_em_fits_collapsed_summary_flat_avg = utils.nanmean(result_em_fits_collapsed_summary_flat, axis=-1)
    result_dist_gorgo11_sequ_collapsed_flat_avg = utils.nanmean(result_dist_gorgo11_sequ_collapsed_flat, axis=-1)
    result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat_avg = utils.nanmean(result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat, axis=-1)

    result_dist_gorgo11_sequ_collapsed_flat_avg_overall = np.nansum(np.nansum(np.nansum(result_dist_gorgo11_sequ_collapsed_flat_avg, axis=-1), axis=-1), axis=-1)
    # We will now grid some of the parameters, to have a 2D/3D surface back.
    # Let's fix the ratio_conj, as we know that the other models need around
    # ratio =0.8 to fit data well.

    def str_best_params(best_i, result_dist_to_use):
        return ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted))]) + ' >> %f' % result_dist_to_use[best_i]

    ###### Best fitting points
    if plots_bestfits:
        nb_best_points = 5

        def plot_collapsed_modelfits(T_space, curr_result_emfits_collapsed_tr, labelplot='', dataio=None):
            f, ax = plt.subplots()
            for nitems_i, nitems in enumerate(T_space):
                ax = plots_experimental_data.plot_kappa_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 0][nitems_i, :nitems], 0.0*curr_result_emfits_collapsed_tr[..., 0][nitems_i, :nitems], title='model fit fig7 %s' % labelplot , ax=ax, label='%d items' % nitems, xlabel='T_recall')

            if dataio is not None:
                dataio.save_current_figure('bestfit_doublepowerlaw_%s_kappa_{label}_{unique_id}.pdf' % labelplot)

            _, ax_target = plt.subplots()
            _, ax_nontarget = plt.subplots()
            _, ax_random = plt.subplots()
            for nitems_i, nitems in enumerate(T_space):
                ax_target = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 1][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 1][nitems_i, :nitems]*0.0, title='Target model fit %s' % labelplot, ax=ax_target, label='%d items' % nitems, xlabel='T_recall')
                ax_nontarget = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 2][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 2][nitems_i, :nitems]*0.0, title='Nontarget model fit %s' % labelplot, ax=ax_nontarget, label='%d items' % nitems, xlabel='T_recall')
                ax_random = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 3][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 3][nitems_i, :nitems]*0.0, title='Random model fit %s' % labelplot, ax=ax_random, label='%d items' % nitems, xlabel='T_recall')

            if dataio is not None:
                plt.figure(ax_target.get_figure().number)
                dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixttarget_{label}_{unique_id}.pdf' % labelplot)

                plt.figure(ax_nontarget.get_figure().number)
                dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixtnontarget_{label}_{unique_id}.pdf' % labelplot)

                plt.figure(ax_random.get_figure().number)
                dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixtrandom_{label}_{unique_id}.pdf' % labelplot)


        best_points_result_dist_gorgo11seq_all = np.argsort(result_dist_gorgo11_sequ_collapsed_flat_avg_overall)[:nb_best_points]

        for best_point_i in best_points_result_dist_gorgo11seq_all:
            plot_collapsed_modelfits(T_space, result_em_fits_collapsed_tr_flat_avg[best_point_i], labelplot='%.1f' % result_dist_gorgo11_sequ_collapsed_flat_avg_overall[best_point_i], dataio=dataio)


    ###### 3D scatter plots
    if plots_scatter3d:
        nb_best_points = 30
        size_normal_points = 8
        size_best_points = 50

        def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file=''):

            result_dist_to_use = all_vars[result_dist_to_use_name]
            result_parameters_flat_3d = all_vars['result_parameters_flat_3d']

            # Filter if downsampling
            filter_downsampling = np.arange(0, result_dist_to_use.size, downsampling)
            result_dist_to_use = result_dist_to_use[filter_downsampling]
            result_parameters_flat_3d = result_parameters_flat_3d[filter_downsampling]

            best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points]

            # Construct all permutations of 3 parameters, for 3D scatters
            params_permutations = set([tuple(np.sort(np.random.choice(result_parameters_flat_3d.shape[-1], 3, replace=False)).tolist()) for i in xrange(1000)])

            for param_permut in params_permutations:
                fig = plt.figure()
                ax = Axes3D(fig)

                # One plot per parameter permutation
                if log_color:
                    color_points = np.log(result_dist_to_use)
                else:
                    color_points = result_dist_to_use

                utils.scatter3d(result_parameters_flat_3d[:, param_permut[0]], result_parameters_flat_3d[:, param_permut[1]], result_parameters_flat_3d[:, param_permut[2]], s=size_normal_points, c=color_points, xlabel=parameter_names_sorted[param_permut[0]], ylabel=parameter_names_sorted[param_permut[1]], zlabel=parameter_names_sorted[param_permut[2]], title=title, ax_handle=ax)

                utils.scatter3d(result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[0]], result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[1]], result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[2]], c='r', s=size_best_points, ax_handle=ax)

                if savefigs:
                    dataio.save_current_figure('scatter3d_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file))

                if savemovies:
                    try:
                        utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), bitrate=8000, min_duration=8)
                        utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), nb_frames=30, min_duration=8)
                    except Exception:
                        # Most likely wrong aggregator...
                        print "failed when creating movies for ", result_dist_to_use_name


                if False and savefigs:
                    ax.view_init(azim=90, elev=10)
                    dataio.save_current_figure('scatter3d_view2_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file))

                # plt.close('all')

            print "Parameters: %s" % ', '.join(parameter_names_sorted)
            print "Best points, %s:" % title
            print '\n'.join([str_best_params(best_i, result_dist_to_use) for best_i in best_points_result_dist_to_use])


    # all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['parameter_names_sorted', 'all_args_arr', 'all_repeats_completed', 'filter_data']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='gorgo11_sequential_fitmixturemodel')


    plt.show()

    return locals()
def launcher_do_fit_mixturemodels_sequential_alltrecall(args):
    '''
        Run the model for 1..T items sequentially, for all possible trecall/T.
        Compute:
        - Precision of samples
        - EM mixture model fits. Both independent and collapsed model.
        - Theoretical Fisher Information
        - EM Mixture model distances to set of currently working datasets.
    '''

    print "Doing a piece of work for launcher_do_fit_mixturemodels_sequential_alltrecall"

    all_parameters = utils.argparse_2_dict(args)
    print all_parameters

    if all_parameters['burn_samples'] + all_parameters['num_samples'] < 200:
        print "WARNING> you do not have enough samples I think!", all_parameters['burn_samples'] + all_parameters['num_samples']

    # Create DataIO
    #  (complete label with current variable state)
    dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters))
    save_every = 1
    run_counter = 0

    # Load dataset to compare against
    data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(data_dir=all_parameters['experiment_data_dir'], fit_mixture_model=True)
    gorgo11_sequ_T_space = np.unique(data_gorgo11_sequ['n_items'])


    # Parameters to vary
    T_max = all_parameters['T']
    T_space = np.arange(1, T_max+1)
    repetitions_axis = -1

    # Result arrays
    result_all_precisions = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions']))
    result_fi_theo = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions']))
    result_fi_theocov = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions']))
    result_em_fits = np.nan*np.empty((T_space.size, T_space.size, 6, all_parameters['num_repetitions']))  # kappa, mixt_target, mixt_nontarget, mixt_random, ll, bic
    result_em_fits_collapsed_tr = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions']))  # kappa_tr, mixt_target_tr, mixt_nontarget_tr, mixt_random_tr
    result_em_fits_collapsed_summary = np.nan*np.empty((5, all_parameters['num_repetitions'])) # bic, ll, kappa_theta

    result_dist_gorgo11_sequ = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions']))  # kappa, mixt_target, mixt_nontarget, mixt_random
    result_dist_gorgo11_sequ_emmixt_KL = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions']))

    result_dist_gorgo11_sequ_collapsed = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions']))
    result_dist_gorgo11_sequ_collapsed_emmixt_KL = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions']))

    gorgo11_sequ_collapsed_mixtmod_mean = data_gorgo11_sequ['collapsed_em_fits_doublepowerlaw_array']


    # If desired, will automatically save all Model responses.
    if all_parameters['collect_responses']:
        print "--- Collecting all responses..."
        result_responses = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], all_parameters['num_repetitions']))
        result_target = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], all_parameters['num_repetitions']))
        result_nontargets = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], T_max-1, all_parameters['num_repetitions']))

    search_progress = progress.Progress(T_space.size*(T_space.size + 1)/2.*all_parameters['num_repetitions'])

    for repet_i in xrange(all_parameters['num_repetitions']):
        for T_i, T in enumerate(T_space):
            for trecall_i, trecall in enumerate(np.arange(T, 0, -1)):
                # Inverting indexing of trecall, to be consistent. trecall_i 0 == last item.
                # But trecall still means the actual time of recall!
                print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str())
                print "Fit for T=%d, tr=%d, %d/%d" % (T, trecall, repet_i+1, all_parameters['num_repetitions'])

                # Update parameter
                all_parameters['T'] = T
                all_parameters['fixed_cued_feature_time'] = trecall - 1

                ### WORK WORK WORK work? ###
                # Instantiate
                (_, _, _, sampler) = launchers.init_everything(all_parameters)

                # Sample
                sampler.run_inference(all_parameters)

                # Compute precision
                print "get precision..."
                result_all_precisions[T_i, trecall_i, repet_i] = sampler.get_precision()

                # Fit mixture model, independent
                print "fit mixture model..."
                curr_params_fit = sampler.fit_mixture_model(use_all_targets=False)
                result_em_fits[T_i, trecall_i, :, repet_i] = [curr_params_fit[key] for key in ['kappa', 'mixt_target', 'mixt_nontargets_sum', 'mixt_random', 'train_LL', 'bic']]

                # Compute fisher info
                print "compute fisher info"
                result_fi_theo[T_i, trecall_i, repet_i] = sampler.estimate_fisher_info_theocov(use_theoretical_cov=False)
                result_fi_theocov[T_i, trecall_i, repet_i] = sampler.estimate_fisher_info_theocov(use_theoretical_cov=True)

                # Compute distances to datasets (this is for the non-collapsed stuff, not the best)
                if T in gorgo11_sequ_T_space:
                    gorgo11_sequ_mixtures_mean = data_gorgo11_sequ['em_fits_nitems_trecall_arrays'][gorgo11_sequ_T_space==T, trecall_i, :4].flatten()

                    result_dist_gorgo11_sequ[T_i, trecall_i, :, repet_i] = (gorgo11_sequ_mixtures_mean - result_em_fits[T_i, trecall_i, :4, repet_i])**2.
                    result_dist_gorgo11_sequ_emmixt_KL[T_i, trecall_i, repet_i] = utils.KL_div(result_em_fits[T_i, trecall_i, 1:4, repet_i], gorgo11_sequ_mixtures_mean[1:])


                # If needed, store responses
                if all_parameters['collect_responses']:
                    print "collect responses"
                    (responses, target, nontarget) = sampler.collect_responses()
                    result_responses[T_i, trecall_i, :, repet_i] = responses
                    result_target[T_i, trecall_i, :, repet_i] = target
                    result_nontargets[T_i, trecall_i, :, :T_i, repet_i] = nontarget


                print "CURRENT RESULTS:\n", result_all_precisions[T_i, trecall_i, repet_i], curr_params_fit, result_fi_theo[T_i, trecall_i, repet_i], result_fi_theocov[T_i, trecall_i, repet_i], np.sum(result_dist_gorgo11_sequ[T_i, trecall_i, :, repet_i]), np.sum(result_dist_gorgo11_sequ_emmixt_KL[T_i, trecall_i, repet_i]), "\n"
                ### /Work ###

                search_progress.increment()
                if run_counter % save_every == 0 or search_progress.done():
                    dataio.save_variables_default(locals())
                run_counter += 1

        # Fit Collapsed mixture model
        # TODO check dimensionality...
        print 'Fitting Collapsed double powerlaw mixture model...'
        params_fit = em_circularmixture_parametrickappa_doublepowerlaw.fit(T_space, result_responses[..., repet_i], result_target[..., repet_i], result_nontargets[..., repet_i], debug=False)

        # First store the parameters that depend on T/trecall
        for i, key in enumerate(['kappa', 'mixt_target_tr', 'mixt_nontargets_tr', 'mixt_random_tr']):
            result_em_fits_collapsed_tr[..., i, repet_i] =  params_fit[key]

        # Then the ones that do not, only one per full collapsed fit.
        result_em_fits_collapsed_summary[0, repet_i] = params_fit['bic']
        # result_em_fits_collapsed_summary[1, repet_i] = params_fit['train_LL']
        result_em_fits_collapsed_summary[2:, repet_i] = params_fit['kappa_theta']

        # Compute distances to dataset for collapsed model
        result_dist_gorgo11_sequ_collapsed[..., repet_i] = (gorgo11_sequ_collapsed_mixtmod_mean - result_em_fits_collapsed_tr[..., repet_i])**2.
        result_dist_gorgo11_sequ_collapsed_emmixt_KL[..., repet_i] = utils.KL_div(result_em_fits_collapsed_tr[..., 1:4, repet_i], gorgo11_sequ_collapsed_mixtmod_mean[..., 1:], axis=-1)


    # Finished
    dataio.save_variables_default(locals())

    print "All finished"
    return locals()
def plots_fit_mixturemodels_random(data_pbs, generator_module=None):
    """
        Reload runs from PBS
    """

    #### SETUP
    #
    savefigs = True
    savedata = True

    colormap = None  # or 'cubehelix'
    plt.rcParams["font.size"] = 16
    #
    #### /SETUP

    print "Order parameters: ", data_pbs.dataset_infos["parameters"]
    # parameters: M, ratio_conj, sigmax

    # Extract data
    T_space = data_pbs.loaded_data["datasets_list"][0]["T_space"]

    result_responses_flat = np.array(data_pbs.dict_arrays["result_responses"]["results_flat"])
    result_targets_flat = np.array(data_pbs.dict_arrays["result_target"]["results_flat"])
    result_nontargets_flat = np.array(data_pbs.dict_arrays["result_nontargets"]["results_flat"])

    result_parameters_flat = np.array(data_pbs.dict_arrays["result_responses"]["parameters_flat"])
    all_repeats_completed = data_pbs.dict_arrays["result_responses"]["repeats_completed"]

    all_args_arr = np.array(data_pbs.loaded_data["args_list"])

    M_space = data_pbs.loaded_data["parameters_uniques"]["M"]
    ratio_conj_space = data_pbs.loaded_data["parameters_uniques"]["ratio_conj"]
    sigmax_space = data_pbs.loaded_data["parameters_uniques"]["sigmax"]
    alpha_space = data_pbs.loaded_data["parameters_uniques"]["alpha"]
    trecall_space = data_pbs.loaded_data["parameters_uniques"]["fixed_cued_feature_time"]

    num_repetitions = generator_module.num_repetitions
    parameter_names_sorted = data_pbs.dataset_infos["parameters"]

    dataio = DataIO(
        output_folder=generator_module.pbs_submission_infos["simul_out_dir"] + "/outputs/",
        label="global_" + dataset_infos["save_output_filename"],
    )

    ##### Because of lazyness, the responses are weird.
    # Each run is for a given trecall. But we run N items= 1 .. Nmax anyway
    # so if trecall > N, you have np.nan
    # => Need to reconstruct the thing properly, to have lower triangle of Nitem x Trecall filled
    # Also, trecall is the actual Time. Hence we need to change its meaning to be Tmax- (trecall + 1) or whatever.

    # Load ground truth
    data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(fit_mixture_model=True)

    ## Filter everything with repeats_completed == num_repet and trecall=last
    filter_data = (result_parameters_flat[:, 0] == (T_space.max() - 1)) & (all_repeats_completed == num_repetitions - 1)
    result_parameters_flat = result_parameters_flat[filter_data]
    result_responses_flat = result_responses_flat[filter_data]
    result_targets_flat = result_targets_flat[filter_data]
    result_nontargets_flat = result_nontargets_flat[filter_data]
    all_args_arr = all_args_arr[filter_data]
    all_repeats_completed = all_repeats_completed[filter_data]

    print "Size post-filter: ", result_parameters_flat.shape[0]

    def str_best_params(best_i, result_dist_to_use):
        return (
            " ".join(
                [
                    "%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i])
                    for param_i in xrange(len(parameter_names_sorted))
                ]
            )
            + " >> %f" % result_dist_to_use[best_i]
        )

    # all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ["parameter_names_sorted", "all_args_arr", "all_repeats_completed", "filter_data"]

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder="gorgo11_sequential_fitmixturemodel")

    plt.show()

    return locals()