示例#1
0
    def target(self, file_name):
        """ Display sampled points (noisy), read from file the input/target
        pairs and draw the resulting points.

        :param file_name - str, name of the file with the data.
        """
        x, t, e = DataIO.read_data(file_name)
        self.AX1.plot(x,
                      t,
                      marker='o',
                      markersize=7,
                      linestyle='none',
                      color=self.CLR_DOT,
                      zorder=2)
示例#2
0
    def download_data(self, pair, start, end):
        """Download trade data and store as .csv file.

        Args:
            pair (str): Currency pair.
            start (int): Start UNIX of trade data to download.
            end (int): End UNIX of trade data to download.
        """
        dataio = DataIO(savedir=self._savedir, fieldnames=self.FIELDNAMES)
        if dataio.csv_check(pair):
            last_row = dataio.csv_get_last(pair)
            newest_id = int(last_row['trade_id']) + 1
            newest_t = int(last_row['time'])
        else:
            newest_id = self.__find_start_trade_id(pair, start)
            newest_t = 0

        while newest_t < end:
            # new -> old
            r = self.__get_slice(pair, newest_id)

            # old -> new, add unix timestamp
            new_r = []
            for row in r:
                row['time'] = row['T'] // 1000
                row['date'] = timeutil.unix_to_iso(row['time'])
                row['price'] = row['p']
                row['size'] = row['q']
                row['side'] = 'sell' if row['m'] == True else 'buy'
                row['best_price_match'] = row['M']
                row['trade_id'] = row['a']
                row.pop('a', None)
                row.pop('p', None)
                row.pop('q', None)
                row.pop('f', None)
                row.pop('l', None)
                row.pop('T', None)
                row.pop('m', None)
                row.pop('M', None)
                new_r.append(row)

            # save to file
            dataio.csv_append(pair, new_r)

            # break condition
            if len(r) < self.__MAX_LIMIT:
                break

            # prepare next iteration
            newest_id = new_r[-1]['trade_id'] + 1
            newest_t = new_r[-1]['time']
            print('Binance\t| {} : {}'.format(timeutil.unix_to_iso(newest_t),
                                              pair))

        print('Binance\t| Download complete : {}'.format(pair))
示例#3
0
    def download_data(self, pair, start, end):
        """Download trade data and store as .csv file.

        Args:
            pair (str): Currency pair.
            start (int): Start UNIX of trade data to download.
            end (int): End UNIX of trade data to download.
        """
        dataio = DataIO(savedir=self._savedir, fieldnames=self.FIELDNAMES)
        if dataio.csv_check(pair):
            last_row = dataio.csv_get_last(pair)
            newest_id = int(last_row['trade_id']) + 1
            newest_t = int(last_row['time'])
        else:
            newest_id = self.__find_start_trade_id(pair, start)
            newest_t = 0

        last_trade_id = self.__find_last_trade_id(pair)

        while newest_t < end:
            # new -> old
            r = self.__get_slice(pair, newest_id + self.__MAX_LIMIT)

            # break condition
            to_break = False

            # old -> new, add unix timestamp
            new_r = []
            for row in reversed(r):
                if row['trade_id'] > newest_id:
                    row['date'] = row['time']
                    row['time'] = timeutil.iso_to_unix(row['time'])
                    new_r.append(row)
                if row['trade_id'] == last_trade_id:
                    to_break = True

            # save to file
            dataio.csv_append(pair, new_r)

            # break condition
            if to_break:
                break

            # prepare next iteration
            newest_id = new_r[-1]['trade_id']
            newest_t = new_r[-1]['time']
            print('GDAX\t| {} : {}'.format(timeutil.unix_to_iso(newest_t),
                                           pair))

        print('GDAX\t| Download complete : {}'.format(pair))
示例#4
0
    def download_data(self, pair, start, end):
        """Download trade data and store as .csv file.

        Args:
            pair (str): Currency pair.
            start (int): Start UNIX of trade data to download.
            end (int): End UNIX of trade data to download.
        """
        dataio = DataIO(savedir=self._savedir, fieldnames=self.FIELDNAMES)
        if dataio.csv_check(pair):
            last_row = dataio.csv_get_last(pair)
            newest_id = int(last_row['trade_id']) + 1
            newest_t = int(last_row['time'])
        else:
            newest_id = self.__find_start_trade_id(pair, start)
            newest_t = 0

        while newest_t < end:
            # new -> old
            r = self.__get_slice(pair, newest_id)

            # old -> new, add unix timestamp
            new_r = []
            for i, row in enumerate(r):
                row['time'] = timeutil.iso_to_unix(row['timestamp'])
                row['date'] = row['timestamp']
                row['trade_id'] = newest_id + i
                row['side'] = row['side'].lower()
                row.pop('timestamp', None)
                row.pop('symbol', None)
                new_r.append(row)

            # save to file
            dataio.csv_append(pair, new_r)

            # break condition
            if len(r) < self.__MAX_LIMIT:
                break

            # prepare next iteration
            newest_id = new_r[-1]['trade_id'] + 1
            newest_t = new_r[-1]['time']
            print('Bitmex\t| {} : {}'.format(
                timeutil.unix_to_iso(newest_t), pair))

        print('Bitmex\t| Download complete : {}'.format(pair))
示例#5
0
    def download_data(self, pair, start, end):
        """Download trade data and store as .csv file.

        Args:
            pair (str): Currency pair.
            start (int): Start UNIX of trade data to download.
            end (int): End UNIX of trade data to download.
        """
        dataio = DataIO(savedir=self._savedir, fieldnames=self.FIELDNAMES)
        last_row = None
        if dataio.csv_check(pair):
            last_row = dataio.csv_get_last(pair)
            newest_t = int(last_row['time'])
        else:
            newest_t = self.__find_start_trade_time(pair, start) - 1

        # break condition
        last_trade_time = self.__find_last_trade_time(pair)

        while newest_t < end:
            # new -> old
            r = self.__get_slice(pair, newest_t)

            # old -> new; remove duplicate data by trade ID
            new_r = []
            for row in reversed(r):
                if last_row is not None:
                    if int(last_row['tradeID']) >= row['tradeID']:
                        continue  # remove duplicates
                last_row = row
                row['time'] = timeutil.iso_to_unix(row['date'])
                new_r.append(row)

            if newest_t > last_trade_time:
                break

            # save to file
            dataio.csv_append(pair, new_r)

            # prepare next iteration
            newest_t += self.__MAX_RANGE
            print('Poloniex| {} : {}'.format(
                timeutil.unix_to_iso(newest_t), pair))

        print('Poloniex| Download complete : {}'.format(pair))
示例#6
0
def data_preprocess(params):
    ### Record Concatenation
    dataio = DataIO(params['input_path'], params['map_path'], params['domain'])
    dataio.read_data()
    dataio.read_label()
    ctn = Concatenation(dataio, params['domain'])
    patient_info, n_feature, feature_list, feature_range = ctn.get_concatenation()
                                           # patient id: Patient
                                           # static feature and dynamic feature
                                           # dynamic feature{time:feature_value}
    ### Data Imputation
    imp_method = 'simple'
    imp = Imputation(patient_info, n_feature)
    patient_array, patient_time = imp.get_imputation(imp_method)

    ### Clinical Data with DTI Generation
    cli = CliGen(feature_list, feature_range, ctn.dti_time)
    subject_array = cli.get_data(patient_array, patient_time, params['time'])
    if True == params['binary']: # only works for discrete clinical features
        subject_array = cli.get_binarization()
    subject_label = cli.get_label(patient_info, params['labels'], params['time'])
    return subject_array, subject_label
示例#7
0
def plots_fitmixtmodel_rcscale_effect(data_pbs, generator_module=None):
    '''
        Reload runs from PBS
    '''

    #### SETUP
    #
    savefigs = True
    savedata = True

    plots_all_T = True
    plots_per_T = True

    # do_relaunch_bestparams_pbs = True

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", data_pbs.dataset_infos['parameters']
    # parameters: M, ratio_conj, sigmax

    # Extract data
    T_space = data_pbs.loaded_data['datasets_list'][0]['T_space']

    result_em_fits_flat = np.array(data_pbs.dict_arrays['result_em_fits']['results_flat'])
    result_precisions_flat = np.array(data_pbs.dict_arrays['result_all_precisions']['results_flat'])
    result_dist_bays09_flat = np.array(data_pbs.dict_arrays['result_dist_bays09']['results_flat'])
    result_dist_gorgo11_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11']['results_flat'])
    result_dist_bays09_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_bays09_emmixt_KL']['results_flat'])
    result_dist_gorgo11_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_gorgo11_emmixt_KL']['results_flat'])
    result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits']['parameters_flat'])

    rc_scale_space = data_pbs.loaded_data['parameters_uniques']['rc_scale']
    num_repetitions = generator_module.num_repetitions
    parameter_names_sorted = data_pbs.dataset_infos['parameters']

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    # Load bays09
    data_bays09 = load_experimental_data.load_data_bays09(fit_mixture_model=True)
    bays09_nitems = data_bays09['data_to_fit']['n_items']
    bays09_em_target = np.nan*np.empty((bays09_nitems.max(), 4))  #kappa, prob_target, prob_nontarget, prob_random
    bays09_em_target[bays09_nitems - 1] = data_bays09['em_fits_nitems_arrays']['mean'].T
    bays09_emmixt_target = bays09_em_target[:, 1:]


    ## Compute some stuff
    result_parameters_flat = result_parameters_flat.flatten()

    result_em_fits_all_avg = utils.nanmean(result_em_fits_flat, axis=-1)
    result_em_kappa_allT = result_em_fits_all_avg[..., 0]
    result_em_emmixt_allT = result_em_fits_all_avg[..., 1:4]

    result_precisions_all_avg = utils.nanmean(result_precisions_flat, axis=-1)

    # Square distance to kappa
    result_dist_bays09_allT_avg = utils.nanmean(result_dist_bays09_flat, axis=-1)
    result_dist_bays09_emmixt_KL_allT_avg = utils.nanmean(result_dist_bays09_emmixt_KL, axis=-1)

    result_dist_bays09_kappa_allT = result_dist_bays09_allT_avg[..., 0]

    # result_dist_bays09_allT_avg = utils.nanmean((result_em_fits_flat[:, :, :4] - bays09_em_target[np.newaxis, :, :, np.newaxis])**2, axis=-1)
    # result_dist_bays09_kappa_sum = np.nansum(result_dist_bays09_allT_avg[:, :, 0], axis=-1)

    # result_dist_bays09_kappa_T1_sum = result_dist_bays09_allT_avg[:, 0, 0]
    # result_dist_bays09_kappa_T25_sum = np.nansum(result_dist_bays09_allT_avg[:, 1:, 0], axis=-1)

    # # Square and KL distance for EM Mixtures
    # result_dist_bays09_emmixt_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, :, 1:], axis=-1), axis=-1)
    # result_dist_bays09_emmixt_T1_sum = np.nansum(result_dist_bays09_allT_avg[:, 0, 1:], axis=-1)
    # result_dist_bays09_emmixt_T25_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, 1:, 1:], axis=-1), axis=-1)


    # result_dist_bays09_emmixt_KL = utils.nanmean(utils.KL_div(result_em_fits_flat[:, :, 1:4], bays09_emmixt_target[np.newaxis, :, :, np.newaxis], axis=-2), axis=-1)   # KL over dimension of mixtures, then mean over repetitions
    # result_dist_bays09_emmixt_KL_sum = np.nansum(result_dist_bays09_emmixt_KL, axis=-1)  # sum over T
    # result_dist_bays09_emmixt_KL_T1_sum = result_dist_bays09_emmixt_KL[:, 0]
    # result_dist_bays09_emmixt_KL_T25_sum = np.nansum(result_dist_bays09_emmixt_KL[:, 1:], axis=-1)


    # result_dist_bays09_both_normalised = result_dist_bays09_emmixt_sum/np.max(result_dist_bays09_emmixt_sum) + result_dist_bays09_kappa_sum/np.max(result_dist_bays09_kappa_sum)

    # # Mask kappa for performance too bad
    # result_dist_bays09_kappa_sum_masked = np.ma.masked_greater(result_dist_bays09_kappa_sum, 2*np.median(result_dist_bays09_kappa_sum))
    # result_dist_bays09_emmixt_KL_sum_masked = np.ma.masked_greater(result_dist_bays09_emmixt_KL_sum, 2*np.median(result_dist_bays09_emmixt_KL_sum))
    # result_dist_bays09_both_normalised_mult_masked = 1-(1. - result_dist_bays09_emmixt_KL_sum/np.max(result_dist_bays09_emmixt_KL_sum))*(1. - result_dist_bays09_kappa_sum_masked/np.max(result_dist_bays09_kappa_sum_masked))

    # Compute optimal rc_scale
    all_args = data_pbs.loaded_data['args_list']
    specific_arg = all_args[0]
    specific_arg['autoset_parameters'] = True
    (_, _, _, sampler) = launchers.init_everything(specific_arg)
    optimal_rc_scale = sampler.random_network.rc_scale[0]

    if plots_all_T:
        # Show Kappa evolution wrt rc_scale
        f, ax = plt.subplots()
        # utils.plot_mean_std_from_samples(result_parameters_flat, np.nansum(result_em_kappa_allT, axis=-1), bins=60, bins_y=150, xlabel='rc_scale', ylabel='EM kappa', title='Kappa, summed T',  ax_handle=ax, show_scatter=False)
        utils.plot_mean_std_from_samples_rolling(result_parameters_flat, np.nansum(result_em_kappa_allT, axis=-1), window=35, xlabel='rc_scale', ylabel='EM kappa', title='Kappa, summed T',  ax_handle=ax, show_scatter=False)
        ax.axvline(x=optimal_rc_scale, color='g', linewidth=2)
        ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2)
        f.canvas.draw()

        if savefigs:
            dataio.save_current_figure('rcscaleeffect_kappa_summedT_{label}_{unique_id}.pdf')

        # Show Mixt proportions
        f, ax = plt.subplots()
        for i in xrange(3):
            # utils.plot_mean_std_from_samples(result_parameters_flat, np.nansum(result_em_emmixt_allT[..., i], axis=-1), bins=60, bins_y=100, xlabel='rc_scale', ylabel='EM mixt proportions', title='EM mixtures, summed T',  ax_handle=ax, show_scatter=False)
            utils.plot_mean_std_from_samples_rolling(result_parameters_flat, np.nansum(result_em_emmixt_allT[..., i], axis=-1), window=35, xlabel='rc_scale', ylabel='EM mixt proportions', title='EM mixtures, summed T',  ax_handle=ax, show_scatter=False)
        ax.axvline(x=optimal_rc_scale, color='g', linewidth=2)
        ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2)
        f.canvas.draw()

        if savefigs:
            dataio.save_current_figure('rcscaleeffect_mixtprop_summedT_{label}_{unique_id}.pdf')

        # Show Precision
        f, ax = plt.subplots()
        # utils.plot_mean_std_from_samples(result_parameters_flat, np.nansum(result_precisions_all_avg, axis=-1), bins=60, bins_y=150, xlabel='rc_scale', ylabel='Precision', title='Precision, summed T',  ax_handle=ax, show_scatter=False)
        utils.plot_mean_std_from_samples_rolling(result_parameters_flat, np.nansum(result_precisions_all_avg, axis=-1), window=35, xlabel='rc_scale', ylabel='Precision', title='Precision, summed T',  ax_handle=ax, show_scatter=False)
        ax.axvline(x=optimal_rc_scale, color='g', linewidth=2)
        ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2)
        f.canvas.draw()

        if savefigs:
            dataio.save_current_figure('rcscaleeffect_precision_summedT_{label}_{unique_id}.pdf')


        plt.close('all')


    if plots_per_T:
        for T_i, T in enumerate(T_space):
            # Show Kappa evolution wrt rc_scale
            f, ax = plt.subplots()
            # utils.plot_mean_std_from_samples(result_parameters_flat, result_em_kappa_allT[:, T_i], bins=40, bins_y=100, xlabel='rc_scale', ylabel='EM kappa', title='Kappa, T %d' % T,  ax_handle=ax, show_scatter=False)
            utils.plot_mean_std_from_samples_rolling(result_parameters_flat, result_em_kappa_allT[:, T_i], window=35, xlabel='rc_scale', ylabel='EM kappa', title='Kappa, T %d' % T,  ax_handle=ax, show_scatter=False)
            ax.axvline(x=optimal_rc_scale, color='g', linewidth=2)
            ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2)
            f.canvas.draw()

            if savefigs:
                dataio.save_current_figure('rcscaleeffect_kappa_T%d_{label}_{unique_id}.pdf' % T)

            # Show Mixt proportions
            f, ax = plt.subplots()
            for i in xrange(3):
                # utils.plot_mean_std_from_samples(result_parameters_flat, result_em_emmixt_allT[:, T_i, i], bins=40, bins_y=100, xlabel='rc_scale', ylabel='EM mixt proportions', title='EM mixtures, T %d' % T,  ax_handle=ax, show_scatter=False)
                utils.plot_mean_std_from_samples_rolling(result_parameters_flat, result_em_emmixt_allT[:, T_i, i], window=35, xlabel='rc_scale', ylabel='EM mixt proportions', title='EM mixtures, T %d' % T,  ax_handle=ax, show_scatter=False)
            ax.axvline(x=optimal_rc_scale, color='g', linewidth=2)
            ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2)
            f.canvas.draw()

            if savefigs:
                dataio.save_current_figure('rcscaleeffect_mixtprop_T%d_{label}_{unique_id}.pdf' % T)

            # Show Precision
            f, ax = plt.subplots()
            # utils.plot_mean_std_from_samples(result_parameters_flat, result_precisions_all_avg[:, T_i], bins=40, bins_y=100, xlabel='rc_scale', ylabel='Precision', title='Precision, T %d' % T,  ax_handle=ax, show_scatter=False)
            utils.plot_mean_std_from_samples_rolling(result_parameters_flat, result_precisions_all_avg[:, T_i], window=35, xlabel='rc_scale', ylabel='Precision', title='Precision, T %d' % T,  ax_handle=ax, show_scatter=False)
            ax.axvline(x=optimal_rc_scale, color='g', linewidth=2)
            ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2)
            f.canvas.draw()

            if savefigs:
                dataio.save_current_figure('rcscaleeffect_precision_T%d_{label}_{unique_id}.pdf' % T)

            plt.close('all')





    # # Interpolate
    # if plots_interpolate:

    #     sigmax_target = 0.9

    #     M_interp_space = np.arange(6, 625, 5)
    #     ratio_interp_space = np.linspace(0.01, 1.0, 50)
    #     # sigmax_interp_space = np.linspace(0.01, 1.0, 50)
    #     sigmax_interp_space = np.array([sigmax_target])
    #     params_crossspace = np.array(utils.cross(M_interp_space, ratio_interp_space, sigmax_interp_space))

    #     interpolated_data = rbf_interpolator(params_crossspace[:, 0], params_crossspace[:, 1], params_crossspace[:, 2]).reshape((M_interp_space.size, ratio_interp_space.size))

    #     utils.pcolor_2d_data(interpolated_data, M_interp_space, ratio_interp_space, 'M', 'ratio', 'interpolated, fixing sigmax= %.2f' % sigmax_target)

    #     points_closeby = ((result_parameters_flat[:, 2] - sigmax_target)**2)< 0.01
    #     plt.figure()
    #     # plt.imshow(interpolated_data, extent=(M_interp_space.min(), M_interp_space.max(), ratio_interp_space.min(), ratio_interp_space.max()))
    #     plt.imshow(interpolated_data)
    #     plt.scatter(result_parameters_flat[points_closeby, 0], result_parameters_flat[points_closeby, 1], s=100, c=result_fitexperiments_bic_avg[points_closeby], marker='o')


    # if plot_per_ratio:
    #     # Plot the evolution of loglike as a function of sigmax, with std shown
    #     for ratio_conj_i, ratio_conj in enumerate(ratio_space):
    #         ax = utils.plot_mean_std_area(sigmax_space, result_log_posterior_mean[ratio_conj_i], result_log_posterior_std[ratio_conj_i])

    #         ax.get_figure().canvas.draw()

    #         if savefigs:
    #             dataio.save_current_figure('results_fitexp_%s_loglike_ratioconj%.2f_{label}_global_{unique_id}.pdf' % (exp_dataset, ratio_conj))



    all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['parameter_names_sorted']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='rcscale_characterisation')


    plt.show()

    return locals()
def plots_fit_mixturemodels_random(data_pbs, generator_module=None):
    '''
        Reload runs from PBS
    '''

    #### SETUP
    #
    savefigs = True
    savedata = True
    savemovies = False

    plots_dist_bays09 = True

    do_scatters_3d = True
    do_best_points_extended_plots = True
    # do_relaunch_bestparams_pbs = True

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", data_pbs.dataset_infos['parameters']
    # parameters: M, ratio_conj, sigmax

    # Extract data
    result_em_fits_flat = np.array(data_pbs.dict_arrays['result_em_fits']['results_flat'])
    result_dist_bays09_flat = np.array(data_pbs.dict_arrays['result_dist_bays09']['results_flat'])
    result_dist_gorgo11_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11']['results_flat'])
    result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits']['parameters_flat'])

    M_space = data_pbs.loaded_data['parameters_uniques']['M']
    sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax']
    ratio_space = data_pbs.loaded_data['parameters_uniques']['sigmax']
    num_repetitions = generator_module.num_repetitions
    parameter_names_sorted = data_pbs.dataset_infos['parameters']
    T_space = data_pbs.loaded_data['datasets_list'][0]['T_space']
    all_args = data_pbs.loaded_data['args_list']
    all_repeats_completed = data_pbs.dict_arrays['result_em_fits']['repeats_completed']

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    # Load bays09
    data_bays09 = load_experimental_data.load_data_bays09(fit_mixture_model=True)
    bays09_nitems = data_bays09['data_to_fit']['n_items']
    bays09_em_target = np.nan*np.empty((bays09_nitems.max(), 4))  #kappa, prob_target, prob_nontarget, prob_random
    bays09_em_target[bays09_nitems - 1] = data_bays09['em_fits_nitems_arrays']['mean'].T
    bays09_emmixt_target = bays09_em_target[:, 1:]

    # All parameters info
    plotting_parameters = launcher_memorycurve.load_prepare_datasets()

    ## Compute some stuff

    # result_dist_bays09_kappa_T1_avg = utils.nanmean(result_dist_bays09_flat[:, 0, 0], axis=-1)
    # result_dist_bays09_kappa_allT_avg = np.nansum(utils.nanmean(result_dist_bays09_flat[:, :, 0], axis=-1), axis=1)

    # Square distance to kappa
    result_dist_bays09_allT_avg = utils.nanmean((result_em_fits_flat[:, :, :4] - bays09_em_target[np.newaxis, :, :, np.newaxis])**2, axis=-1)
    result_dist_bays09_kappa_sum = np.nansum(result_dist_bays09_allT_avg[:, :, 0], axis=-1)
    result_dist_bays09_kappa_sum_masked = np.ma.masked_greater(result_dist_bays09_kappa_sum, 1e8)

    result_dist_bays09_kappa_T1_sum = result_dist_bays09_allT_avg[:, 0, 0]
    result_dist_bays09_kappa_T25_sum = np.nansum(result_dist_bays09_allT_avg[:, 1:, 0], axis=-1)

    # Square and KL distance for EM Mixtures
    result_dist_bays09_emmixt_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, :, 1:], axis=-1), axis=-1)
    result_dist_bays09_emmixt_T1_sum = np.nansum(result_dist_bays09_allT_avg[:, 0, 1:], axis=-1)
    result_dist_bays09_emmixt_T25_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, 1:, 1:], axis=-1), axis=-1)


    result_dist_bays09_emmixt_KL = utils.nanmean(utils.KL_div(result_em_fits_flat[:, :, 1:4], bays09_emmixt_target[np.newaxis, :, :, np.newaxis], axis=-2), axis=-1)   # KL over dimension of mixtures, then mean over repetitions
    result_dist_bays09_emmixt_KL_sum = np.nansum(result_dist_bays09_emmixt_KL, axis=-1)  # sum over T
    result_dist_bays09_emmixt_KL_T1_sum = result_dist_bays09_emmixt_KL[:, 0]
    result_dist_bays09_emmixt_KL_T25_sum = np.nansum(result_dist_bays09_emmixt_KL[:, 1:], axis=-1)

    result_dist_bays09_both_normalised = result_dist_bays09_emmixt_sum/np.max(result_dist_bays09_emmixt_sum) + result_dist_bays09_kappa_sum/np.max(result_dist_bays09_kappa_sum)
    result_dist_bays09_kappaKL_normalised_summed = result_dist_bays09_emmixt_KL_sum/np.max(result_dist_bays09_emmixt_KL_sum) + result_dist_bays09_kappa_sum/np.max(result_dist_bays09_kappa_sum)


    if plots_dist_bays09:
        nb_best_points = 30
        size_normal_points = 8
        size_best_points = 50
        nb_best_points_extended_plots = 3


        def plot_memorycurve(result_em_fits, args_used, suptitle=''):

            packed_data = dict(T_space=T_space, result_em_fits=result_em_fits, all_parameters=args_used)
            if suptitle:
                plotting_parameters['suptitle'] = suptitle
            if savefigs:
                packed_data['dataio'] = dataio

            plotting_parameters['reuse_axes'] = False

            launcher_memorycurve.do_memory_plots(packed_data, plotting_parameters)


        def plot_scatter(result_dist_to_use, best_points_result_dist_to_use, result_dist_to_use_name='', title=''):

            fig = plt.figure()
            ax = Axes3D(fig)

            utils.scatter3d(result_parameters_flat[:, 0], result_parameters_flat[:, 1], result_parameters_flat[:, 2], s=size_normal_points, c=np.log(result_dist_to_use), xlabel=parameter_names_sorted[0], ylabel=parameter_names_sorted[1], zlabel=parameter_names_sorted[2], title=title, ax_handle=ax)
            utils.scatter3d(result_parameters_flat[best_points_result_dist_to_use, 0], result_parameters_flat[best_points_result_dist_to_use, 1], result_parameters_flat[best_points_result_dist_to_use, 2], c='r', s=size_best_points, ax_handle=ax)
            print "Best points, %s:" % title
            print '\n'.join(['M %d, ratio %.2f, sigmax %.2f:  %f' % (result_parameters_flat[i, 0], result_parameters_flat[i, 1], result_parameters_flat[i, 2], result_dist_to_use[i]) for i in best_points_result_dist_to_use])

            if savefigs:
                dataio.save_current_figure('scatter3d_%s_{label}_{unique_id}.pdf' % result_dist_to_use_name)

                if savemovies:
                    try:
                        utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_{label}_{unique_id}.mp4' % result_dist_to_use_name), bitrate=8000, min_duration=8)
                        utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_{label}_{unique_id}.gif' % result_dist_to_use_name), nb_frames=30, min_duration=8)
                    except Exception:
                        # Most likely wrong aggregator...
                        print "failed when creating movies for ", result_dist_to_use_name

                ax.view_init(azim=90, elev=10)
                dataio.save_current_figure('scatter3d_%s_view2_{label}_{unique_id}.pdf' % result_dist_to_use_name)

            return ax

        def plots_redirects(all_vars, result_dist_to_use_name, log_color=True, title='', avoid_incomplete_repeats=True):

            result_dist_to_use = all_vars[result_dist_to_use_name]

            if avoid_incomplete_repeats:
                result_dist_to_use = np.ma.masked_where(~(all_repeats_completed == num_repetitions-1), result_dist_to_use)

            if not log_color:
                result_dist_to_use = np.exp(result_dist_to_use)
            best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points]

            # Scatter
            if do_scatters_3d:
                plot_scatter(result_dist_to_use, best_points_result_dist_to_use, result_dist_to_use_name, title=title)

            # Now do the additional plots if required
            if do_best_points_extended_plots:
                for best_point_index in best_points_result_dist_to_use[:nb_best_points_extended_plots]:
                    print "extended plot for M %d, ratio %.2f, sigmax %.2f:  score %f" % (result_parameters_flat[best_point_index, 0], result_parameters_flat[best_point_index, 1], result_parameters_flat[best_point_index, 2], result_dist_to_use[best_point_index])

                    plot_memorycurve(result_em_fits_flat[best_point_index], all_args[best_point_index], suptitle=result_dist_to_use_name)

        # Distance for kappa, all T
        plots_redirects(locals(), 'result_dist_bays09_kappa_sum', title='kappa all T')

        # Distance for em fits, all T, Squared distance
        plots_redirects(locals(), 'result_dist_bays09_emmixt_sum', title='em fits, all T')

        # Distance for em fits, all T, KL distance
        plots_redirects(locals(), 'result_dist_bays09_emmixt_KL_sum', title='em fits, all T, KL')

        # Distance for sum of normalised em fits + normalised kappa, all T
        plots_redirects(locals(), 'result_dist_bays09_both_normalised', title='summed normalised em mixt + kappa')

        # Distance kappa T = 1
        plots_redirects(locals(), 'result_dist_bays09_kappa_T1_sum', title='Kappa T=1')

        # Distance kappa T = 2...5
        plots_redirects(locals(), 'result_dist_bays09_kappa_T25_sum', title='Kappa T=2/5')

        # Distance em fits T = 1
        plots_redirects(locals(), 'result_dist_bays09_emmixt_T1_sum', title='em fits T=1')

        # Distance em fits T = 2...5
        plots_redirects(locals(), 'result_dist_bays09_emmixt_T25_sum', title='em fits T=2/5')

        # Distance em fits T = 1, KL
        plots_redirects(locals(), 'result_dist_bays09_emmixt_KL_T1_sum', title='em fits T=1, KL')

        # Distance em fits T = 2...5, KL
        plots_redirects(locals(), 'result_dist_bays09_emmixt_KL_T25_sum', title='em fits T=2/5, KL')

    # if plots_per_T:
    #     for T in T_space:
    #         currT_indices = result_parameters_flat[:, 2] == T

    #         utils.contourf_interpolate_data_interactive_maxvalue(result_parameters_flat[currT_indices][..., :2], result_fitexperiments_bic_avg[currT_indices], xlabel='Ratio_conj', ylabel='sigma x', title='BIC, T %d' % T, interpolation_numpoints=200, interpolation_method='nearest', log_scale=False)

    # # Interpolate
    # if plots_interpolate:

    #     sigmax_target = 0.9

    #     M_interp_space = np.arange(6, 625, 5)
    #     ratio_interp_space = np.linspace(0.01, 1.0, 50)
    #     # sigmax_interp_space = np.linspace(0.01, 1.0, 50)
    #     sigmax_interp_space = np.array([sigmax_target])
    #     params_crossspace = np.array(utils.cross(M_interp_space, ratio_interp_space, sigmax_interp_space))

    #     interpolated_data = rbf_interpolator(params_crossspace[:, 0], params_crossspace[:, 1], params_crossspace[:, 2]).reshape((M_interp_space.size, ratio_interp_space.size))

    #     utils.pcolor_2d_data(interpolated_data, M_interp_space, ratio_interp_space, 'M', 'ratio', 'interpolated, fixing sigmax= %.2f' % sigmax_target)

    #     points_closeby = ((result_parameters_flat[:, 2] - sigmax_target)**2)< 0.01
    #     plt.figure()
    #     # plt.imshow(interpolated_data, extent=(M_interp_space.min(), M_interp_space.max(), ratio_interp_space.min(), ratio_interp_space.max()))
    #     plt.imshow(interpolated_data)
    #     plt.scatter(result_parameters_flat[points_closeby, 0], result_parameters_flat[points_closeby, 1], s=100, c=result_fitexperiments_bic_avg[points_closeby], marker='o')


    # if plot_per_ratio:
    #     # Plot the evolution of loglike as a function of sigmax, with std shown
    #     for ratio_conj_i, ratio_conj in enumerate(ratio_space):
    #         ax = utils.plot_mean_std_area(sigmax_space, result_log_posterior_mean[ratio_conj_i], result_log_posterior_std[ratio_conj_i])

    #         ax.get_figure().canvas.draw()

    #         if savefigs:
    #             dataio.save_current_figure('results_fitexp_%s_loglike_ratioconj%.2f_{label}_global_{unique_id}.pdf' % (exp_dataset, ratio_conj))


    variables_to_save = ['parameter_names_sorted', 'all_repeats_completed', 'T_space']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='fit_mixturemodels')


    plt.show()

    return locals()
示例#9
0
def main():

    result_path = 'results/'
    subtype_method = "Algorithm"
    K = 3  # number of subtypes(clusters)
    ############################## LOAD DATA ######################################
    print('patients loading...')
    dataio = DataIO(K)
    dataio.load_demographics('../ufm/patient.csv')
    dataio.load_feature('Motor', 'MDS UPDRS PartI')
    dataio.load_feature('Motor', 'MDS UPDRS PartII')
    dataio.load_feature('Motor', 'MDS UPDRS PartIII')
    dataio.load_feature('Motor', 'MDS UPDRS PartIV')

    dataio.load_feature('Non-Motor', 'BJLO')
    dataio.load_feature('Non-Motor', 'ESS')
    dataio.load_feature('Non-Motor', 'GDS')
    dataio.load_feature('Non-Motor', 'HVLT')
    dataio.load_feature('Non-Motor', 'LNS')
    dataio.load_feature('Non-Motor', 'MoCA')
    dataio.load_feature('Non-Motor', 'QUIP')
    dataio.load_feature('Non-Motor', 'RBD')
    dataio.load_feature('Non-Motor', 'SCOPA-AUT')
    dataio.load_feature('Non-Motor', 'SF')
    dataio.load_feature('Non-Motor', 'STAI')
    dataio.load_feature('Non-Motor', 'SDM')
    dataio.load_feature('Non-Motor', 'MCI')

    dataio.load_feature('Biospecimen', 'DNA')
    dataio.load_feature('Biospecimen', 'CSF', 'Total tau')
    dataio.load_feature('Biospecimen', 'CSF', 'Abeta 42')
    dataio.load_feature('Biospecimen', 'CSF', 'p-Tau181P')
    dataio.load_feature('Biospecimen', 'CSF', 'CSF Alpha-synuclein')

    dataio.load_feature('Image', 'DaTScan SBR')
    dataio.load_feature('Image', 'MRI')
    dataio.load_feature('Medication', 'MED USE')
    suffix = 'normalized_clusters_Deep'
    dataio.load_clustering_result('input/clustering_by_lstm.csv')

    ############################# STATISTICS ######################################
    print('-----------------------')
    print('statistics analyzing...')
    var = Variable(K)
    ftype = 'demographics'
    p = var.get_variables(dataio, ftype)
    var.p_value.extend(p)

    ftype = 'motor'
    _ = var.get_variables(dataio, ftype, 'MDS UPDRS PartI')
    _ = var.get_variables(dataio, ftype, 'MDS UPDRS PartII')
    _ = var.get_variables(dataio, ftype, 'MDS UPDRS PartIII', 'MDS-UPDRS')
    _ = var.get_variables(dataio, ftype, 'MDS UPDRS PartIII', 'H&Y')
    p = var.get_variables(dataio, ftype, 'MDS UPDRS PartIV')
    var.p_value.extend(p)

    ftype = 'nonmotor'
    _ = var.get_variables(dataio, ftype, 'BJLO')
    _ = var.get_variables(dataio, ftype, 'ESS')
    _ = var.get_variables(dataio, ftype, 'GDS')
    _ = var.get_variables(dataio, ftype, 'HVLT', 'Immediate Recall')
    _ = var.get_variables(dataio, ftype, 'HVLT', 'Discrimination Recognition')
    _ = var.get_variables(dataio, ftype, 'HVLT', 'Retention')
    _ = var.get_variables(dataio, ftype, 'LNS')
    print(var.pat_edu)
    _ = var.get_variables(dataio, ftype, 'MoCA', pat_edu=var.pat_edu)
    _ = var.get_variables(dataio, ftype, 'QUIP')
    _ = var.get_variables(dataio, ftype, 'RBD')
    _ = var.get_variables(dataio, ftype, 'SCOPA-AUT')
    _ = var.get_variables(dataio, ftype, 'SF')
    _ = var.get_variables(dataio, ftype, 'STAI')
    _ = var.get_variables(dataio, ftype, 'SDM')
    p = var.get_variables(dataio, ftype, 'MCI')
    var.p_value.extend(p)

    ftype = 'biospecimen'
    var.get_variables(dataio, ftype, 'DNA')
    _ = var.get_variables(dataio, ftype, 'CSF', 'Total tau')
    _ = var.get_variables(dataio, ftype, 'CSF', 'Abeta 42')
    _ = var.get_variables(dataio, ftype, 'CSF', 'p-Tau181P')
    p = var.get_variables(dataio, ftype, 'CSF', 'CSF Alpha-synuclein')
    var.p_value.extend(p)

    ftype = 'image'
    _ = var.get_variables(dataio, ftype, 'DaTScan SBR', 'CAUDATE RIGHT')
    _ = var.get_variables(dataio, ftype, 'DaTScan SBR', 'CAUDATE LEFT')
    _ = var.get_variables(dataio, ftype, 'DaTScan SBR', 'PUTAMEN RIGHT')
    _ = var.get_variables(dataio, ftype, 'DaTScan SBR', 'PUTAMEN LEFT')
    p = var.get_variables(dataio, ftype, 'MRI')
    var.p_value.extend(p)

    ftype = 'medication'
    p = var.get_variables(dataio, ftype, 'MED USE')
    var.p_value.extend(p)

    ################################# DISPLAY ######################################
    print('-----------------------')
    print('value displaying...')
    ds = Display(var)
    print('heatmap of the final mean value')
    figurename = 'results/heatmap_clustering_by_' + subtype_method.lower(
    ) + '_' + suffix + '.pdf'
    ds.heatmap(figurename, is_progress=False, is_rotate=False)
    print('heatmap of the first order difference mean value')
    figurename = 'results/heatmap_clustering_by_' + subtype_method.lower(
    ) + '_progression_' + suffix + '.pdf'
    ds.heatmap(figurename, is_progress=True, is_rotate=False)

    ############################## SAVE RESULTS ####################################
    print('-----------------------')
    filename = result_path + 'statistics_clustering_by_' + subtype_method.lower(
    ) + '_' + suffix + '.csv'
    dataio.save_result(var, filename)
    print('done!')
def plots_fit_mixturemodels_random(data_pbs, generator_module=None):
    """
        Reload runs from PBS
    """

    #### SETUP
    #
    savefigs = True
    savedata = True

    colormap = None  # or 'cubehelix'
    plt.rcParams["font.size"] = 16
    #
    #### /SETUP

    print "Order parameters: ", data_pbs.dataset_infos["parameters"]
    # parameters: M, ratio_conj, sigmax

    # Extract data
    T_space = data_pbs.loaded_data["datasets_list"][0]["T_space"]

    result_responses_flat = np.array(data_pbs.dict_arrays["result_responses"]["results_flat"])
    result_targets_flat = np.array(data_pbs.dict_arrays["result_target"]["results_flat"])
    result_nontargets_flat = np.array(data_pbs.dict_arrays["result_nontargets"]["results_flat"])

    result_parameters_flat = np.array(data_pbs.dict_arrays["result_responses"]["parameters_flat"])
    all_repeats_completed = data_pbs.dict_arrays["result_responses"]["repeats_completed"]

    all_args_arr = np.array(data_pbs.loaded_data["args_list"])

    M_space = data_pbs.loaded_data["parameters_uniques"]["M"]
    ratio_conj_space = data_pbs.loaded_data["parameters_uniques"]["ratio_conj"]
    sigmax_space = data_pbs.loaded_data["parameters_uniques"]["sigmax"]
    alpha_space = data_pbs.loaded_data["parameters_uniques"]["alpha"]
    trecall_space = data_pbs.loaded_data["parameters_uniques"]["fixed_cued_feature_time"]

    num_repetitions = generator_module.num_repetitions
    parameter_names_sorted = data_pbs.dataset_infos["parameters"]

    dataio = DataIO(
        output_folder=generator_module.pbs_submission_infos["simul_out_dir"] + "/outputs/",
        label="global_" + dataset_infos["save_output_filename"],
    )

    ##### Because of lazyness, the responses are weird.
    # Each run is for a given trecall. But we run N items= 1 .. Nmax anyway
    # so if trecall > N, you have np.nan
    # => Need to reconstruct the thing properly, to have lower triangle of Nitem x Trecall filled
    # Also, trecall is the actual Time. Hence we need to change its meaning to be Tmax- (trecall + 1) or whatever.

    # Load ground truth
    data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(fit_mixture_model=True)

    ## Filter everything with repeats_completed == num_repet and trecall=last
    filter_data = (result_parameters_flat[:, 0] == (T_space.max() - 1)) & (all_repeats_completed == num_repetitions - 1)
    result_parameters_flat = result_parameters_flat[filter_data]
    result_responses_flat = result_responses_flat[filter_data]
    result_targets_flat = result_targets_flat[filter_data]
    result_nontargets_flat = result_nontargets_flat[filter_data]
    all_args_arr = all_args_arr[filter_data]
    all_repeats_completed = all_repeats_completed[filter_data]

    print "Size post-filter: ", result_parameters_flat.shape[0]

    def str_best_params(best_i, result_dist_to_use):
        return (
            " ".join(
                [
                    "%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i])
                    for param_i in xrange(len(parameter_names_sorted))
                ]
            )
            + " >> %f" % result_dist_to_use[best_i]
        )

    # all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ["parameter_names_sorted", "all_args_arr", "all_repeats_completed", "filter_data"]

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder="gorgo11_sequential_fitmixturemodel")

    plt.show()

    return locals()
def plots_fit_collapsedmixturemodels_random(data_pbs, generator_module=None):
    '''
        Reload runs from PBS

        Sequential data analysis.
    '''

    #### SETUP
    #
    plots_bestfits = True
    plots_scatter3d = False

    savefigs = True
    savedata = True

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", data_pbs.dataset_infos['parameters']
    # parameters: M, ratio_conj, sigmax

    # Extract data
    T_space = data_pbs.loaded_data['datasets_list'][0]['T_space']

    result_em_fits_collapsed_tr_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_tr']['results_flat'])
    result_em_fits_collapsed_summary_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_summary']['results_flat'])
    result_dist_gorgo11_sequ_collapsed_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11_sequ_collapsed']['results_flat'])
    result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11_sequ_collapsed_emmixt_KL']['results_flat'])

    result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_tr']['parameters_flat'])
    all_repeats_completed = data_pbs.dict_arrays['result_em_fits_collapsed_tr']['repeats_completed']

    all_args_arr = np.array(data_pbs.loaded_data['args_list'])

    M_space = data_pbs.loaded_data['parameters_uniques']['M']
    ratio_conj_space = data_pbs.loaded_data['parameters_uniques']['ratio_conj']
    sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax']
    alpha_space = data_pbs.loaded_data['parameters_uniques']['alpha']

    num_repetitions = generator_module.num_repetitions
    parameter_names_sorted = data_pbs.dataset_infos['parameters']

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    # Load ground truth
    data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(fit_mixture_model=True)

    ## Filter everything with repeats_completed == num_repet
    filter_data = all_repeats_completed == num_repetitions - 1
    result_parameters_flat = result_parameters_flat[filter_data]

    result_em_fits_collapsed_tr_flat = result_em_fits_collapsed_tr_flat[filter_data]
    result_em_fits_collapsed_summary_flat = result_em_fits_collapsed_summary_flat[filter_data]
    result_dist_gorgo11_sequ_collapsed_flat = result_dist_gorgo11_sequ_collapsed_flat[filter_data]
    result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat = result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat[filter_data]

    all_args_arr = all_args_arr[filter_data]
    all_repeats_completed = all_repeats_completed[filter_data]

    print "Size post-filter: ", result_parameters_flat.shape[0]

    # Compute lots of averages over the repetitions
    result_em_fits_collapsed_tr_flat_avg = utils.nanmean(result_em_fits_collapsed_tr_flat, axis=-1)
    result_em_fits_collapsed_summary_flat_avg = utils.nanmean(result_em_fits_collapsed_summary_flat, axis=-1)
    result_dist_gorgo11_sequ_collapsed_flat_avg = utils.nanmean(result_dist_gorgo11_sequ_collapsed_flat, axis=-1)
    result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat_avg = utils.nanmean(result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat, axis=-1)

    result_dist_gorgo11_sequ_collapsed_flat_avg_overall = np.nansum(np.nansum(np.nansum(result_dist_gorgo11_sequ_collapsed_flat_avg, axis=-1), axis=-1), axis=-1)
    # We will now grid some of the parameters, to have a 2D/3D surface back.
    # Let's fix the ratio_conj, as we know that the other models need around
    # ratio =0.8 to fit data well.

    def str_best_params(best_i, result_dist_to_use):
        return ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted))]) + ' >> %f' % result_dist_to_use[best_i]

    ###### Best fitting points
    if plots_bestfits:
        nb_best_points = 5

        def plot_collapsed_modelfits(T_space, curr_result_emfits_collapsed_tr, labelplot='', dataio=None):
            f, ax = plt.subplots()
            for nitems_i, nitems in enumerate(T_space):
                ax = plots_experimental_data.plot_kappa_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 0][nitems_i, :nitems], 0.0*curr_result_emfits_collapsed_tr[..., 0][nitems_i, :nitems], title='model fit fig7 %s' % labelplot , ax=ax, label='%d items' % nitems, xlabel='T_recall')

            if dataio is not None:
                dataio.save_current_figure('bestfit_doublepowerlaw_%s_kappa_{label}_{unique_id}.pdf' % labelplot)

            _, ax_target = plt.subplots()
            _, ax_nontarget = plt.subplots()
            _, ax_random = plt.subplots()
            for nitems_i, nitems in enumerate(T_space):
                ax_target = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 1][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 1][nitems_i, :nitems]*0.0, title='Target model fit %s' % labelplot, ax=ax_target, label='%d items' % nitems, xlabel='T_recall')
                ax_nontarget = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 2][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 2][nitems_i, :nitems]*0.0, title='Nontarget model fit %s' % labelplot, ax=ax_nontarget, label='%d items' % nitems, xlabel='T_recall')
                ax_random = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 3][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 3][nitems_i, :nitems]*0.0, title='Random model fit %s' % labelplot, ax=ax_random, label='%d items' % nitems, xlabel='T_recall')

            if dataio is not None:
                plt.figure(ax_target.get_figure().number)
                dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixttarget_{label}_{unique_id}.pdf' % labelplot)

                plt.figure(ax_nontarget.get_figure().number)
                dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixtnontarget_{label}_{unique_id}.pdf' % labelplot)

                plt.figure(ax_random.get_figure().number)
                dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixtrandom_{label}_{unique_id}.pdf' % labelplot)


        best_points_result_dist_gorgo11seq_all = np.argsort(result_dist_gorgo11_sequ_collapsed_flat_avg_overall)[:nb_best_points]

        for best_point_i in best_points_result_dist_gorgo11seq_all:
            plot_collapsed_modelfits(T_space, result_em_fits_collapsed_tr_flat_avg[best_point_i], labelplot='%.1f' % result_dist_gorgo11_sequ_collapsed_flat_avg_overall[best_point_i], dataio=dataio)


    ###### 3D scatter plots
    if plots_scatter3d:
        nb_best_points = 30
        size_normal_points = 8
        size_best_points = 50

        def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file=''):

            result_dist_to_use = all_vars[result_dist_to_use_name]
            result_parameters_flat_3d = all_vars['result_parameters_flat_3d']

            # Filter if downsampling
            filter_downsampling = np.arange(0, result_dist_to_use.size, downsampling)
            result_dist_to_use = result_dist_to_use[filter_downsampling]
            result_parameters_flat_3d = result_parameters_flat_3d[filter_downsampling]

            best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points]

            # Construct all permutations of 3 parameters, for 3D scatters
            params_permutations = set([tuple(np.sort(np.random.choice(result_parameters_flat_3d.shape[-1], 3, replace=False)).tolist()) for i in xrange(1000)])

            for param_permut in params_permutations:
                fig = plt.figure()
                ax = Axes3D(fig)

                # One plot per parameter permutation
                if log_color:
                    color_points = np.log(result_dist_to_use)
                else:
                    color_points = result_dist_to_use

                utils.scatter3d(result_parameters_flat_3d[:, param_permut[0]], result_parameters_flat_3d[:, param_permut[1]], result_parameters_flat_3d[:, param_permut[2]], s=size_normal_points, c=color_points, xlabel=parameter_names_sorted[param_permut[0]], ylabel=parameter_names_sorted[param_permut[1]], zlabel=parameter_names_sorted[param_permut[2]], title=title, ax_handle=ax)

                utils.scatter3d(result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[0]], result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[1]], result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[2]], c='r', s=size_best_points, ax_handle=ax)

                if savefigs:
                    dataio.save_current_figure('scatter3d_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file))

                if savemovies:
                    try:
                        utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), bitrate=8000, min_duration=8)
                        utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), nb_frames=30, min_duration=8)
                    except Exception:
                        # Most likely wrong aggregator...
                        print "failed when creating movies for ", result_dist_to_use_name


                if False and savefigs:
                    ax.view_init(azim=90, elev=10)
                    dataio.save_current_figure('scatter3d_view2_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file))

                # plt.close('all')

            print "Parameters: %s" % ', '.join(parameter_names_sorted)
            print "Best points, %s:" % title
            print '\n'.join([str_best_params(best_i, result_dist_to_use) for best_i in best_points_result_dist_to_use])


    # all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['parameter_names_sorted', 'all_args_arr', 'all_repeats_completed', 'filter_data']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='gorgo11_sequential_fitmixturemodel')


    plt.show()

    return locals()
示例#12
0
import numpy
from dataio import DataIO

if __name__ == '__main__':

    filename = 'test'
    fieldnames = ['time', 'low', 'high']

    csvio = DataIO('test', fieldnames)
    csvio.csv_newfile(filename)

    test_row1 = {'time': 1.0, 'low': 100, 'high': 101}
    test_row2 = {'time': 2.0, 'low': 101, 'high': 102}
    test_rows = [{'time': 3.0, 'low': 102, 'high': 103},
                 {'time': 4.0, 'low': 103, 'high': 104}]

    exists = csvio.csv_check(filename)
    print(exists)

    print(len(numpy.shape(test_row1)))
    print(len(numpy.shape(test_rows)))

    csvio.csv_append(filename, test_row1)
    csvio.csv_append(filename, test_row2)
    csvio.csv_append(filename, test_rows)

    data = csvio.csv_get(filename)
    print(data)

    csvio.csv_rename(filename, 'test2')
示例#13
0
 def input(self, N, file_name):
     x = self.random_state.uniform(self.X_LBU, self.X_UBU, N)
     title = 'input'
     DataIO.write_data([x], file_name, title)
示例#14
0
 def function(self, file_name):
     x = np.linspace(self.X_LBU, self.X_UBU, self.NLARGE)
     f = np.sin(2 * np.pi * x)
     title = 'input\tsinus function'
     DataIO.write_data([x, f], file_name, title)
def plots_fit_mixturemodels_random(data_pbs, generator_module=None):
    '''
        Reload runs from PBS
    '''

    #### SETUP
    #
    savefigs = True
    savedata = True
    savemovies = True

    plots_dist_bays09 = True
    plots_per_T = True
    plots_interpolate = False

    # do_relaunch_bestparams_pbs = True

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", data_pbs.dataset_infos['parameters']
    # parameters: M, ratio_conj, sigmax

    # Extract data
    T_space = data_pbs.loaded_data['datasets_list'][0]['T_space']

    result_em_fits_flat = np.array(data_pbs.dict_arrays['result_em_fits']['results_flat'])
    result_dist_bays09_flat = np.array(data_pbs.dict_arrays['result_dist_bays09']['results_flat'])
    result_dist_gorgo11_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11']['results_flat'])
    result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits']['parameters_flat'])

    sigmaoutput_space = data_pbs.loaded_data['parameters_uniques']['sigma_output']
    sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax']
    ratio_space = data_pbs.loaded_data['parameters_uniques']['sigmax']
    num_repetitions = generator_module.num_repetitions
    parameter_names_sorted = data_pbs.dataset_infos['parameters']

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    # Load bays09
    data_bays09 = load_experimental_data.load_data_bays09(fit_mixture_model=True)
    bays09_nitems = data_bays09['data_to_fit']['n_items']
    bays09_em_target = np.nan*np.empty((bays09_nitems.max(), 4))  #kappa, prob_target, prob_nontarget, prob_random
    bays09_em_target[bays09_nitems - 1] = data_bays09['em_fits_nitems_arrays']['mean'].T
    bays09_emmixt_target = bays09_em_target[:, 1:]


    ## Compute some stuff

    # result_dist_bays09_kappa_T1_avg = utils.nanmean(result_dist_bays09_flat[:, 0, 0], axis=-1)
    # result_dist_bays09_kappa_allT_avg = np.nansum(utils.nanmean(result_dist_bays09_flat[:, :, 0], axis=-1), axis=1)

    # Square distance to kappa
    result_dist_bays09_allT_avg = utils.nanmean((result_em_fits_flat[:, :, :4] - bays09_em_target[np.newaxis, :, :, np.newaxis])**2, axis=-1)
    result_dist_bays09_kappa_sum = np.nansum(result_dist_bays09_allT_avg[:, :, 0], axis=-1)

    result_dist_bays09_kappa_T1_sum = result_dist_bays09_allT_avg[:, 0, 0]
    result_dist_bays09_kappa_T25_sum = np.nansum(result_dist_bays09_allT_avg[:, 1:, 0], axis=-1)

    # Square and KL distance for EM Mixtures
    result_dist_bays09_emmixt_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, :, 1:], axis=-1), axis=-1)
    result_dist_bays09_emmixt_T1_sum = np.nansum(result_dist_bays09_allT_avg[:, 0, 1:], axis=-1)
    result_dist_bays09_emmixt_T25_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, 1:, 1:], axis=-1), axis=-1)


    result_dist_bays09_emmixt_KL = utils.nanmean(utils.KL_div(result_em_fits_flat[:, :, 1:4], bays09_emmixt_target[np.newaxis, :, :, np.newaxis], axis=-2), axis=-1)   # KL over dimension of mixtures, then mean over repetitions
    result_dist_bays09_emmixt_KL_sum = np.nansum(result_dist_bays09_emmixt_KL, axis=-1)  # sum over T
    result_dist_bays09_emmixt_KL_T1_sum = result_dist_bays09_emmixt_KL[:, 0]
    result_dist_bays09_emmixt_KL_T25_sum = np.nansum(result_dist_bays09_emmixt_KL[:, 1:], axis=-1)


    result_dist_bays09_both_normalised = result_dist_bays09_emmixt_sum/np.max(result_dist_bays09_emmixt_sum) + result_dist_bays09_kappa_sum/np.max(result_dist_bays09_kappa_sum)

    if plots_dist_bays09:
        nb_best_points = 30
        size_normal_points = 8
        size_best_points = 50

        def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file=''):

            fig = plt.figure()
            ax = Axes3D(fig)

            result_dist_to_use = all_vars[result_dist_to_use_name]
            if not log_color:
                result_dist_to_use = np.exp(result_dist_to_use)

            utils.scatter3d(result_parameters_flat[:, 0], result_parameters_flat[:, 1], result_parameters_flat[:, 2], s=size_normal_points, c=np.log(result_dist_to_use), xlabel=parameter_names_sorted[0], ylabel=parameter_names_sorted[1], zlabel=parameter_names_sorted[2], title=title, ax_handle=ax)
            best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points]
            utils.scatter3d(result_parameters_flat[best_points_result_dist_to_use, 0], result_parameters_flat[best_points_result_dist_to_use, 1], result_parameters_flat[best_points_result_dist_to_use, 2], c='r', s=size_best_points, ax_handle=ax)
            print "Best points, %s:" % title
            print '\n'.join(['sigma output %.2f, ratio %.2f, sigmax %.2f:  %f' % (result_parameters_flat[i, 0], result_parameters_flat[i, 1], result_parameters_flat[i, 2], result_dist_to_use[i]) for i in best_points_result_dist_to_use])

            if savefigs:
                dataio.save_current_figure('scatter3d_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, label_file))

                if savemovies:
                    try:
                        utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, label_file)), bitrate=8000, min_duration=8)
                        utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, label_file)), nb_frames=30, min_duration=8)
                    except Exception:
                        # Most likely wrong aggregator...
                        print "failed when creating movies for ", result_dist_to_use_name

                ax.view_init(azim=90, elev=10)
                dataio.save_current_figure('scatter3d_view2_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, label_file))

            return ax

        # Distance for kappa, all T
        plot_scatter(locals(), 'result_dist_bays09_kappa_sum', 'kappa all T')

        # Distance for em fits, all T, Squared distance
        plot_scatter(locals(), 'result_dist_bays09_emmixt_sum', 'em fits, all T')

        # Distance for em fits, all T, KL distance
        plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_sum', 'em fits, all T, KL')

        # Distance for sum of normalised em fits + normalised kappa, all T
        plot_scatter(locals(), 'result_dist_bays09_both_normalised', 'summed normalised em mixt + kappa')

        # Distance kappa T = 1
        plot_scatter(locals(), 'result_dist_bays09_kappa_T1_sum', 'Kappa T=1')

        # Distance kappa T = 2...5
        plot_scatter(locals(), 'result_dist_bays09_kappa_T25_sum', 'Kappa T=2/5')

        # Distance em fits T = 1
        plot_scatter(locals(), 'result_dist_bays09_emmixt_T1_sum', 'em fits T=1')

        # Distance em fits T = 2...5
        plot_scatter(locals(), 'result_dist_bays09_emmixt_T25_sum', 'em fits T=2/5')

        # Distance em fits T = 1, KL
        plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_T1_sum', 'em fits T=1, KL')

        # Distance em fits T = 2...5, KL
        plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_T25_sum', 'em fits T=2/5, KL')



    if plots_per_T:
        for T_i, T in enumerate(T_space):

            # Kappa per T, fit to Bays09
            result_dist_bays09_kappa_currT = result_dist_bays09_allT_avg[:, T_i, 0]
            result_dist_bays09_kappa_currT_masked = mask_outliers(result_dist_bays09_kappa_currT)

            plot_scatter(locals(), 'result_dist_bays09_kappa_currT_masked', 'kappa T %d masked' % T, label_file="T{}".format(T))

            # EM Mixt per T, fit to Bays09
            result_dist_bays09_emmixt_sum_currT = np.nansum(result_dist_bays09_allT_avg[:, T_i, 1:], axis=-1)
            result_dist_bays09_emmixt_sum_currT_masked = mask_outliers(result_dist_bays09_emmixt_sum_currT)

            plot_scatter(locals(), 'result_dist_bays09_emmixt_sum_currT_masked', 'EM mixt T %d masked' % T, label_file="T{}".format(T))

            # EM Mixt per T, fit to Bays09 KL divergence
            result_dist_bays09_emmixt_KL_sum_currT = result_dist_bays09_emmixt_KL[:, T_i]
            plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_sum_currT', 'KL EM mixt T %d masked' % T, label_file="T{}".format(T))




    # # Interpolate
    # if plots_interpolate:

    #     sigmax_target = 0.9

    #     M_interp_space = np.arange(6, 625, 5)
    #     ratio_interp_space = np.linspace(0.01, 1.0, 50)
    #     # sigmax_interp_space = np.linspace(0.01, 1.0, 50)
    #     sigmax_interp_space = np.array([sigmax_target])
    #     params_crossspace = np.array(utils.cross(M_interp_space, ratio_interp_space, sigmax_interp_space))

    #     interpolated_data = rbf_interpolator(params_crossspace[:, 0], params_crossspace[:, 1], params_crossspace[:, 2]).reshape((M_interp_space.size, ratio_interp_space.size))

    #     utils.pcolor_2d_data(interpolated_data, M_interp_space, ratio_interp_space, 'M', 'ratio', 'interpolated, fixing sigmax= %.2f' % sigmax_target)

    #     points_closeby = ((result_parameters_flat[:, 2] - sigmax_target)**2)< 0.01
    #     plt.figure()
    #     # plt.imshow(interpolated_data, extent=(M_interp_space.min(), M_interp_space.max(), ratio_interp_space.min(), ratio_interp_space.max()))
    #     plt.imshow(interpolated_data)
    #     plt.scatter(result_parameters_flat[points_closeby, 0], result_parameters_flat[points_closeby, 1], s=100, c=result_fitexperiments_bic_avg[points_closeby], marker='o')


    # if plot_per_ratio:
    #     # Plot the evolution of loglike as a function of sigmax, with std shown
    #     for ratio_conj_i, ratio_conj in enumerate(ratio_space):
    #         ax = utils.plot_mean_std_area(sigmax_space, result_log_posterior_mean[ratio_conj_i], result_log_posterior_std[ratio_conj_i])

    #         ax.get_figure().canvas.draw()

    #         if savefigs:
    #             dataio.save_current_figure('results_fitexp_%s_loglike_ratioconj%.2f_{label}_global_{unique_id}.pdf' % (exp_dataset, ratio_conj))



    all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['parameter_names_sorted']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='output_noise')


    plt.show()

    return locals()
def plots_fitting_experiments_random(data_pbs, generator_module=None):
    '''
        Reload 2D volume runs from PBS and plot them

    '''

    #### SETUP
    #
    savefigs = True
    savedata = True
    savemovies = False

    do_bays09 = True
    do_gorgo11 = True

    scatter3d_sumT = False
    plots_flat_sorted_performance = False
    plots_memorycurves_fits_best = True

    nb_best_points = 20
    nb_best_points_per_T = nb_best_points/6
    size_normal_points = 8
    size_best_points = 50
    downsampling = 2


    # do_relaunch_bestparams_pbs = True

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", generator_module.dict_parameters_range.keys()
    # parameters: ratio_conj, sigmax, T

    # Extract data
    result_fitexperiments_flat = np.array(data_pbs.dict_arrays['result_fitexperiments']['results_flat'])
    result_fitexperiments_all_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_all']['results_flat'])
    result_fitexperiments_noiseconv_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_noiseconv']['results_flat'])
    result_fitexperiments_noiseconv_all_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_noiseconv_all']['results_flat'])
    result_parameters_flat = np.array(data_pbs.dict_arrays['result_fitexperiments']['parameters_flat'])

    all_repeats_completed = data_pbs.dict_arrays['result_fitexperiments']['repeats_completed']
    all_args = data_pbs.loaded_data['args_list']
    all_args_arr = np.array(all_args)
    num_repetitions = generator_module.num_repetitions

    # Extract order of datasets
    experiment_ids = data_pbs.loaded_data['datasets_list'][0]['fitexperiment_parameters']['experiment_ids']
    parameter_names_sorted = data_pbs.dataset_infos['parameters']

    T_space = data_pbs.loaded_data['datasets_list'][0]['T_space']

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    # filter_data = (result_parameters_flat[:, -1] < 1.0) & (all_repeats_completed == num_repetitions - 1)
    # filter_data = (all_repeats_completed == num_repetitions - 1)
    # result_fitexperiments_flat = result_fitexperiments_flat[filter_data]
    # result_fitexperiments_all_flat = result_fitexperiments_all_flat[filter_data]
    # result_fitexperiments_noiseconv_flat = result_fitexperiments_noiseconv_flat[filter_data]
    # result_fitexperiments_noiseconv_all_flat = result_fitexperiments_noiseconv_all_flat[filter_data]
    # result_parameters_flat = result_parameters_flat[filter_data]

    # Compute some stuff
    # Data is summed over all experiments for _flat, contains bic, ll and ll90.
    # for _all_flat, contains bic, ll and ll90 per experiment. Given that Gorgo11 and Bays09 are incompatible, shouldn't really use the combined version directly!
    result_fitexperiments_noiseconv_bic_avg_allT = utils.nanmean(result_fitexperiments_noiseconv_flat, axis=-1)[..., 0]
    result_fitexperiments_noiseconv_allexp_bic_avg_allT = utils.nanmean(result_fitexperiments_noiseconv_all_flat, axis=-1)[:, :, 0]
    result_fitexperiments_noiseconv_allexp_ll90_avg_allT = -utils.nanmean(result_fitexperiments_noiseconv_all_flat, axis=-1)[:, :, -1]

    ### BIC
    # result_fitexperiments_noiseconv_allexp_bic_avg_allT: N x T x exp
    result_fitexperiments_noiseconv_bays09_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 0]
    result_fitexperiments_noiseconv_gorgo11_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 1]
    result_fitexperiments_noiseconv_dualrecall_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 2]
    # Summed T
    result_fitexperiments_noiseconv_bays09_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_bays09_bic_avg_allT, axis=-1)
    result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_gorgo11_bic_avg_allT, axis=-1)
    result_fitexperiments_noiseconv_dualrecall_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_dualrecall_bic_avg_allT, axis=-1)

    ### LL90
    # N x T x exp
    result_fitexperiments_noiseconv_bays09_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 0]
    result_fitexperiments_noiseconv_gorgo11_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 1]
    result_fitexperiments_noiseconv_dualrecall_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 2]
    # Summed T
    result_fitexperiments_noiseconv_bays09_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_bays09_ll90_avg_allT, axis=-1)
    result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_gorgo11_ll90_avg_allT, axis=-1)
    result_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_dualrecall_ll90_avg_allT, axis=-1)

    def mask_outliers_array(result_dist_to_use, sigma_outlier=3):
        '''
            Mask outlier datapoints.
            Compute the mean of the results and assume that points with:
              result > mean + sigma_outlier*std
            are outliers.

            As we want the minimum values, do not mask small values
        '''
        return np.ma.masked_greater(result_dist_to_use, np.mean(result_dist_to_use) + sigma_outlier*np.std(result_dist_to_use))

    def best_points_allT(result_dist_to_use):
        '''
            Best points for all T
        '''
        return np.argsort(result_dist_to_use)[:nb_best_points]

    def str_best_params(best_i, result_dist_to_use):
        return ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted))]) + ' >> %f' % result_dist_to_use[best_i]

    def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file='', mask_outliers=True):

        result_dist_to_use = all_vars[result_dist_to_use_name]
        result_parameters_flat = all_vars['result_parameters_flat']

        # Filter if downsampling
        filter_downsampling = np.arange(0, result_dist_to_use.size, downsampling)
        result_dist_to_use = result_dist_to_use[filter_downsampling]
        result_parameters_flat = result_parameters_flat[filter_downsampling]

        if mask_outliers:
            result_dist_to_use = mask_outliers_array(result_dist_to_use)

        best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points]

        # Construct all permutations of 3 parameters, for 3D scatters
        params_permutations = set([tuple(np.sort(np.random.choice(result_parameters_flat.shape[-1], 3, replace=False)).tolist()) for i in xrange(1000)])

        for param_permut in params_permutations:
            fig = plt.figure()
            ax = Axes3D(fig)

            # One plot per parameter permutation
            if log_color:
                color_points = np.log(result_dist_to_use)
            else:
                color_points = result_dist_to_use

            utils.scatter3d(result_parameters_flat[:, param_permut[0]], result_parameters_flat[:, param_permut[1]], result_parameters_flat[:, param_permut[2]], s=size_normal_points, c=color_points, xlabel=parameter_names_sorted[param_permut[0]], ylabel=parameter_names_sorted[param_permut[1]], zlabel=parameter_names_sorted[param_permut[2]], title=title, ax_handle=ax)

            utils.scatter3d(result_parameters_flat[best_points_result_dist_to_use, param_permut[0]], result_parameters_flat[best_points_result_dist_to_use, param_permut[1]], result_parameters_flat[best_points_result_dist_to_use, param_permut[2]], c='r', s=size_best_points, ax_handle=ax)

            if savefigs:
                dataio.save_current_figure('scatter3d_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file))

            if savemovies:
                try:
                    utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), bitrate=8000, min_duration=8)
                    utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), nb_frames=30, min_duration=8)
                except Exception:
                    # Most likely wrong aggregator...
                    print "failed when creating movies for ", result_dist_to_use_name


            if False and savefigs:
                ax.view_init(azim=90, elev=10)
                dataio.save_current_figure('scatter3d_view2_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file))

            # plt.close('all')

        print "Parameters: %s" % ', '.join(parameter_names_sorted)
        print "Best points, %s:" % title
        print '\n'.join([str_best_params(best_i, result_dist_to_use) for best_i in best_points_result_dist_to_use])



    if scatter3d_sumT:

        plot_scatter(locals(), 'result_fitexperiments_noiseconv_bays09_bic_avg_sumT', 'BIC Bays09')
        plot_scatter(locals(), 'result_fitexperiments_noiseconv_bays09_ll90_avg_sumT', 'LL90 Bays09')

        plot_scatter(locals(), 'result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT', 'BIC Gorgo11')
        plot_scatter(locals(), 'result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT', 'LL90 Gorgo11')

        plot_scatter(locals(), 'result_fitexperiments_noiseconv_dualrecall_bic_avg_sumT', 'BIC Dual recall')
        plot_scatter(locals(), 'result_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT', 'LL90 Dual recall')


    if plots_flat_sorted_performance:
        result_dist_to_try = []
        if do_bays09:
            result_dist_to_try.extend(['result_fitexperiments_noiseconv_bays09_bic_avg_sumT', 'result_fitexperiments_noiseconv_bays09_ll90_avg_sumT'])
        if do_gorgo11:
            result_dist_to_try.extend(['result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT', 'result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT'])

        for result_dist in result_dist_to_try:
            order_indices = np.argsort(locals()[result_dist])[::-1]

            f, axes = plt.subplots(2, 1)
            axes[0].plot(np.arange(4) + result_parameters_flat[order_indices]/np.max(result_parameters_flat[order_indices], axis=0))
            axes[0].legend(parameter_names_sorted, loc='upper left')
            axes[0].set_ylabel('Parameters')
            axes[1].plot(locals()[result_dist][order_indices])
            axes[1].set_ylabel(result_dist.split('result_dist_')[-1])
            axes[0].set_title('Distance ordered ' + result_dist.split('result_dist_')[-1])
            f.canvas.draw()

            if savefigs:
                dataio.save_current_figure('plot_sortedperf_full_%s_{label}_{unique_id}.pdf' % (result_dist))

    if plots_memorycurves_fits_best:
        # Alright, will actually reload the data from another set of runs, and find the closest parameter set to the ones found here.
        data = utils.load_npy('normalisedsigmaxsigmaoutput_random_fitmixturemodels_sigmaxMratiosigmaoutput_repetitions3_280814/outputs/global_plots_fitmixtmodel_random_sigmaoutsigmaxnormMratio-plots_fit_mixturemodels_random-75eb9c74-72e0-4165-8014-92c1ef446f0a.npy')
        result_em_fits_flat_fitmixture = data['result_em_fits_flat']
        result_parameters_flat_fitmixture = data['result_parameters_flat']
        all_args_arr_fitmixture = data['all_args_arr']

        data_dir = None
        if not os.environ.get('WORKDIR_DROP'):
            data_dir = '../experimental_data/'

        plotting_parameters = launchers_memorycurves_marginal_fi.load_prepare_datasets()

        def plot_memorycurves_fits_fromexternal(all_vars, result_dist_to_use_name, nb_best_points=10):
            result_dist_to_use = all_vars[result_dist_to_use_name]

            result_em_fits_flat_fitmixture = all_vars['result_em_fits_flat_fitmixture']
            result_parameters_flat_fitmixture = all_vars['result_parameters_flat_fitmixture']
            all_args_arr_fitmixture = all_vars['all_args_arr_fitmixture']

            best_point_indices_result_dist = np.argsort(result_dist_to_use)[:nb_best_points]

            for best_point_index in best_point_indices_result_dist:
                print "extended plot desired for: " + str_best_params(best_point_index, result_dist_to_use)

                dist_best_points_fitmixture = np.abs(result_parameters_flat_fitmixture - result_parameters_flat[best_point_index])
                dist_best_points_fitmixture -= np.min(dist_best_points_fitmixture, axis=0)
                dist_best_points_fitmixture /= np.max(dist_best_points_fitmixture, axis=0)

                best_point_index_fitmixture = np.argmax(np.prod(1-dist_best_points_fitmixture, axis=-1))

                print "found closest: " + ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat_fitmixture[best_point_index_fitmixture, param_i]) for param_i in xrange(len(parameter_names_sorted))])

                # Update arguments
                all_args_arr_fitmixture[best_point_index_fitmixture].update(dict(zip(parameter_names_sorted, result_parameters_flat_fitmixture[best_point_index_fitmixture])))
                packed_data = dict(T_space=T_space, result_em_fits=result_em_fits_flat_fitmixture[best_point_index_fitmixture], all_parameters=all_args_arr_fitmixture[best_point_index_fitmixture])

                plotting_parameters['suptitle'] = result_dist_to_use_name
                plotting_parameters['reuse_axes'] = False
                if savefigs:
                    packed_data['dataio'] = dataio

                launchers_memorycurves_marginal_fi.do_memory_plots(packed_data, plotting_parameters)


        plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_bays09_ll90_avg_sumT', nb_best_points=3)

        plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT', nb_best_points=3)

        plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT', nb_best_points=3)



    all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['experiment_ids', 'parameter_names_sorted', 'T_space', 'all_args_arr', 'all_repeats_completed', 'filter_data']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='sigmaoutput_normalisedsigmax_random')


    plt.show()

    return locals()
def postprocess_dualrecall_fitmixturemodel(data_pbs, generator_module=None):
    '''
        Reload runs from PBS

        To be plotted in Ipython later
    '''

    #### SETUP
    #
    savedata = True

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", data_pbs.dataset_infos['parameters']
    # parameters: M, ratio_conj, sigmax

    # Extract data
    result_em_fits = np.array(data_pbs.dict_arrays['result_em_fits']['results_flat'])
    result_dist_dualrecall_angle = np.array(data_pbs.dict_arrays['result_dist_dualrecall_angle']['results_flat'])
    result_dist_dualrecall_angle_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_dualrecall_angle_emmixt_KL']['results_flat'])
    result_dist_dualrecall_colour = np.array(data_pbs.dict_arrays['result_dist_dualrecall_colour']['results_flat'])
    result_dist_dualrecall_colour_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_dualrecall_colour_emmixt_KL']['results_flat'])


    result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits']['parameters_flat'])
    all_repeats_completed = data_pbs.dict_arrays['result_em_fits']['repeats_completed']

    all_args_arr = np.array(data_pbs.loaded_data['args_list'])

    M_space = data_pbs.loaded_data['parameters_uniques']['M']
    ratio_conj_space = data_pbs.loaded_data['parameters_uniques']['ratio_conj']
    sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax']

    num_repetitions = generator_module.num_repetitions
    parameter_names_sorted = data_pbs.dataset_infos['parameters']

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    # Load ground truth
    data_dualrecall = load_experimental_data.load_data_dualrecall(fit_mixture_model=True)

    ## Filter everything with repeats_completed == num_repet
    filter_data = all_repeats_completed == num_repetitions - 1
    result_parameters_flat = result_parameters_flat[filter_data]

    result_em_fits = result_em_fits[filter_data]
    result_dist_dualrecall_angle = result_dist_dualrecall_angle[filter_data]
    result_dist_dualrecall_angle_emmixt_KL = result_dist_dualrecall_angle_emmixt_KL[filter_data]
    result_dist_dualrecall_colour = result_dist_dualrecall_colour[filter_data]
    result_dist_dualrecall_colour_emmixt_KL = result_dist_dualrecall_colour_emmixt_KL[filter_data]

    all_args_arr = all_args_arr[filter_data]
    all_repeats_completed = all_repeats_completed[filter_data]

    print "Size post-filter: ", result_parameters_flat.shape[0]

    # Compute lots of averages over the repetitions
    result_em_fits_avg = utils.nanmean(result_em_fits, axis=-1)
    result_dist_dualrecall_angle_avg = utils.nanmean(result_dist_dualrecall_angle, axis=-1)
    result_dist_dualrecall_angle_emmixt_KL_avg = utils.nanmean(result_dist_dualrecall_angle_emmixt_KL, axis=-1)
    result_dist_dualrecall_colour_avg = utils.nanmean(result_dist_dualrecall_colour, axis=-1)
    result_dist_dualrecall_colour_emmixt_KL_avg = utils.nanmean(result_dist_dualrecall_colour_emmixt_KL, axis=-1)

    # all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['parameter_names_sorted', 'all_args_arr', 'all_repeats_completed', 'filter_data']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='dualrecall_fitmixturemodel')


    plt.show()

    return locals()
示例#18
0
from dataio import DataIO, ImageHandler
from unet_model import UNet
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
import tensorflow as tf

# Solving CUDNN Issues
config = ConfigProto()
config.gpu_options.allow_growth = True
# config.gpu_options.per_process_gpu_memory_fraction = 0.9
session = InteractiveSession(config=config)

# create all instances here
dat = DataIO().load_matfile_images_first('retina_training_STARE.mat')
display = ImageHandler()

# Run U-Net Model
model = UNet('Test_Model')
model.create_UNet_retina()
model.fit_model(*dat, nepochs=2000)

# get plot for training accuracy and loss
plot1 = model.plot_accuracy()
plot1.plot()
plot1.show()
plot2 = model.plot_loss()
plot2.plot()
plot2.show()
def plots_fit_mixturemodels_random(data_pbs, generator_module=None):
    '''
        Reload runs from PBS

        !!!
        IMPORTANT LOOK AT ME
        !!!
    '''

    #### SETUP
    #
    savefigs = True
    savedata = True
    savemovies = False

    do_bays09 = True
    do_gorgo11 = True

    plots_scatter3d = True
    plots_scatter_per_T = False
    plots_flat_sorted_performance = False
    plots_memorycurves_fits_best = True

    # do_relaunch_bestparams_pbs = True

    colormap = None  # or 'cubehelix'
    plt.rcParams['font.size'] = 16
    #
    #### /SETUP

    print "Order parameters: ", data_pbs.dataset_infos['parameters']
    # parameters: M, ratio_conj, sigmax

    # Extract data
    T_space = data_pbs.loaded_data['datasets_list'][0]['T_space']

    result_em_fits_flat = np.array(data_pbs.dict_arrays['result_em_fits']['results_flat'])
    result_precisions_flat = np.array(data_pbs.dict_arrays['result_all_precisions']['results_flat'])
    result_dist_bays09_flat = np.array(data_pbs.dict_arrays['result_dist_bays09']['results_flat'])
    result_dist_gorgo11_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11']['results_flat'])
    result_dist_bays09_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_bays09_emmixt_KL']['results_flat'])
    result_dist_gorgo11_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_gorgo11_emmixt_KL']['results_flat'])
    result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits']['parameters_flat'])
    all_repeats_completed = data_pbs.dict_arrays['result_em_fits']['repeats_completed']

    all_args = data_pbs.loaded_data['args_list']
    all_args_arr = np.array(all_args)

    sigmaoutput_space = data_pbs.loaded_data['parameters_uniques']['sigma_output']
    sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax']
    ratio_space = data_pbs.loaded_data['parameters_uniques']['sigmax']
    num_repetitions = generator_module.num_repetitions
    parameter_names_sorted = data_pbs.dataset_infos['parameters']

    dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename'])

    # Load bays09
    # data_bays09 = load_experimental_data.load_data_bays09(fit_mixture_model=True)
    # bays09_nitems = data_bays09['data_to_fit']['n_items']
    # bays09_em_target = np.nan*np.empty((bays09_nitems.max(), 4))  #kappa, prob_target, prob_nontarget, prob_random
    # bays09_em_target[bays09_nitems - 1] = data_bays09['em_fits_nitems_arrays']['mean'].T
    # bays09_emmixt_target = bays09_em_target[:, 1:]

    ## Filter everything with sigma_output > 1.0 and repeats_completed == num_repet
    filter_data = (result_parameters_flat[:, -1] < 1.0) & (all_repeats_completed == num_repetitions - 1)
    result_em_fits_flat = result_em_fits_flat[filter_data]
    result_precisions_flat = result_precisions_flat[filter_data]
    result_dist_bays09_flat = result_dist_bays09_flat[filter_data]
    result_dist_gorgo11_flat = result_dist_gorgo11_flat[filter_data]
    result_dist_bays09_emmixt_KL = result_dist_bays09_emmixt_KL[filter_data]
    result_dist_gorgo11_emmixt_KL = result_dist_gorgo11_emmixt_KL[filter_data]
    result_parameters_flat = result_parameters_flat[filter_data]
    all_args_arr = all_args_arr[filter_data]
    all_repeats_completed = all_repeats_completed[filter_data]

    # for _result_dist in ['result_em_fits_flat', 'result_precisions_flat', 'result_dist_bays09_flat', 'result_dist_gorgo11_flat', 'result_dist_bays09_emmixt_KL', 'result_dist_gorgo11_emmixt_KL', 'result_parameters_flat']:
    #     locals()[key] = locals()[key][filter_sigmaout]
    #     # exec("%s = %s[%s]" % (_result_dist, _result_dist, 'filter_sigmaout'))

    ## Compute some stuff

    result_em_fits_all_avg = utils.nanmean(result_em_fits_flat, axis=-1)
    result_em_kappa_allT = result_em_fits_all_avg[..., 0]
    result_em_emmixt_allT = result_em_fits_all_avg[..., 1:4]

    result_precisions_all_avg = utils.nanmean(result_precisions_flat, axis=-1)

    ##### Distance to Bays09
    result_dist_bays09_allT_avg = utils.nanmean(result_dist_bays09_flat, axis=-1)
    result_dist_bays09_emmixt_KL_allT_avg = utils.nanmean(result_dist_bays09_emmixt_KL, axis=-1)
    result_dist_bays09_kappa_allT = result_dist_bays09_allT_avg[..., 0]

    result_dist_bays09_kappa_sumT = np.nansum(result_dist_bays09_kappa_allT, axis=-1)
    result_dist_bays09_logkappa_sumT = np.log(result_dist_bays09_kappa_sumT)
    result_dist_bays09_emmixt_KL_sumT = np.nansum(result_dist_bays09_emmixt_KL_allT_avg, axis=-1)

    # combined versions
    result_dist_bays09_both_normalised = result_dist_bays09_emmixt_KL_sumT/np.max(result_dist_bays09_emmixt_KL_sumT) + result_dist_bays09_kappa_sumT/np.max(result_dist_bays09_kappa_sumT)
    result_dist_bays09_logkappamixtKL = result_dist_bays09_logkappa_sumT + result_dist_bays09_emmixt_KL_sumT
    result_dist_bays09_logkappamixtKL_normalised = result_dist_bays09_logkappa_sumT/np.max(result_dist_bays09_logkappa_sumT) + result_dist_bays09_emmixt_KL_sumT/np.max(result_dist_bays09_emmixt_KL_sumT)

    result_dist_bays09_logkappa_sumT_forand = result_dist_bays09_logkappa_sumT - np.min(result_dist_bays09_logkappa_sumT)*np.sign(np.min(result_dist_bays09_logkappa_sumT))
    result_dist_bays09_logkappa_sumT_forand /= np.max(result_dist_bays09_logkappa_sumT_forand)

    result_dist_bays09_emmixt_KL_sumT_forand = result_dist_bays09_emmixt_KL_sumT - np.min(result_dist_bays09_emmixt_KL_sumT)*np.sign(np.min(result_dist_bays09_emmixt_KL_sumT))
    result_dist_bays09_emmixt_KL_sumT_forand /= np.max(result_dist_bays09_emmixt_KL_sumT_forand)

    result_dist_bays09_logkappamixtKL_AND = 1. - (1. - result_dist_bays09_logkappa_sumT_forand)*(1. - result_dist_bays09_emmixt_KL_sumT_forand)

    # Mask kappa for bad performance
    # result_dist_bays09_kappa_sumT_masked = np.ma.masked_greater(result_dist_bays09_kappa_sumT, 2*np.median(result_dist_bays09_kappa_sumT))
    # result_dist_bays09_logkappa_sumT_masked = np.ma.masked_greater(result_dist_bays09_logkappa_sumT, 2*np.median(result_dist_bays09_logkappa_sumT))
    # result_dist_bays09_emmixt_KL_sumT_masked = np.ma.masked_greater(result_dist_bays09_emmixt_KL_sumT, 2*np.median(result_dist_bays09_emmixt_KL_sumT))
    # result_dist_bays09_both_normalised_mult_masked = 1-(1. - result_dist_bays09_emmixt_KL_sumT_masked/np.max(result_dist_bays09_emmixt_KL_sumT_masked))*(1. - result_dist_bays09_kappa_sumT_masked/np.max(result_dist_bays09_kappa_sumT_masked))

    ##### Distance to Gorgo11
    result_dist_gorgo11_allT_avg = utils.nanmean(result_dist_gorgo11_flat, axis=-1)
    result_dist_gorgo11_emmixt_KL_allT_avg = utils.nanmean(result_dist_gorgo11_emmixt_KL, axis=-1)
    result_dist_gorgo11_kappa_allT = result_dist_gorgo11_allT_avg[..., 0]

    result_dist_gorgo11_kappa_sumT = np.nansum(result_dist_gorgo11_kappa_allT, axis=-1)
    result_dist_gorgo11_logkappa_sumT = np.log(result_dist_gorgo11_kappa_sumT)
    result_dist_gorgo11_emmixt_KL_sumT = np.nansum(result_dist_gorgo11_emmixt_KL_allT_avg, axis=-1)
    result_dist_gorgo11_emmixt_KL_sumT25 = np.nansum(result_dist_gorgo11_emmixt_KL_allT_avg[:, 1:], axis=-1)
    result_dist_gorgo11_logkappa_sumT25 = np.log(np.nansum(result_dist_gorgo11_kappa_allT[..., 1:], axis=-1))

    # combined versions
    result_dist_gorgo11_both_normalised = result_dist_gorgo11_emmixt_KL_sumT/np.max(result_dist_gorgo11_emmixt_KL_sumT) + result_dist_gorgo11_kappa_sumT/np.max(result_dist_gorgo11_kappa_sumT)
    result_dist_gorgo11_logkappamixtKL = result_dist_gorgo11_logkappa_sumT + result_dist_gorgo11_emmixt_KL_sumT
    result_dist_gorgo11_logkappamixtKL_normalised = result_dist_gorgo11_logkappa_sumT/np.max(result_dist_gorgo11_logkappa_sumT) + result_dist_gorgo11_emmixt_KL_sumT/np.max(result_dist_gorgo11_emmixt_KL_sumT)

    result_dist_gorgo11_logkappa_sumT_forand = result_dist_gorgo11_logkappa_sumT - np.min(result_dist_gorgo11_logkappa_sumT)*np.sign(np.min(result_dist_gorgo11_logkappa_sumT))
    result_dist_gorgo11_logkappa_sumT_forand /= np.max(result_dist_gorgo11_logkappa_sumT_forand)


    result_dist_gorgo11_logkappa_sumT25_forand = result_dist_gorgo11_logkappa_sumT25 - np.min(result_dist_gorgo11_logkappa_sumT25)*np.sign(np.min(result_dist_gorgo11_logkappa_sumT25))
    result_dist_gorgo11_logkappa_sumT25_forand /= np.max(result_dist_gorgo11_logkappa_sumT25_forand)

    result_dist_gorgo11_emmixt_KL_sumT_forand = result_dist_gorgo11_emmixt_KL_sumT - np.min(result_dist_gorgo11_emmixt_KL_sumT)*np.sign(np.min(result_dist_gorgo11_emmixt_KL_sumT))
    result_dist_gorgo11_emmixt_KL_sumT_forand /= np.max(result_dist_gorgo11_emmixt_KL_sumT_forand)

    result_dist_gorgo11_emmixt_KL_sumT25_forand = result_dist_gorgo11_emmixt_KL_sumT25 - np.min(result_dist_gorgo11_emmixt_KL_sumT25)*np.sign(np.min(result_dist_gorgo11_emmixt_KL_sumT25))
    result_dist_gorgo11_emmixt_KL_sumT25_forand /= np.max(result_dist_gorgo11_emmixt_KL_sumT25_forand)

    result_dist_gorgo11_logkappamixtKL_AND = 1. - (1. - result_dist_gorgo11_logkappa_sumT_forand)*(1. - result_dist_gorgo11_emmixt_KL_sumT_forand)

    result_dist_gorgo11_logkappa25mixtKL_AND = 1. - (1. - result_dist_gorgo11_logkappa_sumT25_forand)*(1. - result_dist_gorgo11_emmixt_KL_sumT25_forand)

    def str_best_params(best_i, result_dist_to_use):
        return ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted))]) + ' >> %f' % result_dist_to_use[best_i]

    if plots_scatter3d:
        nb_best_points = 30
        size_normal_points = 8
        size_best_points = 50

        def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file=''):

            result_dist_to_use = all_vars[result_dist_to_use_name]
            result_parameters_flat = all_vars['result_parameters_flat']

            # Filter if downsampling
            filter_downsampling = np.arange(0, result_dist_to_use.size, downsampling)
            result_dist_to_use = result_dist_to_use[filter_downsampling]
            result_parameters_flat = result_parameters_flat[filter_downsampling]

            best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points]

            # Construct all permutations of 3 parameters, for 3D scatters
            params_permutations = set([tuple(np.sort(np.random.choice(result_parameters_flat.shape[-1], 3, replace=False)).tolist()) for i in xrange(1000)])

            for param_permut in params_permutations:
                fig = plt.figure()
                ax = Axes3D(fig)

                # One plot per parameter permutation
                if log_color:
                    color_points = np.log(result_dist_to_use)
                else:
                    color_points = result_dist_to_use

                utils.scatter3d(result_parameters_flat[:, param_permut[0]], result_parameters_flat[:, param_permut[1]], result_parameters_flat[:, param_permut[2]], s=size_normal_points, c=color_points, xlabel=parameter_names_sorted[param_permut[0]], ylabel=parameter_names_sorted[param_permut[1]], zlabel=parameter_names_sorted[param_permut[2]], title=title, ax_handle=ax)

                utils.scatter3d(result_parameters_flat[best_points_result_dist_to_use, param_permut[0]], result_parameters_flat[best_points_result_dist_to_use, param_permut[1]], result_parameters_flat[best_points_result_dist_to_use, param_permut[2]], c='r', s=size_best_points, ax_handle=ax)

                if savefigs:
                    dataio.save_current_figure('scatter3d_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file))

                if savemovies:
                    try:
                        utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), bitrate=8000, min_duration=8)
                        utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), nb_frames=30, min_duration=8)
                    except Exception:
                        # Most likely wrong aggregator...
                        print "failed when creating movies for ", result_dist_to_use_name


                if False and savefigs:
                    ax.view_init(azim=90, elev=10)
                    dataio.save_current_figure('scatter3d_view2_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file))

                # plt.close('all')

            print "Parameters: %s" % ', '.join(parameter_names_sorted)
            print "Best points, %s:" % title
            print '\n'.join([str_best_params(best_i, result_dist_to_use) for best_i in best_points_result_dist_to_use])



        #### BAYS 09
        if do_bays09:
            # Distance for log kappa, all T
            plot_scatter(locals(), 'result_dist_bays09_logkappa_sumT', 'Bays09 kappa all T', log_color=False)

            # # Distance for em fits, all T, KL distance
            plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_sumT', 'Bays09 em fits, sum T, KL', log_color=False)

            # Distance for product of normalised em fits KL + normalised log kappa, all T
            plot_scatter(locals(), 'result_dist_bays09_logkappamixtKL', 'Bays09 em fits KL, log kappa')

            # Distance for AND normalised em fits KL + log kappa
            plot_scatter(locals(), 'result_dist_bays09_logkappamixtKL_AND', 'Bays09 em fits KL AND log kappa')

        #### Gorgo 11
        if do_gorgo11:
            # Distance for product of normalised em fits KL + normalised log kappa, all T
            plot_scatter(locals(), 'result_dist_gorgo11_logkappamixtKL', 'Gorgo11 em fits KL, log kappa')

            # Distance for AND normalised em fits KL + log kappa
            plot_scatter(locals(), 'result_dist_gorgo11_logkappamixtKL_AND', 'Gorgo11 em fits KL AND log kappa')

            # Distance for logkappa
            plot_scatter(locals(), 'result_dist_gorgo11_logkappa_sumT', 'Gorgo11 log kappa all T', log_color=False)

            # Distance for EM mixture proportions
            plot_scatter(locals(), 'result_dist_gorgo11_emmixt_KL_sumT', 'Gorgo11 em fits, sum T, KL', log_color=False)



    if plots_flat_sorted_performance:
        result_dist_to_try = []
        if do_bays09:
            result_dist_to_try.extend(['result_dist_bays09_logkappamixtKL_AND', 'result_dist_bays09_logkappamixtKL'])
        if do_gorgo11:
            result_dist_to_try.extend(['result_dist_gorgo11_logkappamixtKL_AND', 'result_dist_gorgo11_logkappamixtKL'])

        for result_dist in result_dist_to_try:
            order_indices = np.argsort(locals()[result_dist])[::-1]

            f, axes = plt.subplots(2, 1)
            axes[0].plot(np.arange(4) + result_parameters_flat[order_indices]/np.max(result_parameters_flat[order_indices], axis=0))
            axes[0].legend(parameter_names_sorted, loc='upper left')
            axes[0].set_ylabel('Parameters')
            axes[1].plot(locals()[result_dist][order_indices])
            axes[1].set_ylabel(result_dist.split('result_dist_')[-1])
            axes[0].set_title('Distance ordered ' + result_dist.split('result_dist_')[-1])
            f.canvas.draw()

            if savefigs:
                dataio.save_current_figure('plot_sortedperf_full_%s_{label}_{unique_id}.pdf' % (result_dist))

        ## Extra plot for logkappamixtKL_AND, it seems well behaved

        def plot_flat_best(all_vars, result_name, order_indices_filter, filter_goodAND, ordering='fitness'):
            f = plt.figure()
            axp1 = plt.subplot2grid((3, 2), (0, 0))
            axp2 = plt.subplot2grid((3, 2), (0, 1))
            axp3 = plt.subplot2grid((3, 2), (1, 0))
            axp4 = plt.subplot2grid((3, 2), (1, 1))
            axfit = plt.subplot2grid((3, 2), (2, 0), colspan=2)

            axp1.plot(result_parameters_flat[filter_goodAND][order_indices_filter, 0])
            axp1.set_title(parameter_names_sorted[0])
            axp2.plot(result_parameters_flat[filter_goodAND][order_indices_filter, 1], 'g')
            axp2.set_title(parameter_names_sorted[1])
            axp3.plot(result_parameters_flat[filter_goodAND][order_indices_filter, 2], 'r')
            axp3.set_title(parameter_names_sorted[2])
            axp4.plot(result_parameters_flat[filter_goodAND][order_indices_filter, 3], 'k')
            axp4.set_title(parameter_names_sorted[3])

            axfit.plot(all_vars[result_name][filter_goodAND][order_indices_filter])
            axfit.set_ylabel('bays09_logkappamixtKL_AND')
            plt.suptitle('Distance ordered bays09_logkappamixtKL_AND')

            if savefigs:
                dataio.save_current_figure('plot_sortedperf_best_%s_%s_{label}_{unique_id}.pdf' % (result_name, ordering))

        if do_bays09:
            filter_goodAND = result_dist_bays09_logkappamixtKL_AND < 0.2

            # First order them by fitness
            order_indices_filter = np.argsort(result_dist_bays09_logkappamixtKL_AND[filter_goodAND])[::-1]
            plot_flat_best(locals(), 'result_dist_bays09_logkappamixtKL_AND', order_indices_filter, filter_goodAND, 'fitness')

            # Then by M, to see if there is some structure
            order_indices_filter = np.argsort(result_parameters_flat[filter_goodAND, 0])
            plot_flat_best(locals(), 'result_dist_bays09_logkappamixtKL_AND', order_indices_filter, filter_goodAND, 'M')

        if do_gorgo11:
            filter_goodAND = result_dist_gorgo11_logkappamixtKL_AND < 0.5

            # First order them by fitness
            order_indices_filter = np.argsort(result_dist_gorgo11_logkappamixtKL_AND[filter_goodAND])[::-1]
            plot_flat_best(locals(), 'result_dist_gorgo11_logkappamixtKL_AND', order_indices_filter, filter_goodAND, 'fitness')

            # Then by M, to see if there is some structure
            order_indices_filter = np.argsort(result_parameters_flat[filter_goodAND, 0])
            plot_flat_best(locals(), 'result_dist_gorgo11_logkappamixtKL_AND', order_indices_filter, filter_goodAND, 'M')

            # dist_cmaes_result = np.sum((result_parameters_flat - np.array([75, 1.0, 0.1537, 0.2724]))**2., axis=-1)
            # filter_close_cmaes_result = np.argsort(dist_cmaes_result)[:20]
            # order_indices_filter = np.argsort(result_dist_gorgo11_logkappamixtKL_AND[filter_close_cmaes_result])[::-1]
            # plot_flat_best(locals(), 'result_dist_gorgo11_logkappamixtKL_AND', order_indices_filter, filter_close_cmaes_result, 'Like current CMA/ES run')


    if plots_scatter_per_T:
        for T_i, T in enumerate(T_space):

            # Kappa per T, fit to Bays09
            result_dist_bays09_kappa_currT = result_dist_bays09_kappa_allT[:, T_i]
            result_dist_bays09_kappa_currT_masked = mask_outliers(result_dist_bays09_kappa_currT)

            plot_scatter(locals(), 'result_dist_bays09_kappa_currT_masked', 'kappa T %d masked' % T, label_file="T{}".format(T))

            # EM Mixt per T, fit to Bays09
            result_dist_bays09_emmixt_KL_currT = result_dist_bays09_emmixt_KL_allT_avg[:, T_i]
            result_dist_bays09_emmixt_KL_currT_masked = mask_outliers(result_dist_bays09_emmixt_KL_currT)

            plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_currT_masked', 'KL EM mixt T %d masked' % T, label_file="T{}".format(T), log_color=False)


    if plots_memorycurves_fits_best:

        data_dir = None
        if not os.environ.get('WORKDIR_DROP'):
            data_dir = '../experimental_data/'

        plotting_parameters = launchers_memorycurves_marginal_fi.load_prepare_datasets(data_dir = data_dir)

        def plot_memorycurves_fits(all_vars, result_dist_to_use_name, nb_best_points=10):
            result_dist_to_use = all_vars[result_dist_to_use_name]

            best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points]

            for best_point_index in best_points_result_dist_to_use:
                print "extended plot for: " + str_best_params(best_point_index, result_dist_to_use)

                # Update arguments
                all_args_arr[best_point_index].update(dict(zip(parameter_names_sorted, result_parameters_flat[best_point_index])))
                packed_data = dict(T_space=T_space, result_em_fits=result_em_fits_flat[best_point_index], all_parameters=all_args_arr[best_point_index])

                plotting_parameters['suptitle'] = result_dist_to_use_name
                plotting_parameters['reuse_axes'] = False
                if savefigs:
                    packed_data['dataio'] = dataio

                launchers_memorycurves_marginal_fi.do_memory_plots(packed_data, plotting_parameters)


        plot_memorycurves_fits(locals(), 'result_dist_bays09_logkappamixtKL_AND', nb_best_points=3)

        plot_memorycurves_fits(locals(), 'result_dist_gorgo11_logkappamixtKL_AND', nb_best_points=3)
        # plot_memorycurves_fits(locals(), 'result_dist_gorgo11_logkappamixtKL', nb_best_points=3)

        plot_memorycurves_fits(locals(), 'result_dist_gorgo11_logkappa25mixtKL_AND', nb_best_points=3)

        # plot_memorycurves_fits(locals(), 'result_dist_gorgo11_logkappa_sumT', nb_best_points=3)




    # # Interpolate
    # if plots_interpolate:

    #     sigmax_target = 0.9

    #     M_interp_space = np.arange(6, 625, 5)
    #     ratio_interp_space = np.linspace(0.01, 1.0, 50)
    #     # sigmax_interp_space = np.linspace(0.01, 1.0, 50)
    #     sigmax_interp_space = np.array([sigmax_target])
    #     params_crossspace = np.array(utils.cross(M_interp_space, ratio_interp_space, sigmax_interp_space))

    #     interpolated_data = rbf_interpolator(params_crossspace[:, 0], params_crossspace[:, 1], params_crossspace[:, 2]).reshape((M_interp_space.size, ratio_interp_space.size))

    #     utils.pcolor_2d_data(interpolated_data, M_interp_space, ratio_interp_space, 'M', 'ratio', 'interpolated, fixing sigmax= %.2f' % sigmax_target)

    #     points_closeby = ((result_parameters_flat[:, 2] - sigmax_target)**2)< 0.01
    #     plt.figure()
    #     # plt.imshow(interpolated_data, extent=(M_interp_space.min(), M_interp_space.max(), ratio_interp_space.min(), ratio_interp_space.max()))
    #     plt.imshow(interpolated_data)
    #     plt.scatter(result_parameters_flat[points_closeby, 0], result_parameters_flat[points_closeby, 1], s=100, c=result_fitexperiments_bic_avg[points_closeby], marker='o')


    # if plot_per_ratio:
    #     # Plot the evolution of loglike as a function of sigmax, with std shown
    #     for ratio_conj_i, ratio_conj in enumerate(ratio_space):
    #         ax = utils.plot_mean_std_area(sigmax_space, result_log_posterior_mean[ratio_conj_i], result_log_posterior_std[ratio_conj_i])

    #         ax.get_figure().canvas.draw()

    #         if savefigs:
    #             dataio.save_current_figure('results_fitexp_%s_loglike_ratioconj%.2f_{label}_global_{unique_id}.pdf' % (exp_dataset, ratio_conj))



    # all_args = data_pbs.loaded_data['args_list']
    variables_to_save = ['parameter_names_sorted', 'all_args_arr', 'all_repeats_completed', 'filter_data']

    if savedata:
        dataio.save_variables_default(locals(), variables_to_save)
        dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='sigmaoutput_normalisedsigmax_random')


    plt.show()

    return locals()