def target(self, file_name): """ Display sampled points (noisy), read from file the input/target pairs and draw the resulting points. :param file_name - str, name of the file with the data. """ x, t, e = DataIO.read_data(file_name) self.AX1.plot(x, t, marker='o', markersize=7, linestyle='none', color=self.CLR_DOT, zorder=2)
def download_data(self, pair, start, end): """Download trade data and store as .csv file. Args: pair (str): Currency pair. start (int): Start UNIX of trade data to download. end (int): End UNIX of trade data to download. """ dataio = DataIO(savedir=self._savedir, fieldnames=self.FIELDNAMES) if dataio.csv_check(pair): last_row = dataio.csv_get_last(pair) newest_id = int(last_row['trade_id']) + 1 newest_t = int(last_row['time']) else: newest_id = self.__find_start_trade_id(pair, start) newest_t = 0 while newest_t < end: # new -> old r = self.__get_slice(pair, newest_id) # old -> new, add unix timestamp new_r = [] for row in r: row['time'] = row['T'] // 1000 row['date'] = timeutil.unix_to_iso(row['time']) row['price'] = row['p'] row['size'] = row['q'] row['side'] = 'sell' if row['m'] == True else 'buy' row['best_price_match'] = row['M'] row['trade_id'] = row['a'] row.pop('a', None) row.pop('p', None) row.pop('q', None) row.pop('f', None) row.pop('l', None) row.pop('T', None) row.pop('m', None) row.pop('M', None) new_r.append(row) # save to file dataio.csv_append(pair, new_r) # break condition if len(r) < self.__MAX_LIMIT: break # prepare next iteration newest_id = new_r[-1]['trade_id'] + 1 newest_t = new_r[-1]['time'] print('Binance\t| {} : {}'.format(timeutil.unix_to_iso(newest_t), pair)) print('Binance\t| Download complete : {}'.format(pair))
def download_data(self, pair, start, end): """Download trade data and store as .csv file. Args: pair (str): Currency pair. start (int): Start UNIX of trade data to download. end (int): End UNIX of trade data to download. """ dataio = DataIO(savedir=self._savedir, fieldnames=self.FIELDNAMES) if dataio.csv_check(pair): last_row = dataio.csv_get_last(pair) newest_id = int(last_row['trade_id']) + 1 newest_t = int(last_row['time']) else: newest_id = self.__find_start_trade_id(pair, start) newest_t = 0 last_trade_id = self.__find_last_trade_id(pair) while newest_t < end: # new -> old r = self.__get_slice(pair, newest_id + self.__MAX_LIMIT) # break condition to_break = False # old -> new, add unix timestamp new_r = [] for row in reversed(r): if row['trade_id'] > newest_id: row['date'] = row['time'] row['time'] = timeutil.iso_to_unix(row['time']) new_r.append(row) if row['trade_id'] == last_trade_id: to_break = True # save to file dataio.csv_append(pair, new_r) # break condition if to_break: break # prepare next iteration newest_id = new_r[-1]['trade_id'] newest_t = new_r[-1]['time'] print('GDAX\t| {} : {}'.format(timeutil.unix_to_iso(newest_t), pair)) print('GDAX\t| Download complete : {}'.format(pair))
def download_data(self, pair, start, end): """Download trade data and store as .csv file. Args: pair (str): Currency pair. start (int): Start UNIX of trade data to download. end (int): End UNIX of trade data to download. """ dataio = DataIO(savedir=self._savedir, fieldnames=self.FIELDNAMES) if dataio.csv_check(pair): last_row = dataio.csv_get_last(pair) newest_id = int(last_row['trade_id']) + 1 newest_t = int(last_row['time']) else: newest_id = self.__find_start_trade_id(pair, start) newest_t = 0 while newest_t < end: # new -> old r = self.__get_slice(pair, newest_id) # old -> new, add unix timestamp new_r = [] for i, row in enumerate(r): row['time'] = timeutil.iso_to_unix(row['timestamp']) row['date'] = row['timestamp'] row['trade_id'] = newest_id + i row['side'] = row['side'].lower() row.pop('timestamp', None) row.pop('symbol', None) new_r.append(row) # save to file dataio.csv_append(pair, new_r) # break condition if len(r) < self.__MAX_LIMIT: break # prepare next iteration newest_id = new_r[-1]['trade_id'] + 1 newest_t = new_r[-1]['time'] print('Bitmex\t| {} : {}'.format( timeutil.unix_to_iso(newest_t), pair)) print('Bitmex\t| Download complete : {}'.format(pair))
def download_data(self, pair, start, end): """Download trade data and store as .csv file. Args: pair (str): Currency pair. start (int): Start UNIX of trade data to download. end (int): End UNIX of trade data to download. """ dataio = DataIO(savedir=self._savedir, fieldnames=self.FIELDNAMES) last_row = None if dataio.csv_check(pair): last_row = dataio.csv_get_last(pair) newest_t = int(last_row['time']) else: newest_t = self.__find_start_trade_time(pair, start) - 1 # break condition last_trade_time = self.__find_last_trade_time(pair) while newest_t < end: # new -> old r = self.__get_slice(pair, newest_t) # old -> new; remove duplicate data by trade ID new_r = [] for row in reversed(r): if last_row is not None: if int(last_row['tradeID']) >= row['tradeID']: continue # remove duplicates last_row = row row['time'] = timeutil.iso_to_unix(row['date']) new_r.append(row) if newest_t > last_trade_time: break # save to file dataio.csv_append(pair, new_r) # prepare next iteration newest_t += self.__MAX_RANGE print('Poloniex| {} : {}'.format( timeutil.unix_to_iso(newest_t), pair)) print('Poloniex| Download complete : {}'.format(pair))
def data_preprocess(params): ### Record Concatenation dataio = DataIO(params['input_path'], params['map_path'], params['domain']) dataio.read_data() dataio.read_label() ctn = Concatenation(dataio, params['domain']) patient_info, n_feature, feature_list, feature_range = ctn.get_concatenation() # patient id: Patient # static feature and dynamic feature # dynamic feature{time:feature_value} ### Data Imputation imp_method = 'simple' imp = Imputation(patient_info, n_feature) patient_array, patient_time = imp.get_imputation(imp_method) ### Clinical Data with DTI Generation cli = CliGen(feature_list, feature_range, ctn.dti_time) subject_array = cli.get_data(patient_array, patient_time, params['time']) if True == params['binary']: # only works for discrete clinical features subject_array = cli.get_binarization() subject_label = cli.get_label(patient_info, params['labels'], params['time']) return subject_array, subject_label
def plots_fitmixtmodel_rcscale_effect(data_pbs, generator_module=None): ''' Reload runs from PBS ''' #### SETUP # savefigs = True savedata = True plots_all_T = True plots_per_T = True # do_relaunch_bestparams_pbs = True colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", data_pbs.dataset_infos['parameters'] # parameters: M, ratio_conj, sigmax # Extract data T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] result_em_fits_flat = np.array(data_pbs.dict_arrays['result_em_fits']['results_flat']) result_precisions_flat = np.array(data_pbs.dict_arrays['result_all_precisions']['results_flat']) result_dist_bays09_flat = np.array(data_pbs.dict_arrays['result_dist_bays09']['results_flat']) result_dist_gorgo11_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11']['results_flat']) result_dist_bays09_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_bays09_emmixt_KL']['results_flat']) result_dist_gorgo11_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_gorgo11_emmixt_KL']['results_flat']) result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits']['parameters_flat']) rc_scale_space = data_pbs.loaded_data['parameters_uniques']['rc_scale'] num_repetitions = generator_module.num_repetitions parameter_names_sorted = data_pbs.dataset_infos['parameters'] dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) # Load bays09 data_bays09 = load_experimental_data.load_data_bays09(fit_mixture_model=True) bays09_nitems = data_bays09['data_to_fit']['n_items'] bays09_em_target = np.nan*np.empty((bays09_nitems.max(), 4)) #kappa, prob_target, prob_nontarget, prob_random bays09_em_target[bays09_nitems - 1] = data_bays09['em_fits_nitems_arrays']['mean'].T bays09_emmixt_target = bays09_em_target[:, 1:] ## Compute some stuff result_parameters_flat = result_parameters_flat.flatten() result_em_fits_all_avg = utils.nanmean(result_em_fits_flat, axis=-1) result_em_kappa_allT = result_em_fits_all_avg[..., 0] result_em_emmixt_allT = result_em_fits_all_avg[..., 1:4] result_precisions_all_avg = utils.nanmean(result_precisions_flat, axis=-1) # Square distance to kappa result_dist_bays09_allT_avg = utils.nanmean(result_dist_bays09_flat, axis=-1) result_dist_bays09_emmixt_KL_allT_avg = utils.nanmean(result_dist_bays09_emmixt_KL, axis=-1) result_dist_bays09_kappa_allT = result_dist_bays09_allT_avg[..., 0] # result_dist_bays09_allT_avg = utils.nanmean((result_em_fits_flat[:, :, :4] - bays09_em_target[np.newaxis, :, :, np.newaxis])**2, axis=-1) # result_dist_bays09_kappa_sum = np.nansum(result_dist_bays09_allT_avg[:, :, 0], axis=-1) # result_dist_bays09_kappa_T1_sum = result_dist_bays09_allT_avg[:, 0, 0] # result_dist_bays09_kappa_T25_sum = np.nansum(result_dist_bays09_allT_avg[:, 1:, 0], axis=-1) # # Square and KL distance for EM Mixtures # result_dist_bays09_emmixt_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, :, 1:], axis=-1), axis=-1) # result_dist_bays09_emmixt_T1_sum = np.nansum(result_dist_bays09_allT_avg[:, 0, 1:], axis=-1) # result_dist_bays09_emmixt_T25_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, 1:, 1:], axis=-1), axis=-1) # result_dist_bays09_emmixt_KL = utils.nanmean(utils.KL_div(result_em_fits_flat[:, :, 1:4], bays09_emmixt_target[np.newaxis, :, :, np.newaxis], axis=-2), axis=-1) # KL over dimension of mixtures, then mean over repetitions # result_dist_bays09_emmixt_KL_sum = np.nansum(result_dist_bays09_emmixt_KL, axis=-1) # sum over T # result_dist_bays09_emmixt_KL_T1_sum = result_dist_bays09_emmixt_KL[:, 0] # result_dist_bays09_emmixt_KL_T25_sum = np.nansum(result_dist_bays09_emmixt_KL[:, 1:], axis=-1) # result_dist_bays09_both_normalised = result_dist_bays09_emmixt_sum/np.max(result_dist_bays09_emmixt_sum) + result_dist_bays09_kappa_sum/np.max(result_dist_bays09_kappa_sum) # # Mask kappa for performance too bad # result_dist_bays09_kappa_sum_masked = np.ma.masked_greater(result_dist_bays09_kappa_sum, 2*np.median(result_dist_bays09_kappa_sum)) # result_dist_bays09_emmixt_KL_sum_masked = np.ma.masked_greater(result_dist_bays09_emmixt_KL_sum, 2*np.median(result_dist_bays09_emmixt_KL_sum)) # result_dist_bays09_both_normalised_mult_masked = 1-(1. - result_dist_bays09_emmixt_KL_sum/np.max(result_dist_bays09_emmixt_KL_sum))*(1. - result_dist_bays09_kappa_sum_masked/np.max(result_dist_bays09_kappa_sum_masked)) # Compute optimal rc_scale all_args = data_pbs.loaded_data['args_list'] specific_arg = all_args[0] specific_arg['autoset_parameters'] = True (_, _, _, sampler) = launchers.init_everything(specific_arg) optimal_rc_scale = sampler.random_network.rc_scale[0] if plots_all_T: # Show Kappa evolution wrt rc_scale f, ax = plt.subplots() # utils.plot_mean_std_from_samples(result_parameters_flat, np.nansum(result_em_kappa_allT, axis=-1), bins=60, bins_y=150, xlabel='rc_scale', ylabel='EM kappa', title='Kappa, summed T', ax_handle=ax, show_scatter=False) utils.plot_mean_std_from_samples_rolling(result_parameters_flat, np.nansum(result_em_kappa_allT, axis=-1), window=35, xlabel='rc_scale', ylabel='EM kappa', title='Kappa, summed T', ax_handle=ax, show_scatter=False) ax.axvline(x=optimal_rc_scale, color='g', linewidth=2) ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2) f.canvas.draw() if savefigs: dataio.save_current_figure('rcscaleeffect_kappa_summedT_{label}_{unique_id}.pdf') # Show Mixt proportions f, ax = plt.subplots() for i in xrange(3): # utils.plot_mean_std_from_samples(result_parameters_flat, np.nansum(result_em_emmixt_allT[..., i], axis=-1), bins=60, bins_y=100, xlabel='rc_scale', ylabel='EM mixt proportions', title='EM mixtures, summed T', ax_handle=ax, show_scatter=False) utils.plot_mean_std_from_samples_rolling(result_parameters_flat, np.nansum(result_em_emmixt_allT[..., i], axis=-1), window=35, xlabel='rc_scale', ylabel='EM mixt proportions', title='EM mixtures, summed T', ax_handle=ax, show_scatter=False) ax.axvline(x=optimal_rc_scale, color='g', linewidth=2) ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2) f.canvas.draw() if savefigs: dataio.save_current_figure('rcscaleeffect_mixtprop_summedT_{label}_{unique_id}.pdf') # Show Precision f, ax = plt.subplots() # utils.plot_mean_std_from_samples(result_parameters_flat, np.nansum(result_precisions_all_avg, axis=-1), bins=60, bins_y=150, xlabel='rc_scale', ylabel='Precision', title='Precision, summed T', ax_handle=ax, show_scatter=False) utils.plot_mean_std_from_samples_rolling(result_parameters_flat, np.nansum(result_precisions_all_avg, axis=-1), window=35, xlabel='rc_scale', ylabel='Precision', title='Precision, summed T', ax_handle=ax, show_scatter=False) ax.axvline(x=optimal_rc_scale, color='g', linewidth=2) ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2) f.canvas.draw() if savefigs: dataio.save_current_figure('rcscaleeffect_precision_summedT_{label}_{unique_id}.pdf') plt.close('all') if plots_per_T: for T_i, T in enumerate(T_space): # Show Kappa evolution wrt rc_scale f, ax = plt.subplots() # utils.plot_mean_std_from_samples(result_parameters_flat, result_em_kappa_allT[:, T_i], bins=40, bins_y=100, xlabel='rc_scale', ylabel='EM kappa', title='Kappa, T %d' % T, ax_handle=ax, show_scatter=False) utils.plot_mean_std_from_samples_rolling(result_parameters_flat, result_em_kappa_allT[:, T_i], window=35, xlabel='rc_scale', ylabel='EM kappa', title='Kappa, T %d' % T, ax_handle=ax, show_scatter=False) ax.axvline(x=optimal_rc_scale, color='g', linewidth=2) ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2) f.canvas.draw() if savefigs: dataio.save_current_figure('rcscaleeffect_kappa_T%d_{label}_{unique_id}.pdf' % T) # Show Mixt proportions f, ax = plt.subplots() for i in xrange(3): # utils.plot_mean_std_from_samples(result_parameters_flat, result_em_emmixt_allT[:, T_i, i], bins=40, bins_y=100, xlabel='rc_scale', ylabel='EM mixt proportions', title='EM mixtures, T %d' % T, ax_handle=ax, show_scatter=False) utils.plot_mean_std_from_samples_rolling(result_parameters_flat, result_em_emmixt_allT[:, T_i, i], window=35, xlabel='rc_scale', ylabel='EM mixt proportions', title='EM mixtures, T %d' % T, ax_handle=ax, show_scatter=False) ax.axvline(x=optimal_rc_scale, color='g', linewidth=2) ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2) f.canvas.draw() if savefigs: dataio.save_current_figure('rcscaleeffect_mixtprop_T%d_{label}_{unique_id}.pdf' % T) # Show Precision f, ax = plt.subplots() # utils.plot_mean_std_from_samples(result_parameters_flat, result_precisions_all_avg[:, T_i], bins=40, bins_y=100, xlabel='rc_scale', ylabel='Precision', title='Precision, T %d' % T, ax_handle=ax, show_scatter=False) utils.plot_mean_std_from_samples_rolling(result_parameters_flat, result_precisions_all_avg[:, T_i], window=35, xlabel='rc_scale', ylabel='Precision', title='Precision, T %d' % T, ax_handle=ax, show_scatter=False) ax.axvline(x=optimal_rc_scale, color='g', linewidth=2) ax.axvline(x=2*optimal_rc_scale, color='r', linewidth=2) f.canvas.draw() if savefigs: dataio.save_current_figure('rcscaleeffect_precision_T%d_{label}_{unique_id}.pdf' % T) plt.close('all') # # Interpolate # if plots_interpolate: # sigmax_target = 0.9 # M_interp_space = np.arange(6, 625, 5) # ratio_interp_space = np.linspace(0.01, 1.0, 50) # # sigmax_interp_space = np.linspace(0.01, 1.0, 50) # sigmax_interp_space = np.array([sigmax_target]) # params_crossspace = np.array(utils.cross(M_interp_space, ratio_interp_space, sigmax_interp_space)) # interpolated_data = rbf_interpolator(params_crossspace[:, 0], params_crossspace[:, 1], params_crossspace[:, 2]).reshape((M_interp_space.size, ratio_interp_space.size)) # utils.pcolor_2d_data(interpolated_data, M_interp_space, ratio_interp_space, 'M', 'ratio', 'interpolated, fixing sigmax= %.2f' % sigmax_target) # points_closeby = ((result_parameters_flat[:, 2] - sigmax_target)**2)< 0.01 # plt.figure() # # plt.imshow(interpolated_data, extent=(M_interp_space.min(), M_interp_space.max(), ratio_interp_space.min(), ratio_interp_space.max())) # plt.imshow(interpolated_data) # plt.scatter(result_parameters_flat[points_closeby, 0], result_parameters_flat[points_closeby, 1], s=100, c=result_fitexperiments_bic_avg[points_closeby], marker='o') # if plot_per_ratio: # # Plot the evolution of loglike as a function of sigmax, with std shown # for ratio_conj_i, ratio_conj in enumerate(ratio_space): # ax = utils.plot_mean_std_area(sigmax_space, result_log_posterior_mean[ratio_conj_i], result_log_posterior_std[ratio_conj_i]) # ax.get_figure().canvas.draw() # if savefigs: # dataio.save_current_figure('results_fitexp_%s_loglike_ratioconj%.2f_{label}_global_{unique_id}.pdf' % (exp_dataset, ratio_conj)) all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['parameter_names_sorted'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='rcscale_characterisation') plt.show() return locals()
def plots_fit_mixturemodels_random(data_pbs, generator_module=None): ''' Reload runs from PBS ''' #### SETUP # savefigs = True savedata = True savemovies = False plots_dist_bays09 = True do_scatters_3d = True do_best_points_extended_plots = True # do_relaunch_bestparams_pbs = True colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", data_pbs.dataset_infos['parameters'] # parameters: M, ratio_conj, sigmax # Extract data result_em_fits_flat = np.array(data_pbs.dict_arrays['result_em_fits']['results_flat']) result_dist_bays09_flat = np.array(data_pbs.dict_arrays['result_dist_bays09']['results_flat']) result_dist_gorgo11_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11']['results_flat']) result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits']['parameters_flat']) M_space = data_pbs.loaded_data['parameters_uniques']['M'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] ratio_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] num_repetitions = generator_module.num_repetitions parameter_names_sorted = data_pbs.dataset_infos['parameters'] T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] all_args = data_pbs.loaded_data['args_list'] all_repeats_completed = data_pbs.dict_arrays['result_em_fits']['repeats_completed'] dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) # Load bays09 data_bays09 = load_experimental_data.load_data_bays09(fit_mixture_model=True) bays09_nitems = data_bays09['data_to_fit']['n_items'] bays09_em_target = np.nan*np.empty((bays09_nitems.max(), 4)) #kappa, prob_target, prob_nontarget, prob_random bays09_em_target[bays09_nitems - 1] = data_bays09['em_fits_nitems_arrays']['mean'].T bays09_emmixt_target = bays09_em_target[:, 1:] # All parameters info plotting_parameters = launcher_memorycurve.load_prepare_datasets() ## Compute some stuff # result_dist_bays09_kappa_T1_avg = utils.nanmean(result_dist_bays09_flat[:, 0, 0], axis=-1) # result_dist_bays09_kappa_allT_avg = np.nansum(utils.nanmean(result_dist_bays09_flat[:, :, 0], axis=-1), axis=1) # Square distance to kappa result_dist_bays09_allT_avg = utils.nanmean((result_em_fits_flat[:, :, :4] - bays09_em_target[np.newaxis, :, :, np.newaxis])**2, axis=-1) result_dist_bays09_kappa_sum = np.nansum(result_dist_bays09_allT_avg[:, :, 0], axis=-1) result_dist_bays09_kappa_sum_masked = np.ma.masked_greater(result_dist_bays09_kappa_sum, 1e8) result_dist_bays09_kappa_T1_sum = result_dist_bays09_allT_avg[:, 0, 0] result_dist_bays09_kappa_T25_sum = np.nansum(result_dist_bays09_allT_avg[:, 1:, 0], axis=-1) # Square and KL distance for EM Mixtures result_dist_bays09_emmixt_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, :, 1:], axis=-1), axis=-1) result_dist_bays09_emmixt_T1_sum = np.nansum(result_dist_bays09_allT_avg[:, 0, 1:], axis=-1) result_dist_bays09_emmixt_T25_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, 1:, 1:], axis=-1), axis=-1) result_dist_bays09_emmixt_KL = utils.nanmean(utils.KL_div(result_em_fits_flat[:, :, 1:4], bays09_emmixt_target[np.newaxis, :, :, np.newaxis], axis=-2), axis=-1) # KL over dimension of mixtures, then mean over repetitions result_dist_bays09_emmixt_KL_sum = np.nansum(result_dist_bays09_emmixt_KL, axis=-1) # sum over T result_dist_bays09_emmixt_KL_T1_sum = result_dist_bays09_emmixt_KL[:, 0] result_dist_bays09_emmixt_KL_T25_sum = np.nansum(result_dist_bays09_emmixt_KL[:, 1:], axis=-1) result_dist_bays09_both_normalised = result_dist_bays09_emmixt_sum/np.max(result_dist_bays09_emmixt_sum) + result_dist_bays09_kappa_sum/np.max(result_dist_bays09_kappa_sum) result_dist_bays09_kappaKL_normalised_summed = result_dist_bays09_emmixt_KL_sum/np.max(result_dist_bays09_emmixt_KL_sum) + result_dist_bays09_kappa_sum/np.max(result_dist_bays09_kappa_sum) if plots_dist_bays09: nb_best_points = 30 size_normal_points = 8 size_best_points = 50 nb_best_points_extended_plots = 3 def plot_memorycurve(result_em_fits, args_used, suptitle=''): packed_data = dict(T_space=T_space, result_em_fits=result_em_fits, all_parameters=args_used) if suptitle: plotting_parameters['suptitle'] = suptitle if savefigs: packed_data['dataio'] = dataio plotting_parameters['reuse_axes'] = False launcher_memorycurve.do_memory_plots(packed_data, plotting_parameters) def plot_scatter(result_dist_to_use, best_points_result_dist_to_use, result_dist_to_use_name='', title=''): fig = plt.figure() ax = Axes3D(fig) utils.scatter3d(result_parameters_flat[:, 0], result_parameters_flat[:, 1], result_parameters_flat[:, 2], s=size_normal_points, c=np.log(result_dist_to_use), xlabel=parameter_names_sorted[0], ylabel=parameter_names_sorted[1], zlabel=parameter_names_sorted[2], title=title, ax_handle=ax) utils.scatter3d(result_parameters_flat[best_points_result_dist_to_use, 0], result_parameters_flat[best_points_result_dist_to_use, 1], result_parameters_flat[best_points_result_dist_to_use, 2], c='r', s=size_best_points, ax_handle=ax) print "Best points, %s:" % title print '\n'.join(['M %d, ratio %.2f, sigmax %.2f: %f' % (result_parameters_flat[i, 0], result_parameters_flat[i, 1], result_parameters_flat[i, 2], result_dist_to_use[i]) for i in best_points_result_dist_to_use]) if savefigs: dataio.save_current_figure('scatter3d_%s_{label}_{unique_id}.pdf' % result_dist_to_use_name) if savemovies: try: utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_{label}_{unique_id}.mp4' % result_dist_to_use_name), bitrate=8000, min_duration=8) utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_{label}_{unique_id}.gif' % result_dist_to_use_name), nb_frames=30, min_duration=8) except Exception: # Most likely wrong aggregator... print "failed when creating movies for ", result_dist_to_use_name ax.view_init(azim=90, elev=10) dataio.save_current_figure('scatter3d_%s_view2_{label}_{unique_id}.pdf' % result_dist_to_use_name) return ax def plots_redirects(all_vars, result_dist_to_use_name, log_color=True, title='', avoid_incomplete_repeats=True): result_dist_to_use = all_vars[result_dist_to_use_name] if avoid_incomplete_repeats: result_dist_to_use = np.ma.masked_where(~(all_repeats_completed == num_repetitions-1), result_dist_to_use) if not log_color: result_dist_to_use = np.exp(result_dist_to_use) best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points] # Scatter if do_scatters_3d: plot_scatter(result_dist_to_use, best_points_result_dist_to_use, result_dist_to_use_name, title=title) # Now do the additional plots if required if do_best_points_extended_plots: for best_point_index in best_points_result_dist_to_use[:nb_best_points_extended_plots]: print "extended plot for M %d, ratio %.2f, sigmax %.2f: score %f" % (result_parameters_flat[best_point_index, 0], result_parameters_flat[best_point_index, 1], result_parameters_flat[best_point_index, 2], result_dist_to_use[best_point_index]) plot_memorycurve(result_em_fits_flat[best_point_index], all_args[best_point_index], suptitle=result_dist_to_use_name) # Distance for kappa, all T plots_redirects(locals(), 'result_dist_bays09_kappa_sum', title='kappa all T') # Distance for em fits, all T, Squared distance plots_redirects(locals(), 'result_dist_bays09_emmixt_sum', title='em fits, all T') # Distance for em fits, all T, KL distance plots_redirects(locals(), 'result_dist_bays09_emmixt_KL_sum', title='em fits, all T, KL') # Distance for sum of normalised em fits + normalised kappa, all T plots_redirects(locals(), 'result_dist_bays09_both_normalised', title='summed normalised em mixt + kappa') # Distance kappa T = 1 plots_redirects(locals(), 'result_dist_bays09_kappa_T1_sum', title='Kappa T=1') # Distance kappa T = 2...5 plots_redirects(locals(), 'result_dist_bays09_kappa_T25_sum', title='Kappa T=2/5') # Distance em fits T = 1 plots_redirects(locals(), 'result_dist_bays09_emmixt_T1_sum', title='em fits T=1') # Distance em fits T = 2...5 plots_redirects(locals(), 'result_dist_bays09_emmixt_T25_sum', title='em fits T=2/5') # Distance em fits T = 1, KL plots_redirects(locals(), 'result_dist_bays09_emmixt_KL_T1_sum', title='em fits T=1, KL') # Distance em fits T = 2...5, KL plots_redirects(locals(), 'result_dist_bays09_emmixt_KL_T25_sum', title='em fits T=2/5, KL') # if plots_per_T: # for T in T_space: # currT_indices = result_parameters_flat[:, 2] == T # utils.contourf_interpolate_data_interactive_maxvalue(result_parameters_flat[currT_indices][..., :2], result_fitexperiments_bic_avg[currT_indices], xlabel='Ratio_conj', ylabel='sigma x', title='BIC, T %d' % T, interpolation_numpoints=200, interpolation_method='nearest', log_scale=False) # # Interpolate # if plots_interpolate: # sigmax_target = 0.9 # M_interp_space = np.arange(6, 625, 5) # ratio_interp_space = np.linspace(0.01, 1.0, 50) # # sigmax_interp_space = np.linspace(0.01, 1.0, 50) # sigmax_interp_space = np.array([sigmax_target]) # params_crossspace = np.array(utils.cross(M_interp_space, ratio_interp_space, sigmax_interp_space)) # interpolated_data = rbf_interpolator(params_crossspace[:, 0], params_crossspace[:, 1], params_crossspace[:, 2]).reshape((M_interp_space.size, ratio_interp_space.size)) # utils.pcolor_2d_data(interpolated_data, M_interp_space, ratio_interp_space, 'M', 'ratio', 'interpolated, fixing sigmax= %.2f' % sigmax_target) # points_closeby = ((result_parameters_flat[:, 2] - sigmax_target)**2)< 0.01 # plt.figure() # # plt.imshow(interpolated_data, extent=(M_interp_space.min(), M_interp_space.max(), ratio_interp_space.min(), ratio_interp_space.max())) # plt.imshow(interpolated_data) # plt.scatter(result_parameters_flat[points_closeby, 0], result_parameters_flat[points_closeby, 1], s=100, c=result_fitexperiments_bic_avg[points_closeby], marker='o') # if plot_per_ratio: # # Plot the evolution of loglike as a function of sigmax, with std shown # for ratio_conj_i, ratio_conj in enumerate(ratio_space): # ax = utils.plot_mean_std_area(sigmax_space, result_log_posterior_mean[ratio_conj_i], result_log_posterior_std[ratio_conj_i]) # ax.get_figure().canvas.draw() # if savefigs: # dataio.save_current_figure('results_fitexp_%s_loglike_ratioconj%.2f_{label}_global_{unique_id}.pdf' % (exp_dataset, ratio_conj)) variables_to_save = ['parameter_names_sorted', 'all_repeats_completed', 'T_space'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='fit_mixturemodels') plt.show() return locals()
def main(): result_path = 'results/' subtype_method = "Algorithm" K = 3 # number of subtypes(clusters) ############################## LOAD DATA ###################################### print('patients loading...') dataio = DataIO(K) dataio.load_demographics('../ufm/patient.csv') dataio.load_feature('Motor', 'MDS UPDRS PartI') dataio.load_feature('Motor', 'MDS UPDRS PartII') dataio.load_feature('Motor', 'MDS UPDRS PartIII') dataio.load_feature('Motor', 'MDS UPDRS PartIV') dataio.load_feature('Non-Motor', 'BJLO') dataio.load_feature('Non-Motor', 'ESS') dataio.load_feature('Non-Motor', 'GDS') dataio.load_feature('Non-Motor', 'HVLT') dataio.load_feature('Non-Motor', 'LNS') dataio.load_feature('Non-Motor', 'MoCA') dataio.load_feature('Non-Motor', 'QUIP') dataio.load_feature('Non-Motor', 'RBD') dataio.load_feature('Non-Motor', 'SCOPA-AUT') dataio.load_feature('Non-Motor', 'SF') dataio.load_feature('Non-Motor', 'STAI') dataio.load_feature('Non-Motor', 'SDM') dataio.load_feature('Non-Motor', 'MCI') dataio.load_feature('Biospecimen', 'DNA') dataio.load_feature('Biospecimen', 'CSF', 'Total tau') dataio.load_feature('Biospecimen', 'CSF', 'Abeta 42') dataio.load_feature('Biospecimen', 'CSF', 'p-Tau181P') dataio.load_feature('Biospecimen', 'CSF', 'CSF Alpha-synuclein') dataio.load_feature('Image', 'DaTScan SBR') dataio.load_feature('Image', 'MRI') dataio.load_feature('Medication', 'MED USE') suffix = 'normalized_clusters_Deep' dataio.load_clustering_result('input/clustering_by_lstm.csv') ############################# STATISTICS ###################################### print('-----------------------') print('statistics analyzing...') var = Variable(K) ftype = 'demographics' p = var.get_variables(dataio, ftype) var.p_value.extend(p) ftype = 'motor' _ = var.get_variables(dataio, ftype, 'MDS UPDRS PartI') _ = var.get_variables(dataio, ftype, 'MDS UPDRS PartII') _ = var.get_variables(dataio, ftype, 'MDS UPDRS PartIII', 'MDS-UPDRS') _ = var.get_variables(dataio, ftype, 'MDS UPDRS PartIII', 'H&Y') p = var.get_variables(dataio, ftype, 'MDS UPDRS PartIV') var.p_value.extend(p) ftype = 'nonmotor' _ = var.get_variables(dataio, ftype, 'BJLO') _ = var.get_variables(dataio, ftype, 'ESS') _ = var.get_variables(dataio, ftype, 'GDS') _ = var.get_variables(dataio, ftype, 'HVLT', 'Immediate Recall') _ = var.get_variables(dataio, ftype, 'HVLT', 'Discrimination Recognition') _ = var.get_variables(dataio, ftype, 'HVLT', 'Retention') _ = var.get_variables(dataio, ftype, 'LNS') print(var.pat_edu) _ = var.get_variables(dataio, ftype, 'MoCA', pat_edu=var.pat_edu) _ = var.get_variables(dataio, ftype, 'QUIP') _ = var.get_variables(dataio, ftype, 'RBD') _ = var.get_variables(dataio, ftype, 'SCOPA-AUT') _ = var.get_variables(dataio, ftype, 'SF') _ = var.get_variables(dataio, ftype, 'STAI') _ = var.get_variables(dataio, ftype, 'SDM') p = var.get_variables(dataio, ftype, 'MCI') var.p_value.extend(p) ftype = 'biospecimen' var.get_variables(dataio, ftype, 'DNA') _ = var.get_variables(dataio, ftype, 'CSF', 'Total tau') _ = var.get_variables(dataio, ftype, 'CSF', 'Abeta 42') _ = var.get_variables(dataio, ftype, 'CSF', 'p-Tau181P') p = var.get_variables(dataio, ftype, 'CSF', 'CSF Alpha-synuclein') var.p_value.extend(p) ftype = 'image' _ = var.get_variables(dataio, ftype, 'DaTScan SBR', 'CAUDATE RIGHT') _ = var.get_variables(dataio, ftype, 'DaTScan SBR', 'CAUDATE LEFT') _ = var.get_variables(dataio, ftype, 'DaTScan SBR', 'PUTAMEN RIGHT') _ = var.get_variables(dataio, ftype, 'DaTScan SBR', 'PUTAMEN LEFT') p = var.get_variables(dataio, ftype, 'MRI') var.p_value.extend(p) ftype = 'medication' p = var.get_variables(dataio, ftype, 'MED USE') var.p_value.extend(p) ################################# DISPLAY ###################################### print('-----------------------') print('value displaying...') ds = Display(var) print('heatmap of the final mean value') figurename = 'results/heatmap_clustering_by_' + subtype_method.lower( ) + '_' + suffix + '.pdf' ds.heatmap(figurename, is_progress=False, is_rotate=False) print('heatmap of the first order difference mean value') figurename = 'results/heatmap_clustering_by_' + subtype_method.lower( ) + '_progression_' + suffix + '.pdf' ds.heatmap(figurename, is_progress=True, is_rotate=False) ############################## SAVE RESULTS #################################### print('-----------------------') filename = result_path + 'statistics_clustering_by_' + subtype_method.lower( ) + '_' + suffix + '.csv' dataio.save_result(var, filename) print('done!')
def plots_fit_mixturemodels_random(data_pbs, generator_module=None): """ Reload runs from PBS """ #### SETUP # savefigs = True savedata = True colormap = None # or 'cubehelix' plt.rcParams["font.size"] = 16 # #### /SETUP print "Order parameters: ", data_pbs.dataset_infos["parameters"] # parameters: M, ratio_conj, sigmax # Extract data T_space = data_pbs.loaded_data["datasets_list"][0]["T_space"] result_responses_flat = np.array(data_pbs.dict_arrays["result_responses"]["results_flat"]) result_targets_flat = np.array(data_pbs.dict_arrays["result_target"]["results_flat"]) result_nontargets_flat = np.array(data_pbs.dict_arrays["result_nontargets"]["results_flat"]) result_parameters_flat = np.array(data_pbs.dict_arrays["result_responses"]["parameters_flat"]) all_repeats_completed = data_pbs.dict_arrays["result_responses"]["repeats_completed"] all_args_arr = np.array(data_pbs.loaded_data["args_list"]) M_space = data_pbs.loaded_data["parameters_uniques"]["M"] ratio_conj_space = data_pbs.loaded_data["parameters_uniques"]["ratio_conj"] sigmax_space = data_pbs.loaded_data["parameters_uniques"]["sigmax"] alpha_space = data_pbs.loaded_data["parameters_uniques"]["alpha"] trecall_space = data_pbs.loaded_data["parameters_uniques"]["fixed_cued_feature_time"] num_repetitions = generator_module.num_repetitions parameter_names_sorted = data_pbs.dataset_infos["parameters"] dataio = DataIO( output_folder=generator_module.pbs_submission_infos["simul_out_dir"] + "/outputs/", label="global_" + dataset_infos["save_output_filename"], ) ##### Because of lazyness, the responses are weird. # Each run is for a given trecall. But we run N items= 1 .. Nmax anyway # so if trecall > N, you have np.nan # => Need to reconstruct the thing properly, to have lower triangle of Nitem x Trecall filled # Also, trecall is the actual Time. Hence we need to change its meaning to be Tmax- (trecall + 1) or whatever. # Load ground truth data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(fit_mixture_model=True) ## Filter everything with repeats_completed == num_repet and trecall=last filter_data = (result_parameters_flat[:, 0] == (T_space.max() - 1)) & (all_repeats_completed == num_repetitions - 1) result_parameters_flat = result_parameters_flat[filter_data] result_responses_flat = result_responses_flat[filter_data] result_targets_flat = result_targets_flat[filter_data] result_nontargets_flat = result_nontargets_flat[filter_data] all_args_arr = all_args_arr[filter_data] all_repeats_completed = all_repeats_completed[filter_data] print "Size post-filter: ", result_parameters_flat.shape[0] def str_best_params(best_i, result_dist_to_use): return ( " ".join( [ "%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted)) ] ) + " >> %f" % result_dist_to_use[best_i] ) # all_args = data_pbs.loaded_data['args_list'] variables_to_save = ["parameter_names_sorted", "all_args_arr", "all_repeats_completed", "filter_data"] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder="gorgo11_sequential_fitmixturemodel") plt.show() return locals()
def plots_fit_collapsedmixturemodels_random(data_pbs, generator_module=None): ''' Reload runs from PBS Sequential data analysis. ''' #### SETUP # plots_bestfits = True plots_scatter3d = False savefigs = True savedata = True colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", data_pbs.dataset_infos['parameters'] # parameters: M, ratio_conj, sigmax # Extract data T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] result_em_fits_collapsed_tr_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_tr']['results_flat']) result_em_fits_collapsed_summary_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_summary']['results_flat']) result_dist_gorgo11_sequ_collapsed_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11_sequ_collapsed']['results_flat']) result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11_sequ_collapsed_emmixt_KL']['results_flat']) result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits_collapsed_tr']['parameters_flat']) all_repeats_completed = data_pbs.dict_arrays['result_em_fits_collapsed_tr']['repeats_completed'] all_args_arr = np.array(data_pbs.loaded_data['args_list']) M_space = data_pbs.loaded_data['parameters_uniques']['M'] ratio_conj_space = data_pbs.loaded_data['parameters_uniques']['ratio_conj'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] alpha_space = data_pbs.loaded_data['parameters_uniques']['alpha'] num_repetitions = generator_module.num_repetitions parameter_names_sorted = data_pbs.dataset_infos['parameters'] dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) # Load ground truth data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(fit_mixture_model=True) ## Filter everything with repeats_completed == num_repet filter_data = all_repeats_completed == num_repetitions - 1 result_parameters_flat = result_parameters_flat[filter_data] result_em_fits_collapsed_tr_flat = result_em_fits_collapsed_tr_flat[filter_data] result_em_fits_collapsed_summary_flat = result_em_fits_collapsed_summary_flat[filter_data] result_dist_gorgo11_sequ_collapsed_flat = result_dist_gorgo11_sequ_collapsed_flat[filter_data] result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat = result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat[filter_data] all_args_arr = all_args_arr[filter_data] all_repeats_completed = all_repeats_completed[filter_data] print "Size post-filter: ", result_parameters_flat.shape[0] # Compute lots of averages over the repetitions result_em_fits_collapsed_tr_flat_avg = utils.nanmean(result_em_fits_collapsed_tr_flat, axis=-1) result_em_fits_collapsed_summary_flat_avg = utils.nanmean(result_em_fits_collapsed_summary_flat, axis=-1) result_dist_gorgo11_sequ_collapsed_flat_avg = utils.nanmean(result_dist_gorgo11_sequ_collapsed_flat, axis=-1) result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat_avg = utils.nanmean(result_dist_gorgo11_sequ_collapsed_emmixt_KL_flat, axis=-1) result_dist_gorgo11_sequ_collapsed_flat_avg_overall = np.nansum(np.nansum(np.nansum(result_dist_gorgo11_sequ_collapsed_flat_avg, axis=-1), axis=-1), axis=-1) # We will now grid some of the parameters, to have a 2D/3D surface back. # Let's fix the ratio_conj, as we know that the other models need around # ratio =0.8 to fit data well. def str_best_params(best_i, result_dist_to_use): return ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted))]) + ' >> %f' % result_dist_to_use[best_i] ###### Best fitting points if plots_bestfits: nb_best_points = 5 def plot_collapsed_modelfits(T_space, curr_result_emfits_collapsed_tr, labelplot='', dataio=None): f, ax = plt.subplots() for nitems_i, nitems in enumerate(T_space): ax = plots_experimental_data.plot_kappa_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 0][nitems_i, :nitems], 0.0*curr_result_emfits_collapsed_tr[..., 0][nitems_i, :nitems], title='model fit fig7 %s' % labelplot , ax=ax, label='%d items' % nitems, xlabel='T_recall') if dataio is not None: dataio.save_current_figure('bestfit_doublepowerlaw_%s_kappa_{label}_{unique_id}.pdf' % labelplot) _, ax_target = plt.subplots() _, ax_nontarget = plt.subplots() _, ax_random = plt.subplots() for nitems_i, nitems in enumerate(T_space): ax_target = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 1][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 1][nitems_i, :nitems]*0.0, title='Target model fit %s' % labelplot, ax=ax_target, label='%d items' % nitems, xlabel='T_recall') ax_nontarget = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 2][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 2][nitems_i, :nitems]*0.0, title='Nontarget model fit %s' % labelplot, ax=ax_nontarget, label='%d items' % nitems, xlabel='T_recall') ax_random = plots_experimental_data.plot_emmixture_mean_error(T_space[:nitems], curr_result_emfits_collapsed_tr[..., 3][nitems_i, :nitems], curr_result_emfits_collapsed_tr[..., 3][nitems_i, :nitems]*0.0, title='Random model fit %s' % labelplot, ax=ax_random, label='%d items' % nitems, xlabel='T_recall') if dataio is not None: plt.figure(ax_target.get_figure().number) dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixttarget_{label}_{unique_id}.pdf' % labelplot) plt.figure(ax_nontarget.get_figure().number) dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixtnontarget_{label}_{unique_id}.pdf' % labelplot) plt.figure(ax_random.get_figure().number) dataio.save_current_figure('bestfit_doublepowerlaw_%s_mixtrandom_{label}_{unique_id}.pdf' % labelplot) best_points_result_dist_gorgo11seq_all = np.argsort(result_dist_gorgo11_sequ_collapsed_flat_avg_overall)[:nb_best_points] for best_point_i in best_points_result_dist_gorgo11seq_all: plot_collapsed_modelfits(T_space, result_em_fits_collapsed_tr_flat_avg[best_point_i], labelplot='%.1f' % result_dist_gorgo11_sequ_collapsed_flat_avg_overall[best_point_i], dataio=dataio) ###### 3D scatter plots if plots_scatter3d: nb_best_points = 30 size_normal_points = 8 size_best_points = 50 def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file=''): result_dist_to_use = all_vars[result_dist_to_use_name] result_parameters_flat_3d = all_vars['result_parameters_flat_3d'] # Filter if downsampling filter_downsampling = np.arange(0, result_dist_to_use.size, downsampling) result_dist_to_use = result_dist_to_use[filter_downsampling] result_parameters_flat_3d = result_parameters_flat_3d[filter_downsampling] best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points] # Construct all permutations of 3 parameters, for 3D scatters params_permutations = set([tuple(np.sort(np.random.choice(result_parameters_flat_3d.shape[-1], 3, replace=False)).tolist()) for i in xrange(1000)]) for param_permut in params_permutations: fig = plt.figure() ax = Axes3D(fig) # One plot per parameter permutation if log_color: color_points = np.log(result_dist_to_use) else: color_points = result_dist_to_use utils.scatter3d(result_parameters_flat_3d[:, param_permut[0]], result_parameters_flat_3d[:, param_permut[1]], result_parameters_flat_3d[:, param_permut[2]], s=size_normal_points, c=color_points, xlabel=parameter_names_sorted[param_permut[0]], ylabel=parameter_names_sorted[param_permut[1]], zlabel=parameter_names_sorted[param_permut[2]], title=title, ax_handle=ax) utils.scatter3d(result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[0]], result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[1]], result_parameters_flat_3d[best_points_result_dist_to_use, param_permut[2]], c='r', s=size_best_points, ax_handle=ax) if savefigs: dataio.save_current_figure('scatter3d_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)) if savemovies: try: utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), bitrate=8000, min_duration=8) utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), nb_frames=30, min_duration=8) except Exception: # Most likely wrong aggregator... print "failed when creating movies for ", result_dist_to_use_name if False and savefigs: ax.view_init(azim=90, elev=10) dataio.save_current_figure('scatter3d_view2_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)) # plt.close('all') print "Parameters: %s" % ', '.join(parameter_names_sorted) print "Best points, %s:" % title print '\n'.join([str_best_params(best_i, result_dist_to_use) for best_i in best_points_result_dist_to_use]) # all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['parameter_names_sorted', 'all_args_arr', 'all_repeats_completed', 'filter_data'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='gorgo11_sequential_fitmixturemodel') plt.show() return locals()
import numpy from dataio import DataIO if __name__ == '__main__': filename = 'test' fieldnames = ['time', 'low', 'high'] csvio = DataIO('test', fieldnames) csvio.csv_newfile(filename) test_row1 = {'time': 1.0, 'low': 100, 'high': 101} test_row2 = {'time': 2.0, 'low': 101, 'high': 102} test_rows = [{'time': 3.0, 'low': 102, 'high': 103}, {'time': 4.0, 'low': 103, 'high': 104}] exists = csvio.csv_check(filename) print(exists) print(len(numpy.shape(test_row1))) print(len(numpy.shape(test_rows))) csvio.csv_append(filename, test_row1) csvio.csv_append(filename, test_row2) csvio.csv_append(filename, test_rows) data = csvio.csv_get(filename) print(data) csvio.csv_rename(filename, 'test2')
def input(self, N, file_name): x = self.random_state.uniform(self.X_LBU, self.X_UBU, N) title = 'input' DataIO.write_data([x], file_name, title)
def function(self, file_name): x = np.linspace(self.X_LBU, self.X_UBU, self.NLARGE) f = np.sin(2 * np.pi * x) title = 'input\tsinus function' DataIO.write_data([x, f], file_name, title)
def plots_fit_mixturemodels_random(data_pbs, generator_module=None): ''' Reload runs from PBS ''' #### SETUP # savefigs = True savedata = True savemovies = True plots_dist_bays09 = True plots_per_T = True plots_interpolate = False # do_relaunch_bestparams_pbs = True colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", data_pbs.dataset_infos['parameters'] # parameters: M, ratio_conj, sigmax # Extract data T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] result_em_fits_flat = np.array(data_pbs.dict_arrays['result_em_fits']['results_flat']) result_dist_bays09_flat = np.array(data_pbs.dict_arrays['result_dist_bays09']['results_flat']) result_dist_gorgo11_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11']['results_flat']) result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits']['parameters_flat']) sigmaoutput_space = data_pbs.loaded_data['parameters_uniques']['sigma_output'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] ratio_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] num_repetitions = generator_module.num_repetitions parameter_names_sorted = data_pbs.dataset_infos['parameters'] dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) # Load bays09 data_bays09 = load_experimental_data.load_data_bays09(fit_mixture_model=True) bays09_nitems = data_bays09['data_to_fit']['n_items'] bays09_em_target = np.nan*np.empty((bays09_nitems.max(), 4)) #kappa, prob_target, prob_nontarget, prob_random bays09_em_target[bays09_nitems - 1] = data_bays09['em_fits_nitems_arrays']['mean'].T bays09_emmixt_target = bays09_em_target[:, 1:] ## Compute some stuff # result_dist_bays09_kappa_T1_avg = utils.nanmean(result_dist_bays09_flat[:, 0, 0], axis=-1) # result_dist_bays09_kappa_allT_avg = np.nansum(utils.nanmean(result_dist_bays09_flat[:, :, 0], axis=-1), axis=1) # Square distance to kappa result_dist_bays09_allT_avg = utils.nanmean((result_em_fits_flat[:, :, :4] - bays09_em_target[np.newaxis, :, :, np.newaxis])**2, axis=-1) result_dist_bays09_kappa_sum = np.nansum(result_dist_bays09_allT_avg[:, :, 0], axis=-1) result_dist_bays09_kappa_T1_sum = result_dist_bays09_allT_avg[:, 0, 0] result_dist_bays09_kappa_T25_sum = np.nansum(result_dist_bays09_allT_avg[:, 1:, 0], axis=-1) # Square and KL distance for EM Mixtures result_dist_bays09_emmixt_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, :, 1:], axis=-1), axis=-1) result_dist_bays09_emmixt_T1_sum = np.nansum(result_dist_bays09_allT_avg[:, 0, 1:], axis=-1) result_dist_bays09_emmixt_T25_sum = np.nansum(np.nansum(result_dist_bays09_allT_avg[:, 1:, 1:], axis=-1), axis=-1) result_dist_bays09_emmixt_KL = utils.nanmean(utils.KL_div(result_em_fits_flat[:, :, 1:4], bays09_emmixt_target[np.newaxis, :, :, np.newaxis], axis=-2), axis=-1) # KL over dimension of mixtures, then mean over repetitions result_dist_bays09_emmixt_KL_sum = np.nansum(result_dist_bays09_emmixt_KL, axis=-1) # sum over T result_dist_bays09_emmixt_KL_T1_sum = result_dist_bays09_emmixt_KL[:, 0] result_dist_bays09_emmixt_KL_T25_sum = np.nansum(result_dist_bays09_emmixt_KL[:, 1:], axis=-1) result_dist_bays09_both_normalised = result_dist_bays09_emmixt_sum/np.max(result_dist_bays09_emmixt_sum) + result_dist_bays09_kappa_sum/np.max(result_dist_bays09_kappa_sum) if plots_dist_bays09: nb_best_points = 30 size_normal_points = 8 size_best_points = 50 def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file=''): fig = plt.figure() ax = Axes3D(fig) result_dist_to_use = all_vars[result_dist_to_use_name] if not log_color: result_dist_to_use = np.exp(result_dist_to_use) utils.scatter3d(result_parameters_flat[:, 0], result_parameters_flat[:, 1], result_parameters_flat[:, 2], s=size_normal_points, c=np.log(result_dist_to_use), xlabel=parameter_names_sorted[0], ylabel=parameter_names_sorted[1], zlabel=parameter_names_sorted[2], title=title, ax_handle=ax) best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points] utils.scatter3d(result_parameters_flat[best_points_result_dist_to_use, 0], result_parameters_flat[best_points_result_dist_to_use, 1], result_parameters_flat[best_points_result_dist_to_use, 2], c='r', s=size_best_points, ax_handle=ax) print "Best points, %s:" % title print '\n'.join(['sigma output %.2f, ratio %.2f, sigmax %.2f: %f' % (result_parameters_flat[i, 0], result_parameters_flat[i, 1], result_parameters_flat[i, 2], result_dist_to_use[i]) for i in best_points_result_dist_to_use]) if savefigs: dataio.save_current_figure('scatter3d_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, label_file)) if savemovies: try: utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, label_file)), bitrate=8000, min_duration=8) utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, label_file)), nb_frames=30, min_duration=8) except Exception: # Most likely wrong aggregator... print "failed when creating movies for ", result_dist_to_use_name ax.view_init(azim=90, elev=10) dataio.save_current_figure('scatter3d_view2_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, label_file)) return ax # Distance for kappa, all T plot_scatter(locals(), 'result_dist_bays09_kappa_sum', 'kappa all T') # Distance for em fits, all T, Squared distance plot_scatter(locals(), 'result_dist_bays09_emmixt_sum', 'em fits, all T') # Distance for em fits, all T, KL distance plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_sum', 'em fits, all T, KL') # Distance for sum of normalised em fits + normalised kappa, all T plot_scatter(locals(), 'result_dist_bays09_both_normalised', 'summed normalised em mixt + kappa') # Distance kappa T = 1 plot_scatter(locals(), 'result_dist_bays09_kappa_T1_sum', 'Kappa T=1') # Distance kappa T = 2...5 plot_scatter(locals(), 'result_dist_bays09_kappa_T25_sum', 'Kappa T=2/5') # Distance em fits T = 1 plot_scatter(locals(), 'result_dist_bays09_emmixt_T1_sum', 'em fits T=1') # Distance em fits T = 2...5 plot_scatter(locals(), 'result_dist_bays09_emmixt_T25_sum', 'em fits T=2/5') # Distance em fits T = 1, KL plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_T1_sum', 'em fits T=1, KL') # Distance em fits T = 2...5, KL plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_T25_sum', 'em fits T=2/5, KL') if plots_per_T: for T_i, T in enumerate(T_space): # Kappa per T, fit to Bays09 result_dist_bays09_kappa_currT = result_dist_bays09_allT_avg[:, T_i, 0] result_dist_bays09_kappa_currT_masked = mask_outliers(result_dist_bays09_kappa_currT) plot_scatter(locals(), 'result_dist_bays09_kappa_currT_masked', 'kappa T %d masked' % T, label_file="T{}".format(T)) # EM Mixt per T, fit to Bays09 result_dist_bays09_emmixt_sum_currT = np.nansum(result_dist_bays09_allT_avg[:, T_i, 1:], axis=-1) result_dist_bays09_emmixt_sum_currT_masked = mask_outliers(result_dist_bays09_emmixt_sum_currT) plot_scatter(locals(), 'result_dist_bays09_emmixt_sum_currT_masked', 'EM mixt T %d masked' % T, label_file="T{}".format(T)) # EM Mixt per T, fit to Bays09 KL divergence result_dist_bays09_emmixt_KL_sum_currT = result_dist_bays09_emmixt_KL[:, T_i] plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_sum_currT', 'KL EM mixt T %d masked' % T, label_file="T{}".format(T)) # # Interpolate # if plots_interpolate: # sigmax_target = 0.9 # M_interp_space = np.arange(6, 625, 5) # ratio_interp_space = np.linspace(0.01, 1.0, 50) # # sigmax_interp_space = np.linspace(0.01, 1.0, 50) # sigmax_interp_space = np.array([sigmax_target]) # params_crossspace = np.array(utils.cross(M_interp_space, ratio_interp_space, sigmax_interp_space)) # interpolated_data = rbf_interpolator(params_crossspace[:, 0], params_crossspace[:, 1], params_crossspace[:, 2]).reshape((M_interp_space.size, ratio_interp_space.size)) # utils.pcolor_2d_data(interpolated_data, M_interp_space, ratio_interp_space, 'M', 'ratio', 'interpolated, fixing sigmax= %.2f' % sigmax_target) # points_closeby = ((result_parameters_flat[:, 2] - sigmax_target)**2)< 0.01 # plt.figure() # # plt.imshow(interpolated_data, extent=(M_interp_space.min(), M_interp_space.max(), ratio_interp_space.min(), ratio_interp_space.max())) # plt.imshow(interpolated_data) # plt.scatter(result_parameters_flat[points_closeby, 0], result_parameters_flat[points_closeby, 1], s=100, c=result_fitexperiments_bic_avg[points_closeby], marker='o') # if plot_per_ratio: # # Plot the evolution of loglike as a function of sigmax, with std shown # for ratio_conj_i, ratio_conj in enumerate(ratio_space): # ax = utils.plot_mean_std_area(sigmax_space, result_log_posterior_mean[ratio_conj_i], result_log_posterior_std[ratio_conj_i]) # ax.get_figure().canvas.draw() # if savefigs: # dataio.save_current_figure('results_fitexp_%s_loglike_ratioconj%.2f_{label}_global_{unique_id}.pdf' % (exp_dataset, ratio_conj)) all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['parameter_names_sorted'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='output_noise') plt.show() return locals()
def plots_fitting_experiments_random(data_pbs, generator_module=None): ''' Reload 2D volume runs from PBS and plot them ''' #### SETUP # savefigs = True savedata = True savemovies = False do_bays09 = True do_gorgo11 = True scatter3d_sumT = False plots_flat_sorted_performance = False plots_memorycurves_fits_best = True nb_best_points = 20 nb_best_points_per_T = nb_best_points/6 size_normal_points = 8 size_best_points = 50 downsampling = 2 # do_relaunch_bestparams_pbs = True colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() # parameters: ratio_conj, sigmax, T # Extract data result_fitexperiments_flat = np.array(data_pbs.dict_arrays['result_fitexperiments']['results_flat']) result_fitexperiments_all_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_all']['results_flat']) result_fitexperiments_noiseconv_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_noiseconv']['results_flat']) result_fitexperiments_noiseconv_all_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_noiseconv_all']['results_flat']) result_parameters_flat = np.array(data_pbs.dict_arrays['result_fitexperiments']['parameters_flat']) all_repeats_completed = data_pbs.dict_arrays['result_fitexperiments']['repeats_completed'] all_args = data_pbs.loaded_data['args_list'] all_args_arr = np.array(all_args) num_repetitions = generator_module.num_repetitions # Extract order of datasets experiment_ids = data_pbs.loaded_data['datasets_list'][0]['fitexperiment_parameters']['experiment_ids'] parameter_names_sorted = data_pbs.dataset_infos['parameters'] T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) # filter_data = (result_parameters_flat[:, -1] < 1.0) & (all_repeats_completed == num_repetitions - 1) # filter_data = (all_repeats_completed == num_repetitions - 1) # result_fitexperiments_flat = result_fitexperiments_flat[filter_data] # result_fitexperiments_all_flat = result_fitexperiments_all_flat[filter_data] # result_fitexperiments_noiseconv_flat = result_fitexperiments_noiseconv_flat[filter_data] # result_fitexperiments_noiseconv_all_flat = result_fitexperiments_noiseconv_all_flat[filter_data] # result_parameters_flat = result_parameters_flat[filter_data] # Compute some stuff # Data is summed over all experiments for _flat, contains bic, ll and ll90. # for _all_flat, contains bic, ll and ll90 per experiment. Given that Gorgo11 and Bays09 are incompatible, shouldn't really use the combined version directly! result_fitexperiments_noiseconv_bic_avg_allT = utils.nanmean(result_fitexperiments_noiseconv_flat, axis=-1)[..., 0] result_fitexperiments_noiseconv_allexp_bic_avg_allT = utils.nanmean(result_fitexperiments_noiseconv_all_flat, axis=-1)[:, :, 0] result_fitexperiments_noiseconv_allexp_ll90_avg_allT = -utils.nanmean(result_fitexperiments_noiseconv_all_flat, axis=-1)[:, :, -1] ### BIC # result_fitexperiments_noiseconv_allexp_bic_avg_allT: N x T x exp result_fitexperiments_noiseconv_bays09_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 0] result_fitexperiments_noiseconv_gorgo11_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 1] result_fitexperiments_noiseconv_dualrecall_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 2] # Summed T result_fitexperiments_noiseconv_bays09_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_bays09_bic_avg_allT, axis=-1) result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_gorgo11_bic_avg_allT, axis=-1) result_fitexperiments_noiseconv_dualrecall_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_dualrecall_bic_avg_allT, axis=-1) ### LL90 # N x T x exp result_fitexperiments_noiseconv_bays09_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 0] result_fitexperiments_noiseconv_gorgo11_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 1] result_fitexperiments_noiseconv_dualrecall_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 2] # Summed T result_fitexperiments_noiseconv_bays09_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_bays09_ll90_avg_allT, axis=-1) result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_gorgo11_ll90_avg_allT, axis=-1) result_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_dualrecall_ll90_avg_allT, axis=-1) def mask_outliers_array(result_dist_to_use, sigma_outlier=3): ''' Mask outlier datapoints. Compute the mean of the results and assume that points with: result > mean + sigma_outlier*std are outliers. As we want the minimum values, do not mask small values ''' return np.ma.masked_greater(result_dist_to_use, np.mean(result_dist_to_use) + sigma_outlier*np.std(result_dist_to_use)) def best_points_allT(result_dist_to_use): ''' Best points for all T ''' return np.argsort(result_dist_to_use)[:nb_best_points] def str_best_params(best_i, result_dist_to_use): return ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted))]) + ' >> %f' % result_dist_to_use[best_i] def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file='', mask_outliers=True): result_dist_to_use = all_vars[result_dist_to_use_name] result_parameters_flat = all_vars['result_parameters_flat'] # Filter if downsampling filter_downsampling = np.arange(0, result_dist_to_use.size, downsampling) result_dist_to_use = result_dist_to_use[filter_downsampling] result_parameters_flat = result_parameters_flat[filter_downsampling] if mask_outliers: result_dist_to_use = mask_outliers_array(result_dist_to_use) best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points] # Construct all permutations of 3 parameters, for 3D scatters params_permutations = set([tuple(np.sort(np.random.choice(result_parameters_flat.shape[-1], 3, replace=False)).tolist()) for i in xrange(1000)]) for param_permut in params_permutations: fig = plt.figure() ax = Axes3D(fig) # One plot per parameter permutation if log_color: color_points = np.log(result_dist_to_use) else: color_points = result_dist_to_use utils.scatter3d(result_parameters_flat[:, param_permut[0]], result_parameters_flat[:, param_permut[1]], result_parameters_flat[:, param_permut[2]], s=size_normal_points, c=color_points, xlabel=parameter_names_sorted[param_permut[0]], ylabel=parameter_names_sorted[param_permut[1]], zlabel=parameter_names_sorted[param_permut[2]], title=title, ax_handle=ax) utils.scatter3d(result_parameters_flat[best_points_result_dist_to_use, param_permut[0]], result_parameters_flat[best_points_result_dist_to_use, param_permut[1]], result_parameters_flat[best_points_result_dist_to_use, param_permut[2]], c='r', s=size_best_points, ax_handle=ax) if savefigs: dataio.save_current_figure('scatter3d_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)) if savemovies: try: utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), bitrate=8000, min_duration=8) utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), nb_frames=30, min_duration=8) except Exception: # Most likely wrong aggregator... print "failed when creating movies for ", result_dist_to_use_name if False and savefigs: ax.view_init(azim=90, elev=10) dataio.save_current_figure('scatter3d_view2_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)) # plt.close('all') print "Parameters: %s" % ', '.join(parameter_names_sorted) print "Best points, %s:" % title print '\n'.join([str_best_params(best_i, result_dist_to_use) for best_i in best_points_result_dist_to_use]) if scatter3d_sumT: plot_scatter(locals(), 'result_fitexperiments_noiseconv_bays09_bic_avg_sumT', 'BIC Bays09') plot_scatter(locals(), 'result_fitexperiments_noiseconv_bays09_ll90_avg_sumT', 'LL90 Bays09') plot_scatter(locals(), 'result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT', 'BIC Gorgo11') plot_scatter(locals(), 'result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT', 'LL90 Gorgo11') plot_scatter(locals(), 'result_fitexperiments_noiseconv_dualrecall_bic_avg_sumT', 'BIC Dual recall') plot_scatter(locals(), 'result_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT', 'LL90 Dual recall') if plots_flat_sorted_performance: result_dist_to_try = [] if do_bays09: result_dist_to_try.extend(['result_fitexperiments_noiseconv_bays09_bic_avg_sumT', 'result_fitexperiments_noiseconv_bays09_ll90_avg_sumT']) if do_gorgo11: result_dist_to_try.extend(['result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT', 'result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT']) for result_dist in result_dist_to_try: order_indices = np.argsort(locals()[result_dist])[::-1] f, axes = plt.subplots(2, 1) axes[0].plot(np.arange(4) + result_parameters_flat[order_indices]/np.max(result_parameters_flat[order_indices], axis=0)) axes[0].legend(parameter_names_sorted, loc='upper left') axes[0].set_ylabel('Parameters') axes[1].plot(locals()[result_dist][order_indices]) axes[1].set_ylabel(result_dist.split('result_dist_')[-1]) axes[0].set_title('Distance ordered ' + result_dist.split('result_dist_')[-1]) f.canvas.draw() if savefigs: dataio.save_current_figure('plot_sortedperf_full_%s_{label}_{unique_id}.pdf' % (result_dist)) if plots_memorycurves_fits_best: # Alright, will actually reload the data from another set of runs, and find the closest parameter set to the ones found here. data = utils.load_npy('normalisedsigmaxsigmaoutput_random_fitmixturemodels_sigmaxMratiosigmaoutput_repetitions3_280814/outputs/global_plots_fitmixtmodel_random_sigmaoutsigmaxnormMratio-plots_fit_mixturemodels_random-75eb9c74-72e0-4165-8014-92c1ef446f0a.npy') result_em_fits_flat_fitmixture = data['result_em_fits_flat'] result_parameters_flat_fitmixture = data['result_parameters_flat'] all_args_arr_fitmixture = data['all_args_arr'] data_dir = None if not os.environ.get('WORKDIR_DROP'): data_dir = '../experimental_data/' plotting_parameters = launchers_memorycurves_marginal_fi.load_prepare_datasets() def plot_memorycurves_fits_fromexternal(all_vars, result_dist_to_use_name, nb_best_points=10): result_dist_to_use = all_vars[result_dist_to_use_name] result_em_fits_flat_fitmixture = all_vars['result_em_fits_flat_fitmixture'] result_parameters_flat_fitmixture = all_vars['result_parameters_flat_fitmixture'] all_args_arr_fitmixture = all_vars['all_args_arr_fitmixture'] best_point_indices_result_dist = np.argsort(result_dist_to_use)[:nb_best_points] for best_point_index in best_point_indices_result_dist: print "extended plot desired for: " + str_best_params(best_point_index, result_dist_to_use) dist_best_points_fitmixture = np.abs(result_parameters_flat_fitmixture - result_parameters_flat[best_point_index]) dist_best_points_fitmixture -= np.min(dist_best_points_fitmixture, axis=0) dist_best_points_fitmixture /= np.max(dist_best_points_fitmixture, axis=0) best_point_index_fitmixture = np.argmax(np.prod(1-dist_best_points_fitmixture, axis=-1)) print "found closest: " + ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat_fitmixture[best_point_index_fitmixture, param_i]) for param_i in xrange(len(parameter_names_sorted))]) # Update arguments all_args_arr_fitmixture[best_point_index_fitmixture].update(dict(zip(parameter_names_sorted, result_parameters_flat_fitmixture[best_point_index_fitmixture]))) packed_data = dict(T_space=T_space, result_em_fits=result_em_fits_flat_fitmixture[best_point_index_fitmixture], all_parameters=all_args_arr_fitmixture[best_point_index_fitmixture]) plotting_parameters['suptitle'] = result_dist_to_use_name plotting_parameters['reuse_axes'] = False if savefigs: packed_data['dataio'] = dataio launchers_memorycurves_marginal_fi.do_memory_plots(packed_data, plotting_parameters) plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_bays09_ll90_avg_sumT', nb_best_points=3) plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT', nb_best_points=3) plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT', nb_best_points=3) all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['experiment_ids', 'parameter_names_sorted', 'T_space', 'all_args_arr', 'all_repeats_completed', 'filter_data'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='sigmaoutput_normalisedsigmax_random') plt.show() return locals()
def postprocess_dualrecall_fitmixturemodel(data_pbs, generator_module=None): ''' Reload runs from PBS To be plotted in Ipython later ''' #### SETUP # savedata = True colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", data_pbs.dataset_infos['parameters'] # parameters: M, ratio_conj, sigmax # Extract data result_em_fits = np.array(data_pbs.dict_arrays['result_em_fits']['results_flat']) result_dist_dualrecall_angle = np.array(data_pbs.dict_arrays['result_dist_dualrecall_angle']['results_flat']) result_dist_dualrecall_angle_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_dualrecall_angle_emmixt_KL']['results_flat']) result_dist_dualrecall_colour = np.array(data_pbs.dict_arrays['result_dist_dualrecall_colour']['results_flat']) result_dist_dualrecall_colour_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_dualrecall_colour_emmixt_KL']['results_flat']) result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits']['parameters_flat']) all_repeats_completed = data_pbs.dict_arrays['result_em_fits']['repeats_completed'] all_args_arr = np.array(data_pbs.loaded_data['args_list']) M_space = data_pbs.loaded_data['parameters_uniques']['M'] ratio_conj_space = data_pbs.loaded_data['parameters_uniques']['ratio_conj'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] num_repetitions = generator_module.num_repetitions parameter_names_sorted = data_pbs.dataset_infos['parameters'] dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) # Load ground truth data_dualrecall = load_experimental_data.load_data_dualrecall(fit_mixture_model=True) ## Filter everything with repeats_completed == num_repet filter_data = all_repeats_completed == num_repetitions - 1 result_parameters_flat = result_parameters_flat[filter_data] result_em_fits = result_em_fits[filter_data] result_dist_dualrecall_angle = result_dist_dualrecall_angle[filter_data] result_dist_dualrecall_angle_emmixt_KL = result_dist_dualrecall_angle_emmixt_KL[filter_data] result_dist_dualrecall_colour = result_dist_dualrecall_colour[filter_data] result_dist_dualrecall_colour_emmixt_KL = result_dist_dualrecall_colour_emmixt_KL[filter_data] all_args_arr = all_args_arr[filter_data] all_repeats_completed = all_repeats_completed[filter_data] print "Size post-filter: ", result_parameters_flat.shape[0] # Compute lots of averages over the repetitions result_em_fits_avg = utils.nanmean(result_em_fits, axis=-1) result_dist_dualrecall_angle_avg = utils.nanmean(result_dist_dualrecall_angle, axis=-1) result_dist_dualrecall_angle_emmixt_KL_avg = utils.nanmean(result_dist_dualrecall_angle_emmixt_KL, axis=-1) result_dist_dualrecall_colour_avg = utils.nanmean(result_dist_dualrecall_colour, axis=-1) result_dist_dualrecall_colour_emmixt_KL_avg = utils.nanmean(result_dist_dualrecall_colour_emmixt_KL, axis=-1) # all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['parameter_names_sorted', 'all_args_arr', 'all_repeats_completed', 'filter_data'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='dualrecall_fitmixturemodel') plt.show() return locals()
from dataio import DataIO, ImageHandler from unet_model import UNet from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession import tensorflow as tf # Solving CUDNN Issues config = ConfigProto() config.gpu_options.allow_growth = True # config.gpu_options.per_process_gpu_memory_fraction = 0.9 session = InteractiveSession(config=config) # create all instances here dat = DataIO().load_matfile_images_first('retina_training_STARE.mat') display = ImageHandler() # Run U-Net Model model = UNet('Test_Model') model.create_UNet_retina() model.fit_model(*dat, nepochs=2000) # get plot for training accuracy and loss plot1 = model.plot_accuracy() plot1.plot() plot1.show() plot2 = model.plot_loss() plot2.plot() plot2.show()
def plots_fit_mixturemodels_random(data_pbs, generator_module=None): ''' Reload runs from PBS !!! IMPORTANT LOOK AT ME !!! ''' #### SETUP # savefigs = True savedata = True savemovies = False do_bays09 = True do_gorgo11 = True plots_scatter3d = True plots_scatter_per_T = False plots_flat_sorted_performance = False plots_memorycurves_fits_best = True # do_relaunch_bestparams_pbs = True colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", data_pbs.dataset_infos['parameters'] # parameters: M, ratio_conj, sigmax # Extract data T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] result_em_fits_flat = np.array(data_pbs.dict_arrays['result_em_fits']['results_flat']) result_precisions_flat = np.array(data_pbs.dict_arrays['result_all_precisions']['results_flat']) result_dist_bays09_flat = np.array(data_pbs.dict_arrays['result_dist_bays09']['results_flat']) result_dist_gorgo11_flat = np.array(data_pbs.dict_arrays['result_dist_gorgo11']['results_flat']) result_dist_bays09_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_bays09_emmixt_KL']['results_flat']) result_dist_gorgo11_emmixt_KL = np.array(data_pbs.dict_arrays['result_dist_gorgo11_emmixt_KL']['results_flat']) result_parameters_flat = np.array(data_pbs.dict_arrays['result_em_fits']['parameters_flat']) all_repeats_completed = data_pbs.dict_arrays['result_em_fits']['repeats_completed'] all_args = data_pbs.loaded_data['args_list'] all_args_arr = np.array(all_args) sigmaoutput_space = data_pbs.loaded_data['parameters_uniques']['sigma_output'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] ratio_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] num_repetitions = generator_module.num_repetitions parameter_names_sorted = data_pbs.dataset_infos['parameters'] dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) # Load bays09 # data_bays09 = load_experimental_data.load_data_bays09(fit_mixture_model=True) # bays09_nitems = data_bays09['data_to_fit']['n_items'] # bays09_em_target = np.nan*np.empty((bays09_nitems.max(), 4)) #kappa, prob_target, prob_nontarget, prob_random # bays09_em_target[bays09_nitems - 1] = data_bays09['em_fits_nitems_arrays']['mean'].T # bays09_emmixt_target = bays09_em_target[:, 1:] ## Filter everything with sigma_output > 1.0 and repeats_completed == num_repet filter_data = (result_parameters_flat[:, -1] < 1.0) & (all_repeats_completed == num_repetitions - 1) result_em_fits_flat = result_em_fits_flat[filter_data] result_precisions_flat = result_precisions_flat[filter_data] result_dist_bays09_flat = result_dist_bays09_flat[filter_data] result_dist_gorgo11_flat = result_dist_gorgo11_flat[filter_data] result_dist_bays09_emmixt_KL = result_dist_bays09_emmixt_KL[filter_data] result_dist_gorgo11_emmixt_KL = result_dist_gorgo11_emmixt_KL[filter_data] result_parameters_flat = result_parameters_flat[filter_data] all_args_arr = all_args_arr[filter_data] all_repeats_completed = all_repeats_completed[filter_data] # for _result_dist in ['result_em_fits_flat', 'result_precisions_flat', 'result_dist_bays09_flat', 'result_dist_gorgo11_flat', 'result_dist_bays09_emmixt_KL', 'result_dist_gorgo11_emmixt_KL', 'result_parameters_flat']: # locals()[key] = locals()[key][filter_sigmaout] # # exec("%s = %s[%s]" % (_result_dist, _result_dist, 'filter_sigmaout')) ## Compute some stuff result_em_fits_all_avg = utils.nanmean(result_em_fits_flat, axis=-1) result_em_kappa_allT = result_em_fits_all_avg[..., 0] result_em_emmixt_allT = result_em_fits_all_avg[..., 1:4] result_precisions_all_avg = utils.nanmean(result_precisions_flat, axis=-1) ##### Distance to Bays09 result_dist_bays09_allT_avg = utils.nanmean(result_dist_bays09_flat, axis=-1) result_dist_bays09_emmixt_KL_allT_avg = utils.nanmean(result_dist_bays09_emmixt_KL, axis=-1) result_dist_bays09_kappa_allT = result_dist_bays09_allT_avg[..., 0] result_dist_bays09_kappa_sumT = np.nansum(result_dist_bays09_kappa_allT, axis=-1) result_dist_bays09_logkappa_sumT = np.log(result_dist_bays09_kappa_sumT) result_dist_bays09_emmixt_KL_sumT = np.nansum(result_dist_bays09_emmixt_KL_allT_avg, axis=-1) # combined versions result_dist_bays09_both_normalised = result_dist_bays09_emmixt_KL_sumT/np.max(result_dist_bays09_emmixt_KL_sumT) + result_dist_bays09_kappa_sumT/np.max(result_dist_bays09_kappa_sumT) result_dist_bays09_logkappamixtKL = result_dist_bays09_logkappa_sumT + result_dist_bays09_emmixt_KL_sumT result_dist_bays09_logkappamixtKL_normalised = result_dist_bays09_logkappa_sumT/np.max(result_dist_bays09_logkappa_sumT) + result_dist_bays09_emmixt_KL_sumT/np.max(result_dist_bays09_emmixt_KL_sumT) result_dist_bays09_logkappa_sumT_forand = result_dist_bays09_logkappa_sumT - np.min(result_dist_bays09_logkappa_sumT)*np.sign(np.min(result_dist_bays09_logkappa_sumT)) result_dist_bays09_logkappa_sumT_forand /= np.max(result_dist_bays09_logkappa_sumT_forand) result_dist_bays09_emmixt_KL_sumT_forand = result_dist_bays09_emmixt_KL_sumT - np.min(result_dist_bays09_emmixt_KL_sumT)*np.sign(np.min(result_dist_bays09_emmixt_KL_sumT)) result_dist_bays09_emmixt_KL_sumT_forand /= np.max(result_dist_bays09_emmixt_KL_sumT_forand) result_dist_bays09_logkappamixtKL_AND = 1. - (1. - result_dist_bays09_logkappa_sumT_forand)*(1. - result_dist_bays09_emmixt_KL_sumT_forand) # Mask kappa for bad performance # result_dist_bays09_kappa_sumT_masked = np.ma.masked_greater(result_dist_bays09_kappa_sumT, 2*np.median(result_dist_bays09_kappa_sumT)) # result_dist_bays09_logkappa_sumT_masked = np.ma.masked_greater(result_dist_bays09_logkappa_sumT, 2*np.median(result_dist_bays09_logkappa_sumT)) # result_dist_bays09_emmixt_KL_sumT_masked = np.ma.masked_greater(result_dist_bays09_emmixt_KL_sumT, 2*np.median(result_dist_bays09_emmixt_KL_sumT)) # result_dist_bays09_both_normalised_mult_masked = 1-(1. - result_dist_bays09_emmixt_KL_sumT_masked/np.max(result_dist_bays09_emmixt_KL_sumT_masked))*(1. - result_dist_bays09_kappa_sumT_masked/np.max(result_dist_bays09_kappa_sumT_masked)) ##### Distance to Gorgo11 result_dist_gorgo11_allT_avg = utils.nanmean(result_dist_gorgo11_flat, axis=-1) result_dist_gorgo11_emmixt_KL_allT_avg = utils.nanmean(result_dist_gorgo11_emmixt_KL, axis=-1) result_dist_gorgo11_kappa_allT = result_dist_gorgo11_allT_avg[..., 0] result_dist_gorgo11_kappa_sumT = np.nansum(result_dist_gorgo11_kappa_allT, axis=-1) result_dist_gorgo11_logkappa_sumT = np.log(result_dist_gorgo11_kappa_sumT) result_dist_gorgo11_emmixt_KL_sumT = np.nansum(result_dist_gorgo11_emmixt_KL_allT_avg, axis=-1) result_dist_gorgo11_emmixt_KL_sumT25 = np.nansum(result_dist_gorgo11_emmixt_KL_allT_avg[:, 1:], axis=-1) result_dist_gorgo11_logkappa_sumT25 = np.log(np.nansum(result_dist_gorgo11_kappa_allT[..., 1:], axis=-1)) # combined versions result_dist_gorgo11_both_normalised = result_dist_gorgo11_emmixt_KL_sumT/np.max(result_dist_gorgo11_emmixt_KL_sumT) + result_dist_gorgo11_kappa_sumT/np.max(result_dist_gorgo11_kappa_sumT) result_dist_gorgo11_logkappamixtKL = result_dist_gorgo11_logkappa_sumT + result_dist_gorgo11_emmixt_KL_sumT result_dist_gorgo11_logkappamixtKL_normalised = result_dist_gorgo11_logkappa_sumT/np.max(result_dist_gorgo11_logkappa_sumT) + result_dist_gorgo11_emmixt_KL_sumT/np.max(result_dist_gorgo11_emmixt_KL_sumT) result_dist_gorgo11_logkappa_sumT_forand = result_dist_gorgo11_logkappa_sumT - np.min(result_dist_gorgo11_logkappa_sumT)*np.sign(np.min(result_dist_gorgo11_logkappa_sumT)) result_dist_gorgo11_logkappa_sumT_forand /= np.max(result_dist_gorgo11_logkappa_sumT_forand) result_dist_gorgo11_logkappa_sumT25_forand = result_dist_gorgo11_logkappa_sumT25 - np.min(result_dist_gorgo11_logkappa_sumT25)*np.sign(np.min(result_dist_gorgo11_logkappa_sumT25)) result_dist_gorgo11_logkappa_sumT25_forand /= np.max(result_dist_gorgo11_logkappa_sumT25_forand) result_dist_gorgo11_emmixt_KL_sumT_forand = result_dist_gorgo11_emmixt_KL_sumT - np.min(result_dist_gorgo11_emmixt_KL_sumT)*np.sign(np.min(result_dist_gorgo11_emmixt_KL_sumT)) result_dist_gorgo11_emmixt_KL_sumT_forand /= np.max(result_dist_gorgo11_emmixt_KL_sumT_forand) result_dist_gorgo11_emmixt_KL_sumT25_forand = result_dist_gorgo11_emmixt_KL_sumT25 - np.min(result_dist_gorgo11_emmixt_KL_sumT25)*np.sign(np.min(result_dist_gorgo11_emmixt_KL_sumT25)) result_dist_gorgo11_emmixt_KL_sumT25_forand /= np.max(result_dist_gorgo11_emmixt_KL_sumT25_forand) result_dist_gorgo11_logkappamixtKL_AND = 1. - (1. - result_dist_gorgo11_logkappa_sumT_forand)*(1. - result_dist_gorgo11_emmixt_KL_sumT_forand) result_dist_gorgo11_logkappa25mixtKL_AND = 1. - (1. - result_dist_gorgo11_logkappa_sumT25_forand)*(1. - result_dist_gorgo11_emmixt_KL_sumT25_forand) def str_best_params(best_i, result_dist_to_use): return ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted))]) + ' >> %f' % result_dist_to_use[best_i] if plots_scatter3d: nb_best_points = 30 size_normal_points = 8 size_best_points = 50 def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file=''): result_dist_to_use = all_vars[result_dist_to_use_name] result_parameters_flat = all_vars['result_parameters_flat'] # Filter if downsampling filter_downsampling = np.arange(0, result_dist_to_use.size, downsampling) result_dist_to_use = result_dist_to_use[filter_downsampling] result_parameters_flat = result_parameters_flat[filter_downsampling] best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points] # Construct all permutations of 3 parameters, for 3D scatters params_permutations = set([tuple(np.sort(np.random.choice(result_parameters_flat.shape[-1], 3, replace=False)).tolist()) for i in xrange(1000)]) for param_permut in params_permutations: fig = plt.figure() ax = Axes3D(fig) # One plot per parameter permutation if log_color: color_points = np.log(result_dist_to_use) else: color_points = result_dist_to_use utils.scatter3d(result_parameters_flat[:, param_permut[0]], result_parameters_flat[:, param_permut[1]], result_parameters_flat[:, param_permut[2]], s=size_normal_points, c=color_points, xlabel=parameter_names_sorted[param_permut[0]], ylabel=parameter_names_sorted[param_permut[1]], zlabel=parameter_names_sorted[param_permut[2]], title=title, ax_handle=ax) utils.scatter3d(result_parameters_flat[best_points_result_dist_to_use, param_permut[0]], result_parameters_flat[best_points_result_dist_to_use, param_permut[1]], result_parameters_flat[best_points_result_dist_to_use, param_permut[2]], c='r', s=size_best_points, ax_handle=ax) if savefigs: dataio.save_current_figure('scatter3d_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)) if savemovies: try: utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), bitrate=8000, min_duration=8) utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), nb_frames=30, min_duration=8) except Exception: # Most likely wrong aggregator... print "failed when creating movies for ", result_dist_to_use_name if False and savefigs: ax.view_init(azim=90, elev=10) dataio.save_current_figure('scatter3d_view2_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)) # plt.close('all') print "Parameters: %s" % ', '.join(parameter_names_sorted) print "Best points, %s:" % title print '\n'.join([str_best_params(best_i, result_dist_to_use) for best_i in best_points_result_dist_to_use]) #### BAYS 09 if do_bays09: # Distance for log kappa, all T plot_scatter(locals(), 'result_dist_bays09_logkappa_sumT', 'Bays09 kappa all T', log_color=False) # # Distance for em fits, all T, KL distance plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_sumT', 'Bays09 em fits, sum T, KL', log_color=False) # Distance for product of normalised em fits KL + normalised log kappa, all T plot_scatter(locals(), 'result_dist_bays09_logkappamixtKL', 'Bays09 em fits KL, log kappa') # Distance for AND normalised em fits KL + log kappa plot_scatter(locals(), 'result_dist_bays09_logkappamixtKL_AND', 'Bays09 em fits KL AND log kappa') #### Gorgo 11 if do_gorgo11: # Distance for product of normalised em fits KL + normalised log kappa, all T plot_scatter(locals(), 'result_dist_gorgo11_logkappamixtKL', 'Gorgo11 em fits KL, log kappa') # Distance for AND normalised em fits KL + log kappa plot_scatter(locals(), 'result_dist_gorgo11_logkappamixtKL_AND', 'Gorgo11 em fits KL AND log kappa') # Distance for logkappa plot_scatter(locals(), 'result_dist_gorgo11_logkappa_sumT', 'Gorgo11 log kappa all T', log_color=False) # Distance for EM mixture proportions plot_scatter(locals(), 'result_dist_gorgo11_emmixt_KL_sumT', 'Gorgo11 em fits, sum T, KL', log_color=False) if plots_flat_sorted_performance: result_dist_to_try = [] if do_bays09: result_dist_to_try.extend(['result_dist_bays09_logkappamixtKL_AND', 'result_dist_bays09_logkappamixtKL']) if do_gorgo11: result_dist_to_try.extend(['result_dist_gorgo11_logkappamixtKL_AND', 'result_dist_gorgo11_logkappamixtKL']) for result_dist in result_dist_to_try: order_indices = np.argsort(locals()[result_dist])[::-1] f, axes = plt.subplots(2, 1) axes[0].plot(np.arange(4) + result_parameters_flat[order_indices]/np.max(result_parameters_flat[order_indices], axis=0)) axes[0].legend(parameter_names_sorted, loc='upper left') axes[0].set_ylabel('Parameters') axes[1].plot(locals()[result_dist][order_indices]) axes[1].set_ylabel(result_dist.split('result_dist_')[-1]) axes[0].set_title('Distance ordered ' + result_dist.split('result_dist_')[-1]) f.canvas.draw() if savefigs: dataio.save_current_figure('plot_sortedperf_full_%s_{label}_{unique_id}.pdf' % (result_dist)) ## Extra plot for logkappamixtKL_AND, it seems well behaved def plot_flat_best(all_vars, result_name, order_indices_filter, filter_goodAND, ordering='fitness'): f = plt.figure() axp1 = plt.subplot2grid((3, 2), (0, 0)) axp2 = plt.subplot2grid((3, 2), (0, 1)) axp3 = plt.subplot2grid((3, 2), (1, 0)) axp4 = plt.subplot2grid((3, 2), (1, 1)) axfit = plt.subplot2grid((3, 2), (2, 0), colspan=2) axp1.plot(result_parameters_flat[filter_goodAND][order_indices_filter, 0]) axp1.set_title(parameter_names_sorted[0]) axp2.plot(result_parameters_flat[filter_goodAND][order_indices_filter, 1], 'g') axp2.set_title(parameter_names_sorted[1]) axp3.plot(result_parameters_flat[filter_goodAND][order_indices_filter, 2], 'r') axp3.set_title(parameter_names_sorted[2]) axp4.plot(result_parameters_flat[filter_goodAND][order_indices_filter, 3], 'k') axp4.set_title(parameter_names_sorted[3]) axfit.plot(all_vars[result_name][filter_goodAND][order_indices_filter]) axfit.set_ylabel('bays09_logkappamixtKL_AND') plt.suptitle('Distance ordered bays09_logkappamixtKL_AND') if savefigs: dataio.save_current_figure('plot_sortedperf_best_%s_%s_{label}_{unique_id}.pdf' % (result_name, ordering)) if do_bays09: filter_goodAND = result_dist_bays09_logkappamixtKL_AND < 0.2 # First order them by fitness order_indices_filter = np.argsort(result_dist_bays09_logkappamixtKL_AND[filter_goodAND])[::-1] plot_flat_best(locals(), 'result_dist_bays09_logkappamixtKL_AND', order_indices_filter, filter_goodAND, 'fitness') # Then by M, to see if there is some structure order_indices_filter = np.argsort(result_parameters_flat[filter_goodAND, 0]) plot_flat_best(locals(), 'result_dist_bays09_logkappamixtKL_AND', order_indices_filter, filter_goodAND, 'M') if do_gorgo11: filter_goodAND = result_dist_gorgo11_logkappamixtKL_AND < 0.5 # First order them by fitness order_indices_filter = np.argsort(result_dist_gorgo11_logkappamixtKL_AND[filter_goodAND])[::-1] plot_flat_best(locals(), 'result_dist_gorgo11_logkappamixtKL_AND', order_indices_filter, filter_goodAND, 'fitness') # Then by M, to see if there is some structure order_indices_filter = np.argsort(result_parameters_flat[filter_goodAND, 0]) plot_flat_best(locals(), 'result_dist_gorgo11_logkappamixtKL_AND', order_indices_filter, filter_goodAND, 'M') # dist_cmaes_result = np.sum((result_parameters_flat - np.array([75, 1.0, 0.1537, 0.2724]))**2., axis=-1) # filter_close_cmaes_result = np.argsort(dist_cmaes_result)[:20] # order_indices_filter = np.argsort(result_dist_gorgo11_logkappamixtKL_AND[filter_close_cmaes_result])[::-1] # plot_flat_best(locals(), 'result_dist_gorgo11_logkappamixtKL_AND', order_indices_filter, filter_close_cmaes_result, 'Like current CMA/ES run') if plots_scatter_per_T: for T_i, T in enumerate(T_space): # Kappa per T, fit to Bays09 result_dist_bays09_kappa_currT = result_dist_bays09_kappa_allT[:, T_i] result_dist_bays09_kappa_currT_masked = mask_outliers(result_dist_bays09_kappa_currT) plot_scatter(locals(), 'result_dist_bays09_kappa_currT_masked', 'kappa T %d masked' % T, label_file="T{}".format(T)) # EM Mixt per T, fit to Bays09 result_dist_bays09_emmixt_KL_currT = result_dist_bays09_emmixt_KL_allT_avg[:, T_i] result_dist_bays09_emmixt_KL_currT_masked = mask_outliers(result_dist_bays09_emmixt_KL_currT) plot_scatter(locals(), 'result_dist_bays09_emmixt_KL_currT_masked', 'KL EM mixt T %d masked' % T, label_file="T{}".format(T), log_color=False) if plots_memorycurves_fits_best: data_dir = None if not os.environ.get('WORKDIR_DROP'): data_dir = '../experimental_data/' plotting_parameters = launchers_memorycurves_marginal_fi.load_prepare_datasets(data_dir = data_dir) def plot_memorycurves_fits(all_vars, result_dist_to_use_name, nb_best_points=10): result_dist_to_use = all_vars[result_dist_to_use_name] best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points] for best_point_index in best_points_result_dist_to_use: print "extended plot for: " + str_best_params(best_point_index, result_dist_to_use) # Update arguments all_args_arr[best_point_index].update(dict(zip(parameter_names_sorted, result_parameters_flat[best_point_index]))) packed_data = dict(T_space=T_space, result_em_fits=result_em_fits_flat[best_point_index], all_parameters=all_args_arr[best_point_index]) plotting_parameters['suptitle'] = result_dist_to_use_name plotting_parameters['reuse_axes'] = False if savefigs: packed_data['dataio'] = dataio launchers_memorycurves_marginal_fi.do_memory_plots(packed_data, plotting_parameters) plot_memorycurves_fits(locals(), 'result_dist_bays09_logkappamixtKL_AND', nb_best_points=3) plot_memorycurves_fits(locals(), 'result_dist_gorgo11_logkappamixtKL_AND', nb_best_points=3) # plot_memorycurves_fits(locals(), 'result_dist_gorgo11_logkappamixtKL', nb_best_points=3) plot_memorycurves_fits(locals(), 'result_dist_gorgo11_logkappa25mixtKL_AND', nb_best_points=3) # plot_memorycurves_fits(locals(), 'result_dist_gorgo11_logkappa_sumT', nb_best_points=3) # # Interpolate # if plots_interpolate: # sigmax_target = 0.9 # M_interp_space = np.arange(6, 625, 5) # ratio_interp_space = np.linspace(0.01, 1.0, 50) # # sigmax_interp_space = np.linspace(0.01, 1.0, 50) # sigmax_interp_space = np.array([sigmax_target]) # params_crossspace = np.array(utils.cross(M_interp_space, ratio_interp_space, sigmax_interp_space)) # interpolated_data = rbf_interpolator(params_crossspace[:, 0], params_crossspace[:, 1], params_crossspace[:, 2]).reshape((M_interp_space.size, ratio_interp_space.size)) # utils.pcolor_2d_data(interpolated_data, M_interp_space, ratio_interp_space, 'M', 'ratio', 'interpolated, fixing sigmax= %.2f' % sigmax_target) # points_closeby = ((result_parameters_flat[:, 2] - sigmax_target)**2)< 0.01 # plt.figure() # # plt.imshow(interpolated_data, extent=(M_interp_space.min(), M_interp_space.max(), ratio_interp_space.min(), ratio_interp_space.max())) # plt.imshow(interpolated_data) # plt.scatter(result_parameters_flat[points_closeby, 0], result_parameters_flat[points_closeby, 1], s=100, c=result_fitexperiments_bic_avg[points_closeby], marker='o') # if plot_per_ratio: # # Plot the evolution of loglike as a function of sigmax, with std shown # for ratio_conj_i, ratio_conj in enumerate(ratio_space): # ax = utils.plot_mean_std_area(sigmax_space, result_log_posterior_mean[ratio_conj_i], result_log_posterior_std[ratio_conj_i]) # ax.get_figure().canvas.draw() # if savefigs: # dataio.save_current_figure('results_fitexp_%s_loglike_ratioconj%.2f_{label}_global_{unique_id}.pdf' % (exp_dataset, ratio_conj)) # all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['parameter_names_sorted', 'all_args_arr', 'all_repeats_completed', 'filter_data'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='sigmaoutput_normalisedsigmax_random') plt.show() return locals()