def launcher_do_fit_mixturemodels_sequential_alltrecall(args): ''' Run the model for 1..T items sequentially, for all possible trecall/T. Compute: - Precision of samples - EM mixture model fits. Both independent and collapsed model. - Theoretical Fisher Information - EM Mixture model distances to set of currently working datasets. ''' print "Doing a piece of work for launcher_do_fit_mixturemodels_sequential_alltrecall" all_parameters = utils.argparse_2_dict(args) print all_parameters if all_parameters['burn_samples'] + all_parameters['num_samples'] < 200: print "WARNING> you do not have enough samples I think!", all_parameters['burn_samples'] + all_parameters['num_samples'] # Create DataIO # (complete label with current variable state) dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters)) save_every = 1 run_counter = 0 # Load dataset to compare against data_gorgo11_sequ = load_experimental_data.load_data_gorgo11_sequential(data_dir=all_parameters['experiment_data_dir'], fit_mixture_model=True) gorgo11_sequ_T_space = np.unique(data_gorgo11_sequ['n_items']) # Parameters to vary T_max = all_parameters['T'] T_space = np.arange(1, T_max+1) repetitions_axis = -1 # Result arrays result_all_precisions = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions'])) result_fi_theo = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions'])) result_fi_theocov = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions'])) result_em_fits = np.nan*np.empty((T_space.size, T_space.size, 6, all_parameters['num_repetitions'])) # kappa, mixt_target, mixt_nontarget, mixt_random, ll, bic result_em_fits_collapsed_tr = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions'])) # kappa_tr, mixt_target_tr, mixt_nontarget_tr, mixt_random_tr result_em_fits_collapsed_summary = np.nan*np.empty((5, all_parameters['num_repetitions'])) # bic, ll, kappa_theta result_dist_gorgo11_sequ = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions'])) # kappa, mixt_target, mixt_nontarget, mixt_random result_dist_gorgo11_sequ_emmixt_KL = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions'])) result_dist_gorgo11_sequ_collapsed = np.nan*np.empty((T_space.size, T_space.size, 4, all_parameters['num_repetitions'])) result_dist_gorgo11_sequ_collapsed_emmixt_KL = np.nan*np.empty((T_space.size, T_space.size, all_parameters['num_repetitions'])) gorgo11_sequ_collapsed_mixtmod_mean = data_gorgo11_sequ['collapsed_em_fits_doublepowerlaw_array'] # If desired, will automatically save all Model responses. if all_parameters['collect_responses']: print "--- Collecting all responses..." result_responses = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], all_parameters['num_repetitions'])) result_target = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], all_parameters['num_repetitions'])) result_nontargets = np.nan*np.empty((T_space.size, T_space.size, all_parameters['N'], T_max-1, all_parameters['num_repetitions'])) search_progress = progress.Progress(T_space.size*(T_space.size + 1)/2.*all_parameters['num_repetitions']) for repet_i in xrange(all_parameters['num_repetitions']): for T_i, T in enumerate(T_space): for trecall_i, trecall in enumerate(np.arange(T, 0, -1)): # Inverting indexing of trecall, to be consistent. trecall_i 0 == last item. # But trecall still means the actual time of recall! print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str()) print "Fit for T=%d, tr=%d, %d/%d" % (T, trecall, repet_i+1, all_parameters['num_repetitions']) # Update parameter all_parameters['T'] = T all_parameters['fixed_cued_feature_time'] = trecall - 1 ### WORK WORK WORK work? ### # Instantiate (_, _, _, sampler) = launchers.init_everything(all_parameters) # Sample sampler.run_inference(all_parameters) # Compute precision print "get precision..." result_all_precisions[T_i, trecall_i, repet_i] = sampler.get_precision() # Fit mixture model, independent print "fit mixture model..." curr_params_fit = sampler.fit_mixture_model(use_all_targets=False) result_em_fits[T_i, trecall_i, :, repet_i] = [curr_params_fit[key] for key in ['kappa', 'mixt_target', 'mixt_nontargets_sum', 'mixt_random', 'train_LL', 'bic']] # Compute fisher info print "compute fisher info" result_fi_theo[T_i, trecall_i, repet_i] = sampler.estimate_fisher_info_theocov(use_theoretical_cov=False) result_fi_theocov[T_i, trecall_i, repet_i] = sampler.estimate_fisher_info_theocov(use_theoretical_cov=True) # Compute distances to datasets (this is for the non-collapsed stuff, not the best) if T in gorgo11_sequ_T_space: gorgo11_sequ_mixtures_mean = data_gorgo11_sequ['em_fits_nitems_trecall_arrays'][gorgo11_sequ_T_space==T, trecall_i, :4].flatten() result_dist_gorgo11_sequ[T_i, trecall_i, :, repet_i] = (gorgo11_sequ_mixtures_mean - result_em_fits[T_i, trecall_i, :4, repet_i])**2. result_dist_gorgo11_sequ_emmixt_KL[T_i, trecall_i, repet_i] = utils.KL_div(result_em_fits[T_i, trecall_i, 1:4, repet_i], gorgo11_sequ_mixtures_mean[1:]) # If needed, store responses if all_parameters['collect_responses']: print "collect responses" (responses, target, nontarget) = sampler.collect_responses() result_responses[T_i, trecall_i, :, repet_i] = responses result_target[T_i, trecall_i, :, repet_i] = target result_nontargets[T_i, trecall_i, :, :T_i, repet_i] = nontarget print "CURRENT RESULTS:\n", result_all_precisions[T_i, trecall_i, repet_i], curr_params_fit, result_fi_theo[T_i, trecall_i, repet_i], result_fi_theocov[T_i, trecall_i, repet_i], np.sum(result_dist_gorgo11_sequ[T_i, trecall_i, :, repet_i]), np.sum(result_dist_gorgo11_sequ_emmixt_KL[T_i, trecall_i, repet_i]), "\n" ### /Work ### search_progress.increment() if run_counter % save_every == 0 or search_progress.done(): dataio.save_variables_default(locals()) run_counter += 1 # Fit Collapsed mixture model # TODO check dimensionality... print 'Fitting Collapsed double powerlaw mixture model...' params_fit = em_circularmixture_parametrickappa_doublepowerlaw.fit(T_space, result_responses[..., repet_i], result_target[..., repet_i], result_nontargets[..., repet_i], debug=False) # First store the parameters that depend on T/trecall for i, key in enumerate(['kappa', 'mixt_target_tr', 'mixt_nontargets_tr', 'mixt_random_tr']): result_em_fits_collapsed_tr[..., i, repet_i] = params_fit[key] # Then the ones that do not, only one per full collapsed fit. result_em_fits_collapsed_summary[0, repet_i] = params_fit['bic'] # result_em_fits_collapsed_summary[1, repet_i] = params_fit['train_LL'] result_em_fits_collapsed_summary[2:, repet_i] = params_fit['kappa_theta'] # Compute distances to dataset for collapsed model result_dist_gorgo11_sequ_collapsed[..., repet_i] = (gorgo11_sequ_collapsed_mixtmod_mean - result_em_fits_collapsed_tr[..., repet_i])**2. result_dist_gorgo11_sequ_collapsed_emmixt_KL[..., repet_i] = utils.KL_div(result_em_fits_collapsed_tr[..., 1:4, repet_i], gorgo11_sequ_collapsed_mixtmod_mean[..., 1:], axis=-1) # Finished dataio.save_variables_default(locals()) print "All finished" return locals()
def get_model_em_fits(self, num_repetitions=1, use_cache=True): '''Will setup experimental data, sample from the model, and fit a collapsed powerlaw mixture model on the outcome. ''' if self.model_em_fits is None or not use_cache: # Collect all data to fit. T = self.T_space.size model_data_dict = { 'responses': np.nan*np.empty((T, T, self.num_datapoints)), 'targets': np.nan*np.empty((T, T, self.num_datapoints)), 'nontargets': np.nan*np.empty(( T, T, self.num_datapoints, T - 1))} search_progress = progress.Progress( T*(T + 1)/2.*num_repetitions) params_fit_double_all = [] for repet_i in xrange(num_repetitions): for n_items_i, n_items in enumerate(self.T_space): for trecall_i, trecall in enumerate(self.T_space): if trecall <= n_items: self.setup_experimental_stimuli(n_items, trecall) print ("{:.2f}%, {} left - {} " "== Data, N={}, trecall={}. {}/{}. ").format( search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str(), n_items, trecall, repet_i+1, num_repetitions) if ('samples' in self.get_names_stored_responses() and repet_i < 1): self.restore_responses('samples') else: self.sampler.force_sampling_round() self.store_responses('samples') responses, targets, nontargets = ( self.sampler.collect_responses()) # collect all data model_data_dict['responses'][ n_items_i, trecall_i] = responses model_data_dict['targets'][ n_items_i, trecall_i] = targets model_data_dict['nontargets'][ n_items_i, trecall_i, :, :n_items_i] = nontargets search_progress.increment() # Fit the collapsed mixture model params_fit_double = ( em_circularmixture_parametrickappa_doublepowerlaw.fit( self.T_space, model_data_dict['responses'], model_data_dict['targets'], model_data_dict['nontargets'])) params_fit_double_all.append(params_fit_double) # Store statistics of powerlaw fits self.model_em_fits = collections.defaultdict(dict) emfits_keys = params_fit_double.keys() for key in emfits_keys: repets_param_fit_curr = [ param_fit_double[key] for param_fit_double in params_fit_double_all] self.model_em_fits['mean'][key] = np.mean( repets_param_fit_curr, axis=0) self.model_em_fits['std'][key] = np.std( repets_param_fit_curr, axis=0) self.model_em_fits['sem'][key] = ( self.model_em_fits['std'][key] / np.sqrt( num_repetitions)) return self.model_em_fits
def fit_collapsed_mixture_model(self): ''' Fit the new Collapsed Mixture Model, using data created just above in generate_data_subject_split. Do: * One fit per subject/nitems, using trecall as T_space * One fit per subject/trecall, using nitems as T_space * One fit per subject, using the double-powerlaw on nitems/trecall ''' Tmax = self.dataset['data_subject_split']['nitems_space'].max() Tnum = self.dataset['data_subject_split']['nitems_space'].size self.dataset['collapsed_em_fits_subjects_nitems'] = dict() self.dataset['collapsed_em_fits_nitems'] = dict() self.dataset['collapsed_em_fits_subjects_trecall'] = dict() self.dataset['collapsed_em_fits_trecall'] = dict() self.dataset['collapsed_em_fits_doublepowerlaw_subjects'] = dict() self.dataset['collapsed_em_fits_doublepowerlaw'] = dict() self.dataset['collapsed_em_fits_doublepowerlaw_array'] = np.nan*np.empty((Tnum, Tnum, 4)) for subject, subject_data_dict in self.dataset['data_subject_split']['data_subject'].iteritems(): print 'Fitting Collapsed Mixture model for subject %d' % subject if True: # Use trecall as T_space, bit weird for n_items_i, n_items in enumerate(self.dataset['data_subject_split']['nitems_space']): print '%d nitems, using trecall as T_space' % n_items params_fit = em_circularmixture_parametrickappa.fit(np.arange(1, n_items+1), subject_data_dict['responses'][n_items_i, :(n_items)], subject_data_dict['targets'][n_items_i, :(n_items)], subject_data_dict['nontargets'][n_items_i, :(n_items), :, :(n_items - 1)], debug=False) self.dataset['collapsed_em_fits_subjects_nitems'].setdefault(subject, dict())[n_items] = params_fit # Use nitems as T_space, as a function of trecall (be careful) for trecall_i, trecall in enumerate(self.dataset['data_subject_split']['nitems_space']): print 'trecall %d, using n_items as T_space' % trecall params_fit = em_circularmixture_parametrickappa.fit(np.arange(trecall, Tmax+1), subject_data_dict['responses'][trecall_i:, trecall_i], subject_data_dict['targets'][trecall_i:, trecall_i], subject_data_dict['nontargets'][trecall_i:, trecall_i], debug=False) self.dataset['collapsed_em_fits_subjects_trecall'].setdefault(subject, dict())[trecall] = params_fit # Now do the correct fit, with double powerlaw on nitems+trecall print 'Double powerlaw fit' params_fit_double = em_circularmixture_parametrickappa_doublepowerlaw.fit(self.dataset['data_subject_split']['nitems_space'], subject_data_dict['responses'], subject_data_dict['targets'], subject_data_dict['nontargets'], debug=False) self.dataset['collapsed_em_fits_doublepowerlaw_subjects'][subject] = params_fit_double if True: ## Now compute mean/std collapsed_em_fits_nitems self.dataset['collapsed_em_fits_nitems']['mean'] = dict() self.dataset['collapsed_em_fits_nitems']['std'] = dict() self.dataset['collapsed_em_fits_nitems']['sem'] = dict() self.dataset['collapsed_em_fits_nitems']['values'] = dict() # Need to extract the values for a subject/nitems pair, for all keys of em_fits. Annoying dictionary indexing needed emfits_keys = params_fit.keys() for n_items_i, n_items in enumerate(self.dataset['data_subject_split']['nitems_space']): for key in emfits_keys: values_allsubjects = [self.dataset['collapsed_em_fits_subjects_nitems'][subject][n_items][key] for subject in self.dataset['data_subject_split']['subjects_space']] self.dataset['collapsed_em_fits_nitems']['mean'].setdefault(n_items, dict())[key] = np.mean(values_allsubjects, axis=0) self.dataset['collapsed_em_fits_nitems']['std'].setdefault(n_items, dict())[key] = np.std(values_allsubjects, axis=0) self.dataset['collapsed_em_fits_nitems']['sem'].setdefault(n_items, dict())[key] = self.dataset['collapsed_em_fits_nitems']['std'][n_items][key]/np.sqrt(self.dataset['data_subject_split']['subjects_space'].size) self.dataset['collapsed_em_fits_nitems']['values'].setdefault(n_items, dict())[key] = values_allsubjects ## Same for the other ones self.dataset['collapsed_em_fits_trecall']['mean'] = dict() self.dataset['collapsed_em_fits_trecall']['std'] = dict() self.dataset['collapsed_em_fits_trecall']['sem'] = dict() self.dataset['collapsed_em_fits_trecall']['values'] = dict() # Need to extract the values for a subject/nitems pair, for all keys of em_fits. Annoying dictionary indexing needed emfits_keys = params_fit.keys() for trecall_i, trecall in enumerate(self.dataset['data_subject_split']['nitems_space']): for key in emfits_keys: values_allsubjects = [self.dataset['collapsed_em_fits_subjects_trecall'][subject][trecall][key] for subject in self.dataset['data_subject_split']['subjects_space']] self.dataset['collapsed_em_fits_trecall']['mean'].setdefault(trecall, dict())[key] = np.mean(values_allsubjects, axis=0) self.dataset['collapsed_em_fits_trecall']['std'].setdefault(trecall, dict())[key] = np.std(values_allsubjects, axis=0) self.dataset['collapsed_em_fits_trecall']['sem'].setdefault(trecall, dict())[key] = self.dataset['collapsed_em_fits_trecall']['std'][trecall][key]/np.sqrt(self.dataset['data_subject_split']['subjects_space'].size) self.dataset['collapsed_em_fits_trecall']['values'].setdefault(trecall, dict())[key] = values_allsubjects # Collapsed full double powerlaw model across subjects self.dataset['collapsed_em_fits_doublepowerlaw']['mean'] = dict() self.dataset['collapsed_em_fits_doublepowerlaw']['std'] = dict() self.dataset['collapsed_em_fits_doublepowerlaw']['sem'] = dict() self.dataset['collapsed_em_fits_doublepowerlaw']['values'] = dict() # Need to extract the values for a subject/nitems pair, for all keys of em_fits. Annoying dictionary indexing needed emfits_keys = params_fit_double.keys() for key in emfits_keys: values_allsubjects = [self.dataset['collapsed_em_fits_doublepowerlaw_subjects'][subject][key] for subject in self.dataset['data_subject_split']['subjects_space']] self.dataset['collapsed_em_fits_doublepowerlaw']['mean'][key] = np.mean(values_allsubjects, axis=0) self.dataset['collapsed_em_fits_doublepowerlaw']['std'][key] = np.std(values_allsubjects, axis=0) self.dataset['collapsed_em_fits_doublepowerlaw']['sem'][key] = self.dataset['collapsed_em_fits_doublepowerlaw']['std'][key]/np.sqrt(self.dataset['data_subject_split']['subjects_space'].size) self.dataset['collapsed_em_fits_doublepowerlaw']['values'][key] = values_allsubjects # Construct some easy arrays to compare the fit to the dataset self.dataset['collapsed_em_fits_doublepowerlaw_array'][..., 0] = self.dataset['collapsed_em_fits_doublepowerlaw']['mean']['kappa'] self.dataset['collapsed_em_fits_doublepowerlaw_array'][..., 1] = self.dataset['collapsed_em_fits_doublepowerlaw']['mean']['mixt_target_tr'] self.dataset['collapsed_em_fits_doublepowerlaw_array'][..., 2] = self.dataset['collapsed_em_fits_doublepowerlaw']['mean']['mixt_nontargets_tr'] self.dataset['collapsed_em_fits_doublepowerlaw_array'][..., 3] = self.dataset['collapsed_em_fits_doublepowerlaw']['mean']['mixt_random_tr']