def scan_allsyst(template_settings, steps, hypo_param_selections, outdir, minimizer_settings, metric, debug_mode): """Scan (separately) all systematics (i.e., non-fixed params). Parameters ---------- template_settings steps hypo_param_selections outdir minimizer_settings metric debug_mode Returns ------- restults : dict Keys are param names, values are the scan results """ outdir = expanduser(expandvars(outdir)) mkdir(outdir, warn=False) hypo_maker = DistributionMaker(template_settings) hypo_maker.select_params(hypo_param_selections) data_dist = hypo_maker.get_outputs(return_sum=True) minimizer_settings = from_file(minimizer_settings) analysis = Analysis() results = OrderedDict() # pylint: disable=redefined-outer-name for param in hypo_maker.params: if param.is_fixed: continue logging.info('Scanning %s', param.name) nominal_value = param.value outfile = join( outdir, '{:s}_{:d}_steps_{:s}_scan.json'.format(param.name, steps, metric)) if isfile(outfile): raise IOError( '`outfile` "{}" exists, not overwriting.'.format(outfile)) results[param.name] = analysis.scan( data_dist=data_dist, hypo_maker=hypo_maker, hypo_param_selections=hypo_param_selections, metric=metric, param_names=param.name, steps=steps, only_points=None, outer=True, profile=False, minimizer_settings=minimizer_settings, outfile=outfile, debug_mode=debug_mode) to_file(results[param.name], outfile) param.value = nominal_value logging.info('Done scanning param "%s"', param.name) logging.info('Done.') return results
def profile_scan(data_settings, template_settings, param_names, steps, only_points, no_outer, data_param_selections, hypo_param_selections, profile, outfile, minimizer_settings, metric, debug_mode): """Perform a profile scan. Parameters ---------- data_settings template_settings param_names steps only_points no_outer data_param_selections hypo_param_selections profile outfile minimizer_settings metric debug_mode Returns ------- results analysis """ outfile = expanduser(expandvars(outfile)) if isfile(outfile): raise IOError('`outfile` "{}" already exists!'.format(outfile)) minimizer_settings = from_file(minimizer_settings) hypo_maker = DistributionMaker(template_settings) if data_settings is None: if (data_param_selections is None or data_param_selections == hypo_param_selections): data_maker = hypo_maker else: data_maker = deepcopy(hypo_maker) data_maker.select_params(data_param_selections) else: data_maker = DistributionMaker(data_settings) data_maker.select_params(data_param_selections) data_dist = data_maker.get_outputs(return_sum=True) analysis = Analysis() results = analysis.scan(data_dist=data_dist, hypo_maker=hypo_maker, hypo_param_selections=hypo_param_selections, metric=metric, param_names=param_names, steps=steps, only_points=only_points, outer=not no_outer, profile=profile, minimizer_settings=minimizer_settings, outfile=outfile, debug_mode=debug_mode) to_file(results, outfile) logging.info("Done.") return results, analysis
def compare(outdir, ref, ref_label, test, test_label, asymm_max=None, asymm_min=None, combine=None, diff_max=None, diff_min=None, fract_diff_max=None, fract_diff_min=None, json=False, pdf=False, png=False, ref_abs=False, ref_param_selections=None, sum=None, test_abs=False, test_param_selections=None): """Compare two entities. The result each entity specification is formatted into a MapSet and stored to disk, so that e.g. re-running a DistributionMaker is unnecessary to reproduce the results. Parameters ---------- outdir : string Store output plots to this directory ref : string or array of strings Pipeline settings config file that generates reference output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported ref_abs : bool Use the absolute value of the reference plot for comparisons ref_label : string Label for reference ref_param-selections : string Param selections to apply to ref pipeline config(s). Not applicable if ref specifies stored map or map sets test : string or array of strings Pipeline settings config file that generates test output, or a stored map or map set. Multiple pipelines, maps, or map sets are supported test_abs : bool Use the absolute value of the test plot for comparisons test_label : string Label for test test_param_selections : None or string Param selections to apply to test pipeline config(s). Not applicable if test specifies stored map or map sets combine : None or string or array of strings Combine by wildcard string, where string globbing (a la command line) uses asterisk for any number of wildcard characters. Use single quotes such that asterisks do not get expanded by the shell. Multiple combine strings supported sum : None or int Sum over (and hence remove) the specified axis or axes. I.e., project the map onto remaining (unspecified) axis or axes json : bool Save output maps in compressed json (json.bz2) format pdf : bool Save plots in PDF format. If neither this nor png is specified, no plots are produced png : bool Save plots in PNG format. If neither this nor pdf is specfied, no plots are produced diff_min : None or float Difference plot vmin; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) diff_max : None or float Difference plot max; if you specify only one of diff_min or diff_max, symmetric limits are automatically used (min = -max) fract_diff_min : None or float Fractional difference plot vmin; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) fract_diff_max : None or float Fractional difference plot max; if you specify only one of fract_diff_min or fract_diff_max, symmetric limits are automatically used (min = -max) asymm_min : None or float Asymmetry plot vmin; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) asymm_max : None or float Fractional difference plot max; if you specify only one of asymm_min or asymm_max, symmetric limits are automatically used (min = -max) Returns ------- summary_stats : dict Dictionary containing a summary for each h Map processed diff : MapSet MapSet of the difference - (Test - Ref) fract_diff : MapSet MapSet of the fractional difference - (Test - Ref) / Ref asymm : MapSet MapSet of the asymmetric fraction difference or pull - (Test - Ref) / sqrt(Ref) """ ref_plot_label = ref_label if ref_abs and not ref_label.startswith('abs'): ref_plot_label = 'abs(%s)' % ref_plot_label test_plot_label = test_label if test_abs and not test_label.startswith('abs'): test_plot_label = 'abs(%s)' % test_plot_label plot_formats = [] if pdf: plot_formats.append('pdf') if png: plot_formats.append('png') diff_symm = True if diff_min is not None and diff_max is None: diff_max = -diff_min diff_symm = False if diff_max is not None and diff_min is None: diff_min = -diff_max diff_symm = False fract_diff_symm = True if fract_diff_min is not None and fract_diff_max is None: fract_diff_max = -fract_diff_min fract_diff_symm = False if fract_diff_max is not None and fract_diff_min is None: fract_diff_min = -fract_diff_max fract_diff_symm = False asymm_symm = True if asymm_max is not None and asymm_min is None: asymm_min = -asymm_max asymm_symm = False if asymm_min is not None and asymm_max is None: asymm_max = -asymm_min asymm_symm = False outdir = os.path.expanduser(os.path.expandvars(outdir)) mkdir(outdir) # Get the reference distribution(s) into the form of a test MapSet p_ref = None ref_source = None if isinstance(ref, Map): p_ref = MapSet(ref) ref_source = MAP_SOURCE_STR elif isinstance(ref, MapSet): p_ref = ref ref_source = MAPSET_SOURCE_STR elif isinstance(ref, Pipeline): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = PIPELINE_SOURCE_STR elif isinstance(ref, DistributionMaker): if ref_param_selections is not None: ref.select_params(ref_param_selections) p_ref = ref.get_outputs() ref_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(ref) == 1: try: ref_pipeline = Pipeline(config=ref[0]) except: pass else: ref_source = PIPELINE_SOURCE_STR if ref_param_selections is not None: ref_pipeline.select_params(ref_param_selections) p_ref = ref_pipeline.get_outputs() else: try: ref_dmaker = DistributionMaker(pipelines=ref) except: pass else: ref_source = DISTRIBUTIONMAKER_SOURCE_STR if ref_param_selections is not None: ref_dmaker.select_params(ref_param_selections) p_ref = ref_dmaker.get_outputs() if p_ref is None: try: p_ref = [Map.from_json(f) for f in ref] except: pass else: ref_source = MAP_SOURCE_STR p_ref = MapSet(p_ref) if p_ref is None: assert ref_param_selections is None assert len(ref) == 1, 'Can only handle one MapSet' try: p_ref = MapSet.from_json(ref[0]) except: pass else: ref_source = MAPSET_SOURCE_STR if p_ref is None: raise ValueError( 'Could not instantiate the reference Pipeline, DistributionMaker,' ' Map, or MapSet from ref value(s) %s' % ref) ref = p_ref logging.info('Reference map(s) derived from a ' + ref_source) # Get the test distribution(s) into the form of a test MapSet p_test = None test_source = None if isinstance(test, Map): p_test = MapSet(test) test_source = MAP_SOURCE_STR elif isinstance(test, MapSet): p_test = test test_source = MAPSET_SOURCE_STR elif isinstance(test, Pipeline): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = PIPELINE_SOURCE_STR elif isinstance(test, DistributionMaker): if test_param_selections is not None: test.select_params(test_param_selections) p_test = test.get_outputs() test_source = DISTRIBUTIONMAKER_SOURCE_STR else: if len(test) == 1: try: test_pipeline = Pipeline(config=test[0]) except: pass else: test_source = PIPELINE_SOURCE_STR if test_param_selections is not None: test_pipeline.select_params(test_param_selections) p_test = test_pipeline.get_outputs() else: try: test_dmaker = DistributionMaker(pipelines=test) except: pass else: test_source = DISTRIBUTIONMAKER_SOURCE_STR if test_param_selections is not None: test_dmaker.select_params(test_param_selections) p_test = test_dmaker.get_outputs() if p_test is None: try: p_test = [Map.from_json(f) for f in test] except: pass else: test_source = MAP_SOURCE_STR p_test = MapSet(p_test) if p_test is None: assert test_param_selections is None assert len(test) == 1, 'Can only handle one MapSet' try: p_test = MapSet.from_json(test[0]) except: pass else: test_source = MAPSET_SOURCE_STR if p_test is None: raise ValueError( 'Could not instantiate the test Pipeline, DistributionMaker, Map,' ' or MapSet from test value(s) %s' % test) test = p_test logging.info('Test map(s) derived from a ' + test_source) if combine is not None: ref = ref.combine_wildcard(combine) test = test.combine_wildcard(combine) if isinstance(ref, Map): ref = MapSet([ref]) if isinstance(test, Map): test = MapSet([test]) if sum is not None: ref = ref.sum(sum) test = test.sum(sum) # Set the MapSet names according to args passed by user ref.name = ref_label test.name = test_label # Save to disk the maps being plotted (excluding optional aboslute value # operations) if json: refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label) to_file(ref, refmaps_path) testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label) to_file(test, testmaps_path) if set(test.names) != set(ref.names): raise ValueError('Test map names %s do not match ref map names %s.' % (sorted(test.names), sorted(ref.names))) # Aliases to save keystrokes def masked(x): return np.ma.masked_invalid(x.nominal_values) def zero_to_nan(map): newmap = deepcopy(map) mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON) newmap.hist[mask] = np.nan return newmap reordered_test = [] new_ref = [] diff_maps = [] fract_diff_maps = [] asymm_maps = [] summary_stats = {} for ref_map in ref: test_map = test[ref_map.name].reorder_dimensions(ref_map.binning) if ref_abs: ref_map = abs(ref_map) if test_abs: test_map = abs(test_map) diff_map = test_map - ref_map fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map) asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5) abs_fract_diff_map = np.abs(fract_diff_map) new_ref.append(ref_map) reordered_test.append(test_map) diff_maps.append(diff_map) fract_diff_maps.append(fract_diff_map) asymm_maps.append(asymm_map) min_ref = np.min(masked(ref_map)) max_ref = np.max(masked(ref_map)) min_test = np.min(masked(test_map)) max_test = np.max(masked(test_map)) total_ref = np.sum(masked(ref_map)) total_test = np.sum(masked(test_map)) mean_ref = np.mean(masked(ref_map)) mean_test = np.mean(masked(test_map)) max_abs_fract_diff = np.max(masked(abs_fract_diff_map)) mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map)) median_abs_fract_diff = np.median(masked(abs_fract_diff_map)) mean_fract_diff = np.mean(masked(fract_diff_map)) min_fract_diff = np.min(masked(fract_diff_map)) max_fract_diff = np.max(masked(fract_diff_map)) std_fract_diff = np.std(masked(fract_diff_map)) mean_diff = np.mean(masked(diff_map)) min_diff = np.min(masked(diff_map)) max_diff = np.max(masked(diff_map)) std_diff = np.std(masked(diff_map)) median_diff = np.nanmedian(masked(diff_map)) mad_diff = np.nanmedian(masked(np.abs(diff_map))) median_fract_diff = np.nanmedian(masked(fract_diff_map)) mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map))) min_asymm = np.min(masked(fract_diff_map)) max_asymm = np.max(masked(fract_diff_map)) total_asymm = np.sqrt(np.sum(masked(asymm_map)**2)) summary_stats[test_map.name] = OrderedDict([ ('min_ref', min_ref), ('max_ref', max_ref), ('total_ref', total_ref), ('mean_ref', mean_ref), ('min_test', min_test), ('max_test', max_test), ('total_test', total_test), ('mean_test', mean_test), ('max_abs_fract_diff', max_abs_fract_diff), ('mean_abs_fract_diff', mean_abs_fract_diff), ('median_abs_fract_diff', median_abs_fract_diff), ('min_fract_diff', min_fract_diff), ('max_fract_diff', max_fract_diff), ('mean_fract_diff', mean_fract_diff), ('std_fract_diff', std_fract_diff), ('median_fract_diff', median_fract_diff), ('mad_fract_diff', mad_fract_diff), ('min_diff', min_diff), ('max_diff', max_diff), ('mean_diff', mean_diff), ('std_diff', std_diff), ('median_diff', median_diff), ('mad_diff', mad_diff), ('min_asymm', min_asymm), ('max_asymm', max_asymm), ('total_asymm', total_asymm), ]) logging.info('Map %s...', ref_map.name) logging.info(' Ref map(s):') logging.info(' min :' + ('%.2f' % min_ref).rjust(12)) logging.info(' max :' + ('%.2f' % max_ref).rjust(12)) logging.info(' total :' + ('%.2f' % total_ref).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_ref).rjust(12)) logging.info(' Test map(s):') logging.info(' min :' + ('%.2f' % min_test).rjust(12)) logging.info(' max :' + ('%.2f' % max_test).rjust(12)) logging.info(' total :' + ('%.2f' % total_test).rjust(12)) logging.info(' mean :' + ('%.2f' % mean_test).rjust(12)) logging.info(' Absolute fract. diff., abs((Test - Ref) / Ref):') logging.info(' max : %.4e', max_abs_fract_diff) logging.info(' mean : %.4e', mean_abs_fract_diff) logging.info(' median: %.4e', median_abs_fract_diff) logging.info(' Fractional difference, (Test - Ref) / Ref:') logging.info(' min : %.4e', min_fract_diff) logging.info(' max : %.4e', max_fract_diff) logging.info(' mean : %.4e +/- %.4e', mean_fract_diff, std_fract_diff) logging.info(' median: %.4e +/- %.4e', median_fract_diff, mad_fract_diff) logging.info(' Difference, Test - Ref:') logging.info(' min : %.4e', min_diff) logging.info(' max : %.4e', max_diff) logging.info(' mean : %.4e +/- %.4e', mean_diff, std_diff) logging.info(' median: %.4e +/- %.4e', median_diff, mad_diff) logging.info(' Asymmetry, (Test - Ref) / sqrt(Ref)') logging.info(' min : %.4e', min_asymm) logging.info(' max : %.4e', max_asymm) logging.info(' total : %.4e (sum in quadrature)', total_asymm) logging.info('') ref = MapSet(new_ref) test = MapSet(reordered_test) diff = MapSet(diff_maps) fract_diff = MapSet(fract_diff_maps) asymm = MapSet(asymm_maps) if json: diff.to_json( os.path.join( outdir, 'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) fract_diff.to_json( os.path.join( outdir, 'fract_diff__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) asymm.to_json( os.path.join( outdir, 'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label))) to_file( summary_stats, os.path.join( outdir, 'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label))) for plot_format in plot_formats: # Plot the raw distributions plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=False, ratio=False) plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label) plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label) # Plot the difference (test - ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=diff_symm, ratio=False) plotter.label = '%s - %s' % (test_plot_label, ref_plot_label) plotter.plot_2d_array( test - ref, fname='diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=diff_min, vmax=diff_max ) # Plot the fractional difference (test - ref)/ref plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=fract_diff_symm, ratio=True) plotter.label = ('(%s-%s)/%s' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r) for r in ref]), fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label), #vmin=fract_diff_min, vmax=fract_diff_max ) # Plot the asymmetry (test - ref)/sqrt(ref) plotter = Plotter(stamp='', outdir=outdir, fmt=plot_format, log=False, annotate=False, symmetric=asymm_symm, ratio=True) plotter.label = (r'$(%s - %s)/\sqrt{%s}$' % (test_plot_label, ref_plot_label, ref_plot_label)) plotter.plot_2d_array( (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]), fname='asymm__%s__%s' % (test_plot_label, ref_plot_label), #vmin=asymm_min, vmax=asymm_max ) return summary_stats, diff, fract_diff, asymm
def main(): args = parse_args() init_args_d = vars(args) # NOTE: Removing extraneous args that won't get passed to instantiate the # HypoTesting object via dictionary's `pop()` method. set_verbosity(init_args_d.pop('v')) detector = init_args_d.pop('detector') selection = init_args_d.pop('selection') # Normalize and convert `*_pipeline` filenames; store to `*_maker` # (which is argument naming convention that HypoTesting init accepts). filenames = init_args_d.pop('pipeline') if filenames is not None: filenames = sorted([normcheckpath(fname) for fname in filenames]) ps_str = init_args_d['param_selections'] if ps_str is None: ps_list = None else: ps_list = [x.strip().lower() for x in ps_str.split(',')] data_maker = DistributionMaker(filenames) data_maker.select_params(ps_list) for data_pipeline in data_maker.pipelines: # Need a special case where PID is a separate stage if 'pid' in data_pipeline.stage_names: raise ValueError("Special case for separate PID stage currently " "not implemented.") else: return_sum = True baseline_maps = data_maker.get_outputs(return_sum=return_sum) det_sel = [] if detector.strip() != '': det_sel.append(detector.strip()) if selection.strip() != '': det_sel.append(selection.strip()) det_sel_label = ' '.join(det_sel) det_sel_plot_label = det_sel_label if det_sel_plot_label != '': det_sel_plot_label += ', ' det_sel_file_label = det_sel_label if det_sel_file_label != '': det_sel_file_label += '_' det_sel_file_label = det_sel_file_label.replace(' ', '_') for data_param in data_maker.params.free: # Calculate a shifted value based on the prior if possible if hasattr(data_param, 'prior') and (data_param.prior is not None): # Gaussian priors are easy - just do 1 sigma if data_param.prior.kind == 'gaussian': data_param.value = \ data_param.value + data_param.prior.stddev shift_label = r"$1\sigma$" # Else do 10%, or +/- 1 if the baseline is zero else: if data_param.value != 0.0: data_param.value = 1.1 * data_param.value shift_label = r"10%" else: data_param.value = 1.0 shift_label = r"1" # For no prior also do 10%, or +/- 1 if the baseline is zero else: if data_param.value != 0.0: data_param.value = 1.1 * data_param.value shift_label = r"10%" else: data_param.value = 1.0 shift_label = r"1" up_maps = data_maker.get_outputs(return_sum=return_sum) data_maker.params.reset_free() if hasattr(data_param, 'prior') and (data_param.prior is not None): if data_param.prior.kind == 'gaussian': data_param.value = \ data_param.value - data_param.prior.stddev else: if data_param.value != 0.0: data_param.value = 0.9 * data_param.value else: data_param.value = -1.0 else: if data_param.value != 0.0: data_param.value = 0.9 * data_param.value else: data_param.value = -1.0 down_maps = data_maker.get_outputs(return_sum=return_sum) data_maker.params.reset_free() baseline_map = baseline_maps['total'] baseline_map.set_errors(error_hist=None) up_map = up_maps['total'] up_map.set_errors(error_hist=None) down_map = down_maps['total'] down_map.set_errors(error_hist=None) pid_names = baseline_map.binning['pid'].bin_names if pid_names is None: logging.warn('There are no names given for the PID bins, thus ' 'they will just be numbered in both the the plot ' 'save names and titles.') pid_names = [ x for x in range(0, baseline_map.binning['pid'].num_bins) ] gridspec_kw = dict(left=0.04, right=0.966, wspace=0.32) fig, axes = plt.subplots(nrows=2, ncols=len(pid_names), gridspec_kw=gridspec_kw, sharex=False, sharey=False, figsize=(7 * len(pid_names), 14)) for i, pid_name in enumerate(pid_names): baseline = baseline_map.split(dim='pid', bin=pid_name) up_to_plot = up_map.split(dim='pid', bin=pid_name) up_to_plot = (up_to_plot - baseline) / baseline * 100.0 down_to_plot = down_map.split(dim='pid', bin=pid_name) down_to_plot = (down_to_plot - baseline) / baseline * 100.0 if isinstance(pid_name, int): pid_name = 'PID Bin %i' % (pid_name) else: pid_name += ' Channel' up_to_plot.plot(fig=fig, ax=axes[0][i], title="%s " % (pid_name) + "\n" + " %s + %s" % (tex_axis_label(data_param.name), shift_label), titlesize=30, cmap=plt.cm.seismic, clabel='% Change from Baseline', clabelsize=30, xlabelsize=24, ylabelsize=24, symm=True) down_to_plot.plot(fig=fig, ax=axes[1][i], title="%s " % (pid_name) + "\n" + " %s - %s" % (tex_axis_label(data_param.name), shift_label), titlesize=30, cmap=plt.cm.seismic, clabel='% Change from Baseline', clabelsize=30, xlabelsize=24, ylabelsize=24, symm=True) fig.subplots_adjust(hspace=0.4) savename = det_sel_file_label if savename != '' and savename[-1] != '_': savename += '_' savename += '%s_variation.png' % (data_param.name) mkdir(args.logdir, warn=False) fig.savefig(os.path.join(args.logdir, savename), bbox_inches='tight') plt.close(fig.number)