Пример #1
0
    def instantiate_disk_cache(self):
        """Instantiate a disk cache for use by the stage."""
        if isinstance(self.disk_cache, DiskCache):
            self.disk_cache_path = self.disk_cache.path
            return

        if self.disk_cache is False or self.disk_cache is None:
            self.disk_cache = None
            self.disk_cache_path = None
            return

        if isinstance(self.disk_cache, basestring):
            dirpath, filename = os.path.split(
                os.path.expandvars(os.path.expanduser(self.disk_cache))
            )
            if os.path.isabs(dirpath):
                self.disk_cache_path = os.path.join(dirpath, filename)
            else:
                self.disk_cache_path = os.path.join(CACHE_DIR, dirpath, filename)
        elif self.disk_cache is True:
            dirs = [CACHE_DIR, self.stage_name]
            dirpath = os.path.expandvars(os.path.expanduser(os.path.join(*dirs)))
            if self.service_name is not None and self.service_name != "":
                filename = self.service_name + ".sqlite"
            else:
                filename = "generic.sqlite"
            mkdir(dirpath, warn=False)
            self.disk_cache_path = os.path.join(dirpath, filename)
        else:
            raise ValueError("Don't know what to do with a %s." % type(self.disk_cache))

        self.disk_cache = DiskCache(self.disk_cache_path, max_depth=10, is_lru=False)
Пример #2
0
def main(return_outputs=False):
    """Main; call as script with `return_outputs=False` or interactively with
    `return_outputs=True`"""
    from pisa.utils.plotter import Plotter
    args = parse_args()
    set_verbosity(args.v)
    plot_formats = []
    if args.pdf:
        plot_formats.append('pdf')
    if args.png:
        plot_formats.append('png')

    distribution_maker = DistributionMaker(pipelines=args.pipeline)  # pylint: disable=redefined-outer-name
    if args.select is not None:
        distribution_maker.select_params(args.select)

    outputs = distribution_maker.get_outputs(return_sum=args.return_sum)  # pylint: disable=redefined-outer-name
    if args.outdir:
        # TODO: unique filename: append hash (or hash per pipeline config)
        fname = 'distribution_maker_outputs.json.bz2'
        mkdir(args.outdir)
        fpath = expand(os.path.join(args.outdir, fname))
        to_file(outputs, fpath)

    if args.outdir and plot_formats:
        my_plotter = Plotter(outdir=args.outdir,
                             fmt=plot_formats,
                             log=False,
                             annotate=False)
        for num, output in enumerate(outputs):
            my_plotter.plot_2d_array(output, fname='dist_output_%d' % num)

    if return_outputs:
        return distribution_maker, outputs
Пример #3
0
def save_hyperplane_fits(input_data, fit_results, outdir, tag):
    """Store discrete systematics fits and chi-square values to a specified
    output location, with results identified by a tag.

    Parameters
    ----------
    input_data : mapping
        input data container returned by `hyperplane` function
    fit_results : dict
        fit results data container returned by `hyperplane` function
    outdir : string
        output directory
    tag : string
        identifier for filenames holding fit results

    """
    # Get some strings to use when naming
    dim = len(input_data["param_names"])
    param_str = "_".join(input_data["param_names"])

    # Store as JSON
    mkdir(outdir)
    res_path = join(
        outdir, "%s__%dd__%s__hyperplane_fits.json" % (tag, dim, param_str))
    to_file(fit_results, res_path)
Пример #4
0
def postproc_profile_scan(return_outputs=False):
    """Process the output files of profile_scan"""

    init_args_d = parse_args(
        description=postproc_profile_scan.__doc__,
        command='profile_scan'
    )

    if init_args_d['pseudo_experiments'] is not None:
        fluctuate_fid = True
        fluctuate_data = False
    else:
        fluctuate_fid = None
        fluctuate_data = None

    mkdir(init_args_d['outdir'])

    postprocessor = Postprocessor(
        analysis_type='profile_scan',
        detector=init_args_d['detector'],
        selection=init_args_d['selection'],
        outdir=init_args_d['outdir'],
        formats=init_args_d['formats'],
        scan_file=init_args_d['infile'],
        best_fit_file=init_args_d['best_fit_infile'],
        projection_files=init_args_d['projection_infile'],
        other_contours=init_args_d['other_contour'],
        pseudo_experiments=init_args_d['pseudo_experiments'],
        fluctuate_fid=fluctuate_fid,
        fluctuate_data=fluctuate_data
    )

    # 1D profile scans
    if len(postprocessor.all_bin_cens) == 1:
        postprocessor.plot_1d_scans()

    # 2D profile scans
    elif len(postprocessor.all_bin_cens) == 2:
        postprocessor.plot_2d_scans()

        if (postprocessor.all_bin_names[0] == 'theta23'
                and postprocessor.all_bin_names[1] == 'deltam31'):

            postprocessor.add_deltam32_sin2theta23()
            postprocessor.plot_2d_scans(
                xlabel='sin2theta23',
                xunits='dimensionless',
                ylabel='deltam32'
            )

    else:
        raise NotImplementedError(
            'Postprocessing of profile scans in anything other than 1D or '
            ' 2D not implemented in this script.'
        )

    if return_outputs:
        return postprocessor
def plot_variation(baseline_maps,
                   up_maps,
                   down_maps,
                   h1_name,
                   fulltitle,
                   savename,
                   outdir,
                   ftype='pdf'):
    matplotlib.rcParams['font.family'] = 'sans-serif'
    matplotlib.rcParams['mathtext.fontset'] = 'stixsans'

    gridspec_kw = dict(left=0.04, right=0.966, wspace=0.32)
    fig, axes = plt.subplots(nrows=1,
                             ncols=3,
                             gridspec_kw=gridspec_kw,
                             sharex=False,
                             sharey=False,
                             figsize=(15, 5))

    asymmetry_hist = (h1_map.hist - h0_map.hist) / np.sqrt(h0_map.hist)
    asymmetry_to_plot = Map(name='asymmetry',
                            hist=asymmetry_hist,
                            binning=h0_map.binning)

    asymmetrylabel = (
        r'$\left(N_{%s}-N_{%s}\right)'
        r'/\sqrt{N_{%s}}$' %
        (text2tex(h1_name), text2tex(h0_name), text2tex(h0_name)))

    vmax = max(np.nanmax(h0_map.hist), np.nanmax(h1_map.hist))

    h0_map.plot(fig=fig,
                ax=axes[0],
                title='Hypothesis 0: $%s$' % text2tex(h0_name),
                cmap=plt.cm.afmhot,
                vmax=vmax)

    h1_map.plot(fig=fig,
                ax=axes[1],
                title='Hypothesis 1: $%s$' % text2tex(h1_name),
                cmap=plt.cm.afmhot,
                vmax=vmax)

    asymmetry_to_plot.plot(fig=fig,
                           ax=axes[2],
                           title='Asymmetry',
                           symm=True,
                           cmap=plt.cm.seismic)

    plt.subplots_adjust(bottom=0.12, top=0.8)
    plt.suptitle(fulltitle, size='xx-large')
    if savename != '' and savename[-1] != '_':
        savename += '_'
    fname = '%s%s_%s_asymmetry.pdf' % (savename, h0_name, h1_name)
    fname = fname.replace(' ', '_')
    mkdir(outdir, warn=False)
    fig.savefig(os.path.join(outdir, fname))
    plt.close(fig.number)
Пример #6
0
def main(return_outputs=False):
    """Main; call as script with `return_outputs=False` or interactively with
    `return_outputs=True`"""
    from pisa.utils.plotter import Plotter
    args = parse_args()
    set_verbosity(args.v)
    plot_formats = []
    if args.pdf:
        plot_formats.append('pdf')
    if args.png:
        plot_formats.append('png')
        
    detectors = Detectors(args.pipeline,shared_params=args.shared_params)
    Names = detectors.det_names
    if args.select is not None:
        detectors.select_params(args.select)

    outputs = detectors.get_outputs(return_sum=args.return_sum)

    #outputs = outputs[0].fluctuate(
     #               method='poisson', random_state=get_random_state([0, 0, 0]))

    if args.outdir:
        # TODO: unique filename: append hash (or hash per pipeline config)
        fname = 'detectors_outputs.json.bz2'
        mkdir(args.outdir)
        fpath = expand(os.path.join(args.outdir, fname))
        to_file(outputs, fpath)

    if args.outdir and plot_formats:
        my_plotter = Plotter(
            outdir=args.outdir,
            fmt=plot_formats, log=False,
            annotate=False
        )
        for num, output in enumerate(outputs):
            if args.return_sum:
                my_plotter.plot_2d_array(
                    output,
                    fname=Names[num]
                )
            else:
                for out in output:
                    my_plotter.plot_2d_array(
                        out,
                        fname=Names[num]
                    )

    if return_outputs:
        return detectors, outputs
Пример #7
0
def main():
    """Perform a hypersurface fit to discrete systematics sets."""

    # Get args
    args = parse_args()
    set_verbosity(args.v)

    # Read in data and fit hypersurfaces to it
    hypersurfaces = create_hypersurfaces(fit_cfg=args.fit_cfg)

    # Store as JSON
    mkdir(args.outdir)
    arbitrary_hypersurface = list(hypersurfaces.values())[0]
    output_path = join( args.outdir, get_hypersurface_file_name(arbitrary_hypersurface, args.tag) )
    to_file(hypersurfaces, output_path)
Пример #8
0
    def plot_xsec(self, map_set, ylim=None, logx=True):
        from pisa.utils import fileio

        zero_np_element = np.array([0])
        for map in map_set:
            binning = map.binning
            if 'true_energy' in binning.names:
                energy_binning = binning.true_energy
            elif 'reco_energy' in binning.names:
                energy_binning = binning.reco_energy
            else:
                dim_idx = binning.index('energy', use_basenames=True)
                energy_binning = binning.dims[dim_idx]

            fig = plt.figure(figsize=self.size)
            fig.suptitle(map.name, y=0.95)
            ax = fig.add_subplot(111)
            ax.grid(b=True, which='major')
            ax.grid(b=True, which='minor', linestyle=':')
            plt.xlabel(tex_dollars(energy_binning.label), size=18)
            plt.ylabel(tex_dollars(text2tex(self.label)), size=18)
            if self.log:
                ax.set_yscale('log')
            if logx:
                ax.set_xscale('log')
            if ylim:
                ax.set_ylim(ylim)
            ax.set_xlim(np.min(energy_binning.bin_edges.m),
                        np.max(energy_binning.bin_edges.m))

            hist = map.hist
            array_element = np.hstack((hist, zero_np_element))
            ax.step(energy_binning.bin_edges.m, array_element, where='post')

            fileio.mkdir(self.outdir)
            fig.savefig(self.outdir + '/' + map.name + '.png',
                        bbox_inches='tight',
                        dpi=150)
Пример #9
0
def scan_allsyst(template_settings, steps, hypo_param_selections, outdir,
                 minimizer_settings, metric, debug_mode):
    """Scan (separately) all systematics (i.e., non-fixed params).

    Parameters
    ----------
    template_settings
    steps
    hypo_param_selections
    outdir
    minimizer_settings
    metric
    debug_mode

    Returns
    -------
    restults : dict
        Keys are param names, values are the scan results

    """
    outdir = expanduser(expandvars(outdir))
    mkdir(outdir, warn=False)

    hypo_maker = DistributionMaker(template_settings)

    hypo_maker.select_params(hypo_param_selections)
    data_dist = hypo_maker.get_outputs(return_sum=True)

    minimizer_settings = from_file(minimizer_settings)

    analysis = Analysis()

    results = OrderedDict()  # pylint: disable=redefined-outer-name
    for param in hypo_maker.params:
        if param.is_fixed:
            continue

        logging.info('Scanning %s', param.name)
        nominal_value = param.value

        outfile = join(
            outdir,
            '{:s}_{:d}_steps_{:s}_scan.json'.format(param.name, steps, metric))
        if isfile(outfile):
            raise IOError(
                '`outfile` "{}" exists, not overwriting.'.format(outfile))

        results[param.name] = analysis.scan(
            data_dist=data_dist,
            hypo_maker=hypo_maker,
            hypo_param_selections=hypo_param_selections,
            metric=metric,
            param_names=param.name,
            steps=steps,
            only_points=None,
            outer=True,
            profile=False,
            minimizer_settings=minimizer_settings,
            outfile=outfile,
            debug_mode=debug_mode)

        to_file(results[param.name], outfile)
        param.value = nominal_value

        logging.info('Done scanning param "%s"', param.name)

    logging.info('Done.')

    return results
Пример #10
0
def make_toy_events(outdir, num_events, energy_range, spectral_index,
                    coszen_range, num_sets, first_set, aeff_energy_param,
                    aeff_coszen_param, reco_param, pid_param, pid_dist):
    """Make toy events and store to a file.

    Parameters
    ----------
    outdir : string
    num_events : int
    energy_range : 2-tuple of floats
    spectral_index : float
    coszen_range : 2-tuple of floats
    num_sets : int
    first_set : int
    aeff_energy_param : string
    aeff_coszen_param : string
    reco_param : string
    pid_param : string
    pid_dist : string

    Returns
    -------
    events : :class:`pisa.core.events.Events`

    """
    energy_range = sorted(energy_range)
    coszen_range = sorted(coszen_range)

    # Validation of args
    assert energy_range[0] > 0 and energy_range[1] < 1e9
    assert coszen_range[0] >= -1 and coszen_range[1] <= 1
    assert np.diff(energy_range)[0] > 0, str(energy_range)
    assert np.diff(coszen_range)[0] > 0, str(coszen_range)
    assert spectral_index >= 0, str(spectral_index)
    assert first_set >= 0, str(first_set)
    assert num_sets >= 1, str(first_set)

    # Make sure resources specified actually exist
    for arg in [aeff_energy_param, aeff_coszen_param, reco_param, pid_param]:
        find_resource(arg)

    mkdir(outdir, warn=False)

    set_indices = list(range(first_set, first_set + num_sets))

    # The following loop is for validation only
    for num, index in product(num_events, set_indices):
        mcgen_random_state(num_events=num, set_index=index)

    for num, set_index in product(num_events, set_indices):
        mcevts_fname = FNAME_TEMPLATE.format(
            file_type='events',
            detector='vlvnt',
            e_min=format_num(energy_range[0]),
            e_max=format_num(energy_range[1]),
            spectral_index=format_num(spectral_index,
                                      sigfigs=2,
                                      trailing_zeros=True),
            cz_min=format_num(coszen_range[0]),
            cz_max=format_num(coszen_range[1]),
            num_events=format_num(num, sigfigs=3, sci_thresh=(1, -1)),
            set_index=format_num(set_index, sci_thresh=(10, -10)),
            extension='hdf5')
        mcevts_fpath = os.path.join(outdir, mcevts_fname)
        if os.path.isfile(mcevts_fpath):
            logging.warn('File already exists, skipping: "%s"', mcevts_fpath)
            continue

        logging.info('Working on set "%s"', mcevts_fname)

        # TODO: pass filepaths / resource locations via command line args

        # Create a single random state object to pass from function to function
        random_state = mcgen_random_state(num_events=num, set_index=set_index)

        mc_events = generate_mc_events(
            num_events=num,
            energy_range=energy_range,
            coszen_range=coszen_range,
            spec_ind=spectral_index,
            aeff_energy_param_source=aeff_energy_param,
            aeff_coszen_param_source=aeff_coszen_param,
            random_state=random_state)
        populate_reco_observables(mc_events=mc_events,
                                  param_source=reco_param,
                                  random_state=random_state)
        populate_pid(mc_events=mc_events,
                     param_source=pid_param,
                     random_state=random_state,
                     dist=pid_dist)

        to_file(mc_events, mcevts_fpath)

        return mc_events
Пример #11
0
def main(return_outputs=False):
    """Run unit tests if `pipeline.py` is called as a script."""
    from pisa.utils.plotter import Plotter

    args = parse_args()
    set_verbosity(args.v)

    # Even if user specifies an integer on command line, it comes in as a
    # string. Try to convert to int (e.g. if `'1'` is passed to indicate the
    # second stage), and -- if successful -- use this as `args.only_stage`.
    # Otherwise, the string value passed will be used (e.g. `'osc'` could be
    # passed).
    try:
        only_stage_int = int(args.only_stage)
    except (ValueError, TypeError):
        pass
    else:
        args.only_stage = only_stage_int

    if args.outdir:
        mkdir(args.outdir)
    else:
        if args.pdf or args.png:
            raise ValueError("No --outdir provided, so cannot save images.")

    # Most basic parsing of the pipeline config (parsing only to this level
    # allows for simple strings to be specified as args for updating)
    bcp = PISAConfigParser()
    bcp.read(args.pipeline)

    # Update the config with any args specified on command line
    if args.arg is not None:
        for arg_list in args.arg:
            if len(arg_list) < 2:
                raise ValueError(
                    'Args must be formatted as: "section arg=val". Got "%s"'
                    " instead." % " ".join(arg_list))
            section = arg_list[0]
            remainder = " ".join(arg_list[1:])
            eq_split = remainder.split("=")
            newarg = eq_split[0].strip()
            value = ("=".join(eq_split[1:])).strip()
            logging.debug('Setting config section "%s" arg "%s" = "%s"',
                          section, newarg, value)
            try:
                bcp.set(section, newarg, value)
            except NoSectionError:
                logging.error(
                    'Invalid section "%s" specified. Must be one of %s',
                    section,
                    bcp.sections(),
                )
                raise

    # Instantiate the pipeline
    pipeline = Pipeline(bcp)  # pylint: disable=redefined-outer-name

    if args.select is not None:
        pipeline.select_params(args.select, error_on_missing=True)

    if args.only_stage is None:
        stop_idx = args.stop_after_stage
        try:
            stop_idx = int(stop_idx)
        except (TypeError, ValueError):
            pass
        if isinstance(stop_idx, str):
            stop_idx = pipeline.index(stop_idx)
        outputs = pipeline.get_outputs(idx=stop_idx)  # pylint: disable=redefined-outer-name
        if stop_idx is not None:
            stop_idx += 1
        indices = slice(0, stop_idx)
    else:
        assert args.stop_after_stage is None
        idx = pipeline.index(args.only_stage)
        stage = pipeline[idx]
        indices = slice(idx, idx + 1)

        # Create dummy inputs if necessary
        inputs = None
        if hasattr(stage, "input_binning"):
            logging.warning(
                "Stage requires input, so building dummy"
                " inputs of random numbers, with random state set to the input"
                " index according to alphabetical ordering of input names and"
                " filled in alphabetical ordering of dimension names.")
            input_maps = []
            tmp = deepcopy(stage.input_binning)
            alphabetical_binning = tmp.reorder_dimensions(sorted(tmp.names))
            for input_num, input_name in enumerate(sorted(stage.input_names)):
                # Create a new map with all 3's; name according to the input
                hist = np.full(shape=alphabetical_binning.shape,
                               fill_value=3.0)
                input_map = Map(name=input_name,
                                binning=alphabetical_binning,
                                hist=hist)

                # Apply Poisson fluctuations to randomize the values in the map
                input_map.fluctuate(method="poisson", random_state=input_num)

                # Reorder dimensions according to user's original binning spec
                input_map.reorder_dimensions(stage.input_binning)
                input_maps.append(input_map)
            inputs = MapSet(maps=input_maps, name="ones", hash=1)

        outputs = stage.run(inputs=inputs)

    for stage in pipeline[indices]:
        if not args.outdir:
            break
        stg_svc = stage.stage_name + "__" + stage.service_name
        fbase = os.path.join(args.outdir, stg_svc)
        if args.intermediate or stage == pipeline[indices][-1]:
            stage.outputs.to_json(fbase + "__output.json.bz2")

        # also only plot if args intermediate or last stage
        if args.intermediate or stage == pipeline[indices][-1]:
            formats = OrderedDict(png=args.png, pdf=args.pdf)
            if isinstance(stage.outputs, Data):
                # TODO(shivesh): plots made here will use the most recent
                # "pisa_weight" column and so all stages will have identical plots
                # (one workaround is to turn on "memcache_deepcopy")
                # TODO(shivesh): intermediate stages have no output binning
                if stage.output_binning is None:
                    logging.debug("Skipping plot of intermediate stage %s",
                                  stage)
                    continue
                outputs = stage.outputs.histogram_set(
                    binning=stage.output_binning,
                    nu_weights_col="pisa_weight",
                    mu_weights_col="pisa_weight",
                    noise_weights_col="pisa_weight",
                    mapset_name=stg_svc,
                    errors=True,
                )

            try:
                for fmt, enabled in formats.items():
                    if not enabled:
                        continue
                    my_plotter = Plotter(
                        stamp="Event rate",
                        outdir=args.outdir,
                        fmt=fmt,
                        log=False,
                        annotate=args.annotate,
                    )
                    my_plotter.ratio = True
                    my_plotter.plot_2d_array(outputs,
                                             fname=stg_svc + "__output",
                                             cmap="RdBu")
            except ValueError as exc:
                logging.error(
                    "Failed to save plot to format %s. See exception"
                    " message below",
                    fmt,
                )
                traceback.format_exc()
                logging.exception(exc)
                logging.warning("I can't go on, I'll go on.")

    if return_outputs:
        return pipeline, outputs
Пример #12
0
                       prop=dict(size=12))
    plt.setp(legend.get_title(), fontsize=18)
    at = AnchoredText(r'$%s$' % map.tex,
                      prop=dict(size=20),
                      frameon=True,
                      loc=2)
    at.patch.set_boxstyle("round,pad=0.,rounding_size=0.5")
    ax.add_artist(at)
    fig.savefig(outfile, bbox_inches='tight', dpi=150)


if __name__ == "__main__":
    args = parse_args()
    set_verbosity(args.verbose)

    logging.info('Loading Map from file {0}'.format(args.infile))
    input_MapSet = MapSet.from_json(args.infile)
    if len(input_MapSet) > 1:
        input_Map = input_MapSet[args.name]
    else:
        input_Map = input_MapSet.pop()

    fileio.mkdir(args.outdir, mode=0755)
    outfile = args.outdir + '/' + args.outname
    logging.info('outfile {0}'.format(outfile))
    plot_CFX_one(map=input_Map,
                 outfile=outfile,
                 logy=args.logy,
                 ylim=args.ylim,
                 ylabel=args.ylabel)
Пример #13
0
    def _compute_nominal_transforms(self):
        self.load_events(self.params.aeff_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # Units must be the following for correctly converting a sum-of-
        # OneWeights-in-bin to an average effective area across the bin.
        comp_units = dict(true_energy='GeV', true_coszen=None,
                          true_azimuth='rad')

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {dim: unit for dim, unit in comp_units.items()
                    if dim in self.input_binning}

        # TODO: use out_units for some kind of conversion?
        #out_units = {dim: unit for dim, unit in comp_units.items()
        #             if dim in self.output_binning}

        # These will be in the computational units
        input_binning = self.input_binning.to(**in_units)

        # Account for "missing" dimension(s) (dimensions OneWeight expects for
        # computation of bin volume), and accommodate with a factor equal to
        # the full range. See IceCube wiki/documentation for OneWeight for
        # more info.
        missing_dims_vol = 1
        if 'true_azimuth' not in input_binning:
            missing_dims_vol *= 2*np.pi
        if 'true_coszen' not in input_binning:
            missing_dims_vol *= 2

        if bool(self.debug_mode):
            outdir = os.path.join(find_resource('debug'),
                                  self.stage_name,
                                  self.service_name)
            mkdir(outdir)
            #hex_hash = hash2hex(kde_hash)

        bin_volumes = input_binning.bin_volumes(attach_units=False)
        norm_volumes = bin_volumes * missing_dims_vol

        nominal_transforms = []
        for xform_flavints in self.transform_groups:
            logging.debug('Working on %s effective areas xform',
                          xform_flavints)

            aeff_transform = self.events.histogram(
                kinds=xform_flavints,
                binning=input_binning,
                weights_col='weighted_aeff',
                errors=(self.error_method not in [None, False])
            )
            aeff_transform = aeff_transform.hist

            # Divide histogram by
            #   (energy bin width x coszen bin width x azimuth bin width)
            # volumes to convert from sums-of-OneWeights-in-bins to
            # effective areas. Note that volume correction factor for
            # missing dimensions is applied here.
            aeff_transform /= norm_volumes

            if self.debug_mode:
                outfile = os.path.join(
                    outdir, 'aeff_' + str(xform_flavints) + '.pkl'
                )
                to_file(aeff_transform, outfile)

            nominal_transforms.extend(
                populate_transforms(
                    service=self,
                    xform_flavints=xform_flavints,
                    xform_array=aeff_transform
                )
            )

        return TransformSet(transforms=nominal_transforms)
Пример #14
0
def plot_cmp(new,
             ref,
             new_label,
             ref_label,
             plot_label,
             file_label,
             outdir,
             ftype='png'):
    """Plot comparisons between two (identically-binned) maps or map sets.

    Parameters
    ----------
    new : Map or MapSet
    ref : Map or MapSet
    new_label : str
    ref_label : str
    plot_label : str
    file_label : str
    outdir : str
    ftype : str

    """
    path = [outdir]

    if isinstance(ref, Map):
        assert isinstance(new, Map)
        ref_maps = [ref]
        new_maps = [new]

    if outdir is not None:
        mkdir(os.path.join(*path), warn=False)

    for ref, new in zip(ref_maps, new_maps):
        assert ref.binning == new.binning
        fname = get_valid_filename('__'.join([
            get_valid_filename(file_label),
            '%s_vs_%s' % (get_valid_filename(new_label.lower()),
                          get_valid_filename(ref_label.lower()))
        ]) + '.' + ftype)
        path.append(fname)

        ratio = new / ref
        diff = new - ref
        fract_diff = diff / ref

        finite_ratio = ratio.hist[np.isfinite(ratio.hist)]
        ratio_mean = np.mean(finite_ratio)
        ratio_median = np.median(finite_ratio)

        finite_diff = diff.hist[np.isfinite(diff.hist)]
        diff_mean = np.mean(finite_diff)
        diff_median = np.median(finite_diff)

        finite_fract_diff = fract_diff.hist[np.isfinite(fract_diff.hist)]
        fract_diff_mean = np.mean(finite_fract_diff)
        fract_diff_median = np.median(finite_fract_diff)

        max_diff_ratio = np.nanmax(fract_diff.hist)

        # Handle cases where ratio returns infinite
        # This isn't necessarily a fail, since all it means is the referene was
        # zero. If the new value is sufficiently close to zero then it's stil
        # fine.
        if max_diff_ratio == np.inf:
            logging.warn(
                'Infinite value found in ratio tests. Difference tests'
                ' now also being calculated')
            # First find all the finite elements
            finite_mask = np.isfinite(fract_diff.hist)
            # Then find the nanmax of this, will be our new test value
            max_diff_ratio = np.nanmax(fract_diff.hist[finite_mask])
            # Also find all the infinite elements; compute a second test value
            max_diff = np.nanmax(diff.hist[~finite_mask])
        else:
            # Without any infinite elements we can ignore this second test
            max_diff = 0.0

        if outdir is not None:
            if new.binning.num_dims == 2:
                n_dims = 2
                n_third_dim_bins = 1
            elif new.binning.num_dims == 3:
                n_dims = 3
                odd_dim_idx = new.binning.shape.index(np.min(
                    new.binning.shape))
                logging.debug('odd_dim_idx: %s', odd_dim_idx)
                n_third_dim_bins = new.binning.shape[odd_dim_idx]

            gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32)
            fig, axes = plt.subplots(nrows=n_third_dim_bins,
                                     ncols=5,
                                     gridspec_kw=gridspec_kw,
                                     squeeze=False,
                                     sharex=False,
                                     sharey=False,
                                     figsize=(20, 5))

            refslice = ref
            newslice = new
            bin_names = None
            if n_dims == 3:
                if odd_dim_idx != 0:
                    refslice = np.moveaxis(ref,
                                           source=odd_dim_idx,
                                           destination=0)
                    newslice = np.moveaxis(new,
                                           source=odd_dim_idx,
                                           destination=0)
                bin_names = new.binning.dims[odd_dim_idx].bin_names

            for odd_bin_idx in range(n_third_dim_bins):
                if n_dims == 2:
                    thisbin_ref = refslice
                    thisbin_new = newslice
                    tmp_ref_label = ref_label
                    tmp_new_label = new_label

                elif n_dims == 3:
                    thisbin_ref = refslice[odd_bin_idx, ...].squeeze()
                    thisbin_new = newslice[odd_bin_idx, ...].squeeze()

                    if bin_names is not None:
                        suffix = bin_names[odd_bin_idx]
                    else:
                        suffix = format(odd_bin_idx, 'd')
                    tmp_new_label = new_label + ' ' + suffix
                    tmp_ref_label = ref_label + ' ' + suffix

                    ratio = thisbin_new / thisbin_ref
                    diff = thisbin_new - thisbin_ref
                    fract_diff = diff / thisbin_ref

                refmax = np.nanmax(thisbin_ref.hist)
                newmax = np.nanmax(thisbin_new.hist)
                vmax = refmax if refmax > newmax else newmax

                baseplot2(map=thisbin_new,
                          title=tmp_new_label,
                          vmax=vmax,
                          evtrate=True,
                          ax=axes[odd_bin_idx][0])

                baseplot2(map=thisbin_ref,
                          title=tmp_ref_label,
                          vmax=vmax,
                          evtrate=True,
                          ax=axes[odd_bin_idx][1])

                ax, _, _ = baseplot2(map=ratio,
                                     title='%s/%s' %
                                     (tmp_new_label, tmp_ref_label),
                                     ax=axes[odd_bin_idx][2])
                ax.text(0.95,
                        0.95,
                        "Mean: %.6f" % ratio_mean,
                        horizontalalignment='right',
                        transform=ax.transAxes,
                        color=(0, 0.8, 0.8))
                ax.text(0.95,
                        0.91,
                        "Median: %.6f" % ratio_median,
                        horizontalalignment='right',
                        transform=ax.transAxes,
                        color=(0, 0.8, 0.8))

                ax, _, _ = baseplot2(map=diff,
                                     title='%s-%s' %
                                     (tmp_new_label, tmp_ref_label),
                                     symm=True,
                                     ax=axes[odd_bin_idx][3])
                ax.text(0.95,
                        0.95,
                        "Mean: %.6f" % diff_mean,
                        horizontalalignment='right',
                        transform=ax.transAxes)
                ax.text(0.95,
                        0.91,
                        "Median: %.6f" % diff_median,
                        horizontalalignment='right',
                        transform=ax.transAxes)

                ax, _, _ = baseplot2(
                    map=fract_diff,
                    title='(%s-%s)/%s' %
                    (tmp_new_label, tmp_ref_label, tmp_ref_label),
                    symm=True,
                    ax=axes[odd_bin_idx][4])
                ax.text(0.95,
                        0.95,
                        "Mean: %.6f" % fract_diff_mean,
                        horizontalalignment='right',
                        transform=ax.transAxes)
                ax.text(0.95,
                        0.91,
                        "Median: %.6f" % fract_diff_median,
                        horizontalalignment='right',
                        transform=ax.transAxes)

            logging.debug('>>>> Plot for inspection saved at %s' %
                          os.path.join(*path))
            fig.savefig(os.path.join(*path))
            plt.close(fig.number)

        return max_diff_ratio, max_diff
Пример #15
0
def plot_map_comparisons(ref_map,
                         new_map,
                         ref_abv,
                         new_abv,
                         outdir,
                         subdir,
                         name,
                         texname,
                         stagename,
                         servicename,
                         shorttitles=False,
                         ftype='png'):
    """Plot comparisons between two identically-binned PISA 3 style maps"""
    path = [outdir]

    if subdir is None:
        subdir = stagename.lower()
    path.append(subdir)

    if outdir is not None:
        mkdir(os.path.join(*path), warn=False)

    if stagename is not None:
        fname = [
            '%s_%s_comparisons' % (ref_abv.lower(), new_abv.lower()),
            'stage_' + stagename
        ]
    else:
        fname = ['%s_%s_comparisons' % (ref_abv.lower(), new_abv.lower())]
    if servicename is not None:
        fname.append('service_' + servicename)
    if name is not None:
        fname.append(name.lower())
    fname = '__'.join(fname) + '.' + ftype

    path.append(fname)

    basetitle = []
    if stagename is not None:
        basetitle.append('%s' % stagename)
    if texname is not None:
        basetitle.append(r'$%s$' % texname)
    basetitle = ' '.join(basetitle)

    validate_map_objs(new_map, ref_map)
    with np.errstate(divide='ignore', invalid='ignore'):
        ratio_map = new_map / ref_map
    diff_map = new_map - ref_map
    with np.errstate(divide='ignore', invalid='ignore'):
        diff_ratio_map = diff_map / ref_map

    max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist))

    # Handle cases where ratio returns infinite
    # This isn't necessarily a fail, since all it means is the referene was
    # zero If the new value is sufficiently close to zero then it's still fine
    if max_diff_ratio == float('inf'):
        logging.warn('Infinite value found in ratio tests. Difference tests '
                     'now also being calculated')
        # First find all the finite elements
        finite_map = np.isfinite(diff_ratio_map.hist)
        # Then find the nanmax of this, will be our new test value
        max_diff_ratio = np.nanmax(np.abs(diff_ratio_map.hist[finite_map]))
        # Also find all the infinite elements
        infinite_map = np.logical_not(finite_map)
        # This will be a second test value
        max_diff = np.nanmax(np.abs(diff_map.hist[infinite_map]))
    else:
        # Without any infinite elements we can ignore this second test
        max_diff = 0.0

    if outdir is not None:
        gridspec_kw = dict(left=0.03, right=0.968, wspace=0.32)
        fig, axes = plt.subplots(nrows=1,
                                 ncols=5,
                                 gridspec_kw=gridspec_kw,
                                 sharex=False,
                                 sharey=False,
                                 figsize=(20, 5))
        if shorttitles:
            ref_map.plot(fig=fig,
                         ax=axes[0],
                         title=basetitle + ' ' + ref_abv + ' (A)',
                         cmap=plt.cm.afmhot)
            new_map.plot(fig=fig,
                         ax=axes[1],
                         title=basetitle + ' ' + new_abv + ' (B)',
                         cmap=plt.cm.afmhot)
            ratio_map.plot(fig=fig,
                           ax=axes[2],
                           title='A/B',
                           cmap=plt.cm.afmhot)
            diff_map.plot(fig=fig,
                          ax=axes[3],
                          title='A-B',
                          symm=True,
                          cmap=plt.cm.seismic)
            diff_ratio_map.plot(fig=fig,
                                ax=axes[4],
                                title='(A-B)/A',
                                symm=True,
                                cmap=plt.cm.seismic)
        else:
            ref_map.plot(fig=fig,
                         ax=axes[0],
                         title=basetitle + ' ' + ref_abv,
                         cmap=plt.cm.afmhot)
            new_map.plot(fig=fig,
                         ax=axes[1],
                         title=basetitle + ' ' + new_abv,
                         cmap=plt.cm.afmhot)
            ratio_map.plot(fig=fig,
                           ax=axes[2],
                           title=basetitle + ' %s/%s' % (new_abv, ref_abv),
                           cmap=plt.cm.afmhot)
            diff_map.plot(fig=fig,
                          ax=axes[3],
                          title=basetitle + ' %s-%s' % (new_abv, ref_abv),
                          symm=True,
                          cmap=plt.cm.seismic)
            diff_ratio_map.plot(fig=fig,
                                ax=axes[4],
                                title=basetitle + ' (%s-%s)/%s' %
                                (new_abv, ref_abv, ref_abv),
                                symm=True,
                                cmap=plt.cm.seismic)
        logging.debug('>>>> Plot for inspection saved at %s' %
                      os.path.join(*path))
        fig.savefig(os.path.join(*path))
        plt.close(fig.number)

    return max_diff_ratio, max_diff
Пример #16
0
def main():
    global SIGMA
    args = vars(parse_args())
    set_verbosity(args.pop('v'))
    center_zero = args.pop('center_zero')

    make_pdf = False
    if args['pdf']:
        make_pdf = True
        args['pdf'] = False

    outdir = args.pop('outdir')
    fileio.mkdir(outdir, mode=0755)
    SIGMA *= args.pop('sigma')

    cfx_pipe = Pipeline(args.pop('cfx_pipeline'))

    signal = args.pop('signal').replace(' ', '').split(',')
    output_str = []
    for name in signal:
        if 'muons' in name or 'noise' in name:
            raise AssertionError('Are you trying to unfold muons/noise?')
        elif 'all_nu' in name:
            output_str = [str(NuFlavIntGroup(f)) for f in ALL_NUFLAVINTS]
        else:
            output_str.append(NuFlavIntGroup(name))
    output_str = [str(f) for f in output_str]
    cfx_pipe._output_names = output_str

    # Turn off stat fluctuations
    stat_param = cfx_pipe.params['stat_fluctuations']
    stat_param.value = 0 * ureg.dimensionless
    cfx_pipe.update_params(stat_param)

    # Get nominal Map
    re_param = cfx_pipe.params['regularisation']
    re_param.value = 0 * ureg.dimensionless
    cfx_pipe.update_params(re_param)
    nom_out = cfx_pipe.get_outputs()

    re_param.reset()
    cfx_pipe.update_params(re_param)

    params = ParamSet()
    for param in cfx_pipe.params:
        if param.name != 'dataset':
            params.extend(param)

    free = params.free
    logging.info('Free params = {0}'.format(free))
    contin = True
    for f in free:
        if 'hole_ice' not in f.name and 'dom_eff' not in f.name:
            continue
        # if 'atm_muon_scale' in f.name:
        #     contin = False
        # if contin:
        #     continue

        logging.info('Working on parameter {0}'.format(f.name))
        if f.prior.kind != 'uniform':
            # Use deltaLLH = SIGMA to define +/- sigma for non-uniform
            scan_over = np.linspace(*f.range, num=1000) * f.range[0].u
            llh = f.prior.llh(scan_over)
            dllh = llh - np.min(-llh)

            mllh_idx = np.argmin(-llh)
            if mllh_idx == 0:
                l_sig_idx = 0
            else:
                l_sig_idx = np.argmin(np.abs(dllh[:mllh_idx] - SIGMA))
            u_sig_idx = np.argmin(np.abs(dllh[mllh_idx:] - SIGMA)) + mllh_idx

            l_sigma = scan_over[l_sig_idx]
            u_sigma = scan_over[u_sig_idx]
        else:
            l_sigma = f.range[0]
            u_sigma = f.range[1]

        logging.info('Setting {0} lower sigma bound to '
                     '{1}'.format(f.name, l_sigma))
        f.value = l_sigma
        cfx_pipe.update_params(f)
        l_out = cfx_pipe.get_outputs()

        logging.info('Setting {0} upper sigma bound to '
                     '{1}'.format(f.name, u_sigma))
        f.value = u_sigma
        cfx_pipe.update_params(f)
        u_out = cfx_pipe.get_outputs()

        f.reset()
        cfx_pipe.update_params(f)

        f_outdir = outdir + '/' + f.name
        l_outdir = f_outdir + '/' + 'lower'
        u_outdir = f_outdir + '/' + 'upper'
        fileio.mkdir(f_outdir)
        fileio.mkdir(l_outdir)
        fileio.mkdir(u_outdir)

        compare(outdir=l_outdir,
                ref=MapSet([nom_out]),
                ref_label='baseline',
                test=MapSet([l_out]),
                test_label=r'-sigma',
                **args)
        compare(outdir=u_outdir,
                ref=MapSet([nom_out]),
                ref_label='baseline',
                test=MapSet([u_out]),
                test_label=r'+sigma',
                **args)

        l_in_mapset = l_outdir + '/' + 'fract_diff__-sigma___baseline.json.bz2'
        u_in_mapset = u_outdir + '/' + 'fract_diff__+sigma___baseline.json.bz2'
        l_in_map = MapSet.from_json(l_in_mapset).pop() * 100.
        u_in_map = MapSet.from_json(u_in_mapset).pop() * 100.

        if make_pdf:
            outfile = f_outdir + '/systematic_effect.pdf'
        else:
            outfile = f_outdir + '/systematic_effect.png'
        title = r'% effect on ' + r'${0}$'.format(l_in_map.tex) + \
                ' event counts for {0} parameter'.format(f.name)
        sub_titles = (r'(-\sigma - {\rm baseline}) \:/\: {\rm baseline}',
                      r'(+\sigma - {\rm baseline}) \:/\: {\rm baseline}')
        make_plot(
            maps=(l_in_map, u_in_map),
            outfile=outfile,
            logv=False,
            center_zero=center_zero,
            vlabel=r'({\rm change} - {\rm baseline}) \:/\: {\rm baseline} (%)',
            title=title,
            sub_titles=sub_titles)
Пример #17
0
def compare(outdir,
            ref,
            ref_label,
            test,
            test_label,
            asymm_max=None,
            asymm_min=None,
            combine=None,
            diff_max=None,
            diff_min=None,
            fract_diff_max=None,
            fract_diff_min=None,
            json=False,
            pdf=False,
            png=False,
            ref_abs=False,
            ref_param_selections=None,
            sum=None,
            test_abs=False,
            test_param_selections=None):
    """Compare two entities. The result each entity specification is
    formatted into a MapSet and stored to disk, so that e.g. re-running
    a DistributionMaker is unnecessary to reproduce the results.

    Parameters
    ----------
    outdir : string
        Store output plots to this directory

    ref : string or array of strings
        Pipeline settings config file that generates reference output,
        or a stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    ref_abs : bool
        Use the absolute value of the reference plot for comparisons

    ref_label : string
        Label for reference

    ref_param-selections : string
        Param selections to apply to ref pipeline config(s). Not
        applicable if ref specifies stored map or map sets

    test : string or array of strings
        Pipeline settings config file that generates test output, or a
        stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    test_abs : bool
        Use the absolute value of the test plot for comparisons

    test_label : string
        Label for test

    test_param_selections : None or string
        Param selections to apply to test pipeline config(s). Not
        applicable if test specifies stored map or map sets

    combine : None or string or array of strings
        Combine by wildcard string, where string globbing (a la command
        line) uses asterisk for any number of wildcard characters. Use
        single quotes such that asterisks do not get expanded by the
        shell. Multiple combine strings supported

    sum : None or int
        Sum over (and hence remove) the specified axis or axes. I.e.,
        project the map onto remaining (unspecified) axis or axes

    json : bool
        Save output maps in compressed json (json.bz2) format

    pdf : bool
        Save plots in PDF format. If neither this nor png is
        specified, no plots are produced

    png : bool
        Save plots in PNG format. If neither this nor pdf is specfied,
        no plots are produced

    diff_min : None or float
        Difference plot vmin; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    diff_max : None or float
        Difference plot max; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    fract_diff_min : None or float
        Fractional difference plot vmin; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    fract_diff_max : None or float
        Fractional difference plot max; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    asymm_min : None or float
        Asymmetry plot vmin; if you specify only one of asymm_min or
        asymm_max, symmetric limits are automatically used (min = -max)

    asymm_max : None or float
        Fractional difference plot max; if you specify only one of
        asymm_min or asymm_max, symmetric limits are automatically used
        (min = -max)

    Returns
    -------
    summary_stats : dict
        Dictionary containing a summary for each h Map processed

    diff : MapSet
        MapSet of the difference
        - (Test - Ref)

    fract_diff : MapSet
        MapSet of the fractional difference
        - (Test - Ref) / Ref

    asymm : MapSet
        MapSet of the asymmetric fraction difference or pull
        - (Test - Ref) / sqrt(Ref)

    """
    ref_plot_label = ref_label
    if ref_abs and not ref_label.startswith('abs'):
        ref_plot_label = 'abs(%s)' % ref_plot_label
    test_plot_label = test_label
    if test_abs and not test_label.startswith('abs'):
        test_plot_label = 'abs(%s)' % test_plot_label

    plot_formats = []
    if pdf:
        plot_formats.append('pdf')
    if png:
        plot_formats.append('png')

    diff_symm = True
    if diff_min is not None and diff_max is None:
        diff_max = -diff_min
        diff_symm = False
    if diff_max is not None and diff_min is None:
        diff_min = -diff_max
        diff_symm = False

    fract_diff_symm = True
    if fract_diff_min is not None and fract_diff_max is None:
        fract_diff_max = -fract_diff_min
        fract_diff_symm = False
    if fract_diff_max is not None and fract_diff_min is None:
        fract_diff_min = -fract_diff_max
        fract_diff_symm = False

    asymm_symm = True
    if asymm_max is not None and asymm_min is None:
        asymm_min = -asymm_max
        asymm_symm = False
    if asymm_min is not None and asymm_max is None:
        asymm_max = -asymm_min
        asymm_symm = False

    outdir = os.path.expanduser(os.path.expandvars(outdir))
    mkdir(outdir)

    # Get the reference distribution(s) into the form of a test MapSet
    p_ref = None
    ref_source = None
    if isinstance(ref, Map):
        p_ref = MapSet(ref)
        ref_source = MAP_SOURCE_STR
    elif isinstance(ref, MapSet):
        p_ref = ref
        ref_source = MAPSET_SOURCE_STR
    elif isinstance(ref, Pipeline):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = PIPELINE_SOURCE_STR
    elif isinstance(ref, DistributionMaker):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(ref) == 1:
            try:
                ref_pipeline = Pipeline(config=ref[0])
            except:
                pass
            else:
                ref_source = PIPELINE_SOURCE_STR
                if ref_param_selections is not None:
                    ref_pipeline.select_params(ref_param_selections)
                p_ref = ref_pipeline.get_outputs()
        else:
            try:
                ref_dmaker = DistributionMaker(pipelines=ref)
            except:
                pass
            else:
                ref_source = DISTRIBUTIONMAKER_SOURCE_STR
                if ref_param_selections is not None:
                    ref_dmaker.select_params(ref_param_selections)
                p_ref = ref_dmaker.get_outputs()

    if p_ref is None:
        try:
            p_ref = [Map.from_json(f) for f in ref]
        except:
            pass
        else:
            ref_source = MAP_SOURCE_STR
            p_ref = MapSet(p_ref)

    if p_ref is None:
        assert ref_param_selections is None
        assert len(ref) == 1, 'Can only handle one MapSet'
        try:
            p_ref = MapSet.from_json(ref[0])
        except:
            pass
        else:
            ref_source = MAPSET_SOURCE_STR

    if p_ref is None:
        raise ValueError(
            'Could not instantiate the reference Pipeline, DistributionMaker,'
            ' Map, or MapSet from ref value(s) %s' % ref)
    ref = p_ref

    logging.info('Reference map(s) derived from a ' + ref_source)

    # Get the test distribution(s) into the form of a test MapSet
    p_test = None
    test_source = None
    if isinstance(test, Map):
        p_test = MapSet(test)
        test_source = MAP_SOURCE_STR
    elif isinstance(test, MapSet):
        p_test = test
        test_source = MAPSET_SOURCE_STR
    elif isinstance(test, Pipeline):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = PIPELINE_SOURCE_STR
    elif isinstance(test, DistributionMaker):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(test) == 1:
            try:
                test_pipeline = Pipeline(config=test[0])
            except:
                pass
            else:
                test_source = PIPELINE_SOURCE_STR
                if test_param_selections is not None:
                    test_pipeline.select_params(test_param_selections)
                p_test = test_pipeline.get_outputs()
        else:
            try:
                test_dmaker = DistributionMaker(pipelines=test)
            except:
                pass
            else:
                test_source = DISTRIBUTIONMAKER_SOURCE_STR
                if test_param_selections is not None:
                    test_dmaker.select_params(test_param_selections)
                p_test = test_dmaker.get_outputs()

    if p_test is None:
        try:
            p_test = [Map.from_json(f) for f in test]
        except:
            pass
        else:
            test_source = MAP_SOURCE_STR
            p_test = MapSet(p_test)

    if p_test is None:
        assert test_param_selections is None
        assert len(test) == 1, 'Can only handle one MapSet'
        try:
            p_test = MapSet.from_json(test[0])
        except:
            pass
        else:
            test_source = MAPSET_SOURCE_STR

    if p_test is None:
        raise ValueError(
            'Could not instantiate the test Pipeline, DistributionMaker, Map,'
            ' or MapSet from test value(s) %s' % test)
    test = p_test

    logging.info('Test map(s) derived from a ' + test_source)

    if combine is not None:
        ref = ref.combine_wildcard(combine)
        test = test.combine_wildcard(combine)
        if isinstance(ref, Map):
            ref = MapSet([ref])
        if isinstance(test, Map):
            test = MapSet([test])

    if sum is not None:
        ref = ref.sum(sum)
        test = test.sum(sum)

    # Set the MapSet names according to args passed by user
    ref.name = ref_label
    test.name = test_label

    # Save to disk the maps being plotted (excluding optional aboslute value
    # operations)
    if json:
        refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label)
        to_file(ref, refmaps_path)

        testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label)
        to_file(test, testmaps_path)

    if set(test.names) != set(ref.names):
        raise ValueError('Test map names %s do not match ref map names %s.' %
                         (sorted(test.names), sorted(ref.names)))

    # Aliases to save keystrokes
    def masked(x):
        return np.ma.masked_invalid(x.nominal_values)

    def zero_to_nan(map):
        newmap = deepcopy(map)
        mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON)
        newmap.hist[mask] = np.nan
        return newmap

    reordered_test = []
    new_ref = []
    diff_maps = []
    fract_diff_maps = []
    asymm_maps = []
    summary_stats = {}
    for ref_map in ref:
        test_map = test[ref_map.name].reorder_dimensions(ref_map.binning)
        if ref_abs:
            ref_map = abs(ref_map)
        if test_abs:
            test_map = abs(test_map)

        diff_map = test_map - ref_map
        fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map)
        asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5)
        abs_fract_diff_map = np.abs(fract_diff_map)

        new_ref.append(ref_map)
        reordered_test.append(test_map)
        diff_maps.append(diff_map)
        fract_diff_maps.append(fract_diff_map)
        asymm_maps.append(asymm_map)

        min_ref = np.min(masked(ref_map))
        max_ref = np.max(masked(ref_map))

        min_test = np.min(masked(test_map))
        max_test = np.max(masked(test_map))

        total_ref = np.sum(masked(ref_map))
        total_test = np.sum(masked(test_map))

        mean_ref = np.mean(masked(ref_map))
        mean_test = np.mean(masked(test_map))

        max_abs_fract_diff = np.max(masked(abs_fract_diff_map))
        mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map))
        median_abs_fract_diff = np.median(masked(abs_fract_diff_map))

        mean_fract_diff = np.mean(masked(fract_diff_map))
        min_fract_diff = np.min(masked(fract_diff_map))
        max_fract_diff = np.max(masked(fract_diff_map))
        std_fract_diff = np.std(masked(fract_diff_map))

        mean_diff = np.mean(masked(diff_map))
        min_diff = np.min(masked(diff_map))
        max_diff = np.max(masked(diff_map))
        std_diff = np.std(masked(diff_map))

        median_diff = np.nanmedian(masked(diff_map))
        mad_diff = np.nanmedian(masked(np.abs(diff_map)))
        median_fract_diff = np.nanmedian(masked(fract_diff_map))
        mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map)))

        min_asymm = np.min(masked(fract_diff_map))
        max_asymm = np.max(masked(fract_diff_map))

        total_asymm = np.sqrt(np.sum(masked(asymm_map)**2))

        summary_stats[test_map.name] = OrderedDict([
            ('min_ref', min_ref),
            ('max_ref', max_ref),
            ('total_ref', total_ref),
            ('mean_ref', mean_ref),
            ('min_test', min_test),
            ('max_test', max_test),
            ('total_test', total_test),
            ('mean_test', mean_test),
            ('max_abs_fract_diff', max_abs_fract_diff),
            ('mean_abs_fract_diff', mean_abs_fract_diff),
            ('median_abs_fract_diff', median_abs_fract_diff),
            ('min_fract_diff', min_fract_diff),
            ('max_fract_diff', max_fract_diff),
            ('mean_fract_diff', mean_fract_diff),
            ('std_fract_diff', std_fract_diff),
            ('median_fract_diff', median_fract_diff),
            ('mad_fract_diff', mad_fract_diff),
            ('min_diff', min_diff),
            ('max_diff', max_diff),
            ('mean_diff', mean_diff),
            ('std_diff', std_diff),
            ('median_diff', median_diff),
            ('mad_diff', mad_diff),
            ('min_asymm', min_asymm),
            ('max_asymm', max_asymm),
            ('total_asymm', total_asymm),
        ])

        logging.info('Map %s...', ref_map.name)
        logging.info('  Ref map(s):')
        logging.info('    min   :' + ('%.2f' % min_ref).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_ref).rjust(12))
        logging.info('    total :' + ('%.2f' % total_ref).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_ref).rjust(12))
        logging.info('  Test map(s):')
        logging.info('    min   :' + ('%.2f' % min_test).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_test).rjust(12))
        logging.info('    total :' + ('%.2f' % total_test).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_test).rjust(12))
        logging.info('  Absolute fract. diff., abs((Test - Ref) / Ref):')
        logging.info('    max   : %.4e', max_abs_fract_diff)
        logging.info('    mean  : %.4e', mean_abs_fract_diff)
        logging.info('    median: %.4e', median_abs_fract_diff)
        logging.info('  Fractional difference, (Test - Ref) / Ref:')
        logging.info('    min   : %.4e', min_fract_diff)
        logging.info('    max   : %.4e', max_fract_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_fract_diff,
                     std_fract_diff)
        logging.info('    median: %.4e +/- %.4e', median_fract_diff,
                     mad_fract_diff)
        logging.info('  Difference, Test - Ref:')
        logging.info('    min   : %.4e', min_diff)
        logging.info('    max   : %.4e', max_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_diff, std_diff)
        logging.info('    median: %.4e +/- %.4e', median_diff, mad_diff)
        logging.info('  Asymmetry, (Test - Ref) / sqrt(Ref)')
        logging.info('    min   : %.4e', min_asymm)
        logging.info('    max   : %.4e', max_asymm)
        logging.info('    total : %.4e (sum in quadrature)', total_asymm)
        logging.info('')

    ref = MapSet(new_ref)
    test = MapSet(reordered_test)
    diff = MapSet(diff_maps)
    fract_diff = MapSet(fract_diff_maps)
    asymm = MapSet(asymm_maps)

    if json:
        diff.to_json(
            os.path.join(
                outdir,
                'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))
        fract_diff.to_json(
            os.path.join(
                outdir, 'fract_diff__%s___%s.json.bz2' %
                (test_plot_label, ref_plot_label)))
        asymm.to_json(
            os.path.join(
                outdir,
                'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label)))
        to_file(
            summary_stats,
            os.path.join(
                outdir,
                'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))

    for plot_format in plot_formats:
        # Plot the raw distributions
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=False,
                          ratio=False)
        plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label)
        plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label)

        # Plot the difference (test - ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=diff_symm,
                          ratio=False)
        plotter.label = '%s - %s' % (test_plot_label, ref_plot_label)
        plotter.plot_2d_array(
            test - ref,
            fname='diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=diff_min, vmax=diff_max
        )

        # Plot the fractional difference (test - ref)/ref
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=fract_diff_symm,
                          ratio=True)
        plotter.label = ('(%s-%s)/%s' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r) for r in ref]),
            fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=fract_diff_min, vmax=fract_diff_max
        )

        # Plot the asymmetry (test - ref)/sqrt(ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=asymm_symm,
                          ratio=True)
        plotter.label = (r'$(%s - %s)/\sqrt{%s}$' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]),
            fname='asymm__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=asymm_min, vmax=asymm_max
        )

    return summary_stats, diff, fract_diff, asymm
Пример #18
0
def makeEventsFile(data_files,
                   detector,
                   proc_ver,
                   cut,
                   outdir,
                   run_settings=None,
                   data_proc_params=None,
                   join=None,
                   cust_cuts=None,
                   extract_fields=EXTRACT_FIELDS,
                   output_fields=OUTPUT_FIELDS):
    r"""Take the simulated and reconstructed HDF5 file(s) (as converted from I3
    by icecube.hdfwriter.I3HDFTableService) as input and write out a simplified
    PISA-standard-format HDF5 file for use in aeff, reco, and/or PID stages.

    Parameters
    ----------
    data_files : dict
        File paths for finding data files for each run, formatted as:
            {
                <string run>: <list of file paths>,
                <string run>: <list of file paths>,
                ...
                <string run>: <list of file paths>,
            }

    detector : string
        Name of the detector (e.g. IceCube, DeepCore, PINGU, etc.) as found in
        e.g. mc_sim_run_settings.json and data_proc_params.json files.

    proc_ver
        Version of processing applied to the events, as found in e.g.
        data_proc_params.json.

    cut
        Name of a standard cut to use; must be specified in the relevant
        detector/processing version node of the data processing parameters
        (file from which the data_proc_params object was instantiated)

    outdir
        Directory path in which to store resulting files; will be generated if
        it does not already exist (including any parent directories that do not
        exist)

    run_settings : string or MCSimRunSettings
        Resource location of mc_sim_run_settings.json or an MCSimRunSettings
        object instantiated therefrom.

    data_proc_params : string or DataProcParams
        Resource location of data_proc_params.json or a DataProcParams object
        instantiated therefrom.

    join
        String specifying any flavor/interaction types (flavInts) to join
        together. Separate flavInts with commas (',') and separate groups
        with semicolons (';'). E.g. an acceptable string is:
            'numucc+numubarcc; nuall bar NC, nuall NC'

    cust_cuts
        dict with a single DataProcParams cut specification or list of same
        (see help for DataProcParams for detailed description of cut spec)

    extract_fields : None or iterable of strings
        Field names to extract from source HDF5 file. If None, extract all
        fields.

    output_fields : None or iterable of strings
        Fields to include in the generated PISA-standard-format events HDF5
        file; note that if 'weighted_aeff' is not preent, effective area will
        not be computed. If None, all fields will be written.

    Notes
    -----
    Compute "weighted_aeff" field:

    Within each int type (CC or NC), ngen should be added together;
    events recorded of that int type then get their one_weight divided by the
    total *for that int type only* to obtain the "weighted_aeff" for that
    event (even if int types are being grouped/joined together).

    This has the effect that within a group, ...
      ... and within an interaction type, effective area is a weighted
      average of that of the flavors being combined. E.g. for CC,

                     \sum_{run x}\sum_{flav y} (Aeff_{x,y} * ngen_{x,y})
          Aeff_CC = ----------------------------------------------------- ,
                          \sum_{run x}\sum_{flav y} (ngen_{x,y})

      ... and then across interaction types, the results of the above for
      each int type need to be summed together, i.e.:

          Aeff_total = Aeff_CC + Aeff_NC

    Note that each grouping of flavors is calculated with the above math
    completely independently from other flavor groupings specified.

    See Justin Lanfranchi's presentation on the PINGU Analysis call,
    2015-10-21, for more details:
      https://wikispaces.psu.edu/download/attachments/282040606/meff_report_jllanfranchi_v05_2015-10-21.pdf

    """
    if isinstance(run_settings, str):
        run_settings = DetMCSimRunsSettings(find_resource(run_settings),
                                            detector=detector)
    assert isinstance(run_settings, DetMCSimRunsSettings)
    assert run_settings.detector == detector

    if isinstance(data_proc_params, str):
        data_proc_params = DataProcParams(
            detector=detector,
            proc_ver=proc_ver,
            data_proc_params=find_resource(data_proc_params))
    assert data_proc_params.detector == detector
    assert data_proc_params.proc_ver == proc_ver

    runs = sorted(data_files.keys())

    all_flavs = []
    flavs_by_run = {}
    run_norm_factors = {}
    bin_edges = set()

    runs_by_flavint = FlavIntData()
    for flavint in runs_by_flavint.flavints:
        runs_by_flavint[flavint] = []

    #ngen_flavint_by_run = {run:FlavIntData() for run in runs}
    ##ngen_per_flav_by_run = {run:FlavIntData() for run in runs}
    #eint_per_flav_by_run = {run:FlavIntData() for run in runs}
    #for run in runs:
    #    flavints_in_run = run_settings.get_flavints(run=run)
    #    e_range = run_settings.get_energy_range(run)
    #    gamma = run_settings.get_spectral_index(run)
    #    for flavint in flavints_in_run:
    #        runs_by_flavint[flavint].append(run)
    #        ngen_flav = run_settings.get_num_gen(
    #            run=run, flav_or_flavint=flavint, include_physical_fract=True
    #        )
    #        #runs_by_flavint[flavint].append(run)
    #        #this_flav = flavint.
    #        #xsec_fract_en_wtd_avg[run][flavint] = \
    #        ngen_flavint_by_run[run][flavint] = \
    #                xsec.get_xs_ratio_integral(
    #                    flavintgrp0=flavint,
    #                    flavintgrp1=flavint.flav,
    #                    e_range=e_range,
    #                    gamma=gamma,
    #                    average=True
    #                )
    #    xsec_ver = run_settings.get_xsec_version(run=run)
    #    if xsec_ver_ref is None:
    #        xsec_ver_ref = xsec_ver
    #    # An assumption of below logic is that all MC is generated using the
    #    # same cross sections version.
    #    #
    #    # TODO / NOTE:
    #    # It would be possible to combine runs with different cross sections so
    #    # long as each (flavor, interaction type) cross sections are
    #    # weighted-averaged together using weights
    #    #   N_gen_{n,flav+inttype} * E_x^{-gamma_n} /
    #    #       ( \int_{E_min_n}^{E_max_n} E^{-\gamma_n} dE )
    #    # where E_x are the energy sample points specified in the cross
    #    # sections (and hence these must also be identical across all cross
    #    # sections that get combined, unless interpolation is performed).
    #    assert xsec_ver == xsec_ver_ref
    #    #ngen_weighted_energy_integral[str(run)] = powerLawIntegral(
    #    #flavs_by_run[run] = run_settings.flavs(run)
    ##flavs_present =

    detector_geom = run_settings[runs[0]]['geom']

    # Create Events object to store data
    evts = Events()
    evts.metadata.update({
        'detector': run_settings.detector,
        'proc_ver': data_proc_params.proc_ver,
        'geom': detector_geom,
        'runs': runs,
    })

    cuts = []
    if isinstance(cust_cuts, dict):
        cust_cuts = [cust_cuts]
    if cut is not None:
        evts.metadata['cuts'].append(cut)
        cuts.append(cut)
    if cust_cuts is not None:
        for ccut in cust_cuts:
            evts.metadata['cuts'].append('custom: ' + ccut['pass_if'])
            cuts.append(ccut)

    orig_outdir = outdir
    outdir = expand(outdir)
    logging.info('Output dir spec\'d: %s', orig_outdir)
    if outdir != orig_outdir:
        logging.info('Output dir expands to: %s', outdir)
    mkdir(outdir)

    detector_label = str(data_proc_params.detector)
    proc_label = 'proc_' + str(data_proc_params.proc_ver)

    # What flavints to group together
    if join is None or join == '':
        grouped = []
        ungrouped = [NuFlavIntGroup(k) for k in ALL_NUFLAVINTS]
        groups_label = 'unjoined'
        logging.info('Events in the following groups will be joined together:'
                     ' (none)')
    else:
        grouped, ungrouped = xlateGroupsStr(join)
        evts.metadata['flavints_joined'] = [str(g) for g in grouped]
        groups_label = 'joined_G_' + '_G_'.join([str(g) for g in grouped])
        logging.info(
            'Events in the following groups will be joined together: ' +
            '; '.join([str(g) for g in grouped]))

    # Find any flavints not included in the above groupings
    flavint_groupings = grouped + ungrouped
    if len(ungrouped) == 0:
        ungrouped = ['(none)']
    logging.info('Events of the following flavints will NOT be joined'
                 'together: ' + '; '.join([str(k) for k in ungrouped]))

    # Enforce that flavints composing groups are mutually exclusive
    for grp_n, flavintgrp0 in enumerate(flavint_groupings[:-1]):
        for flavintgrp1 in flavint_groupings[grp_n + 1:]:
            assert len(set(flavintgrp0).intersection(set(flavintgrp1))) == 0

    flavintgrp_names = [str(flavintgrp) for flavintgrp in flavint_groupings]

    # Instantiate storage for all intermediate destination fields;
    # The data structure looks like:
    #   extracted_data[group #][interaction type][field name] = list of data
    if extract_fields is None:
        extracted_data = [{inttype: {}
                           for inttype in ALL_NUINT_TYPES}
                          for _ in flavintgrp_names]
    else:
        extracted_data = [{
            inttype: {field: []
                      for field in extract_fields}
            for inttype in ALL_NUINT_TYPES
        } for _ in flavintgrp_names]

    # Instantiate generated-event counts for destination fields; count
    # CClseparately from NC because aeff's for CC & NC add, whereas
    # aeffs intra-CC should be weighted-averaged (as for intra-NC)
    ngen = [{inttype: {}
             for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names]

    # Loop through all of the files, retrieving the events, filtering,
    # and recording the number of generated events pertinent to
    # calculating aeff
    filecount = {}
    detector_geom = None
    bad_files = []
    for run, fnames in data_files.items():
        file_count = 0
        for fname in fnames:
            # Retrieve data from all nodes specified in the processing
            # settings file
            logging.trace('Trying to get data from file %s', fname)
            try:
                data = data_proc_params.get_data(fname,
                                                 run_settings=run_settings)
            except (ValueError, KeyError, IOError):
                logging.warning('Bad file encountered: %s', fname)
                bad_files.append(fname)
                continue

            file_count += 1

            # Check to make sure only one run is present in the data
            runs_in_data = set(data['run'])
            assert len(runs_in_data) == 1, 'Must be just one run in data'

            #run = int(data['run'][0])
            if not run in filecount:
                filecount[run] = 0
            filecount[run] += 1
            rs_run = run_settings[run]

            # Record geom; check that geom is consistent with other runs
            if detector_geom is None:
                detector_geom = rs_run['geom']
            assert rs_run['geom'] == detector_geom, \
                    'All runs\' geometries must match!'

            # Loop through all flavints spec'd for run
            for run_flavint in rs_run['flavints']:
                barnobar = run_flavint.bar_code
                int_type = run_flavint.intType

                # Retrieve this-interaction-type- & this-barnobar-only events
                # that also pass cuts. (note that cut names are strings)
                intonly_cut_data = data_proc_params.apply_cuts(
                    data,
                    cuts=cuts + [str(int_type), str(barnobar)],
                    return_fields=extract_fields)

                # Record the generated count and data for this run/flavor for
                # each group to which it's applicable
                for grp_n, flavint_group in enumerate(flavint_groupings):
                    if not run_flavint in flavint_group:
                        continue

                    # Instantiate a field for particles and antiparticles,
                    # keyed by the output of the bar_code property for each
                    if not run in ngen[grp_n][int_type]:
                        ngen[grp_n][int_type][run] = {
                            NuFlav(12).bar_code: 0,
                            NuFlav(-12).bar_code: 0,
                        }

                    # Record count only if it hasn't already been recorded
                    if ngen[grp_n][int_type][run][barnobar] == 0:
                        # Note that one_weight includes cc/nc:total fraction,
                        # so DO NOT specify the full flavint here, only flav
                        # (since one_weight does NOT take bar/nobar fraction,
                        # it must be included here in the ngen computation)
                        flav_ngen = run_settings.get_num_gen(run=run,
                                                             barnobar=barnobar)
                        ngen[grp_n][int_type][run][barnobar] = flav_ngen

                    # Append the data. Note that extracted_data is:
                    # extracted_data[group n][int_type][extract field name] =
                    #   list
                    if extract_fields is None:
                        for f in intonly_cut_data.keys():
                            if f not in extracted_data[grp_n][int_type]:
                                extracted_data[grp_n][int_type][f] = []
                            extracted_data[grp_n][int_type][f].extend(
                                intonly_cut_data[f])
                    else:
                        for f in extract_fields:
                            extracted_data[grp_n][int_type][f].extend(
                                intonly_cut_data[f])
        logging.info('File count for run %s: %d', run, file_count)
    to_file(bad_files, '/tmp/bad_files.json')

    if ((output_fields is None and
         (extract_fields is None or 'one_weight' in extract_fields))
            or 'weighted_aeff' in output_fields):
        fmtfields = (' ' * 12 + 'flavint_group', 'int type', '     run',
                     'part/anti', 'part/anti count', 'aggregate count')
        fmt_n = [len(f) for f in fmtfields]
        fmt = '  '.join([r'%' + str(n) + r's' for n in fmt_n])
        lines = '  '.join(['-' * n for n in fmt_n])
        logging.info(fmt, fmtfields)
        logging.info(lines)
        for grp_n, flavint_group in enumerate(flavint_groupings):
            for int_type in set([fi.intType for fi in flavint_group.flavints]):
                ngen_it_tot = 0
                for run, run_counts in ngen[grp_n][int_type].items():
                    for barnobar, barnobar_counts in run_counts.items():
                        ngen_it_tot += barnobar_counts
                        logging.info(fmt, flavint_group.simple_str(), int_type,
                                     str(run), barnobar, int(barnobar_counts),
                                     int(ngen_it_tot))
                # Convert data to numpy array
                if extract_fields is None:
                    for field in extracted_data[grp_n][int_type].keys():
                        extracted_data[grp_n][int_type][field] = \
                                np.array(extracted_data[grp_n][int_type][field])
                else:
                    for field in extract_fields:
                        extracted_data[grp_n][int_type][field] = \
                                np.array(extracted_data[grp_n][int_type][field])
                # Generate weighted_aeff field for this group / int type's data
                extracted_data[grp_n][int_type]['weighted_aeff'] = \
                        extracted_data[grp_n][int_type]['one_weight'] \
                        / ngen_it_tot * CMSQ_TO_MSQ

    # Report file count per run
    for run, count in filecount.items():
        logging.info('Files read, run %s: %d', run, count)
        ref_num_i3_files = run_settings[run]['num_i3_files']
        if count != ref_num_i3_files:
            logging.warning(
                'Run %s, Number of files read (%d) != number of '
                'source I3 files (%d), which may indicate an error.', run,
                count, ref_num_i3_files)

    # Generate output data
    for flavint in ALL_NUFLAVINTS:
        int_type = flavint.intType
        for grp_n, flavint_group in enumerate(flavint_groupings):
            if not flavint in flavint_group:
                logging.trace('flavint %s not in flavint_group %s, passing.',
                              flavint, flavint_group)
                continue
            else:
                logging.trace(
                    'flavint %s **IS** in flavint_group %s, storing.', flavint,
                    flavint_group)
            if output_fields is None:
                evts[flavint] = extracted_data[grp_n][int_type]
            else:
                evts[flavint] = {
                    f: extracted_data[grp_n][int_type][f]
                    for f in output_fields
                }

    # Generate file name
    numerical_runs = []
    alphanumerical_runs = []
    for run in runs:
        try:
            int(run)
            numerical_runs.append(int(run))
        except ValueError:
            alphanumerical_runs.append(str(run))
    run_labels = []
    if len(numerical_runs) > 0:
        run_labels.append(list2hrlist(numerical_runs))
    if len(alphanumerical_runs) > 0:
        run_labels += sorted(alphanumerical_runs)
    run_label = 'runs_' + ','.join(run_labels)
    geom_label = '' + detector_geom
    fname = 'events__' + '__'.join([
        detector_label,
        geom_label,
        run_label,
        proc_label,
        groups_label,
    ]) + '.hdf5'

    outfpath = os.path.join(outdir, fname)
    logging.info('Writing events to %s', outfpath)

    # Save data to output file
    evts.save(outfpath)
Пример #19
0
def add_fluxes_to_file(data_file_path,
                       flux_table,
                       flux_name,
                       outdir=None,
                       label=None,
                       overwrite=False):
    """Add fluxes to PISA events file (e.g. for use by an mc stage)
    
    Parameters
    -----------
    data_file_path : string
    flux_table
    flux_name
    outdir : string or None
        If None, output is to the same directory as `data_file_path`
    overwrite : bool, optional
    """
    data, attrs = from_file(find_resource(data_file_path), return_attrs=True)
    bname, ext = splitext(basename(data_file_path))
    assert ext.lstrip('.') in HDF5_EXTS

    if outdir is None:
        outdir = dirname(data_file_path)

    if label is None:
        label = ''
    else:
        assert isinstance(label, basestring)
        label = '_' + label

    outpath = join(outdir, '{}__with_fluxes{}{}'.format(bname, label, ext))

    if not overwrite and isfile(outpath):
        logging.warning('Output path "%s" already exists, not regenerating',
                        outpath)
        return

    mkdir(outdir, warn=False)

    # Loop over the top-level keys
    for primary, primary_node in data.items():

        # Only handling neutrnio fluxes here, skip past e.g. muon or noise MC events
        if primary.startswith("nu"):

            logging.info('Adding fluxes to "%s" events', primary)

            # Input data may have one layer of hierarchy before the event variables (e.g. [numu_cc]),
            # or for older files there maybe be a second layer (e.g. [numu][cc]).
            # Handling either case here...
            if "true_energy" in primary_node:
                secondary_nodes = [primary_node]
            else:
                secondary_nodes = primary_node.values()

            for secondary_node in secondary_nodes:

                true_e = secondary_node['true_energy']
                true_cz = secondary_node['true_coszen']

                # calculate all 4 fluxes (nue, nuebar, numu and numubar)
                for table in ['nue', 'nuebar', 'numu', 'numubar']:
                    flux = calculate_2d_flux_weights(
                        true_energies=true_e,
                        true_coszens=true_cz,
                        en_splines=flux_table[table])
                    keyname = flux_name + '_' + table + '_flux'
                    secondary_node[keyname] = flux

    to_file(data, outpath, attrs=attrs, overwrite=overwrite)
    logging.info('--> Wrote file including fluxes to "%s"', outpath)
def main():
    args = parse_args()
    init_args_d = vars(args)

    # NOTE: Removing extraneous args that won't get passed to instantiate the
    # HypoTesting object via dictionary's `pop()` method.

    set_verbosity(init_args_d.pop('v'))

    detector = init_args_d.pop('detector')
    selection = init_args_d.pop('selection')

    # Normalize and convert `*_pipeline` filenames; store to `*_maker`
    # (which is argument naming convention that HypoTesting init accepts).
    filenames = init_args_d.pop('pipeline')
    if filenames is not None:
        filenames = sorted([normcheckpath(fname) for fname in filenames])
    ps_str = init_args_d['param_selections']
    if ps_str is None:
        ps_list = None
    else:
        ps_list = [x.strip().lower() for x in ps_str.split(',')]

    data_maker = DistributionMaker(filenames)
    data_maker.select_params(ps_list)

    for data_pipeline in data_maker.pipelines:
        # Need a special case where PID is a separate stage
        if 'pid' in data_pipeline.stage_names:
            raise ValueError("Special case for separate PID stage currently "
                             "not implemented.")
        else:
            return_sum = True
    baseline_maps = data_maker.get_outputs(return_sum=return_sum)

    det_sel = []
    if detector.strip() != '':
        det_sel.append(detector.strip())
    if selection.strip() != '':
        det_sel.append(selection.strip())
    det_sel_label = ' '.join(det_sel)

    det_sel_plot_label = det_sel_label
    if det_sel_plot_label != '':
        det_sel_plot_label += ', '

    det_sel_file_label = det_sel_label
    if det_sel_file_label != '':
        det_sel_file_label += '_'
    det_sel_file_label = det_sel_file_label.replace(' ', '_')

    for data_param in data_maker.params.free:
        # Calculate a shifted value based on the prior if possible
        if hasattr(data_param, 'prior') and (data_param.prior is not None):
            # Gaussian priors are easy - just do 1 sigma
            if data_param.prior.kind == 'gaussian':
                data_param.value = \
                    data_param.value + data_param.prior.stddev
                shift_label = r"$1\sigma$"
            # Else do 10%, or +/- 1 if the baseline is zero
            else:
                if data_param.value != 0.0:
                    data_param.value = 1.1 * data_param.value
                    shift_label = r"10%"
                else:
                    data_param.value = 1.0
                    shift_label = r"1"
        # For no prior also do 10%, or +/- 1 if the baseline is zero
        else:
            if data_param.value != 0.0:
                data_param.value = 1.1 * data_param.value
                shift_label = r"10%"
            else:
                data_param.value = 1.0
                shift_label = r"1"
        up_maps = data_maker.get_outputs(return_sum=return_sum)
        data_maker.params.reset_free()

        if hasattr(data_param, 'prior') and (data_param.prior is not None):
            if data_param.prior.kind == 'gaussian':
                data_param.value = \
                    data_param.value - data_param.prior.stddev
            else:
                if data_param.value != 0.0:
                    data_param.value = 0.9 * data_param.value
                else:
                    data_param.value = -1.0
        else:
            if data_param.value != 0.0:
                data_param.value = 0.9 * data_param.value
            else:
                data_param.value = -1.0
        down_maps = data_maker.get_outputs(return_sum=return_sum)
        data_maker.params.reset_free()

        baseline_map = baseline_maps['total']
        baseline_map.set_errors(error_hist=None)
        up_map = up_maps['total']
        up_map.set_errors(error_hist=None)
        down_map = down_maps['total']
        down_map.set_errors(error_hist=None)

        pid_names = baseline_map.binning['pid'].bin_names
        if pid_names is None:
            logging.warn('There are no names given for the PID bins, thus '
                         'they will just be numbered in both the the plot '
                         'save names and titles.')
            pid_names = [
                x for x in range(0, baseline_map.binning['pid'].num_bins)
            ]

        gridspec_kw = dict(left=0.04, right=0.966, wspace=0.32)
        fig, axes = plt.subplots(nrows=2,
                                 ncols=len(pid_names),
                                 gridspec_kw=gridspec_kw,
                                 sharex=False,
                                 sharey=False,
                                 figsize=(7 * len(pid_names), 14))

        for i, pid_name in enumerate(pid_names):

            baseline = baseline_map.split(dim='pid', bin=pid_name)
            up_to_plot = up_map.split(dim='pid', bin=pid_name)
            up_to_plot = (up_to_plot - baseline) / baseline * 100.0
            down_to_plot = down_map.split(dim='pid', bin=pid_name)
            down_to_plot = (down_to_plot - baseline) / baseline * 100.0

            if isinstance(pid_name, int):
                pid_name = 'PID Bin %i' % (pid_name)
            else:
                pid_name += ' Channel'

            up_to_plot.plot(fig=fig,
                            ax=axes[0][i],
                            title="%s " % (pid_name) + "\n" + " %s + %s" %
                            (tex_axis_label(data_param.name), shift_label),
                            titlesize=30,
                            cmap=plt.cm.seismic,
                            clabel='% Change from Baseline',
                            clabelsize=30,
                            xlabelsize=24,
                            ylabelsize=24,
                            symm=True)
            down_to_plot.plot(fig=fig,
                              ax=axes[1][i],
                              title="%s " % (pid_name) + "\n" + " %s - %s" %
                              (tex_axis_label(data_param.name), shift_label),
                              titlesize=30,
                              cmap=plt.cm.seismic,
                              clabel='% Change from Baseline',
                              clabelsize=30,
                              xlabelsize=24,
                              ylabelsize=24,
                              symm=True)

        fig.subplots_adjust(hspace=0.4)
        savename = det_sel_file_label
        if savename != '' and savename[-1] != '_':
            savename += '_'
        savename += '%s_variation.png' % (data_param.name)
        mkdir(args.logdir, warn=False)
        fig.savefig(os.path.join(args.logdir, savename), bbox_inches='tight')
        plt.close(fig.number)