Пример #1
0
def sampleHypercube(n_dim, n_samp, rand_set_id=0, crit='m', iterations=5,
                    rdata_dir='~/cowen/data/random'):
    """Load (if file exists) or generate samples from within hypercube using
    Latin hypercube sampling

    Requires pyDOE to generate new samples.
    """
    fname = samplesFilename(n_dim=n_dim,
                            n_samp=n_samp,
                            rand_set_id=rand_set_id,
                            crit=crit,
                            iterations=iterations)
    rdata_dir = os.path.expandvars(os.path.expanduser(rdata_dir))
    fpath = os.path.join(rdata_dir, fname)

    if os.path.exists(fpath):
        samps = fileio.from_file(fpath)
    else:
        logging.info('File not found. Generating new set of samples & saving'
                     ' result to "%s"', fpath)
        import pyDOE
        mkdir(rdata_dir)
        # Set a deterministic random state based upon the critical hypercube
        # sampling parameters specified
        n_bad_seeds(n_dim, n_samp, rand_set_id)
        samps = pyDOE.lhs(n=n_dim, samples=n_samp, criterion=crit,
                          iterations=iterations)
        fileio.to_file(samps, fpath)
    return samps
Пример #2
0
def main(return_outputs=False):
    """Main; call as script with `return_outputs=False` or interactively with
    `return_outputs=True`"""
    from pisa.utils.plotter import Plotter
    args = parse_args()
    set_verbosity(args.v)
    plot_formats = []
    if args.pdf:
        plot_formats.append('pdf')
    if args.png:
        plot_formats.append('png')

    distribution_maker = DistributionMaker(pipelines=args.pipeline)  # pylint: disable=redefined-outer-name
    if args.select is not None:
        distribution_maker.select_params(args.select)

    outputs = distribution_maker.get_outputs(return_sum=args.return_sum)  # pylint: disable=redefined-outer-name
    if args.outdir:
        # TODO: unique filename: append hash (or hash per pipeline config)
        fname = 'distribution_maker_outputs.json.bz2'
        mkdir(args.outdir)
        fpath = expand(os.path.join(args.outdir, fname))
        to_file(outputs, fpath)

    if args.outdir and plot_formats:
        my_plotter = Plotter(outdir=args.outdir,
                             fmt=plot_formats,
                             log=False,
                             annotate=False)
        for num, output in enumerate(outputs):
            my_plotter.plot_2d_array(output, fname='dist_output_%d' % num)

    if return_outputs:
        return distribution_maker, outputs
Пример #3
0
def save_hyperplane_fits(input_data, fit_results, outdir, tag):
    """Store discrete systematics fits and chi-square values to a specified
    output location, with results identified by a tag.

    Parameters
    ----------
    input_data : mapping
        input data container returned by `hyperplane` function
    fit_results : dict
        fit results data container returned by `hyperplane` function
    outdir : string
        output directory
    tag : string
        identifier for filenames holding fit results

    """
    # Get some strings to use when naming
    dim = len(input_data["param_names"])
    param_str = "_".join(input_data["param_names"])

    # Store as JSON
    mkdir(outdir)
    res_path = join(
        outdir, "%s__%dd__%s__hyperplane_fits.json" % (tag, dim, param_str))
    to_file(fit_results, res_path)
Пример #4
0
    def save(self, fpath, ver=None, **kwargs):
        """Save cross sections (and the energy specification) to a file at
        `fpath`."""
        if ver is None:
            if self._ver is None:
                raise ValueError(
                    'Either a ver must be specified in call to `save` or it '
                    'must have been set prior to the invocation of `save`.'
                )
            ver = self._ver
        else:
            assert ver == self._ver

        try:
            fpath = find_resource(fpath)
        except IOError:
            pass
        fpath = os.path.expandvars(os.path.expanduser(fpath))
        all_xs = {}
        # Get any existing data from file
        if os.path.exists(fpath):
            all_xs = from_file(fpath)
        # Validate existing data by instantiating objects from each
        for v, d in all_xs.items():
            CrossSections(ver=v, energy=d['energy'], xsec=d['xsec'])
        if ver in all_xs:
            logging.warning('Overwriting existing version "' + ver +
                            '" in file ' + fpath)
        all_xs[ver] = {'xsec':self, 'energy':self.energy}
        to_file(all_xs, fpath, **kwargs)
Пример #5
0
    def saveFile(self, filename):
        """
        Write Fisher matrix to json file
        """

        dict_to_write = {}
        dict_to_write['matrix'] = self.matrix
        dict_to_write['parameters'] = self.parameters
        dict_to_write['best_fits'] = self.best_fits
        dict_to_write['labels'] = self.labels
        dict_to_write['priors'] = self.priors

        to_file(dict_to_write, filename)
Пример #6
0
def main(return_outputs=False):
    """Main; call as script with `return_outputs=False` or interactively with
    `return_outputs=True`"""
    from pisa.utils.plotter import Plotter
    args = parse_args()
    set_verbosity(args.v)
    plot_formats = []
    if args.pdf:
        plot_formats.append('pdf')
    if args.png:
        plot_formats.append('png')
        
    detectors = Detectors(args.pipeline,shared_params=args.shared_params)
    Names = detectors.det_names
    if args.select is not None:
        detectors.select_params(args.select)

    outputs = detectors.get_outputs(return_sum=args.return_sum)

    #outputs = outputs[0].fluctuate(
     #               method='poisson', random_state=get_random_state([0, 0, 0]))

    if args.outdir:
        # TODO: unique filename: append hash (or hash per pipeline config)
        fname = 'detectors_outputs.json.bz2'
        mkdir(args.outdir)
        fpath = expand(os.path.join(args.outdir, fname))
        to_file(outputs, fpath)

    if args.outdir and plot_formats:
        my_plotter = Plotter(
            outdir=args.outdir,
            fmt=plot_formats, log=False,
            annotate=False
        )
        for num, output in enumerate(outputs):
            if args.return_sum:
                my_plotter.plot_2d_array(
                    output,
                    fname=Names[num]
                )
            else:
                for out in output:
                    my_plotter.plot_2d_array(
                        out,
                        fname=Names[num]
                    )

    if return_outputs:
        return detectors, outputs
Пример #7
0
def main():
    """Perform a hypersurface fit to discrete systematics sets."""

    # Get args
    args = parse_args()
    set_verbosity(args.v)

    # Read in data and fit hypersurfaces to it
    hypersurfaces = create_hypersurfaces(fit_cfg=args.fit_cfg)

    # Store as JSON
    mkdir(args.outdir)
    arbitrary_hypersurface = list(hypersurfaces.values())[0]
    output_path = join( args.outdir, get_hypersurface_file_name(arbitrary_hypersurface, args.tag) )
    to_file(hypersurfaces, output_path)
Пример #8
0
def assemble_interpolated_fits(fit_directory,
                               output_file,
                               drop_fit_maps=False):
    """After all of the fits on the cluster are done, assemble the results to one JSON.

    The JSON produced by this function is what `load_interpolated_hypersurfaces`
    expects.
    """
    assert os.path.isdir(fit_directory), "fit directory does not exist"
    metadata = from_json(os.path.join(fit_directory, "metadata.json"))

    combined_data = collections.OrderedDict()
    combined_data["interpolation_param_spec"] = metadata[
        "interpolation_param_spec"]

    # Loop over grid points
    hs_fits = []
    grid_shape = tuple(metadata["grid_shape"])
    for job_idx, grid_idx in enumerate(np.ndindex(grid_shape)):

        # Load grid point data
        gridpoint_json = os.path.join(fit_directory,
                                      f"gridpoint_{job_idx:06d}.json.bz2")
        logging.info(f"Reading {gridpoint_json}")
        gridpoint_data = from_json(gridpoint_json)

        # Check the loaded data
        assert job_idx == gridpoint_data["job_idx"]
        assert np.all(grid_idx == gridpoint_data["grid_idx"])
        # TODO: Offer to run incomplete fits locally
        assert gridpoint_data[
            "fit_successful"], f"job no. {job_idx} not finished"

        # Drop fit maps if requested (can significantly reduce file size)
        if drop_fit_maps:
            for key, hs_state in gridpoint_data["hs_fit"].items():
                hs_state["fit_maps_raw"] = None
                hs_state["fit_maps_norm"] = None

        # Add grid point data to output file
        hs_fits.append(
            collections.OrderedDict(
                param_values=gridpoint_data["param_values"],
                hs_fit=gridpoint_data["hs_fit"]))

    # Write the output file
    combined_data["hs_fits"] = hs_fits
    to_file(combined_data, output_file)
Пример #9
0
def stability_test(func, func_kw, ref_path, ignore_fails=False, define_as_ref=False):
    """basic stability test of a Numba CPUDispatcher function (i.e., function
    compiled via @jit / @njit)"""
    func_name = func.py_func.__name__
    logging.info("stability testing `%s`", func_name)
    ref_path = expand(ref_path)

    test = execute_func(func=func, func_kw=func_kw)

    if define_as_ref:
        to_file(test, ref_path)

    # Even when we define the test case as ref, round-trip to/from file to
    # ensure that doesn't corrupt the values
    ref = from_file(ref_path)

    check(test=test, ref=ref, label=func_name, ignore_fails=ignore_fails)

    return test, ref
Пример #10
0
 def store_kernels(self, filename, fmt=None):
     """Store reconstruction kernels to file"""
     fileio.to_file(self.kernels, filename, fmt=fmt)
Пример #11
0
def main():
    args = parse_args()
    set_verbosity(args.v)

    if args.plot:
        import matplotlib as mpl
        mpl.use('pdf')
        import matplotlib.pyplot as plt
        from pisa.utils.plotter import Plotter

    cfg = from_file(args.fit_settings)
    sys_list = cfg.get('general', 'sys_list').replace(' ', '').split(',')
    stop_idx = cfg.getint('general', 'stop_after_stage')


    for sys in sys_list:
        # Parse info for given systematic
        nominal = cfg.getfloat(sys, 'nominal')
        degree = cfg.getint(sys, 'degree')
        force_through_nominal = cfg.getboolean(sys, 'force_through_nominal')
        runs = eval(cfg.get(sys, 'runs'))
        #print "runs ", runs
        smooth = cfg.get(sys, 'smooth')

        x_values = np.array(sorted(runs))

        # Build fit function
        if force_through_nominal:
            function = "lambda x, *p: np.polynomial.polynomial.polyval(x, [1.] + list(p))"
        else:
            function = "lambda x, *p: np.polynomial.polynomial.polyval(x, list(p))"
            # Add free parameter for constant term
            degree += 1
        fit_fun = eval(function)

        # Instantiate template maker
        template_maker = Pipeline(args.template_settings)

        if not args.set_param == '':
            for one_set_param in args.set_param:
                p_name, value = one_set_param.split("=")
                #print "p_name,value= ", p_name, " ", value
                value = parse_quantity(value)
                value = value.n * value.units
                param = template_maker.params[p_name]
                #print "old ", p_name, "value = ", param.value
                param.value = value
                #print "new ", p_name, "value = ", param.value
                template_maker.update_params(param)

        inputs = {}
        map_names = None
        # Get sys templates
        for run in runs:
            for key, val in cfg.items('%s:%s'%(sys, run)):
                if key.startswith('param.'):
                    _, pname = key.split('.')
                    param = template_maker.params[pname]
                    try:
                        value = parse_quantity(val)
                        param.value = value.n * value.units
                    except ValueError:
                        value = parse_string_literal(val)
                        param.value = value
                    param.set_nominal_to_current_value()
                    template_maker.update_params(param)
            # Retreive maps
            template = template_maker.get_outputs(idx=stop_idx)
            if map_names is None: map_names = [m.name for m in template]
            inputs[run] = {}
            for m in template:
                inputs[run][m.name] = m.hist

        # Numpy acrobatics:
        arrays = {}
        for name in map_names:
            arrays[name] = []
            for x in x_values:
                arrays[name].append(
                    inputs[x][name] / unp.nominal_values(inputs[nominal][name])
                )
            a = np.array(arrays[name])
            arrays[name] = np.rollaxis(a, 0, len(a.shape))

        # Shift to get deltas
        x_values -= nominal

        # Binning object (assuming they're all the same)
        binning = template.maps[0].binning

        shape = [d.num_bins for d in binning] + [degree]
        shape_small = [d.num_bins for d in binning]

        outputs = {}
        errors = {}
        for name in map_names:
            # Now actualy perform some fits
            outputs[name] = np.ones(shape)
            errors[name] = np.ones(shape)


            for idx in np.ndindex(*shape_small):
                y_values = unp.nominal_values(arrays[name][idx])
                y_sigma = unp.std_devs(arrays[name][idx])
                if np.any(y_sigma):
                    popt, pcov = curve_fit(fit_fun, x_values, y_values,
                                           sigma=y_sigma, p0=np.ones(degree))
                else:
                    popt, pcov = curve_fit(fit_fun, x_values, y_values,
                                           p0=np.ones(degree))
                perr = np.sqrt(np.diag(pcov))
                for k, p in enumerate(popt):
                    outputs[name][idx][k] = p
                    errors[name][idx][k] = perr[k]

                # TODO(philippeller): the below block of code will fail

                # Maybe plot
                #if args.plot:
                #    fig_num = i + nx * j
                #    if fig_num == 0:
                #        fig = plt.figure(num=1, figsize=( 4*nx, 4*ny))
                #    subplot_idx = nx*(ny-1-j)+ i + 1
                #    plt.subplot(ny, nx, subplot_idx)
                #    #plt.snameter(x_values, y_values, color=plt_colors[name])
                #    plt.gca().errorbar(x_values, y_values, yerr=y_sigma,
                #                       fmt='o', color=plt_colors[name],
                #                       ecolor=plt_colors[name],
                #                       mec=plt_colors[name])
                #    # Plot nominal point again in black
                #    plt.snameter([0.0], [1.0], color='k')
                #    f_values = fit_fun(x_values, *popt)
                #    fun_plot, = plt.plot(x_values, f_values,
                #            color=plt_colors[name])
                #    plt.ylim(np.min(unp.nominal_values(arrays[name]))*0.9,
                #             np.max(unp.nominal_values(arrays[name]))*1.1)
                #    if i > 0:
                #        plt.setp(plt.gca().get_yticklabels(), visible=False)
                #    if j > 0:
                #        plt.setp(plt.gca().get_xticklabels(), visible=False)

        if smooth == 'gauss':
            for name in map_names:
                for d in range(degree):
                    outputs[name][...,d] = gaussian_filter(outputs[name][...,d],sigma=1)

        if smooth == 'gauss_pid':
            for name in map_names:
                split_idx = binning.names.index('pid')
                tot = len(binning)-1
                for d in range(degree):
                    for p in range(len(binning['pid'])):
                        outputs[name][...,p,d] = gaussian_filter(
                            np.swapaxes(outputs[name], split_idx, tot)[...,p,d],
                            sigma=1
                        )
                outputs[name] = np.swapaxes(outputs[name], split_idx, tot)

        # Save the raw ones anyway
        outputs['pname'] = sys
        outputs['nominal'] = nominal
        outputs['function'] = function
        outputs['map_names'] = map_names
        outputs['binning_hash'] = binning.hash
        to_file(outputs, '%s/%s_sysfits_%s_%s.json'%(args.out_dir, sys,
                                                     args.tag, smooth))

        if args.plot:
            for d in range(degree):
                maps = []
                for name in map_names:
                    maps.append(Map(name='%s_raw'%name, hist=outputs[name][...,d],
                                    binning=binning))
                maps = MapSet(maps)
                my_plotter = Plotter(
                    stamp='',
                    outdir=args.out_dir,
                    fmt='pdf',
                    log=False,
                    label=''
                )
                my_plotter.plot_2d_array(
                    maps,
                    fname='%s_%s_%s_%s'%(sys, args.tag, d, smooth),
                )
Пример #12
0
    def _compute_nominal_transforms(self):
        self.load_events(self.params.aeff_events)
        self.cut_events(self.params.transform_events_keep_criteria)

        # Units must be the following for correctly converting a sum-of-
        # OneWeights-in-bin to an average effective area across the bin.
        comp_units = dict(true_energy='GeV', true_coszen=None,
                          true_azimuth='rad')

        # Select only the units in the input/output binning for conversion
        # (can't pass more than what's actually there)
        in_units = {dim: unit for dim, unit in comp_units.items()
                    if dim in self.input_binning}

        # TODO: use out_units for some kind of conversion?
        #out_units = {dim: unit for dim, unit in comp_units.items()
        #             if dim in self.output_binning}

        # These will be in the computational units
        input_binning = self.input_binning.to(**in_units)

        # Account for "missing" dimension(s) (dimensions OneWeight expects for
        # computation of bin volume), and accommodate with a factor equal to
        # the full range. See IceCube wiki/documentation for OneWeight for
        # more info.
        missing_dims_vol = 1
        if 'true_azimuth' not in input_binning:
            missing_dims_vol *= 2*np.pi
        if 'true_coszen' not in input_binning:
            missing_dims_vol *= 2

        if bool(self.debug_mode):
            outdir = os.path.join(find_resource('debug'),
                                  self.stage_name,
                                  self.service_name)
            mkdir(outdir)
            #hex_hash = hash2hex(kde_hash)

        bin_volumes = input_binning.bin_volumes(attach_units=False)
        norm_volumes = bin_volumes * missing_dims_vol

        nominal_transforms = []
        for xform_flavints in self.transform_groups:
            logging.debug('Working on %s effective areas xform',
                          xform_flavints)

            aeff_transform = self.events.histogram(
                kinds=xform_flavints,
                binning=input_binning,
                weights_col='weighted_aeff',
                errors=(self.error_method not in [None, False])
            )
            aeff_transform = aeff_transform.hist

            # Divide histogram by
            #   (energy bin width x coszen bin width x azimuth bin width)
            # volumes to convert from sums-of-OneWeights-in-bins to
            # effective areas. Note that volume correction factor for
            # missing dimensions is applied here.
            aeff_transform /= norm_volumes

            if self.debug_mode:
                outfile = os.path.join(
                    outdir, 'aeff_' + str(xform_flavints) + '.pkl'
                )
                to_file(aeff_transform, outfile)

            nominal_transforms.extend(
                populate_transforms(
                    service=self,
                    xform_flavints=xform_flavints,
                    xform_array=aeff_transform
                )
            )

        return TransformSet(transforms=nominal_transforms)
Пример #13
0
def make_toy_events(outdir, num_events, energy_range, spectral_index,
                    coszen_range, num_sets, first_set, aeff_energy_param,
                    aeff_coszen_param, reco_param, pid_param, pid_dist):
    """Make toy events and store to a file.

    Parameters
    ----------
    outdir : string
    num_events : int
    energy_range : 2-tuple of floats
    spectral_index : float
    coszen_range : 2-tuple of floats
    num_sets : int
    first_set : int
    aeff_energy_param : string
    aeff_coszen_param : string
    reco_param : string
    pid_param : string
    pid_dist : string

    Returns
    -------
    events : :class:`pisa.core.events.Events`

    """
    energy_range = sorted(energy_range)
    coszen_range = sorted(coszen_range)

    # Validation of args
    assert energy_range[0] > 0 and energy_range[1] < 1e9
    assert coszen_range[0] >= -1 and coszen_range[1] <= 1
    assert np.diff(energy_range)[0] > 0, str(energy_range)
    assert np.diff(coszen_range)[0] > 0, str(coszen_range)
    assert spectral_index >= 0, str(spectral_index)
    assert first_set >= 0, str(first_set)
    assert num_sets >= 1, str(first_set)

    # Make sure resources specified actually exist
    for arg in [aeff_energy_param, aeff_coszen_param, reco_param, pid_param]:
        find_resource(arg)

    mkdir(outdir, warn=False)

    set_indices = list(range(first_set, first_set + num_sets))

    # The following loop is for validation only
    for num, index in product(num_events, set_indices):
        mcgen_random_state(num_events=num, set_index=index)

    for num, set_index in product(num_events, set_indices):
        mcevts_fname = FNAME_TEMPLATE.format(
            file_type='events',
            detector='vlvnt',
            e_min=format_num(energy_range[0]),
            e_max=format_num(energy_range[1]),
            spectral_index=format_num(spectral_index,
                                      sigfigs=2,
                                      trailing_zeros=True),
            cz_min=format_num(coszen_range[0]),
            cz_max=format_num(coszen_range[1]),
            num_events=format_num(num, sigfigs=3, sci_thresh=(1, -1)),
            set_index=format_num(set_index, sci_thresh=(10, -10)),
            extension='hdf5')
        mcevts_fpath = os.path.join(outdir, mcevts_fname)
        if os.path.isfile(mcevts_fpath):
            logging.warn('File already exists, skipping: "%s"', mcevts_fpath)
            continue

        logging.info('Working on set "%s"', mcevts_fname)

        # TODO: pass filepaths / resource locations via command line args

        # Create a single random state object to pass from function to function
        random_state = mcgen_random_state(num_events=num, set_index=set_index)

        mc_events = generate_mc_events(
            num_events=num,
            energy_range=energy_range,
            coszen_range=coszen_range,
            spec_ind=spectral_index,
            aeff_energy_param_source=aeff_energy_param,
            aeff_coszen_param_source=aeff_coszen_param,
            random_state=random_state)
        populate_reco_observables(mc_events=mc_events,
                                  param_source=reco_param,
                                  random_state=random_state)
        populate_pid(mc_events=mc_events,
                     param_source=pid_param,
                     random_state=random_state,
                     dist=pid_dist)

        to_file(mc_events, mcevts_fpath)

        return mc_events
Пример #14
0
def compare(outdir,
            ref,
            ref_label,
            test,
            test_label,
            asymm_max=None,
            asymm_min=None,
            combine=None,
            diff_max=None,
            diff_min=None,
            fract_diff_max=None,
            fract_diff_min=None,
            json=False,
            pdf=False,
            png=False,
            ref_abs=False,
            ref_param_selections=None,
            sum=None,
            test_abs=False,
            test_param_selections=None):
    """Compare two entities. The result each entity specification is
    formatted into a MapSet and stored to disk, so that e.g. re-running
    a DistributionMaker is unnecessary to reproduce the results.

    Parameters
    ----------
    outdir : string
        Store output plots to this directory

    ref : string or array of strings
        Pipeline settings config file that generates reference output,
        or a stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    ref_abs : bool
        Use the absolute value of the reference plot for comparisons

    ref_label : string
        Label for reference

    ref_param-selections : string
        Param selections to apply to ref pipeline config(s). Not
        applicable if ref specifies stored map or map sets

    test : string or array of strings
        Pipeline settings config file that generates test output, or a
        stored map or map set. Multiple pipelines, maps, or map sets are
        supported

    test_abs : bool
        Use the absolute value of the test plot for comparisons

    test_label : string
        Label for test

    test_param_selections : None or string
        Param selections to apply to test pipeline config(s). Not
        applicable if test specifies stored map or map sets

    combine : None or string or array of strings
        Combine by wildcard string, where string globbing (a la command
        line) uses asterisk for any number of wildcard characters. Use
        single quotes such that asterisks do not get expanded by the
        shell. Multiple combine strings supported

    sum : None or int
        Sum over (and hence remove) the specified axis or axes. I.e.,
        project the map onto remaining (unspecified) axis or axes

    json : bool
        Save output maps in compressed json (json.bz2) format

    pdf : bool
        Save plots in PDF format. If neither this nor png is
        specified, no plots are produced

    png : bool
        Save plots in PNG format. If neither this nor pdf is specfied,
        no plots are produced

    diff_min : None or float
        Difference plot vmin; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    diff_max : None or float
        Difference plot max; if you specify only one of diff_min or
        diff_max, symmetric limits are automatically used (min = -max)

    fract_diff_min : None or float
        Fractional difference plot vmin; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    fract_diff_max : None or float
        Fractional difference plot max; if you specify only one of
        fract_diff_min or fract_diff_max, symmetric limits are
        automatically used (min = -max)

    asymm_min : None or float
        Asymmetry plot vmin; if you specify only one of asymm_min or
        asymm_max, symmetric limits are automatically used (min = -max)

    asymm_max : None or float
        Fractional difference plot max; if you specify only one of
        asymm_min or asymm_max, symmetric limits are automatically used
        (min = -max)

    Returns
    -------
    summary_stats : dict
        Dictionary containing a summary for each h Map processed

    diff : MapSet
        MapSet of the difference
        - (Test - Ref)

    fract_diff : MapSet
        MapSet of the fractional difference
        - (Test - Ref) / Ref

    asymm : MapSet
        MapSet of the asymmetric fraction difference or pull
        - (Test - Ref) / sqrt(Ref)

    """
    ref_plot_label = ref_label
    if ref_abs and not ref_label.startswith('abs'):
        ref_plot_label = 'abs(%s)' % ref_plot_label
    test_plot_label = test_label
    if test_abs and not test_label.startswith('abs'):
        test_plot_label = 'abs(%s)' % test_plot_label

    plot_formats = []
    if pdf:
        plot_formats.append('pdf')
    if png:
        plot_formats.append('png')

    diff_symm = True
    if diff_min is not None and diff_max is None:
        diff_max = -diff_min
        diff_symm = False
    if diff_max is not None and diff_min is None:
        diff_min = -diff_max
        diff_symm = False

    fract_diff_symm = True
    if fract_diff_min is not None and fract_diff_max is None:
        fract_diff_max = -fract_diff_min
        fract_diff_symm = False
    if fract_diff_max is not None and fract_diff_min is None:
        fract_diff_min = -fract_diff_max
        fract_diff_symm = False

    asymm_symm = True
    if asymm_max is not None and asymm_min is None:
        asymm_min = -asymm_max
        asymm_symm = False
    if asymm_min is not None and asymm_max is None:
        asymm_max = -asymm_min
        asymm_symm = False

    outdir = os.path.expanduser(os.path.expandvars(outdir))
    mkdir(outdir)

    # Get the reference distribution(s) into the form of a test MapSet
    p_ref = None
    ref_source = None
    if isinstance(ref, Map):
        p_ref = MapSet(ref)
        ref_source = MAP_SOURCE_STR
    elif isinstance(ref, MapSet):
        p_ref = ref
        ref_source = MAPSET_SOURCE_STR
    elif isinstance(ref, Pipeline):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = PIPELINE_SOURCE_STR
    elif isinstance(ref, DistributionMaker):
        if ref_param_selections is not None:
            ref.select_params(ref_param_selections)
        p_ref = ref.get_outputs()
        ref_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(ref) == 1:
            try:
                ref_pipeline = Pipeline(config=ref[0])
            except:
                pass
            else:
                ref_source = PIPELINE_SOURCE_STR
                if ref_param_selections is not None:
                    ref_pipeline.select_params(ref_param_selections)
                p_ref = ref_pipeline.get_outputs()
        else:
            try:
                ref_dmaker = DistributionMaker(pipelines=ref)
            except:
                pass
            else:
                ref_source = DISTRIBUTIONMAKER_SOURCE_STR
                if ref_param_selections is not None:
                    ref_dmaker.select_params(ref_param_selections)
                p_ref = ref_dmaker.get_outputs()

    if p_ref is None:
        try:
            p_ref = [Map.from_json(f) for f in ref]
        except:
            pass
        else:
            ref_source = MAP_SOURCE_STR
            p_ref = MapSet(p_ref)

    if p_ref is None:
        assert ref_param_selections is None
        assert len(ref) == 1, 'Can only handle one MapSet'
        try:
            p_ref = MapSet.from_json(ref[0])
        except:
            pass
        else:
            ref_source = MAPSET_SOURCE_STR

    if p_ref is None:
        raise ValueError(
            'Could not instantiate the reference Pipeline, DistributionMaker,'
            ' Map, or MapSet from ref value(s) %s' % ref)
    ref = p_ref

    logging.info('Reference map(s) derived from a ' + ref_source)

    # Get the test distribution(s) into the form of a test MapSet
    p_test = None
    test_source = None
    if isinstance(test, Map):
        p_test = MapSet(test)
        test_source = MAP_SOURCE_STR
    elif isinstance(test, MapSet):
        p_test = test
        test_source = MAPSET_SOURCE_STR
    elif isinstance(test, Pipeline):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = PIPELINE_SOURCE_STR
    elif isinstance(test, DistributionMaker):
        if test_param_selections is not None:
            test.select_params(test_param_selections)
        p_test = test.get_outputs()
        test_source = DISTRIBUTIONMAKER_SOURCE_STR
    else:
        if len(test) == 1:
            try:
                test_pipeline = Pipeline(config=test[0])
            except:
                pass
            else:
                test_source = PIPELINE_SOURCE_STR
                if test_param_selections is not None:
                    test_pipeline.select_params(test_param_selections)
                p_test = test_pipeline.get_outputs()
        else:
            try:
                test_dmaker = DistributionMaker(pipelines=test)
            except:
                pass
            else:
                test_source = DISTRIBUTIONMAKER_SOURCE_STR
                if test_param_selections is not None:
                    test_dmaker.select_params(test_param_selections)
                p_test = test_dmaker.get_outputs()

    if p_test is None:
        try:
            p_test = [Map.from_json(f) for f in test]
        except:
            pass
        else:
            test_source = MAP_SOURCE_STR
            p_test = MapSet(p_test)

    if p_test is None:
        assert test_param_selections is None
        assert len(test) == 1, 'Can only handle one MapSet'
        try:
            p_test = MapSet.from_json(test[0])
        except:
            pass
        else:
            test_source = MAPSET_SOURCE_STR

    if p_test is None:
        raise ValueError(
            'Could not instantiate the test Pipeline, DistributionMaker, Map,'
            ' or MapSet from test value(s) %s' % test)
    test = p_test

    logging.info('Test map(s) derived from a ' + test_source)

    if combine is not None:
        ref = ref.combine_wildcard(combine)
        test = test.combine_wildcard(combine)
        if isinstance(ref, Map):
            ref = MapSet([ref])
        if isinstance(test, Map):
            test = MapSet([test])

    if sum is not None:
        ref = ref.sum(sum)
        test = test.sum(sum)

    # Set the MapSet names according to args passed by user
    ref.name = ref_label
    test.name = test_label

    # Save to disk the maps being plotted (excluding optional aboslute value
    # operations)
    if json:
        refmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % ref_label)
        to_file(ref, refmaps_path)

        testmaps_path = os.path.join(outdir, 'maps__%s.json.bz2' % test_label)
        to_file(test, testmaps_path)

    if set(test.names) != set(ref.names):
        raise ValueError('Test map names %s do not match ref map names %s.' %
                         (sorted(test.names), sorted(ref.names)))

    # Aliases to save keystrokes
    def masked(x):
        return np.ma.masked_invalid(x.nominal_values)

    def zero_to_nan(map):
        newmap = deepcopy(map)
        mask = np.isclose(newmap.nominal_values, 0, rtol=0, atol=EPSILON)
        newmap.hist[mask] = np.nan
        return newmap

    reordered_test = []
    new_ref = []
    diff_maps = []
    fract_diff_maps = []
    asymm_maps = []
    summary_stats = {}
    for ref_map in ref:
        test_map = test[ref_map.name].reorder_dimensions(ref_map.binning)
        if ref_abs:
            ref_map = abs(ref_map)
        if test_abs:
            test_map = abs(test_map)

        diff_map = test_map - ref_map
        fract_diff_map = (test_map - ref_map) / zero_to_nan(ref_map)
        asymm_map = (test_map - ref_map) / zero_to_nan(ref_map**0.5)
        abs_fract_diff_map = np.abs(fract_diff_map)

        new_ref.append(ref_map)
        reordered_test.append(test_map)
        diff_maps.append(diff_map)
        fract_diff_maps.append(fract_diff_map)
        asymm_maps.append(asymm_map)

        min_ref = np.min(masked(ref_map))
        max_ref = np.max(masked(ref_map))

        min_test = np.min(masked(test_map))
        max_test = np.max(masked(test_map))

        total_ref = np.sum(masked(ref_map))
        total_test = np.sum(masked(test_map))

        mean_ref = np.mean(masked(ref_map))
        mean_test = np.mean(masked(test_map))

        max_abs_fract_diff = np.max(masked(abs_fract_diff_map))
        mean_abs_fract_diff = np.mean(masked(abs_fract_diff_map))
        median_abs_fract_diff = np.median(masked(abs_fract_diff_map))

        mean_fract_diff = np.mean(masked(fract_diff_map))
        min_fract_diff = np.min(masked(fract_diff_map))
        max_fract_diff = np.max(masked(fract_diff_map))
        std_fract_diff = np.std(masked(fract_diff_map))

        mean_diff = np.mean(masked(diff_map))
        min_diff = np.min(masked(diff_map))
        max_diff = np.max(masked(diff_map))
        std_diff = np.std(masked(diff_map))

        median_diff = np.nanmedian(masked(diff_map))
        mad_diff = np.nanmedian(masked(np.abs(diff_map)))
        median_fract_diff = np.nanmedian(masked(fract_diff_map))
        mad_fract_diff = np.nanmedian(masked(np.abs(fract_diff_map)))

        min_asymm = np.min(masked(fract_diff_map))
        max_asymm = np.max(masked(fract_diff_map))

        total_asymm = np.sqrt(np.sum(masked(asymm_map)**2))

        summary_stats[test_map.name] = OrderedDict([
            ('min_ref', min_ref),
            ('max_ref', max_ref),
            ('total_ref', total_ref),
            ('mean_ref', mean_ref),
            ('min_test', min_test),
            ('max_test', max_test),
            ('total_test', total_test),
            ('mean_test', mean_test),
            ('max_abs_fract_diff', max_abs_fract_diff),
            ('mean_abs_fract_diff', mean_abs_fract_diff),
            ('median_abs_fract_diff', median_abs_fract_diff),
            ('min_fract_diff', min_fract_diff),
            ('max_fract_diff', max_fract_diff),
            ('mean_fract_diff', mean_fract_diff),
            ('std_fract_diff', std_fract_diff),
            ('median_fract_diff', median_fract_diff),
            ('mad_fract_diff', mad_fract_diff),
            ('min_diff', min_diff),
            ('max_diff', max_diff),
            ('mean_diff', mean_diff),
            ('std_diff', std_diff),
            ('median_diff', median_diff),
            ('mad_diff', mad_diff),
            ('min_asymm', min_asymm),
            ('max_asymm', max_asymm),
            ('total_asymm', total_asymm),
        ])

        logging.info('Map %s...', ref_map.name)
        logging.info('  Ref map(s):')
        logging.info('    min   :' + ('%.2f' % min_ref).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_ref).rjust(12))
        logging.info('    total :' + ('%.2f' % total_ref).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_ref).rjust(12))
        logging.info('  Test map(s):')
        logging.info('    min   :' + ('%.2f' % min_test).rjust(12))
        logging.info('    max   :' + ('%.2f' % max_test).rjust(12))
        logging.info('    total :' + ('%.2f' % total_test).rjust(12))
        logging.info('    mean  :' + ('%.2f' % mean_test).rjust(12))
        logging.info('  Absolute fract. diff., abs((Test - Ref) / Ref):')
        logging.info('    max   : %.4e', max_abs_fract_diff)
        logging.info('    mean  : %.4e', mean_abs_fract_diff)
        logging.info('    median: %.4e', median_abs_fract_diff)
        logging.info('  Fractional difference, (Test - Ref) / Ref:')
        logging.info('    min   : %.4e', min_fract_diff)
        logging.info('    max   : %.4e', max_fract_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_fract_diff,
                     std_fract_diff)
        logging.info('    median: %.4e +/- %.4e', median_fract_diff,
                     mad_fract_diff)
        logging.info('  Difference, Test - Ref:')
        logging.info('    min   : %.4e', min_diff)
        logging.info('    max   : %.4e', max_diff)
        logging.info('    mean  : %.4e +/- %.4e', mean_diff, std_diff)
        logging.info('    median: %.4e +/- %.4e', median_diff, mad_diff)
        logging.info('  Asymmetry, (Test - Ref) / sqrt(Ref)')
        logging.info('    min   : %.4e', min_asymm)
        logging.info('    max   : %.4e', max_asymm)
        logging.info('    total : %.4e (sum in quadrature)', total_asymm)
        logging.info('')

    ref = MapSet(new_ref)
    test = MapSet(reordered_test)
    diff = MapSet(diff_maps)
    fract_diff = MapSet(fract_diff_maps)
    asymm = MapSet(asymm_maps)

    if json:
        diff.to_json(
            os.path.join(
                outdir,
                'diff__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))
        fract_diff.to_json(
            os.path.join(
                outdir, 'fract_diff__%s___%s.json.bz2' %
                (test_plot_label, ref_plot_label)))
        asymm.to_json(
            os.path.join(
                outdir,
                'asymm__%s___%s.json.bz2' % (test_plot_label, ref_plot_label)))
        to_file(
            summary_stats,
            os.path.join(
                outdir,
                'stats__%s__%s.json.bz2' % (test_plot_label, ref_plot_label)))

    for plot_format in plot_formats:
        # Plot the raw distributions
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=False,
                          ratio=False)
        plotter.plot_2d_array(ref, fname='distr__%s' % ref_plot_label)
        plotter.plot_2d_array(test, fname='distr__%s' % test_plot_label)

        # Plot the difference (test - ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=diff_symm,
                          ratio=False)
        plotter.label = '%s - %s' % (test_plot_label, ref_plot_label)
        plotter.plot_2d_array(
            test - ref,
            fname='diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=diff_min, vmax=diff_max
        )

        # Plot the fractional difference (test - ref)/ref
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=fract_diff_symm,
                          ratio=True)
        plotter.label = ('(%s-%s)/%s' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r) for r in ref]),
            fname='fract_diff__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=fract_diff_min, vmax=fract_diff_max
        )

        # Plot the asymmetry (test - ref)/sqrt(ref)
        plotter = Plotter(stamp='',
                          outdir=outdir,
                          fmt=plot_format,
                          log=False,
                          annotate=False,
                          symmetric=asymm_symm,
                          ratio=True)
        plotter.label = (r'$(%s - %s)/\sqrt{%s}$' %
                         (test_plot_label, ref_plot_label, ref_plot_label))
        plotter.plot_2d_array(
            (test - ref) / MapSet([zero_to_nan(r**0.5) for r in ref]),
            fname='asymm__%s__%s' % (test_plot_label, ref_plot_label),
            #vmin=asymm_min, vmax=asymm_max
        )

    return summary_stats, diff, fract_diff, asymm
Пример #15
0
def makeEventsFile(data_files,
                   detector,
                   proc_ver,
                   cut,
                   outdir,
                   run_settings=None,
                   data_proc_params=None,
                   join=None,
                   cust_cuts=None,
                   extract_fields=EXTRACT_FIELDS,
                   output_fields=OUTPUT_FIELDS):
    r"""Take the simulated and reconstructed HDF5 file(s) (as converted from I3
    by icecube.hdfwriter.I3HDFTableService) as input and write out a simplified
    PISA-standard-format HDF5 file for use in aeff, reco, and/or PID stages.

    Parameters
    ----------
    data_files : dict
        File paths for finding data files for each run, formatted as:
            {
                <string run>: <list of file paths>,
                <string run>: <list of file paths>,
                ...
                <string run>: <list of file paths>,
            }

    detector : string
        Name of the detector (e.g. IceCube, DeepCore, PINGU, etc.) as found in
        e.g. mc_sim_run_settings.json and data_proc_params.json files.

    proc_ver
        Version of processing applied to the events, as found in e.g.
        data_proc_params.json.

    cut
        Name of a standard cut to use; must be specified in the relevant
        detector/processing version node of the data processing parameters
        (file from which the data_proc_params object was instantiated)

    outdir
        Directory path in which to store resulting files; will be generated if
        it does not already exist (including any parent directories that do not
        exist)

    run_settings : string or MCSimRunSettings
        Resource location of mc_sim_run_settings.json or an MCSimRunSettings
        object instantiated therefrom.

    data_proc_params : string or DataProcParams
        Resource location of data_proc_params.json or a DataProcParams object
        instantiated therefrom.

    join
        String specifying any flavor/interaction types (flavInts) to join
        together. Separate flavInts with commas (',') and separate groups
        with semicolons (';'). E.g. an acceptable string is:
            'numucc+numubarcc; nuall bar NC, nuall NC'

    cust_cuts
        dict with a single DataProcParams cut specification or list of same
        (see help for DataProcParams for detailed description of cut spec)

    extract_fields : None or iterable of strings
        Field names to extract from source HDF5 file. If None, extract all
        fields.

    output_fields : None or iterable of strings
        Fields to include in the generated PISA-standard-format events HDF5
        file; note that if 'weighted_aeff' is not preent, effective area will
        not be computed. If None, all fields will be written.

    Notes
    -----
    Compute "weighted_aeff" field:

    Within each int type (CC or NC), ngen should be added together;
    events recorded of that int type then get their one_weight divided by the
    total *for that int type only* to obtain the "weighted_aeff" for that
    event (even if int types are being grouped/joined together).

    This has the effect that within a group, ...
      ... and within an interaction type, effective area is a weighted
      average of that of the flavors being combined. E.g. for CC,

                     \sum_{run x}\sum_{flav y} (Aeff_{x,y} * ngen_{x,y})
          Aeff_CC = ----------------------------------------------------- ,
                          \sum_{run x}\sum_{flav y} (ngen_{x,y})

      ... and then across interaction types, the results of the above for
      each int type need to be summed together, i.e.:

          Aeff_total = Aeff_CC + Aeff_NC

    Note that each grouping of flavors is calculated with the above math
    completely independently from other flavor groupings specified.

    See Justin Lanfranchi's presentation on the PINGU Analysis call,
    2015-10-21, for more details:
      https://wikispaces.psu.edu/download/attachments/282040606/meff_report_jllanfranchi_v05_2015-10-21.pdf

    """
    if isinstance(run_settings, str):
        run_settings = DetMCSimRunsSettings(find_resource(run_settings),
                                            detector=detector)
    assert isinstance(run_settings, DetMCSimRunsSettings)
    assert run_settings.detector == detector

    if isinstance(data_proc_params, str):
        data_proc_params = DataProcParams(
            detector=detector,
            proc_ver=proc_ver,
            data_proc_params=find_resource(data_proc_params))
    assert data_proc_params.detector == detector
    assert data_proc_params.proc_ver == proc_ver

    runs = sorted(data_files.keys())

    all_flavs = []
    flavs_by_run = {}
    run_norm_factors = {}
    bin_edges = set()

    runs_by_flavint = FlavIntData()
    for flavint in runs_by_flavint.flavints:
        runs_by_flavint[flavint] = []

    #ngen_flavint_by_run = {run:FlavIntData() for run in runs}
    ##ngen_per_flav_by_run = {run:FlavIntData() for run in runs}
    #eint_per_flav_by_run = {run:FlavIntData() for run in runs}
    #for run in runs:
    #    flavints_in_run = run_settings.get_flavints(run=run)
    #    e_range = run_settings.get_energy_range(run)
    #    gamma = run_settings.get_spectral_index(run)
    #    for flavint in flavints_in_run:
    #        runs_by_flavint[flavint].append(run)
    #        ngen_flav = run_settings.get_num_gen(
    #            run=run, flav_or_flavint=flavint, include_physical_fract=True
    #        )
    #        #runs_by_flavint[flavint].append(run)
    #        #this_flav = flavint.
    #        #xsec_fract_en_wtd_avg[run][flavint] = \
    #        ngen_flavint_by_run[run][flavint] = \
    #                xsec.get_xs_ratio_integral(
    #                    flavintgrp0=flavint,
    #                    flavintgrp1=flavint.flav,
    #                    e_range=e_range,
    #                    gamma=gamma,
    #                    average=True
    #                )
    #    xsec_ver = run_settings.get_xsec_version(run=run)
    #    if xsec_ver_ref is None:
    #        xsec_ver_ref = xsec_ver
    #    # An assumption of below logic is that all MC is generated using the
    #    # same cross sections version.
    #    #
    #    # TODO / NOTE:
    #    # It would be possible to combine runs with different cross sections so
    #    # long as each (flavor, interaction type) cross sections are
    #    # weighted-averaged together using weights
    #    #   N_gen_{n,flav+inttype} * E_x^{-gamma_n} /
    #    #       ( \int_{E_min_n}^{E_max_n} E^{-\gamma_n} dE )
    #    # where E_x are the energy sample points specified in the cross
    #    # sections (and hence these must also be identical across all cross
    #    # sections that get combined, unless interpolation is performed).
    #    assert xsec_ver == xsec_ver_ref
    #    #ngen_weighted_energy_integral[str(run)] = powerLawIntegral(
    #    #flavs_by_run[run] = run_settings.flavs(run)
    ##flavs_present =

    detector_geom = run_settings[runs[0]]['geom']

    # Create Events object to store data
    evts = Events()
    evts.metadata.update({
        'detector': run_settings.detector,
        'proc_ver': data_proc_params.proc_ver,
        'geom': detector_geom,
        'runs': runs,
    })

    cuts = []
    if isinstance(cust_cuts, dict):
        cust_cuts = [cust_cuts]
    if cut is not None:
        evts.metadata['cuts'].append(cut)
        cuts.append(cut)
    if cust_cuts is not None:
        for ccut in cust_cuts:
            evts.metadata['cuts'].append('custom: ' + ccut['pass_if'])
            cuts.append(ccut)

    orig_outdir = outdir
    outdir = expand(outdir)
    logging.info('Output dir spec\'d: %s', orig_outdir)
    if outdir != orig_outdir:
        logging.info('Output dir expands to: %s', outdir)
    mkdir(outdir)

    detector_label = str(data_proc_params.detector)
    proc_label = 'proc_' + str(data_proc_params.proc_ver)

    # What flavints to group together
    if join is None or join == '':
        grouped = []
        ungrouped = [NuFlavIntGroup(k) for k in ALL_NUFLAVINTS]
        groups_label = 'unjoined'
        logging.info('Events in the following groups will be joined together:'
                     ' (none)')
    else:
        grouped, ungrouped = xlateGroupsStr(join)
        evts.metadata['flavints_joined'] = [str(g) for g in grouped]
        groups_label = 'joined_G_' + '_G_'.join([str(g) for g in grouped])
        logging.info(
            'Events in the following groups will be joined together: ' +
            '; '.join([str(g) for g in grouped]))

    # Find any flavints not included in the above groupings
    flavint_groupings = grouped + ungrouped
    if len(ungrouped) == 0:
        ungrouped = ['(none)']
    logging.info('Events of the following flavints will NOT be joined'
                 'together: ' + '; '.join([str(k) for k in ungrouped]))

    # Enforce that flavints composing groups are mutually exclusive
    for grp_n, flavintgrp0 in enumerate(flavint_groupings[:-1]):
        for flavintgrp1 in flavint_groupings[grp_n + 1:]:
            assert len(set(flavintgrp0).intersection(set(flavintgrp1))) == 0

    flavintgrp_names = [str(flavintgrp) for flavintgrp in flavint_groupings]

    # Instantiate storage for all intermediate destination fields;
    # The data structure looks like:
    #   extracted_data[group #][interaction type][field name] = list of data
    if extract_fields is None:
        extracted_data = [{inttype: {}
                           for inttype in ALL_NUINT_TYPES}
                          for _ in flavintgrp_names]
    else:
        extracted_data = [{
            inttype: {field: []
                      for field in extract_fields}
            for inttype in ALL_NUINT_TYPES
        } for _ in flavintgrp_names]

    # Instantiate generated-event counts for destination fields; count
    # CClseparately from NC because aeff's for CC & NC add, whereas
    # aeffs intra-CC should be weighted-averaged (as for intra-NC)
    ngen = [{inttype: {}
             for inttype in ALL_NUINT_TYPES} for _ in flavintgrp_names]

    # Loop through all of the files, retrieving the events, filtering,
    # and recording the number of generated events pertinent to
    # calculating aeff
    filecount = {}
    detector_geom = None
    bad_files = []
    for run, fnames in data_files.items():
        file_count = 0
        for fname in fnames:
            # Retrieve data from all nodes specified in the processing
            # settings file
            logging.trace('Trying to get data from file %s', fname)
            try:
                data = data_proc_params.get_data(fname,
                                                 run_settings=run_settings)
            except (ValueError, KeyError, IOError):
                logging.warning('Bad file encountered: %s', fname)
                bad_files.append(fname)
                continue

            file_count += 1

            # Check to make sure only one run is present in the data
            runs_in_data = set(data['run'])
            assert len(runs_in_data) == 1, 'Must be just one run in data'

            #run = int(data['run'][0])
            if not run in filecount:
                filecount[run] = 0
            filecount[run] += 1
            rs_run = run_settings[run]

            # Record geom; check that geom is consistent with other runs
            if detector_geom is None:
                detector_geom = rs_run['geom']
            assert rs_run['geom'] == detector_geom, \
                    'All runs\' geometries must match!'

            # Loop through all flavints spec'd for run
            for run_flavint in rs_run['flavints']:
                barnobar = run_flavint.bar_code
                int_type = run_flavint.intType

                # Retrieve this-interaction-type- & this-barnobar-only events
                # that also pass cuts. (note that cut names are strings)
                intonly_cut_data = data_proc_params.apply_cuts(
                    data,
                    cuts=cuts + [str(int_type), str(barnobar)],
                    return_fields=extract_fields)

                # Record the generated count and data for this run/flavor for
                # each group to which it's applicable
                for grp_n, flavint_group in enumerate(flavint_groupings):
                    if not run_flavint in flavint_group:
                        continue

                    # Instantiate a field for particles and antiparticles,
                    # keyed by the output of the bar_code property for each
                    if not run in ngen[grp_n][int_type]:
                        ngen[grp_n][int_type][run] = {
                            NuFlav(12).bar_code: 0,
                            NuFlav(-12).bar_code: 0,
                        }

                    # Record count only if it hasn't already been recorded
                    if ngen[grp_n][int_type][run][barnobar] == 0:
                        # Note that one_weight includes cc/nc:total fraction,
                        # so DO NOT specify the full flavint here, only flav
                        # (since one_weight does NOT take bar/nobar fraction,
                        # it must be included here in the ngen computation)
                        flav_ngen = run_settings.get_num_gen(run=run,
                                                             barnobar=barnobar)
                        ngen[grp_n][int_type][run][barnobar] = flav_ngen

                    # Append the data. Note that extracted_data is:
                    # extracted_data[group n][int_type][extract field name] =
                    #   list
                    if extract_fields is None:
                        for f in intonly_cut_data.keys():
                            if f not in extracted_data[grp_n][int_type]:
                                extracted_data[grp_n][int_type][f] = []
                            extracted_data[grp_n][int_type][f].extend(
                                intonly_cut_data[f])
                    else:
                        for f in extract_fields:
                            extracted_data[grp_n][int_type][f].extend(
                                intonly_cut_data[f])
        logging.info('File count for run %s: %d', run, file_count)
    to_file(bad_files, '/tmp/bad_files.json')

    if ((output_fields is None and
         (extract_fields is None or 'one_weight' in extract_fields))
            or 'weighted_aeff' in output_fields):
        fmtfields = (' ' * 12 + 'flavint_group', 'int type', '     run',
                     'part/anti', 'part/anti count', 'aggregate count')
        fmt_n = [len(f) for f in fmtfields]
        fmt = '  '.join([r'%' + str(n) + r's' for n in fmt_n])
        lines = '  '.join(['-' * n for n in fmt_n])
        logging.info(fmt, fmtfields)
        logging.info(lines)
        for grp_n, flavint_group in enumerate(flavint_groupings):
            for int_type in set([fi.intType for fi in flavint_group.flavints]):
                ngen_it_tot = 0
                for run, run_counts in ngen[grp_n][int_type].items():
                    for barnobar, barnobar_counts in run_counts.items():
                        ngen_it_tot += barnobar_counts
                        logging.info(fmt, flavint_group.simple_str(), int_type,
                                     str(run), barnobar, int(barnobar_counts),
                                     int(ngen_it_tot))
                # Convert data to numpy array
                if extract_fields is None:
                    for field in extracted_data[grp_n][int_type].keys():
                        extracted_data[grp_n][int_type][field] = \
                                np.array(extracted_data[grp_n][int_type][field])
                else:
                    for field in extract_fields:
                        extracted_data[grp_n][int_type][field] = \
                                np.array(extracted_data[grp_n][int_type][field])
                # Generate weighted_aeff field for this group / int type's data
                extracted_data[grp_n][int_type]['weighted_aeff'] = \
                        extracted_data[grp_n][int_type]['one_weight'] \
                        / ngen_it_tot * CMSQ_TO_MSQ

    # Report file count per run
    for run, count in filecount.items():
        logging.info('Files read, run %s: %d', run, count)
        ref_num_i3_files = run_settings[run]['num_i3_files']
        if count != ref_num_i3_files:
            logging.warning(
                'Run %s, Number of files read (%d) != number of '
                'source I3 files (%d), which may indicate an error.', run,
                count, ref_num_i3_files)

    # Generate output data
    for flavint in ALL_NUFLAVINTS:
        int_type = flavint.intType
        for grp_n, flavint_group in enumerate(flavint_groupings):
            if not flavint in flavint_group:
                logging.trace('flavint %s not in flavint_group %s, passing.',
                              flavint, flavint_group)
                continue
            else:
                logging.trace(
                    'flavint %s **IS** in flavint_group %s, storing.', flavint,
                    flavint_group)
            if output_fields is None:
                evts[flavint] = extracted_data[grp_n][int_type]
            else:
                evts[flavint] = {
                    f: extracted_data[grp_n][int_type][f]
                    for f in output_fields
                }

    # Generate file name
    numerical_runs = []
    alphanumerical_runs = []
    for run in runs:
        try:
            int(run)
            numerical_runs.append(int(run))
        except ValueError:
            alphanumerical_runs.append(str(run))
    run_labels = []
    if len(numerical_runs) > 0:
        run_labels.append(list2hrlist(numerical_runs))
    if len(alphanumerical_runs) > 0:
        run_labels += sorted(alphanumerical_runs)
    run_label = 'runs_' + ','.join(run_labels)
    geom_label = '' + detector_geom
    fname = 'events__' + '__'.join([
        detector_label,
        geom_label,
        run_label,
        proc_label,
        groups_label,
    ]) + '.hdf5'

    outfpath = os.path.join(outdir, fname)
    logging.info('Writing events to %s', outfpath)

    # Save data to output file
    evts.save(outfpath)
Пример #16
0
def test_Prior():
    """Unit tests for Prior class"""
    uniform = Prior(kind='uniform', llh_offset=1.5)
    jeffreys = Prior(kind='jeffreys', A=2 * ureg.s, B=3 * ureg.ns)
    gaussian = Prior(kind='gaussian', mean=10, stddev=1)
    x = np.linspace(-10, 10, 100)
    y = x**2
    linterp = Prior(kind='linterp',
                    param_vals=x * ureg.meter / ureg.s,
                    llh_vals=y)
    param_vals = np.linspace(-10, 10, 100)
    llh_vals = x**2
    knots, coeffs, deg = splrep(param_vals, llh_vals)
    spline = Prior(kind='spline',
                   knots=knots * ureg.foot,
                   coeffs=coeffs,
                   deg=deg)
    param_upsamp = np.linspace(-10, 10, 1000) * ureg.foot
    llh_upsamp = splev(param_upsamp.magnitude, tck=(knots, coeffs, deg), ext=2)
    assert all(spline.llh(param_upsamp) == llh_upsamp)

    # Asking for param value outside of range should fail
    try:
        linterp.llh(-1000 * ureg.mile / ureg.s)
    except ValueError:
        pass
    else:
        assert False

    # Asking for value at quantity with invalid units
    try:
        linterp.chi2(-1000 * ureg.km)
    except pint.DimensionalityError:
        pass
    else:
        assert False

    try:
        spline.llh(-1000 * ureg.meter)
    except ValueError:
        pass
    else:
        assert False

    try:
        spline.chi2(+1000 * ureg.meter)
    except ValueError:
        pass
    else:
        assert False

    # Asking for param value when units were used should fail
    try:
        spline.llh(10)
    except TypeError:
        pass
    else:
        assert False

    # ... or vice versa
    try:
        gaussian.llh(10 * ureg.meter)
    except pint.DimensionalityError:
        pass
    else:
        assert False

    # -- Test writing to and reading from JSON files -- #

    with tempfile.TemporaryDirectory() as temp_dir:
        for pri in [uniform, jeffreys, gaussian, linterp, spline]:
            fpath = join(temp_dir, pri.kind + '.json')
            try:
                to_file(pri, fpath)
                loaded = from_file(fpath, cls=Prior)
                assert loaded == pri
            except:
                logging.error('prior %s failed', pri.kind)
                if isfile(fpath):
                    logging.error(
                        'contents of %s:\n%s',
                        fpath,
                        open(fpath, 'r').read(),
                    )
                raise

    logging.info('<< PASS : test_Prior >>')
Пример #17
0
def profile_scan(data_settings, template_settings, param_names, steps,
                 only_points, no_outer, data_param_selections,
                 hypo_param_selections, profile, outfile, minimizer_settings,
                 metric, debug_mode):
    """Perform a profile scan.

    Parameters
    ----------
    data_settings
    template_settings
    param_names
    steps
    only_points
    no_outer
    data_param_selections
    hypo_param_selections
    profile
    outfile
    minimizer_settings
    metric
    debug_mode

    Returns
    -------
    results
    analysis

    """
    outfile = expanduser(expandvars(outfile))
    if isfile(outfile):
        raise IOError('`outfile` "{}" already exists!'.format(outfile))

    minimizer_settings = from_file(minimizer_settings)

    hypo_maker = DistributionMaker(template_settings)

    if data_settings is None:
        if (data_param_selections is None
                or data_param_selections == hypo_param_selections):
            data_maker = hypo_maker
        else:
            data_maker = deepcopy(hypo_maker)
            data_maker.select_params(data_param_selections)
    else:
        data_maker = DistributionMaker(data_settings)
        data_maker.select_params(data_param_selections)

    data_dist = data_maker.get_outputs(return_sum=True)

    analysis = Analysis()
    results = analysis.scan(data_dist=data_dist,
                            hypo_maker=hypo_maker,
                            hypo_param_selections=hypo_param_selections,
                            metric=metric,
                            param_names=param_names,
                            steps=steps,
                            only_points=only_points,
                            outer=not no_outer,
                            profile=profile,
                            minimizer_settings=minimizer_settings,
                            outfile=outfile,
                            debug_mode=debug_mode)
    to_file(results, outfile)
    logging.info("Done.")

    return results, analysis
Пример #18
0
def add_fluxes_to_file(data_file_path,
                       flux_table,
                       flux_name,
                       outdir=None,
                       label=None,
                       overwrite=False):
    """Add fluxes to PISA events file (e.g. for use by an mc stage)
    
    Parameters
    -----------
    data_file_path : string
    flux_table
    flux_name
    outdir : string or None
        If None, output is to the same directory as `data_file_path`
    overwrite : bool, optional
    """
    data, attrs = from_file(find_resource(data_file_path), return_attrs=True)
    bname, ext = splitext(basename(data_file_path))
    assert ext.lstrip('.') in HDF5_EXTS

    if outdir is None:
        outdir = dirname(data_file_path)

    if label is None:
        label = ''
    else:
        assert isinstance(label, basestring)
        label = '_' + label

    outpath = join(outdir, '{}__with_fluxes{}{}'.format(bname, label, ext))

    if not overwrite and isfile(outpath):
        logging.warning('Output path "%s" already exists, not regenerating',
                        outpath)
        return

    mkdir(outdir, warn=False)

    # Loop over the top-level keys
    for primary, primary_node in data.items():

        # Only handling neutrnio fluxes here, skip past e.g. muon or noise MC events
        if primary.startswith("nu"):

            logging.info('Adding fluxes to "%s" events', primary)

            # Input data may have one layer of hierarchy before the event variables (e.g. [numu_cc]),
            # or for older files there maybe be a second layer (e.g. [numu][cc]).
            # Handling either case here...
            if "true_energy" in primary_node:
                secondary_nodes = [primary_node]
            else:
                secondary_nodes = primary_node.values()

            for secondary_node in secondary_nodes:

                true_e = secondary_node['true_energy']
                true_cz = secondary_node['true_coszen']

                # calculate all 4 fluxes (nue, nuebar, numu and numubar)
                for table in ['nue', 'nuebar', 'numu', 'numubar']:
                    flux = calculate_2d_flux_weights(
                        true_energies=true_e,
                        true_coszens=true_cz,
                        en_splines=flux_table[table])
                    keyname = flux_name + '_' + table + '_flux'
                    secondary_node[keyname] = flux

    to_file(data, outpath, attrs=attrs, overwrite=overwrite)
    logging.info('--> Wrote file including fluxes to "%s"', outpath)
Пример #19
0
                        analysis.profile(args.var,
                                         eval(args.range),
                                         check_octant=not args.no_check_octant,
                                         pprint=not args.quiet))
                elif args.mode == 'feldman_cousins':
                    assert (data_fixed_param != None)
                    p_name, value = data_fixed_param.items(
                    )[0][0], data_fixed_param.items()[0][1]
                    print("save the fixed_param_data to output: ", p_name, " ",
                          value)
                    return_result = analysis.profile(
                        p_name, [value],
                        check_octant=not args.no_check_octant,
                        pprint=not args.quiet)
                    return_result.append({'data_%s' % p_name: value})
                    results.append(return_result)
            elif args.function == 'fit':
                best_fit_result = analysis.find_best_fit(
                    check_octant=not args.no_check_octant,
                    pprint=not args.quiet)
                if (data_fixed_param != None):
                    p_name, value = data_fixed_param.items(
                    )[0][0], data_fixed_param.items()[0][1]
                    best_fit_result['data_' + p_name] = value
                    print("save the fixed_param_data to output: ", p_name, " ",
                          value)
                results.append(best_fit_result)

        to_file(results, args.outfile)
        logging.info('Done.')
Пример #20
0
def scan_allsyst(template_settings, steps, hypo_param_selections, outdir,
                 minimizer_settings, metric, debug_mode):
    """Scan (separately) all systematics (i.e., non-fixed params).

    Parameters
    ----------
    template_settings
    steps
    hypo_param_selections
    outdir
    minimizer_settings
    metric
    debug_mode

    Returns
    -------
    restults : dict
        Keys are param names, values are the scan results

    """
    outdir = expanduser(expandvars(outdir))
    mkdir(outdir, warn=False)

    hypo_maker = DistributionMaker(template_settings)

    hypo_maker.select_params(hypo_param_selections)
    data_dist = hypo_maker.get_outputs(return_sum=True)

    minimizer_settings = from_file(minimizer_settings)

    analysis = Analysis()

    results = OrderedDict()  # pylint: disable=redefined-outer-name
    for param in hypo_maker.params:
        if param.is_fixed:
            continue

        logging.info('Scanning %s', param.name)
        nominal_value = param.value

        outfile = join(
            outdir,
            '{:s}_{:d}_steps_{:s}_scan.json'.format(param.name, steps, metric))
        if isfile(outfile):
            raise IOError(
                '`outfile` "{}" exists, not overwriting.'.format(outfile))

        results[param.name] = analysis.scan(
            data_dist=data_dist,
            hypo_maker=hypo_maker,
            hypo_param_selections=hypo_param_selections,
            metric=metric,
            param_names=param.name,
            steps=steps,
            only_points=None,
            outer=True,
            profile=False,
            minimizer_settings=minimizer_settings,
            outfile=outfile,
            debug_mode=debug_mode)

        to_file(results[param.name], outfile)
        param.value = nominal_value

        logging.info('Done scanning param "%s"', param.name)

    logging.info('Done.')

    return results
def main():
    parser = ArgumentParser(description=__doc__,
                            formatter_class=ArgumentDefaultsHelpFormatter)
    parser.add_argument('-io',
                        '--io_chi2_file',
                        type=str,
                        required=True,
                        help="Inverted Ordering Chi2 file from NuFit")
    parser.add_argument('-no',
                        '--no_chi2_file',
                        type=str,
                        required=True,
                        help="Inverted Ordering Chi2 file from NuFit")
    parser.add_argument('--shifted',
                        action='store_true',
                        default=False,
                        help='''Flag if wanting prior which attempts to remove
                        the ordering prior by subtracting the delta chi2.''')
    parser.add_argument('--minimised',
                        action='store_true',
                        default=False,
                        help='''Flag if wanting prior which attempts to remove
                        the ordering prior by minimising over both surfaces.'''
                        )
    parser.add_argument('--outdir',
                        metavar='DIR',
                        type=str,
                        required=True,
                        help='''Store all output files to this directory. It
                        is recommended you save them in the priors directory
                        in the PISA resources.''')

    args = parser.parse_args()

    io_filename, io_fileext = os.path.splitext(args.io_chi2_file)
    no_filename, no_fileext = os.path.splitext(args.no_chi2_file)

    if io_fileext != '.gz':
        raise ValueError('%s file extension not expected. Please use the file '
                         'as downloaded from the Nu-Fit website.' % io_fileext)
    if no_fileext != '.gz':
        raise ValueError(
            '%s file extension not expected. Please use the file as '
            'downloaded directly from the Nu-Fit website.' % no_fileext)

    # Get Nu-Fit version from filenames
    NuFitVersion = io_filename.split('/')[-1].split('.')[0]
    if NuFitVersion[0].lower() != 'v':
        raise ValueError('%s%s input file does not allow for discerning the '
                         'Nu-Fit version directly from the filename. Please '
                         'use the file as downloaded directly from the Nu-Fit '
                         'website.' % (io_filename, io_fileext))
    NO_NuFitVersion = no_filename.split('/')[-1].split('.')[0]
    if NuFitVersion != NO_NuFitVersion:
        raise ValueError(
            'The NuFit version extracted from the NO and IO files '
            'do not match. i.e. %s is not the same as %s. Please '
            'use the same NuFit version for each of the NO and IO '
            'chi2 surfaces.' % (NuFitVersion, NO_NuFitVersion))

    # Add special treatment for NuFit 2.1 since it has two releases
    if NuFitVersion == 'v21':
        NuFitVersion += io_filename.split('/')[-1].split('-')[1]

    io_infile = gzip.open(args.io_chi2_file)
    no_infile = gzip.open(args.no_chi2_file)

    io_s2th23, io_dchi2 = extract_vals(
        infile=io_infile,
        string_of_interest='# T23 projection: sin^2(theta23) Delta_chi^2')
    no_s2th23, no_dchi2 = extract_vals(
        infile=no_infile,
        string_of_interest='# T23 projection: sin^2(theta23) Delta_chi^2')

    io_th23 = np.arcsin(np.sqrt(np.array(io_s2th23)))
    no_th23 = np.arcsin(np.sqrt(np.array(no_s2th23)))

    io_dchi2 = np.array(io_dchi2)
    no_dchi2 = np.array(no_dchi2)

    f_io = scipy.interpolate.splrep(io_th23, -io_dchi2 / 2.0, s=0)
    f_no = scipy.interpolate.splrep(no_th23, -no_dchi2 / 2.0, s=0)

    priors = make_prior_dict(f_io=f_io, f_no=f_no)

    to_file(
        priors,
        os.path.join(args.outdir,
                     'nufit%sstandardtheta23splines.json' % NuFitVersion))

    if args.shifted:
        # Make priors where the delta chi2 between the orderings is removed.
        # The idea is to remove the prior on the ordering.

        io_shifteddchi2 = io_dchi2 - min(io_dchi2)
        no_shifteddchi2 = no_dchi2 - min(no_dchi2)

        f_shiftedio = scipy.interpolate.splrep(io_th23,
                                               -io_shifteddchi2 / 2.0,
                                               s=0)
        f_shiftedno = scipy.interpolate.splrep(no_th23,
                                               -no_shifteddchi2 / 2.0,
                                               s=0)

        shiftedpriors = make_prior_dict(f_io=f_shiftedio, f_no=f_shiftedno)

        to_file(
            shiftedpriors,
            os.path.join(args.outdir,
                         'nufit%sshiftedtheta23splines.json' % NuFitVersion))

    if args.minimised:
        # Make one prior that is the minimum of both of the original chi2
        # surfaces. The idea is to remove the prior on the ordering.

        minchi2 = np.minimum(io_dchi2, no_dchi2)

        # Now just one prior. X values should be the same for both.
        f_minimised = scipy.interpolate.splrep(io_th23, -minchi2 / 2.0, s=0)

        minimisedprior = make_prior_dict(f=f_minimised)

        to_file(
            minimisedprior,
            os.path.join(args.outdir,
                         'nufit%sminimisedtheta23spline.json' % NuFitVersion))
    '--infile',
    type=str,
    required=True
)
parser.add_argument(
    '--outfile',
    type=str,
    required=True
)
args = parser.parse_args()

import sys, os, re, traceback, time, warnings, itertools
import copy
#from pisa.utils import utils as putils
from pisa.utils.fileio import from_file, to_file
from pisa.utils import params as ppars
from pisa.utils import utils as putils

ts0 = from_file(args.infile)
ts1 = copy.deepcopy(ts0)
for paramname, param in sorted(ts0['params'].iteritems()):
    new_prior = ppars.Prior.from_param(param)
    if new_prior is None:
        continue
    print 'Converting prior for param `' + paramname + '`'
    new_param = copy.deepcopy(param)
    new_param.update(new_prior.build_dict())
    ts1['params'][paramname] = new_param

to_file(ts1, args.outfile)