Пример #1
0
    def split_data(self):
        this_hash = hash_obj([
            self.fit_hash, self.output_str, self._data.contains_muons,
            self._data.contains_noise
        ],
                             full_hash=self.full_hash)
        if self.split_data_hash == this_hash:
            return self._signal_data, self._bg_data, self._all_data

        if self.params['real_data'].value:
            return self._data, None, self._data

        trans_data = self._data.transform_groups(self.output_str)
        [trans_data[fig].pop('sample_weight') for fig in trans_data]
        bg_str = [fig for fig in trans_data if fig != self.output_str]
        if trans_data.contains_muons:
            trans_data['muons'].pop('sample_weight')
            bg_str.append('muons')
        if trans_data.contains_noise:
            trans_data['noise'].pop('sample_weight')
            bg_str.append('noise')

        signal_data = trans_data[self.output_str]
        bg_data = [trans_data[bg] for bg in bg_str]
        bg_data = reduce(Data._merge, bg_data)
        all_data = Data._merge(deepcopy(bg_data), signal_data)

        self._signal_data = signal_data
        self._bg_data = bg_data
        self._all_data = all_data
        self.split_data_hash = this_hash
        return signal_data, bg_data, all_data
Пример #2
0
    def _compute_outputs(self, inputs=None):
        """Apply basic cuts and compute histograms for output channels."""

        logging.debug('Entering events_to_data._compute_outputs')

        #Hashing
        #TODO What should I hash??
        hash_property = [
            self.events_file, self.params['dataset'].value, self.output_names
        ]
        this_hash = hash_obj(hash_property, full_hash=self.full_hash)
        #if this_hash == self.sample_hash: #TODO Fix this and replace...
        #    return

        #TODO Check there are no inputs

        #Fill an events instance from a file
        events = Events(self.events_file)

        #TODO Handle nominal, etc, etc datasets?

        #Extract the neutrino data from the 'Events' instance
        nu_data = []
        flav_fidg = FlavIntDataGroup(flavint_groups=events.flavints)
        for flavint in events.present_flavints:
            flav_fidg[flavint] = {
                var: events[flavint][var]
                for var in events[flavint].keys()
            }
        nu_data.append(flav_fidg)

        #Create the data instance, including the metadata
        #Note that there is no muon or noise data  in the 'Events'
        data = Data(reduce(add, nu_data), metadata=deepcopy(events.metadata))

        #Make cuts
        if self.params['keep_criteria'].value is not None:
            self._data.applyCut(self.params['keep_criteria'].value
                                )  #TODO Shivesh says this needs testing
            self._data.update_hash()

        #Update hashes
        self.sample_hash = this_hash
        data.metadata['sample_hash'] = this_hash
        data.update_hash()

        return data
Пример #3
0
    def load_noise_events(config, dataset):
        name = config.get('general', 'name')
        weight = config.get('noise', 'weight')
        weight_units = config.get('noise', 'weight_units')
        sys_list = split(config.get('noise', 'sys_list'))
        base_prefix = config.get('noise', 'baseprefix')
        keep_keys = split(config.get('noise', 'keep_keys'))
        aliases = config.items('noise%saliases' % SEP)
        if base_prefix == 'None':
            base_prefix = ''

        if dataset == 'nominal':
            paths = []
            for sys in sys_list:
                ev_sys = 'noise%s%s' % (SEP, sys)
                nominal = config.get(ev_sys, 'nominal')
                ev_sys_nom = ev_sys + SEP + nominal
                paths.append(config.get(ev_sys_nom, 'file_path'))
            if len(set(paths)) > 1:
                raise AssertionError(
                    'Choice of nominal file is ambigous. Nominal '
                    'choice of systematic parameters must coincide '
                    'with one and only one file. Options found are: '
                    '{0}'.format(paths))
            file_path = paths[0]
        else:
            file_path = config.get(dataset, 'file_path')
        logging.info('Extracting noise dataset "{0}" from sample '
                     '"{1}"'.format(dataset, name))

        noise = from_file(file_path)
        sample.strip_keys(keep_keys, noise)

        if weight == 'None' or weight == '1':
            noise['sample_weight'] = np.ones(noise['weights'].shape)
        elif weight == '0':
            noise['sample_weight'] = np.zeros(noise['weights'].shape)
        else:
            noise['sample_weight'] = noise[weight] * ureg(weight_units)
        noise['pisa_weight'] = deepcopy(noise['sample_weight'])

        for alias, expr in aliases:
            if alias in noise:
                logging.warning(
                    'Overwriting Data key {0} with aliased expression '
                    '{1}'.format(alias, expr))
            noise[alias] = eval(re.sub(r'\<(.*?)\>', r"noise['\1']", expr))

        noise_dict = {'noise': noise}
        return Data(noise_dict,
                    metadata={
                        'name': name,
                        'noise_sample': dataset
                    })
Пример #4
0
    def load_neutrino_events(config, dataset):

        nu_data = []
        if dataset == 'neutrinos%sgen_lvl' % SEP:
            gen_cfg      = from_file(config.get(dataset, 'gen_cfg_file'))
            name         = gen_cfg.get('general', 'name')
            datadir      = gen_cfg.get('general', 'datadir')
            event_types  = split(gen_cfg.get('general', 'event_type'))
            weights      = split(gen_cfg.get('general', 'weights'))
            weight_units = gen_cfg.get('general', 'weight_units')
            keep_keys    = split(gen_cfg.get('general', 'keep_keys'))
            aliases      = gen_cfg.items('aliases')
            logging.info('Extracting neutrino dataset "{0}" from generator '
                         'level sample "{1}"'.format(dataset, name))

            for idx, flav in enumerate(event_types):
                fig = NuFlavIntGroup(flav)
                all_flavints = fig.flavints
                events_file = datadir + gen_cfg.get(flav, 'filename')

                flav_fidg = sample.load_from_nu_file(
                    events_file, all_flavints, weights[idx], weight_units,
                    keep_keys, aliases
                )
                nu_data.append(flav_fidg)
        else:

            name         = config.get('general', 'name')
            flavours     = split(config.get('neutrinos', 'flavours'))
            weights      = split(config.get('neutrinos', 'weights'))
            weight_units = config.get('neutrinos', 'weight_units')
            sys_list     = split(config.get('neutrinos', 'sys_list'))
            base_prefix  = config.get('neutrinos', 'baseprefix')
            keep_keys    = split(config.get('neutrinos', 'keep_keys'))
            aliases      = config.items('neutrinos%saliases' % SEP)
            logging.info('Extracting neutrino dataset "{0}" from sample '
                         '"{1}"'.format(dataset, name))
            if base_prefix == 'None':
                base_prefix = ''

            for idx, flav in enumerate(flavours):
                f = int(flav)
                all_flavints = NuFlavIntGroup(f, -f).flavints
                if dataset == 'nominal':
                    prefixes = []
                    for sys in sys_list:
                        ev_sys = 'neutrinos%s%s' % (SEP, sys)
                        nominal = config.get(ev_sys, 'nominal')
                        ev_sys_nom = ev_sys + SEP + nominal
                        prefixes.append(config.get(ev_sys_nom, 'file_prefix'))
                    if len(set(prefixes)) > 1:
                        raise AssertionError(
                            'Choice of nominal file is ambigous. Nominal '
                            'choice of systematic parameters must coincide '
                            'with one and only one file. Options found are: '
                            '{0}'.format(prefixes)
                        )
                    file_prefix = flav + prefixes[0]
                else:
                    file_prefix = flav + config.get(dataset, 'file_prefix')
                events_file = path.join( config.get('general', 'datadir'), base_prefix + file_prefix )

                flav_fidg = sample.load_from_nu_file(
                    events_file, all_flavints, weights[idx], weight_units,
                    keep_keys, aliases
                )
                nu_data.append(flav_fidg)
        nu_data = Data(
            reduce(add, nu_data),
            metadata={'name': name, 'sample': dataset}
        )

        return nu_data