Пример #1
0
    def __init__(self, data_dir, tag):
        '''
        Args:
            data_dir (str) : Data directory for the classifications data
            tag (str) : Identifying tag for the data to load.
        '''

        # Open the file
        classifications_filepath = os.path.join(
            data_dir, 'classifications_{}.hdf5'.format(tag))
        with h5py.File(classifications_filepath, 'r') as f:

            # Store the data
            self.data = {}
            for key in f.keys():
                if key != 'parameters':
                    self.data[key] = f[key][...]

            # Store the data attributes
            self.data_attrs = {}
            for key in f.attrs.keys():
                self.data_attrs[key] = utilities.check_and_decode_bytes(
                    f.attrs[key])

            # Store the parameters
            self.parameters = {}
            param_grp = f['parameters']
            for key in param_grp.attrs.keys():
                self.parameters[key] = utilities.check_and_decode_bytes(
                    param_grp.attrs[key])
Пример #2
0
    def test_select_ids_jug(self):

        kwargs = copy.copy(default_kwargs)

        # Modifications to the output dirs to account for shifting up one level
        kwargs['out_dir'] = self.out_dir
        kwargs['snapshot_kwargs'][
            'sdir'] = './linefinder/tests/data/stars_included_test_data'
        kwargs['snapshot_kwargs'][
            'halo_data_dir'] = './linefinder/tests/data/ahf_test_data'

        data_filters = {
            'radial_cut': {
                'data_key': 'Rf',
                'data_min': 0.,
                'data_max': 1.,
            },
        }

        id_selector = select.IDSelector(**kwargs)
        id_selector.select_ids(data_filters)

        # Run jug version
        os.system("jug execute ./linefinder/tests/select_jugfile.py &")
        os.system("jug execute ./linefinder/tests/select_jugfile.py")

        files = []
        for filepath in self.filepaths:
            files.append(h5py.File(filepath, 'r'))

        for key in ['target_ids', 'target_child_ids']:

            npt.assert_allclose(files[0][key][...], files[1][key][...])

        for key in files[0]['parameters'].attrs.keys():

            # These shouldn't match
            if key == 'tag':
                continue

            try:
                self.assertEqual(
                    utilities.check_and_decode_bytes(
                        files[0]['parameters'].attrs[key], ),
                    utilities.check_and_decode_bytes(
                        files[1]['parameters'].attrs[key], ),
                )
            # In case it's an array
            except ValueError:
                npt.assert_allclose(
                    files[0]['parameters'].attrs[key],
                    files[1]['parameters'].attrs[key],
                )
Пример #3
0
    def __init__(self, data_dir, tag):
        '''
        Args:
            data_dir (str) : Data directory for the classified data
            tag (str) : Identifying tag for the data to load.
        '''

        # Open the file
        ids_filepath = os.path.join(data_dir, 'ids_{}.hdf5'.format(tag))
        with h5py.File(ids_filepath, 'r') as f:

            # Store the data
            self.data = {}
            for key in f.keys():
                if key != 'parameters':
                    self.data[key] = f[key][...]

            # Store the data attributes
            self.data_attrs = {}
            for key in f.attrs.keys():
                self.data_attrs[key] = f.attrs[key]

            # Store the parameters
            self.parameters = {}
            param_grp = f['parameters']
            for key in param_grp.attrs.keys():
                self.parameters[key] = utilities.check_and_decode_bytes(
                    param_grp.attrs[key])

            # Store the parameters
            self.snapshot_parameters = {}
            try:
                snap_param_grp = f['parameters/snapshot_parameters']
                for key in snap_param_grp.attrs.keys():
                    self.snapshot_parameters[
                        key] = utilities.check_and_decode_bytes(
                            snap_param_grp.attrs[key])
            except KeyError:
                print('No snapshot parameters stored. Not loading.')

            # Store the used data filters
            self.data_filters = {}
            try:
                filters_grp = f['parameters/data_filters']
                for filters_subset in filters_grp.keys():
                    subgroup = filters_grp[filters_subset]
                    self.data_filters[filters_subset] = {}
                    for key in subgroup.attrs.keys():
                        self.data_filters[filters_subset][key] = \
                            subgroup.attrs[key]
            except KeyError:
                print("Failed to load data_filters. Using older data?")
Пример #4
0
        def load_data_into_ptrack(filename, store_parameters=False):

            filepath = os.path.join(self.out_dir, filename)
            f = h5py.File(filepath, 'r')

            # Store the particle track data in a dictionary
            for key in f.keys():
                if key != 'parameters':
                    self.ptrack[key] = f[key][...]

            # Store the ptrack attributes
            for key in f.attrs.keys():
                self.ptrack_attrs[key] = f.attrs[key]

            if store_parameters:
                default_attrs_to_replace = [
                    'halo_data_dir',
                    'mtree_halos_index',
                    'halo_file_tag',
                ]
                for attr in default_attrs_to_replace:
                    if getattr(self, attr) is default:
                        attr_value = utilities.check_and_decode_bytes(
                            f['parameters'].attrs[attr])
                        setattr(self, attr, attr_value)
                for parameter_key in [
                        'galaxy_cut',
                        'length_scale',
                ]:
                    self.ptrack_attrs[ parameter_key ] = \
                        f['parameters'].attrs[parameter_key]

            f.close()
Пример #5
0
 def replace_default_attr(attr_name):
     attr = getattr(self, attr_name)
     if attr is default:
         try:
             if attr_name == 'sdir':
                 attr = utilities.check_and_decode_bytes(
                     f['parameters/snapshot_parameters'].
                     attrs[attr_name])
             else:
                 attr = utilities.check_and_decode_bytes(
                     f['parameters'].attrs[attr_name])
             setattr(self, attr_name, attr)
         except KeyError:
             raise LookupError(
                 'Cannot fallback to default parameters because ' + \
                 '{} does not include default parameters.'.format(
                     id_filename,
                 )
             )
Пример #6
0
    def __init__(self, data_dir, tag, ahf_index=None, *args, **kwargs):
        '''
        Args:
            data_dir (str) : Data directory for the classified data
            tag (str) : Identifying tag for the data to load.
            ahf_index (str or int) : Index to use for AHF data.
        '''

        # Open the file
        ptracks_filepath = os.path.join(data_dir,
                                        'ptracks_{}.hdf5'.format(tag))
        with h5py.File(ptracks_filepath, 'r') as f:

            # Store the data
            self.data = {}
            for key in f.keys():
                if key != 'parameters':
                    self.data[key] = f[key][...]

            # Store the data attributes
            self.data_attrs = {}
            for key in f.attrs.keys():
                self.data_attrs[key] = utilities.check_and_decode_bytes(
                    f.attrs[key])

            # Store the parameters
            self.parameters = {}
            param_grp = f['parameters']
            for key in param_grp.attrs.keys():
                self.parameters[key] = utilities.check_and_decode_bytes(
                    param_grp.attrs[key])

        # Reorganize data to match with formatting in TimeData
        self.data['P'] = np.rollaxis(self.data['P'], 2)
        self.data['V'] = np.rollaxis(self.data['V'], 2)

        super(PTracks, self).__init__(data_dir=data_dir,
                                      snum=self.data['snum'],
                                      ahf_index=ahf_index,
                                      *args,
                                      **kwargs)
Пример #7
0
    def identify_duplicate_ids(self):
        '''Get all IDs that have duplicates at the latest snapshot. This draws data from the same sample as the original.

        Returns:
            duplicate_ids (set) : A set of all IDs that have duplicates at snum_end.
        '''

        assert 'target_child_ids' not in self.f.keys(
        ), "Identifying duplicate IDs does not work with new ID system."

        if self.p_types is None:
            self.p_types = self.f['parameters'].attrs['p_types']

        if self.snapshot_kwargs is None:
            self.snapshot_kwargs = dict(
                self.f['parameters/snapshot_parameters'].attrs)

        duplicate_ids = set()
        id_sets = []
        for ptype in self.p_types:

            # Check for bytes data and decode
            for key, item in copy.deepcopy(self.snapshot_kwargs).items():
                self.snapshot_kwargs[key] = \
                     utilities.check_and_decode_bytes( item )

            self.snapshot_kwargs['snum'] = self.reference_snum_for_duplicates
            self.snapshot_kwargs['ptype'] = ptype
            p_data = particle_data.ParticleData(**self.snapshot_kwargs)

            duplicate_ids = duplicate_ids | set(p_data.find_duplicate_ids())

            # Get the IDs out, so that we can find duplicates where the particles are of different types
            id_sets.append(set(p_data.get_data('ID')))

        duplicate_ids = duplicate_ids | set.intersection(*id_sets)

        return duplicate_ids