Python Task примеры, jug.Task Python примеры использования

Пример #1

0

Показать файл

Файл: select.py Проект: zhafen/linefinder

    def select_ids_jug(self, data_filters={}):
        '''Save a set of all ids that match a set of data filters to a file.

        Args:
            data_filters (dict of dicts): The data filters to apply.
        '''

        print(
            "########################################################################"
        )
        print("Selecting IDs")
        print(
            "########################################################################"
        )
        sys.stdout.flush()

        selected_ids = self.get_selected_ids_jug(data_filters)

        selected_ids_formatted = jug.Task(self.format_selected_ids,
                                          selected_ids)

        jug.Task(self.save_selected_ids, selected_ids_formatted, data_filters)

        jug.barrier()

        print(
            "########################################################################"
        )
        print("Done selecting IDs!")
        sys.stdout.flush()

Пример #2

0

Показать файл

Файл: select.py Проект: zhafen/linefinder

    def get_selected_ids_jug(self, data_filters):
        '''Parallel version of self.get_selected_ids(). Requires a lot of memory, because it will have multiple
        snapshots open at once.

        Args:
            data_filters (list of dicts): The data filters to apply.

        Returns:
            selected_ids (set): Set of selected ids.
        '''

        selected_ids = set()

        results = []
        for snum in self.snums:
            for ptype in self.p_types:

                kwargs = dict(self.snapshot_kwargs)
                kwargs['snum'] = snum
                kwargs['ptype'] = ptype

                result = jug.Task(
                    self.get_selected_ids_snapshot,
                    (data_filters, kwargs),
                )

                results.append(result)

        def unify_results(given_results):
            '''Helper function to get around jug formatting.'''

            for i, data in enumerate(given_results):

                data = np.array(list(data))

                # The np method (which is supposedly faster) doesn't work for
                # results that contain child IDs too
                if len(data.shape) == 2:
                    return set.union(*given_results)

                print("Adding data set {}, consisting of {} ids".format(
                    i,
                    data.shape,
                ))
                try:
                    combined = np.union1d(combined, data)
                except NameError:
                    combined = data

            return combined

        selected_ids = jug.Task(unify_results, results)

        return selected_ids

Пример #3

0

Показать файл

    def get_tracked_data_jug(self):
        '''Loop overall redshift snapshots, and get the data. This is the
        parallelized version that uses Jug

        Returns:
            ptrack (dict):
                Structure to hold particle tracks.
                Structure is...
                ptrack ['varname'] [particle i, snap j, k component]
        '''

        self.snaps = np.arange(self.snum_end, self.snum_start - 1,
                               -self.snum_step)

        self.ntrack = self.target_ids.size
        print("Tracking {} particles...".format(self.ntrack))
        sys.stdout.flush()

        def get_tracked_data_snapshot(args):

            i, snum = args

            time_1 = time.time()

            id_finder = IDFinder()
            dfid, redshift, attrs = id_finder.find_ids(
                self.sdir,
                snum,
                self.p_types,
                self.target_ids,
                target_child_ids=self.target_child_ids,
                custom_fns=self.custom_fns,
            )

            # Maybe helps stop leaking memory
            del id_finder
            gc.collect()

            time_2 = time.time()

            # Print output information.
            print(
                 'Snapshot {:>3} | redshift {:>7.3g} | done in {:.3g} seconds'\
                .format( snum, redshift, time_2 - time_1 )
            )
            sys.stdout.flush()

            return i, dfid, redshift, attrs, snum

        tracked_data_snapshots = []
        for args in enumerate(self.snaps):

            tracked_data = jug.Task(get_tracked_data_snapshot, args)

            tracked_data_snapshots.append(tracked_data)

        return tracked_data_snapshots

Пример #4

0

Показать файл

Файл: galaxy_link.py Проект: zhafen/linefinder

    def find_galaxies_for_particle_tracks_jug(self):
        '''Main function when using jug'''

        self.read_data()

        ptrack_gal_ids = self.get_galaxy_identification_loop_jug()

        jug.Task(self.write_galaxy_identifications, ptrack_gal_ids)

        jug.barrier()

Пример #5

0

Показать файл

    def save_particle_tracks_jug(self):
        '''Loop over all redshifts, get the data, and save the particle tracks.
        '''

        print("#" * 80)
        print("Starting Tracking!")
        print("#" * 80)

        # Get the target ids
        self.get_target_ids()

        tracked_data_snapshots = self.get_tracked_data_jug()

        formatted_data = jug.Task(self.format_tracked_data,
                                  tracked_data_snapshots)

        # Write particle data to the file
        jug.Task(self.write_tracked_data, formatted_data)

        jug.barrier()

Пример #6

0

Показать файл

Файл: galaxy_link.py Проект: zhafen/linefinder

    def get_galaxy_identification_loop_jug(self):
        '''Loop over all snapshots and identify the galaxy in each.
        Use Jug for parallelism.

        Modifies:
            self.ptrack_gal_ids (dict) : Where the galaxy IDs are stored.
        '''
        def get_galaxy_and_halo_ids(i):
            '''Get the galaxy and halo ids for a single snapshot.'''

            # Get the particle positions
            particle_positions = self.ptrack['P'][...][:, i]

            # Get the data parameters to pass to GalaxyLinker
            kwargs = {
                'halo_data': None,
                'galaxy_cut': self.galaxy_cut,
                'length_scale': self.length_scale,
                'mt_length_scale': self.mt_length_scale,
                'ids_to_return': self.ids_to_return,
                'minimum_criteria': self.minimum_criteria,
                'minimum_value': self.minimum_value,
                'redshift': self.ptrack['redshift'][...][i],
                'snum': self.ptrack['snum'][...][i],
                'hubble': self.ptrack.attrs['hubble'],
                'halo_data_dir': self.halo_data_dir,
                'mtree_halos_index': self.mtree_halos_index,
                'main_mt_halo_id': self.main_mt_halo_id,
                'halo_file_tag': self.halo_file_tag,
            }

            time_start = time.time()

            # Find the galaxy for a given snapshot
            gal_linker = galaxy_linker.GalaxyLinker(particle_positions,
                                                    **kwargs)
            galaxy_and_halo_ids = gal_linker.find_ids()

            time_end = time.time()

            print( 'Snapshot {:>3} | redshift {:>7.3g} | done in {:.3g} seconds'\
                .format(
                    kwargs['snum'],
                    kwargs['redshift'],
                    time_end - time_start
                )
            )
            sys.stdout.flush()

            # Try to avoid memory leaks
            del kwargs
            del gal_linker
            gc.collect()

            return galaxy_and_halo_ids

        n_snaps = self.ptrack['snum'][...].size
        n_particles = self.ptrack['P'][...].shape[0]

        # Loop over each included snapshot and submit Jug Tasks
        galaxy_and_halo_ids_all = []
        for i in range(n_snaps):

            galaxy_and_halo_ids = jug.Task(
                get_galaxy_and_halo_ids,
                i,
            )

            galaxy_and_halo_ids_all.append(galaxy_and_halo_ids)

        assert len(galaxy_and_halo_ids_all) == n_snaps

        # Store the results
        def store_results(galaxy_and_halo_ids_all):
            for i, galaxy_and_halo_ids in enumerate(galaxy_and_halo_ids_all):

                # Make the arrays to store the data in
                if not hasattr(self, 'ptrack_gal_ids'):
                    self.ptrack_gal_ids = {}
                    for key in galaxy_and_halo_ids.keys():
                        dtype = type(galaxy_and_halo_ids[key][0])
                        self.ptrack_gal_ids[key] = np.empty(
                            (n_particles, n_snaps), dtype=dtype)

                # Store the data in the primary array
                for key in galaxy_and_halo_ids.keys():
                    self.ptrack_gal_ids[key][:, i] = galaxy_and_halo_ids[key]

                # Try clearing up memory again, in case gal_linker
                # is hanging around
                del galaxy_and_halo_ids
                gc.collect()

            return self.ptrack_gal_ids

        return jug.Task(store_results, galaxy_and_halo_ids_all)

Пример #7

0

Показать файл

Файл: linefinder.py Проект: agurvich/linefinder

def run_linefinder_jug(
    tag,
    out_dir = None,
    sim_data_dir = None,
    halo_data_dir = None,
    main_mt_halo_id = None,
    sim_name = None,
    galdef = None,
    selector_data_filters = {},
    selector_kwargs = {},
    sampler_kwargs = {},
    tracker_kwargs = {},
    gal_linker_kwargs = {},
    classifier_kwargs = {},
    visualization_kwargs = {},
    run_id_selecting = True,
    run_id_sampling = True,
    run_tracking = True,
    run_galaxy_linking = True,
    run_classifying = True,
    run_visualization = True,
):
    '''Main function for running linefinder.

    Args:
        tag (str):
            Filename identifier for data products.

        out_dir (str):
            Output directory to store the data in.

        sim_data_dir (str):
            Directory the simulation data is stored in.

        halo_data_dir (str):
            Directory the halo data (e.g. AHF output) is stored in.
            Halo data is necessary for linking particles to galaxies.

        main_mt_halo_id (int):
            Halo ID for the main merger tree halo that's being tracked.
            If not provided defaults to 0 (or whatever value is cataloged for
            the sim name).

        sim_name (str):
            Name of the simulation this is being run for.
            If provided then linefinder will automatically choose the location
            of the simulation and halo data, according to the linefinder.config
            file. The sim_data_dir or halo_data_dir arguments directly
            overwrites this.

        galdef (str):
            Which set of parameters to use for the galaxy_linking and
            classification steps? Defaults to the parameters in
            linefinder.config

        selector_data_filters (dict):
            Data filters to pass to select.IDSelector.select_ids()

        selector_kwargs (dict):
            Arguments to use when selecting what particles to track.
            Arguments will be passed to select.IDSelector

        sampler_kwargs (dict):
            Arguments to use when selecting what particles to track.
            Arguments will be passed to select.IDSampler

        tracker_kwargs (dict):
            Arguments to use when tracking particles.
            Arguments will be passedts to pass to track.ParticleTracker

        gal_linker_kwargs (dict):
            Arguments to use when associating particles with galaxies.
            Arguments will be passed to galaxy_link.ParticleTrackGalaxyLinker

        classifier_kwargs (dict):
            Arguments to use when classifying particles.
            Arguments will be passed to classify.Classifier

        visualization_kwargs (dict):
            Arguments to use when visualizing the data.
            Arguments will be passed to visualize.export_to_firefly

        run_id_selecting (bool):
            If True, then run routines for selecting particles.

        run_id_sampling (bool):
            If True, then run routines for sampling from the full list of
            selected particles.

        run_tracking (bool):
            If True, then run routines for tracking particles.

        run_galaxy_linking (bool):
            If True, then run routines for associating particles with galaxies.

        run_classifying (bool):
            If True, then run routines for classifying particles.
    '''

    # Expand data dirs, if possible
    if out_dir is not None:
        out_dir = os.path.expandvars( out_dir )
    if sim_data_dir is not None:
        sim_data_dir = os.path.expandvars( sim_data_dir )
    if halo_data_dir is not None:
        halo_data_dir = os.path.expandvars( halo_data_dir )

    # Set up for auto-retrieval, if chosen
    if sim_name is not None:
        file_manager = file_management.FileManager()
        if out_dir is None:
            out_dir = file_manager.get_linefinder_dir( sim_name )

    # Setup for galaxy definitions, if chosen
    if galdef is not None:
        galdef_dict = linefinder_config.GALAXY_DEFINITIONS[galdef]

    # Setup jugdata
    jugdir_tail = '{}.jugdata'.format( tag )
    jug.set_jugdir( os.path.join( out_dir, jugdir_tail ) )

    print( "Starting jug thread..." )

    # These are kwargs that could be used at any stage of running linefinder.
    general_kwargs = {
        'out_dir': out_dir,
        'tag': tag,
    }

    # Run the ID Selecting
    if run_id_selecting:

        # Update arguments
        selector_kwargs = utilities.merge_two_dicts(
            selector_kwargs, general_kwargs )

        # Check if the snapshot kwargs exist, and if not, create them
        if 'snapshot_kwargs' not in list( selector_kwargs.keys() ):
            selector_kwargs['snapshot_kwargs'] = {}

        # Add in sim data dir if given
        if sim_data_dir is not None:
            selector_kwargs['snapshot_kwargs']['sdir'] = sim_data_dir

        # Add in halo data dir if given
        if halo_data_dir is not None:
            selector_kwargs['snapshot_kwargs']['halo_data_dir'] = halo_data_dir

        # Use sim name to find defaults
        if sim_name is not None:
            snapshot_kwargs = selector_kwargs['snapshot_kwargs']

            if 'sdir' not in snapshot_kwargs:
                snapshot_kwargs['sdir'] = file_manager.get_sim_dir( sim_name )

            if 'halo_data_dir' not in snapshot_kwargs:
                snapshot_kwargs['halo_data_dir'] = file_manager.get_halo_dir( sim_name )

            if 'main_halo_id' not in snapshot_kwargs:
                snapshot_kwargs['main_halo_id'] = linefinder_config.MAIN_MT_HALO_ID[sim_name]

            selector_kwargs['snapshot_kwargs'] = snapshot_kwargs

        id_selector = select.IDSelector( **selector_kwargs )
        id_selector.select_ids_jug( selector_data_filters )

    # Run the ID Sampling
    if run_id_sampling:

        # Update arguments
        sampler_kwargs = utilities.merge_two_dicts(
            sampler_kwargs, general_kwargs )

        # Check if the snapshot kwargs exist, and if not, create them
        if 'snapshot_kwargs' not in list( sampler_kwargs.keys() ):
            sampler_kwargs['snapshot_kwargs'] = {}

        # Add in sim data dir if given
        if sim_data_dir is not None:
            sampler_kwargs['snapshot_kwargs']['sdir'] = sim_data_dir

        # Add in halo data dir if given
        if halo_data_dir is not None:
            sampler_kwargs['snapshot_kwargs']['halo_data_dir'] = halo_data_dir

        # Use sim name to find defaults
        if sim_name is not None:
            snapshot_kwargs = sampler_kwargs['snapshot_kwargs']

            if 'sdir' not in snapshot_kwargs:
                snapshot_kwargs['sdir'] = file_manager.get_sim_dir( sim_name )

            if 'halo_data_dir' not in snapshot_kwargs:
                snapshot_kwargs['halo_data_dir'] = file_manager.get_halo_dir( sim_name )

            if 'main_halo_id' not in snapshot_kwargs:
                snapshot_kwargs['main_halo_id'] = linefinder_config.MAIN_MT_HALO_ID[sim_name]

            sampler_kwargs['snapshot_kwargs'] = snapshot_kwargs

        id_sampler = select.IDSampler( **sampler_kwargs )

        jug.Task( id_sampler.sample_ids )

        jug.barrier()

    # Run the Particle Tracking
    if run_tracking:

        # Update arguments
        tracker_kwargs = utilities.merge_two_dicts(
            tracker_kwargs, general_kwargs )

        # Add in sim data dir if given
        if sim_data_dir is not None:
            tracker_kwargs['sdir'] = sim_data_dir

        # Choose the sdir automatically, if possible
        if 'sdir' not in tracker_kwargs:
            # Try and load the default values if using the file manager.
            if sim_name is not None:
                tracker_kwargs['sdir'] = file_manager.get_sim_dir( sim_name )
            # Try to use the sdir passed to the selector kwargs
            elif 'snapshot_kwargs' in selector_kwargs:
                if 'sdir' in 'snapshot_kwargs':
                    tracker_kwargs['sdir'] = \
                        selector_kwargs['snapshot_kwargs']['sdir']

        particle_tracker = track.ParticleTracker( **tracker_kwargs )
        particle_tracker.save_particle_tracks_jug()

    # Run the Galaxy Finding
    if run_galaxy_linking:

        # Update arguments
        gal_linker_kwargs = utilities.merge_two_dicts(
            gal_linker_kwargs, general_kwargs )

        # Add in halo data dir if given
        if halo_data_dir is not None:
            gal_linker_kwargs['halo_data_dir'] = halo_data_dir

        if sim_name is not None:

            if 'halo_data_dir' not in gal_linker_kwargs:
                gal_linker_kwargs['halo_data_dir'] = file_manager.get_halo_dir( sim_name )

            if 'main_mt_halo_id' not in gal_linker_kwargs:
                gal_linker_kwargs['main_mt_halo_id'] = linefinder_config.MAIN_MT_HALO_ID[sim_name]

        # Default to halo 0 if MT halo ID not given
        if 'main_mt_halo_id' not in gal_linker_kwargs:
            gal_linker_kwargs['main_mt_halo_id'] = 0

        if galdef is not None:
            for key in [ 'galaxy_cut', 'length_scale', 'mt_length_scale' ]:
                gal_linker_kwargs[key] = galdef_dict[key]

        particle_track_gal_linker = galaxy_link.ParticleTrackGalaxyLinker(
            **gal_linker_kwargs
        )
        particle_track_gal_linker.find_galaxies_for_particle_tracks_jug()

    # Run the Classification
    if run_classifying:

        # Update arguments
        classifier_kwargs = utilities.merge_two_dicts(
            classifier_kwargs, general_kwargs )

        # Add in halo data dir if given
        if halo_data_dir is not None:
            classifier_kwargs['halo_data_dir'] = halo_data_dir

        if sim_name is not None:

            if 'halo_data_dir' not in classifier_kwargs:
                classifier_kwargs['halo_data_dir'] = file_manager.get_halo_dir( sim_name )

        if galdef is not None:
            for key in [ 't_pro', 't_m', ]:
                classifier_kwargs[key] = galdef_dict[key]

        classifier = classify.Classifier( **classifier_kwargs )
        jug.Task( classifier.classify_particles )

    # Run Visualizing
    if run_visualization:

        # Add in halo data dir if given
        if halo_data_dir is not None:
            visualization_kwargs['halo_data_dir'] = halo_data_dir

        if sim_name is not None:

            if 'halo_data_dir' not in visualization_kwargs:
                visualization_kwargs['halo_data_dir'] = file_manager.get_halo_dir( sim_name )

            if 'main_mt_halo_id' not in visualization_kwargs:
                visualization_kwargs['main_halo_id'] = linefinder_config.MAIN_MT_HALO_ID[sim_name]

        jug.Task(
            visualize.export_to_firefly,
            tag = tag,
            data_dir = out_dir,
            **visualization_kwargs
        )

        # Make a file indicating that the visualization completed.
        f = os.path.join( out_dir, 'visualized_{}'.format(tag ) )
        open(f, 'a').close()

Python Task примеры использования