Exemplo n.º 1
0
    def start_tasks(self, **kwargs):

        try:
            logger.info('Adding %i particles as tasks' % self.total_particle_count())
            tasks = []

            for part in self.particles:
                forcer = BaseForcer(self.hydrodataset,
                                    particle=part,
                                    common_variables=self.common_variables,
                                    timevar=self.timevar,
                                    times=self.times,
                                    start_time=self.start,
                                    models=self._models,
                                    release_location_centroid=self.reference_location.point,
                                    usebathy=self._use_bathymetry,
                                    useshore=self._use_shoreline,
                                    usesurface=self._use_seasurface,
                                    reverse_distance=self.reverse_distance,
                                    bathy_path=self.bathy_path,
                                    shoreline_path=self.shoreline_path,
                                    shoreline_feature=self.shoreline_feature,
                                    time_method=self.time_method,
                                    shoreline_index_buffer=self.shoreline_index_buffer)
                tasks.append(forcer)

            logger.progress((5, 'Running model'))
            return self.pool.map_async(Runner(), tasks)

        except Exception:
            logger.exception("Something didn't start correctly!")
            raise
    def run(self):

        while True:

            try:
                next_task = self.task_queue.get(True, 10)
            except queue.Empty:
                logger.info("No tasks left to complete, closing %s" % self.name)
                break
            else:
                answer = (None, None)
                try:
                    answer = (1, next_task(self.active))
                except Exception:
                    logger.exception("Disabling Error")
                    if isinstance(next_task, CachingDataController):
                        answer = (-2, "CachingDataController")
                        # Tell the particles that the CachingDataController is releasing file
                        self.get_data.value = False
                        # The data controller has died, so don't process any more tasks
                        self.active.value = False
                    elif isinstance(next_task, BaseForcer):
                        answer = (-1, next_task.particle)
                    else:
                        logger.warn("Strange task raised an exception: %s" % str(next_task.__class__))
                        answer = (None, None)
                finally:
                    self.result_queue.put(answer)

                    self.nproc_lock.acquire()
                    self.n_run.value = self.n_run.value - 1
                    self.nproc_lock.release()

                    self.task_queue.task_done()
Exemplo n.º 3
0
    def listen_for_results(self, output_h5_file, total_particles):
        logger.info("Waiting for %i particle results" % total_particles)

        particles = []
        retrieved = 0
        timeout = 200

        while retrieved < total_particles:
            try:
                # self.result is an iterator that can timeout on next()
                particle = self.result.next(timeout)
                retrieved += 1
                particles.append(particle)
            except StopIteration:
                assert retrieved >= total_particles
                break
            except:
                logger.exception("Particle has FAILED!!")
                continue

            # We multiply by 90 here to save 10% for the exporting
            logger.progress((round((float(retrieved) / total_particles) * 90.,
                                   1), "%s Particle(s) complete" % retrieved))

        logger.info(particles)
        results = ex.ResultsPyTable(output_h5_file)
        for p in particles:
            for x in range(len(p.locations)):
                results.write(p.timestep_index_dump(x))
        results.compute()
        results.close()

        return
Exemplo n.º 4
0
    def run(self, **kwargs):

        logger.progress((4, "Starting tasks"))
        self.result = self.start_tasks(**kwargs)
        if self.result is None:
            raise BaseDataControllerError("Not all tasks started! Exiting.")

        # Store results in hdf5 file for processing later
        output_h5_file = None
        if kwargs.get('output_path') is not None:
            output_h5_file = os.path.join(kwargs.get('output_path'),
                                          'results.h5')

        if self.thread_result_listener is True:
            rl = threading.Thread(name="ResultListener",
                                  target=self.listen_for_results,
                                  args=(output_h5_file,
                                        self.total_particle_count()))
            rl.daemon = True
            rl.start()
            rl.join()  # This blocks until the tasks are all done.
        else:
            self.listen_for_results(output_h5_file, self.total_particle_count(
            ))  # This blocks until the tasks are all done.

        logger.info('Tasks are all finished... Cleaning up!!')
        self.cleanup()

        # If output_formats and path specified,
        # output particle run data to disk when completed
        if "output_formats" in kwargs:

            logger.progress((96, "Exporting results"))

            # Make sure output_path is also included
            if kwargs.get("output_path", None) is not None:
                formats = kwargs.get("output_formats")
                output_path = kwargs.get("output_path")
                if isinstance(formats, list):
                    for fmt in formats:
                        logger.info("Exporting to: %s" % fmt)
                        try:
                            # Calls the export function
                            fmt.export(output_path, output_h5_file)
                        except:
                            logger.exception("Failed to export to: %s" % fmt)
                else:
                    logger.warn(
                        'The output_formats parameter should be a list, not saving any output!'
                    )
            else:
                logger.warn('No output path defined, not saving any output!')
        else:
            logger.warn(
                'No output_formats parameter was defined, not saving any output!'
            )

        logger.progress((97, "Model Run Complete"))

        return
Exemplo n.º 5
0
    def listen_for_results(self, output_h5_file, total_particles):
        logger.info("Waiting for %i particle results" % total_particles)

        particles = []
        retrieved = 0
        timeout = 200

        while retrieved < total_particles:
            try:
                # self.result is an iterator that can timeout on next()
                particle = self.result.next(timeout)
                retrieved += 1
                particles.append(particle)
            except StopIteration:
                assert retrieved >= total_particles
                break
            except:
                logger.exception("Particle has FAILED!!")
                continue

            # We multiply by 90 here to save 10% for the exporting
            logger.progress((round((float(retrieved) / total_particles) * 90., 1), "%s Particle(s) complete" % retrieved))

        logger.info(particles)
        results = ex.ResultsPyTable(output_h5_file)
        for p in particles:
            for x in range(len(p.locations)):
                results.write(p.timestep_index_dump(x))
        results.compute()
        results.close()

        return
Exemplo n.º 6
0
    def start_tasks(self):
        try:
            logger.info('Starting CachingDataController')

            # Add data controller to the queue first so that it
            # can get the initial data and is not blocked
            data_controller = CachingDataController(self.hydrodataset, self.common_variables, self.n_run, self.get_data, self.write_lock, self.has_write_lock, self.read_lock, self.read_count,
                                                    self.time_chunk, self.horiz_chunk, self.times, self.start, self.point_get, self.reference_location, cache_path=self.cache_path)
            self.tasks.put(data_controller)
            # Create CachingDataController worker
            self.data_controller_process = Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name="CachingDataController")
            self.data_controller_process.start()

            logger.info('Adding %i particles as tasks' % len(self.particles))

            for part in self.particles:
                forcer = CachingForcer(self.cache_path,
                                       particle=part,
                                       common_variables=self.common_variables,
                                       timevar=self.timevar,
                                       times=self.times,
                                       start_time=self.start,
                                       models=self._models,
                                       release_location_centroid=self.reference_location.point,
                                       usebathy=self._use_bathymetry,
                                       useshore=self._use_shoreline,
                                       usesurface=self._use_seasurface,
                                       reverse_distance=self.reverse_distance,
                                       bathy_path=self.bathy_path,
                                       shoreline_path=self.shoreline_path,
                                       shoreline_feature=self.shoreline_feature,
                                       time_method=self.time_method,
                                       redis_url=self.redis_url,
                                       redis_results_channel=self.redis_results_channel,
                                       shoreline_index_buffer=self.shoreline_index_buffer,
                                       get_data=self.get_data,
                                       read_lock=self.read_lock,
                                       has_read_lock=self.has_read_lock,
                                       read_count=self.read_count,
                                       point_get=self.point_get,
                                       data_request_lock=self.data_request_lock,
                                       has_data_request_lock=self.has_data_request_lock
                                      )
                self.tasks.put(forcer)

            # Create workers for the particles.
            self.procs = [Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, self.get_data, name="CachingForcer-%d" % i)
                          for i in xrange(self.nproc - 1) ]
            for w in self.procs:
                w.start()
                logger.info('Started %s' % w.name)

            return True

        except Exception:
            logger.exception("Something didn't start correctly!")
            return False
Exemplo n.º 7
0
    def run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        self.setup_run(**kwargs)

        logger.progress((4, "Starting tasks"))
        self.result = self.start_tasks()
        if self.result is None:
            raise BaseDataControllerError("Not all tasks started! Exiting.")

        # This blocks until the tasks are all done.
        self.particles = self.listen_for_results()

        logger.info('Consumers are all finished!')

        logger.info('Cleaning up')
        self.cleanup()

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:

                logger.progress((96, "Exporting results"))

                # Make sure output_path is also included
                if kwargs.get("output_path", None) is not None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.exception("Failed to export to: %s" % format)
                    else:
                        logger.warn('The output_formats parameter should be a list, not saving any output!')
                else:
                    logger.warn('No output path defined, not saving any output!')
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if self.error_code == -2:
                raise BaseDataControllerError("Error in the BaseDataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((97, "Model Run Complete"))
        return self.particles
Exemplo n.º 8
0
    def run(self, **kwargs):

        logger.progress((4, "Starting tasks"))
        self.result = self.start_tasks(**kwargs)
        if self.result is None:
            raise BaseDataControllerError("Not all tasks started! Exiting.")

        # Store results in hdf5 file for processing later
        output_h5_file = None
        if kwargs.get('output_path') is not None:
            output_h5_file = os.path.join(kwargs.get('output_path'), 'results.h5')

        if self.thread_result_listener is True:
            rl = threading.Thread(name="ResultListener", target=self.listen_for_results, args=(output_h5_file, self.total_particle_count()))
            rl.daemon = True
            rl.start()
            rl.join()  # This blocks until the tasks are all done.
        else:
            self.listen_for_results(output_h5_file, self.total_particle_count())    # This blocks until the tasks are all done.

        logger.info('Tasks are all finished... Cleaning up!!')
        self.cleanup()

        # If output_formats and path specified,
        # output particle run data to disk when completed
        if "output_formats" in kwargs:

            logger.progress((96, "Exporting results"))

            # Make sure output_path is also included
            if kwargs.get("output_path", None) is not None:
                formats = kwargs.get("output_formats")
                output_path = kwargs.get("output_path")
                if isinstance(formats, list):
                    for fmt in formats:
                        logger.info("Exporting to: %s" % fmt)
                        try:
                            # Calls the export function
                            fmt.export(output_path, output_h5_file)
                        except:
                            logger.exception("Failed to export to: %s" % fmt)
                else:
                    logger.warn('The output_formats parameter should be a list, not saving any output!')
            else:
                logger.warn('No output path defined, not saving any output!')
        else:
            logger.warn('No output_formats parameter was defined, not saving any output!')

        logger.progress((97, "Model Run Complete"))

        return
    def get_nearest_data(self, i):
        """ Note: self.dataset.opennc() must be called before calling this function.
            This is because the caching forcer must close it everytime, while a non caching
            forcer can leave the dataset open.
        """
        try:
            # Grab data at time index closest to particle location
            u = np.mean(np.mean(self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.particle.location )))
            v = np.mean(np.mean(self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.particle.location )))
            # if there is vertical velocity inthe dataset, get it
            if 'w' in self.dataset.nc.variables:
                w = np.mean(np.mean(self.dataset.get_values('w', timeindsf=[np.asarray([i])], point=self.particle.location )))
            else:
                w = 0.0
            # If there is salt and temp in the dataset, get it
            if self.temp_name is not None and self.salt_name is not None:
                temp = np.mean(np.mean(self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.particle.location )))
                salt = np.mean(np.mean(self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.particle.location )))

            # Check for nans that occur in the ocean (happens because
            # of model and coastline resolution mismatches)
            if np.isnan(u).any() or np.isnan(v).any() or np.isnan(w).any():
                # Take the mean of the closest 4 points
                # If this includes nan which it will, result is nan
                uarray1 = self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.particle.location, num=2)
                varray1 = self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.particle.location, num=2)
                if 'w' in self.dataset.nc.variables:
                    warray1 = self.dataset.get_values('w', timeinds=[np.asarray([i])], point=self.particle.location, num=2)
                    w = warray1.mean()
                else:
                    w = 0.0

                if self.temp_name is not None and self.salt_name is not None:
                    temparray1 = self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.particle.location, num=2)
                    saltarray1 = self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.particle.location, num=2)
                    temp = temparray1.mean()
                    salt = saltarray1.mean()
                u = uarray1.mean()
                v = varray1.mean()

            if self.temp_name is None:
                temp = np.nan
            if self.salt_name is None:
                salt = np.nan

        except Exception:
            logger.exception("Could not retrieve data.")
            raise

        return u, v, w, temp, salt
Exemplo n.º 10
0
def particle_runner(part, model):

    from paegan.logger import logger
    logger.setLevel(logging.PROGRESS)

    from paegan.logger.redis_handler import RedisHandler
    rhandler = RedisHandler(model.redis_log_channel, model.redis_url)
    rhandler.setLevel(logging.PROGRESS)
    logger.addHandler(rhandler)

    try:
        redis_connection = redis.from_url(model.redis_url)
        forcer = BaseForcer(
            model.hydrodataset,
            particle=part,
            common_variables=model.common_variables,
            times=model.times,
            start_time=model.start,
            models=model._models,
            release_location_centroid=model.reference_location.point,
            usebathy=model._use_bathymetry,
            useshore=model._use_shoreline,
            usesurface=model._use_seasurface,
            reverse_distance=model.reverse_distance,
            bathy_path=model.bathy_path,
            shoreline_path=model.shoreline_path,
            shoreline_feature=model.shoreline_feature,
            time_method=model.time_method,
            redis_url=model.redis_url,
            redis_results_channel=model.redis_results_channel,
            shoreline_index_buffer=model.shoreline_index_buffer)
        forcer.run()
    except Exception:
        logger.exception(traceback.format_exc())
        redis_connection.publish(
            model.redis_results_channel,
            json.dumps({
                "status": "FAILED",
                "uid": part.uid
            }))
    else:
        redis_connection.publish(
            model.redis_results_channel,
            json.dumps({
                "status": "COMPLETED",
                "uid": part.uid
            }))
Exemplo n.º 11
0
    def get_linterp_data(self, i, currenttime):
        self.fill_cache_with_linterp_data(i, currenttime)

        # Now that the cache is filled, get the actual data and return
        with self.read_lock:
            self.read_count.value += 1
            self.has_read_lock.append(os.getpid())
        try:
            self.dataset.opennc()
            return super(CachingForcer, self).get_linterp_data(i, currenttime)
        except Exception:
            logger.exception("Could not retrieve data")
        finally:
            self.dataset.closenc()
            with self.read_lock:
                self.read_count.value -= 1
                self.has_read_lock.remove(os.getpid())
Exemplo n.º 12
0
    def get_linterp_data(self, i, currenttime):
        self.fill_cache_with_linterp_data(i, currenttime)

        # Now that the cache is filled, get the actual data and return
        with self.read_lock:
            self.read_count.value += 1
            self.has_read_lock.append(os.getpid())
        try:
            self.dataset.opennc()
            return super(CachingForcer, self).get_linterp_data(i, currenttime)
        except Exception:
            logger.exception("Could not retrieve data")
        finally:
            self.dataset.closenc()
            with self.read_lock:
                self.read_count.value -= 1
                self.has_read_lock.remove(os.getpid())
Exemplo n.º 13
0
    def start_tasks(self, **kwargs):
        logger.progress((5, 'Running model'))
        rc = redis.from_url(self.redis_url)
        if kwargs.get('task_queue_call'):
            for p in self.particles:
                try:
                    kwargs.get('task_queue_call')(func=particle_runner,
                                                  args=(
                                                      p,
                                                      self,
                                                  ))
                except Exception:
                    logger.exception(traceback.format_exc())
                    rc.publish(self.redis_results_channel,
                               json.dumps({
                                   "status": "FAILED",
                                   "uid": p.uid
                               }))
            return True
        else:
            tasks = []
            for p in self.particles:
                f = BaseForcer(
                    self.hydrodataset,
                    particle=p,
                    common_variables=self.common_variables,
                    times=self.times,
                    start_time=self.start,
                    models=self._models,
                    release_location_centroid=self.reference_location.point,
                    usebathy=self._use_bathymetry,
                    useshore=self._use_shoreline,
                    usesurface=self._use_seasurface,
                    reverse_distance=self.reverse_distance,
                    bathy_path=self.bathy_path,
                    shoreline_path=self.shoreline_path,
                    shoreline_feature=self.shoreline_feature,
                    time_method=self.time_method,
                    redis_url=self.redis_url,
                    redis_results_channel=self.redis_results_channel,
                    shoreline_index_buffer=self.shoreline_index_buffer)
                tasks.append(f)
            self.pool = multiprocessing.Pool()
            return self.pool.imap(Runner(), tasks)

        return False
Exemplo n.º 14
0
    def start_tasks(self):

        # @TODO: this is more initialization, but need to prevent derived classes from doing this
        if self.pool is None:
            self.pool = multiprocessing.Pool()

        try:
            logger.info('Adding %i particles as tasks' % len(self.particles))
            tasks = []

            for part in self.particles:
                forcer = BaseForcer(self.hydrodataset,
                                    particle=part,
                                    common_variables=self.common_variables,
                                    timevar=self.timevar,
                                    times=self.times,
                                    start_time=self.start,
                                    models=self._models,
                                    release_location_centroid=self.reference_location.point,
                                    usebathy=self._use_bathymetry,
                                    useshore=self._use_shoreline,
                                    usesurface=self._use_seasurface,
                                    reverse_distance=self.reverse_distance,
                                    bathy_path=self.bathy_path,
                                    shoreline_path=self.shoreline_path,
                                    shoreline_feature=self.shoreline_feature,
                                    time_method=self.time_method,
                                    redis_url=self.redis_url,
                                    redis_results_channel=self.redis_results_channel,
                                    shoreline_index_buffer=self.shoreline_index_buffer)
                tasks.append(forcer)

            # @TODO: better mechanism than switching on type
            if isinstance(self.pool, multiprocessing.pool.Pool):
                aiter = self.pool.imap(Runner(), tasks)
            else:
                # IPython parallel View
                aiter = self.pool.map_async(Runner(), tasks)

            return aiter

        except Exception:
            logger.exception("Something didn't start correctly!")
            raise
Exemplo n.º 15
0
    def listen_for_results(self):
        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))

        particles = []
        retrieved = 0
        timeout = 200

        while retrieved < len(self.particles):
            try:
                # @TODO: better mechanism than switching on type
                if isinstance(self.pool, multiprocessing.pool.Pool):
                    # self.result is an iterator that can timeout on next()
                    particle = self.result.next(timeout)
                    retrieved += 1
                    particles.append(particle)
                else:
                    # IPython parallel View
                    # self.result is an AsyncMapResult
                    from IPython.parallel import TimeoutError
                    try:
                        new_particles = self.result.get(timeout=1)
                    except TimeoutError:
                        pass    # this is fine, get incremental progress below
                    else:
                        particles = new_particles

                    # progress is absolute, not incremental
                    retrieved = self.result.progress

            except StopIteration:
                assert retrieved >= len(self.particles)
                break
            except:
                logger.exception("Particle has FAILED!!")
                #logger.warn("Particle %s has FAILED!!" % particle.uid)
                continue

            # We multiply by 95 here to save 5% for the exporting
            logger.progress((round((float(retrieved) / self.number_of_tasks) * 90., 1), "%s Particle(s) complete" % retrieved))

        return particles
Exemplo n.º 16
0
    def nc_object(ncfile, tname='time'):

        if isinstance(ncfile, str):
            try:
                return netCDF4.Dataset(ncfile)
            except (IOError, RuntimeError, IndexError):
                # Are we a set of files?
                try:
                    return netCDF4.MFDataset(ncfile)
                except (IOError, RuntimeError, IndexError):
                    try:
                        return netCDF4.MFDataset(ncfile, aggdim=tname)
                    except (IOError, RuntimeError, IndexError):
                        try:
                            # Unicode isn't working sometimes?
                            return netCDF4.MFDataset(str(ncfile), aggdim=tname)
                        except Exception:
                            logger.exception("Can not open %s" % ncfile)
                            raise
                except ValueError:
                    # Probably a DAP endpoint
                    logger.exception("Can not open %s" % ncfile)
                    raise
            except Exception:
                logger.exception("Can not open %s" % ncfile)
                raise
        elif isinstance(ncfile, Dataset):
            # Passed in paegan Dataset object
            return ncfile.nc
        elif isinstance(ncfile, netCDF4.Dataset) or isinstance(ncfile, netCDF4.MFDataset):
            # Passed in a netCDF4 Dataset object
            return ncfile
Exemplo n.º 17
0
    def start_tasks(self):
        try:
            logger.info('Adding %i particles as tasks' % len(self.particles))
            for part in self.particles:
                forcer = BaseForcer(self.hydrodataset,
                                    particle=part,
                                    common_variables=self.common_variables,
                                    timevar=self.timevar,
                                    times=self.times,
                                    start_time=self.start,
                                    models=self._models,
                                    release_location_centroid=self.reference_location.point,
                                    usebathy=self._use_bathymetry,
                                    useshore=self._use_shoreline,
                                    usesurface=self._use_seasurface,
                                    reverse_distance=self.reverse_distance,
                                    bathy_path=self.bathy_path,
                                    shoreline_path=self.shoreline_path,
                                    shoreline_feature=self.shoreline_feature,
                                    time_method=self.time_method,
                                    redis_url=self.redis_url,
                                    redis_results_channel=self.redis_results_channel,
                                    shoreline_index_buffer=self.shoreline_index_buffer
                                   )
                self.tasks.put(forcer)

            # Create workers for the particles.
            self.procs = [ Consumer(self.tasks, self.results, self.n_run, self.nproc_lock, self.active, None, name="BaseForcer-%d" % i)
                           for i in xrange(self.nproc - 1) ]
            for w in self.procs:
                w.start()
                logger.info('Started %s' % w.name)

            return True

        except Exception:
            logger.exception("Something didn't start correctly!")
            return False
Exemplo n.º 18
0
    def listen_for_results(self, output_h5_file, total_particles):
        logger.info("Waiting for %i particle results" % total_particles)

        particles = []
        retrieved = 0

        while retrieved < total_particles:
            try:
                # IPython parallel View
                # self.result is an AsyncMapResult
                from IPython.parallel import TimeoutError
                try:
                    new_particles = self.result.get(timeout=1)
                except TimeoutError:
                    pass    # this is fine, get incremental progress below
                else:
                    particles = new_particles

                # progress is absolute, not incremental
                retrieved = self.result.progress
            except StopIteration:
                assert retrieved >= total_particles
                break
            except:
                logger.exception("Particle has FAILED!!")
                continue

            # We multiply by 90 here to save 10% for the exporting
            logger.progress((round((float(retrieved) / total_particles) * 90., 1), "%s Particle(s) complete" % retrieved))

        results = ex.ResultsPyTable(output_h5_file)
        for p in particles:
            for x in range(len(p.locations)):
                results.write(p.timestep_index_dump(x))
        results.compute()
        results.close()

        return
Exemplo n.º 19
0
    def run(self):

        while True:

            try:
                next_task = self.task_queue.get(True, 10)
            except queue.Empty:
                logger.info("No tasks left to complete, closing %s" %
                            self.name)
                break
            else:
                answer = (None, None)
                try:
                    answer = (1, next_task(self.active))
                except Exception:
                    logger.exception("Disabling Error")
                    if isinstance(next_task, CachingDataController):
                        answer = (-2, "CachingDataController")
                        # Tell the particles that the CachingDataController is releasing file
                        self.get_data.value = False
                        # The data controller has died, so don't process any more tasks
                        self.active.value = False
                    elif isinstance(next_task, BaseForcer):
                        answer = (-1, next_task.particle)
                    else:
                        logger.warn("Strange task raised an exception: %s" %
                                    str(next_task.__class__))
                        answer = (None, None)
                finally:
                    self.result_queue.put(answer)

                    self.nproc_lock.acquire()
                    self.n_run.value = self.n_run.value - 1
                    self.nproc_lock.release()

                    self.task_queue.task_done()
Exemplo n.º 20
0
    def __call__(self, active):
        c = 0

        self.dataset = CommonDataset.open(self.hydrodataset)
        self.remote = self.dataset.nc

        # Calculate the datetimes of the model timesteps like
        # the particle objects do, so we can figure out unique
        # time indices
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(
            self.times, start=self.start_time)

        timevar = self.dataset.gettimevar(self.uname)

        # Don't need to grab the last datetime, as it is not needed for forcing, only
        # for setting the time of the final particle forcing
        time_indexs = timevar.nearest_index(newtimes[0:-1], select='before')

        # Have to make sure that we get the plus 1 for the
        # linear interpolation of u,v,w,temp,salt
        self.inds = np.unique(time_indexs)
        self.inds = np.append(self.inds, self.inds.max() + 1)

        # While there is at least 1 particle still running,
        # stay alive, if not break
        while self.n_run.value > 1:

            if self.caching is False:
                logger.debug(
                    "Caching is False, not doing much.  Just hanging out until all of the particles finish."
                )
                timer.sleep(10)
                continue

            # If particle asks for data, do the following
            if self.get_data.value is True:
                logger.debug("Particle asked for data!")

                # Wait for particles to get out
                while True:
                    self.read_lock.acquire()

                    logger.debug("Read count: %d" % self.read_count.value)
                    if self.read_count.value > 0:
                        logger.debug(
                            "Waiting for write lock on cache file (particles must stop reading)..."
                        )
                        self.read_lock.release()
                        timer.sleep(2)
                    else:
                        break

                # Get write lock on the file.  Already have read lock.
                self.write_lock.acquire()
                self.has_write_lock.value = os.getpid()

                if c == 0:
                    logger.debug("Creating cache file")
                    try:
                        # Open local cache for writing, overwrites
                        # existing file with same name
                        self.local = netCDF4.Dataset(self.cache_path, 'w')

                        indices = self.dataset.get_indices(
                            self.uname,
                            timeinds=[np.asarray([0])],
                            point=self.start)
                        self.point_get.value = [
                            self.inds[0], indices[-2], indices[-1]
                        ]

                        # Create dimensions for u and v variables
                        self.local.createDimension('time', None)
                        self.local.createDimension('level', None)
                        self.local.createDimension('x', None)
                        self.local.createDimension('y', None)

                        # Create 3d or 4d u and v variables
                        if self.remote.variables[self.uname].ndim == 4:
                            self.ndim = 4
                            dimensions = ('time', 'level', 'y', 'x')
                            coordinates = "time z lon lat"
                        elif self.remote.variables[self.uname].ndim == 3:
                            self.ndim = 3
                            dimensions = ('time', 'y', 'x')
                            coordinates = "time lon lat"
                        shape = self.remote.variables[self.uname].shape

                        # If there is no FillValue defined in the dataset, use np.nan.
                        # Sometimes it will work out correctly and other times we will
                        # have a huge cache file.
                        try:
                            fill = self.remote.variables[
                                self.uname].missing_value
                        except Exception:
                            fill = np.nan

                        # Create domain variable that specifies
                        # where there is data geographically/by time
                        # and where there is not data,
                        #   Used for testing if particle needs to
                        #   ask cache to update
                        domain = self.local.createVariable('domain',
                                                           'i',
                                                           dimensions,
                                                           zlib=False,
                                                           fill_value=0)
                        domain.coordinates = coordinates

                        # Create local u and v variables
                        u = self.local.createVariable('u',
                                                      'f',
                                                      dimensions,
                                                      zlib=False,
                                                      fill_value=fill)
                        v = self.local.createVariable('v',
                                                      'f',
                                                      dimensions,
                                                      zlib=False,
                                                      fill_value=fill)

                        v.coordinates = coordinates
                        u.coordinates = coordinates

                        localvars = [
                            u,
                            v,
                        ]
                        remotevars = [
                            self.remote.variables[self.uname],
                            self.remote.variables[self.vname]
                        ]

                        # Create local w variable
                        if self.wname is not None:
                            w = self.local.createVariable('w',
                                                          'f',
                                                          dimensions,
                                                          zlib=False,
                                                          fill_value=fill)
                            w.coordinates = coordinates
                            localvars.append(w)
                            remotevars.append(
                                self.remote.variables[self.wname])

                        if self.temp_name is not None and self.salt_name is not None:
                            # Create local temp and salt vars
                            temp = self.local.createVariable('temp',
                                                             'f',
                                                             dimensions,
                                                             zlib=False,
                                                             fill_value=fill)
                            salt = self.local.createVariable('salt',
                                                             'f',
                                                             dimensions,
                                                             zlib=False,
                                                             fill_value=fill)
                            temp.coordinates = coordinates
                            salt.coordinates = coordinates
                            localvars.append(temp)
                            localvars.append(salt)
                            remotevars.append(
                                self.remote.variables[self.temp_name])
                            remotevars.append(
                                self.remote.variables[self.salt_name])

                        # Create local lat/lon coordinate variables
                        if self.remote.variables[self.xname].ndim == 2:
                            lon = self.local.createVariable('lon',
                                                            'f', ("y", "x"),
                                                            zlib=False)
                            lon[:] = self.remote.variables[self.xname][:, :]
                            lat = self.local.createVariable('lat',
                                                            'f', ("y", "x"),
                                                            zlib=False)
                            lat[:] = self.remote.variables[self.yname][:, :]
                        if self.remote.variables[self.xname].ndim == 1:
                            lon = self.local.createVariable('lon',
                                                            'f', ("x"),
                                                            zlib=False)
                            lon[:] = self.remote.variables[self.xname][:]
                            lat = self.local.createVariable('lat',
                                                            'f', ("y"),
                                                            zlib=False)
                            lat[:] = self.remote.variables[self.yname][:]

                        # Create local z variable
                        if self.zname is not None:
                            if self.remote.variables[self.zname].ndim == 4:
                                z = self.local.createVariable(
                                    'z',
                                    'f', ("time", "level", "y", "x"),
                                    zlib=False)
                                remotez = self.remote.variables[self.zname]
                                localvars.append(z)
                                remotevars.append(remotez)
                            elif self.remote.variables[self.zname].ndim == 3:
                                z = self.local.createVariable(
                                    'z', 'f', ("level", "y", "x"), zlib=False)
                                z[:] = self.remote.variables[
                                    self.zname][:, :, :]
                            elif self.remote.variables[self.zname].ndim == 1:
                                z = self.local.createVariable('z',
                                                              'f', ("level", ),
                                                              zlib=False)
                                z[:] = self.remote.variables[self.zname][:]

                        # Create local time variable
                        time = self.local.createVariable('time',
                                                         'f8', ("time", ),
                                                         zlib=False)
                        if self.tname is not None:
                            time[:] = self.remote.variables[self.tname][
                                self.inds]

                        if self.point_get.value[0] + self.time_size > np.max(
                                self.inds):
                            current_inds = np.arange(self.point_get.value[0],
                                                     np.max(self.inds) + 1)
                        else:
                            current_inds = np.arange(
                                self.point_get.value[0],
                                self.point_get.value[0] + self.time_size)

                        # Get data from remote dataset and add
                        # to local cache.
                        # Try 20 times on the first attempt
                        current_attempt = 1
                        max_attempts = 20
                        while True:
                            try:
                                assert current_attempt <= max_attempts
                                self.get_remote_data(localvars, remotevars,
                                                     current_inds, shape)
                            except AssertionError:
                                raise
                            except:
                                logger.warn(
                                    "CachingDataController failed to get remote data.  Trying again in 20 seconds. %s attempts left."
                                    % str(max_attempts - current_attempt))
                                logger.exception("Data Access Error")
                                timer.sleep(20)
                                current_attempt += 1
                            else:
                                break

                        c += 1
                    except (Exception, AssertionError):
                        logger.error(
                            "CachingDataController failed to get data (first request)"
                        )
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug(
                            "Done updating cache file, closing file, and releasing locks"
                        )
                else:
                    logger.debug("Updating cache file")
                    try:
                        # Open local cache dataset for appending
                        self.local = netCDF4.Dataset(self.cache_path, 'a')

                        # Create local and remote variable objects
                        # for the variables of interest
                        u = self.local.variables['u']
                        v = self.local.variables['v']
                        time = self.local.variables['time']
                        remoteu = self.remote.variables[self.uname]
                        remotev = self.remote.variables[self.vname]

                        # Create lists of variable objects for
                        # the data updater
                        localvars = [
                            u,
                            v,
                        ]
                        remotevars = [
                            remoteu,
                            remotev,
                        ]
                        if self.salt_name is not None and self.temp_name is not None:
                            salt = self.local.variables['salt']
                            temp = self.local.variables['temp']
                            remotesalt = self.remote.variables[self.salt_name]
                            remotetemp = self.remote.variables[self.temp_name]
                            localvars.append(salt)
                            localvars.append(temp)
                            remotevars.append(remotesalt)
                            remotevars.append(remotetemp)
                        if self.wname is not None:
                            w = self.local.variables['w']
                            remotew = self.remote.variables[self.wname]
                            localvars.append(w)
                            remotevars.append(remotew)
                        if self.zname is not None:
                            remotez = self.remote.variables[self.zname]
                            if remotez.ndim == 4:
                                z = self.local.variables['z']
                                localvars.append(z)
                                remotevars.append(remotez)
                        if self.tname is not None:
                            # remotetime = self.remote.variables[self.tname]
                            time[self.inds] = self.remote.variables[self.inds]

                        if self.point_get.value[0] + self.time_size > np.max(
                                self.inds):
                            current_inds = np.arange(self.point_get.value[0],
                                                     np.max(self.inds) + 1)
                        else:
                            current_inds = np.arange(
                                self.point_get.value[0],
                                self.point_get.value[0] + self.time_size)

                        # Get data from remote dataset and add
                        # to local cache
                        while True:
                            try:
                                self.get_remote_data(localvars, remotevars,
                                                     current_inds, shape)
                            except:
                                logger.warn(
                                    "CachingDataController failed to get remote data.  Trying again in 30 seconds"
                                )
                                timer.sleep(30)
                            else:
                                break

                        c += 1
                    except Exception:
                        logger.error(
                            "CachingDataController failed to get data (not first request)"
                        )
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug(
                            "Done updating cache file, closing file, and releasing locks"
                        )
            else:
                logger.debug(
                    "Particles are still running, waiting for them to request data..."
                )
                timer.sleep(2)

        self.dataset.closenc()

        return "CachingDataController"
Exemplo n.º 21
0
    def get_linterp_data(self, i, currenttime):
        """ Note: self.dataset.opennc() must be called before calling this function.
            This is because the caching forcer must close it everytime, while a non caching
            forcer can leave the dataset open.
        """
        try:
            # Grab data at time index closest to particle location
            u = [
                np.mean(
                    np.mean(
                        self.dataset.get_values(
                            'u',
                            timeinds=[np.asarray([i])],
                            point=self.particle.location))),
                np.mean(
                    np.mean(
                        self.dataset.get_values('u',
                                                timeinds=[np.asarray([i + 1])],
                                                point=self.particle.location)))
            ]
            v = [
                np.mean(
                    np.mean(
                        self.dataset.get_values(
                            'v',
                            timeinds=[np.asarray([i])],
                            point=self.particle.location))),
                np.mean(
                    np.mean(
                        self.dataset.get_values('v',
                                                timeinds=[np.asarray([i + 1])],
                                                point=self.particle.location)))
            ]
            # if there is vertical velocity inthe dataset, get it
            if 'w' in self.dataset.nc.variables:
                w = [
                    np.mean(
                        np.mean(
                            self.dataset.get_values(
                                'w',
                                timeinds=[np.asarray([i])],
                                point=self.particle.location))),
                    np.mean(
                        np.mean(
                            self.dataset.get_values(
                                'w',
                                timeinds=[np.asarray([i + 1])],
                                point=self.particle.location)))
                ]
            else:
                w = [0.0, 0.0]
            # If there is salt and temp in the dataset, get it
            if self.temp_name is not None and self.salt_name is not None:
                temp = [
                    np.mean(
                        np.mean(
                            self.dataset.get_values(
                                'temp',
                                timeinds=[np.asarray([i])],
                                point=self.particle.location))),
                    np.mean(
                        np.mean(
                            self.dataset.get_values(
                                'temp',
                                timeinds=[np.asarray([i + 1])],
                                point=self.particle.location)))
                ]
                salt = [
                    np.mean(
                        np.mean(
                            self.dataset.get_values(
                                'salt',
                                timeinds=[np.asarray([i])],
                                point=self.particle.location))),
                    np.mean(
                        np.mean(
                            self.dataset.get_values(
                                'salt',
                                timeinds=[np.asarray([i + 1])],
                                point=self.particle.location)))
                ]

            # Check for nans that occur in the ocean (happens because
            # of model and coastline resolution mismatches)
            if np.isnan(u).any() or np.isnan(v).any() or np.isnan(w).any():
                # Take the mean of the closest 4 points
                # If this includes nan which it will, result is nan
                uarray1 = self.dataset.get_values('u',
                                                  timeinds=[np.asarray([i])],
                                                  point=self.particle.location,
                                                  num=2)
                varray1 = self.dataset.get_values('v',
                                                  timeinds=[np.asarray([i])],
                                                  point=self.particle.location,
                                                  num=2)
                uarray2 = self.dataset.get_values(
                    'u',
                    timeinds=[np.asarray([i + 1])],
                    point=self.particle.location,
                    num=2)
                varray2 = self.dataset.get_values(
                    'v',
                    timeinds=[np.asarray([i + 1])],
                    point=self.particle.location,
                    num=2)
                if 'w' in self.dataset.nc.variables:
                    warray1 = self.dataset.get_values(
                        'w',
                        timeinds=[np.asarray([i])],
                        point=self.particle.location,
                        num=2)
                    warray2 = self.dataset.get_values(
                        'w',
                        timeinds=[np.asarray([i + 1])],
                        point=self.particle.location,
                        num=2)
                    w = [warray1.mean(), warray2.mean()]
                else:
                    w = [0.0, 0.0]

                if self.temp_name is not None and self.salt_name is not None:
                    temparray1 = self.dataset.get_values(
                        'temp',
                        timeinds=[np.asarray([i])],
                        point=self.particle.location,
                        num=2)
                    saltarray1 = self.dataset.get_values(
                        'salt',
                        timeinds=[np.asarray([i])],
                        point=self.particle.location,
                        num=2)
                    temparray2 = self.dataset.get_values(
                        'temp',
                        timeinds=[np.asarray([i + 1])],
                        point=self.particle.location,
                        num=2)
                    saltarray2 = self.dataset.get_values(
                        'salt',
                        timeinds=[np.asarray([i + 1])],
                        point=self.particle.location,
                        num=2)
                    temp = [temparray1.mean(), temparray2.mean()]
                    salt = [saltarray1.mean(), saltarray2.mean()]
                u = [uarray1.mean(), uarray2.mean()]
                v = [varray1.mean(), varray2.mean()]

            # Linear interp of data between timesteps
            currenttime = date2num(currenttime)
            timevar = self.timevar.datenum
            u = self.linterp(timevar[i:i + 2], u, currenttime)
            v = self.linterp(timevar[i:i + 2], v, currenttime)
            w = self.linterp(timevar[i:i + 2], w, currenttime)
            if self.temp_name is not None and self.salt_name is not None:
                temp = self.linterp(timevar[i:i + 2], temp, currenttime)
                salt = self.linterp(timevar[i:i + 2], salt, currenttime)

            if self.temp_name is None:
                temp = np.nan
            if self.salt_name is None:
                salt = np.nan

        except Exception:
            logger.exception("Could not retrieve data.")
            raise

        return u, v, w, temp, salt
Exemplo n.º 22
0
    def run(self):

        self.load_initial_dataset()

        redis_connection = None
        if self.redis_url is not None and self.redis_results_channel is not None:
            import redis
            redis_connection = redis.from_url(self.redis_url)

        # Setup shoreline
        self._shoreline = None
        if self.useshore is True:
            self._shoreline = Shoreline(path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=self.shoreline_index_buffer)
            # Make sure we are not starting on land.  Raises exception if we are.
            self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid)

        # Setup Bathymetry
        if self.usebathy is True:
            try:
                self._bathymetry = Bathymetry(file=self.bathy_path)
            except Exception:
                logger.exception("Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy_path)
                self.usebathy = False

        # Calculate datetime at every timestep
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time)

        if self.time_method == 'interp':
            time_indexs = self.timevar.nearest_index(newtimes, select='before')
        elif self.time_method == 'nearest':
            time_indexs = self.timevar.nearest_index(newtimes)
        else:
            logger.warn("Method for computing u,v,w,temp,salt not supported!")
        try:
            assert len(newtimes) == len(time_indexs)
        except AssertionError:
            logger.exception("Time indexes are messed up. Need to have equal datetime and time indexes")
            raise

        # Keep track of how much time we spend in each area.
        tot_boundary_time = 0.
        tot_model_time    = {}
        tot_read_data     = 0.
        for m in self.models:
            tot_model_time[m.name] = 0.

        # Set the base conditions
        # If using Redis, send the results
        if redis_connection is not None:
            redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump()))

        # loop over timesteps
        # We don't loop over the last time_index because
        # we need to query in the time_index and set the particle's
        # location as the 'newtime' object.
        for loop_i, i in enumerate(time_indexs[0:-1]):

            if self.active and self.active.value is False:
                raise ValueError("Particle exiting due to Failure.")

            newloc = None

            st = time.clock()
            # Get the variable data required by the models
            if self.time_method == 'nearest':
                u, v, w, temp, salt = self.get_nearest_data(i)
            elif self.time_method == 'interp':
                u, v, w, temp, salt = self.get_linterp_data(i, newtimes[loop_i])
            else:
                logger.warn("Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported.")
            tot_read_data += (time.clock() - st)

            # Get the bathy value at the particles location
            if self.usebathy is True:
                bathymetry_value = self._bathymetry.get_depth(self.particle.location)
            else:
                bathymetry_value = -999999999999999

            # Age the particle by the modelTimestep (seconds)
            # 'Age' meaning the amount of time it has been forced.
            self.particle.age(seconds=modelTimestep[loop_i])

            # loop over models - sort these in the order you want them to run
            for model in self.models:
                st = time.clock()
                movement = model.move(self.particle, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value)
                newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1])
                tot_model_time[m.name] += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.particle.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat()))
                if newloc:
                    st = time.clock()
                    self.boundary_interaction(particle=self.particle, starting=self.particle.location, ending=newloc,
                                              distance=movement['distance'], angle=movement['angle'],
                                              azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'],
                                              vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle'])
                    tot_boundary_time += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug("%s - was forced by %s and is now at %s" % (self.particle.logstring(), model.__class__.__name__, self.particle.location.logstring()))

            self.particle.note = self.particle.outputstring()
            # Each timestep, save the particles status and environmental variables.
            # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps
            self.particle.save()

            # If using Redis, send the results
            if redis_connection is not None:
                redis_connection.publish(self.redis_results_channel, json.dumps(self.particle.timestep_dump()))

        self.dataset.closenc()

        # We won't pull data for the last entry in locations, but we need to populate it with fill data.
        self.particle.fill_gap()

        if self.usebathy is True:
            self._bathymetry.close()

        if self.useshore is True:
            self._shoreline.close()

        logger.info(textwrap.dedent('''Particle %i Stats:
                          Data read: %f seconds
                          Model forcing: %s seconds
                          Boundary intersection: %f seconds''' % (self.particle.uid, tot_read_data, { s: '{:g} seconds'.format(f) for s, f in list(tot_model_time.items()) }, tot_boundary_time)))

        return self.particle
Exemplo n.º 23
0
    def get_linterp_data(self, i, currenttime):
        """ Note: self.dataset.opennc() must be called before calling this function.
            This is because the caching forcer must close it everytime, while a non caching
            forcer can leave the dataset open.
        """
        try:
            # Grab data at time index closest to particle location
            u = [np.mean(np.mean(self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.particle.location ))),
                 np.mean(np.mean(self.dataset.get_values('u', timeinds=[np.asarray([i+1])], point=self.particle.location )))]
            v = [np.mean(np.mean(self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.particle.location ))),
                 np.mean(np.mean(self.dataset.get_values('v', timeinds=[np.asarray([i+1])], point=self.particle.location )))]
            # if there is vertical velocity inthe dataset, get it
            if 'w' in self.dataset.nc.variables:
                w = [np.mean(np.mean(self.dataset.get_values('w', timeinds=[np.asarray([i])], point=self.particle.location ))),
                     np.mean(np.mean(self.dataset.get_values('w', timeinds=[np.asarray([i+1])], point=self.particle.location )))]
            else:
                w = [0.0, 0.0]
            # If there is salt and temp in the dataset, get it
            if self.temp_name is not None and self.salt_name is not None:
                temp = [np.mean(np.mean(self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.particle.location ))),
                        np.mean(np.mean(self.dataset.get_values('temp', timeinds=[np.asarray([i+1])], point=self.particle.location )))]
                salt = [np.mean(np.mean(self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.particle.location ))),
                        np.mean(np.mean(self.dataset.get_values('salt', timeinds=[np.asarray([i+1])], point=self.particle.location )))]

            # Check for nans that occur in the ocean (happens because
            # of model and coastline resolution mismatches)
            if np.isnan(u).any() or np.isnan(v).any() or np.isnan(w).any():
                # Take the mean of the closest 4 points
                # If this includes nan which it will, result is nan
                uarray1 = self.dataset.get_values('u', timeinds=[np.asarray([i])], point=self.particle.location, num=2)
                varray1 = self.dataset.get_values('v', timeinds=[np.asarray([i])], point=self.particle.location, num=2)
                uarray2 = self.dataset.get_values('u', timeinds=[np.asarray([i+1])], point=self.particle.location, num=2)
                varray2 = self.dataset.get_values('v', timeinds=[np.asarray([i+1])], point=self.particle.location, num=2)
                if 'w' in self.dataset.nc.variables:
                    warray1 = self.dataset.get_values('w', timeinds=[np.asarray([i])], point=self.particle.location, num=2)
                    warray2 = self.dataset.get_values('w', timeinds=[np.asarray([i+1])], point=self.particle.location, num=2)
                    w = [warray1.mean(), warray2.mean()]
                else:
                    w = [0.0, 0.0]

                if self.temp_name is not None and self.salt_name is not None:
                    temparray1 = self.dataset.get_values('temp', timeinds=[np.asarray([i])], point=self.particle.location, num=2)
                    saltarray1 = self.dataset.get_values('salt', timeinds=[np.asarray([i])], point=self.particle.location, num=2)
                    temparray2 = self.dataset.get_values('temp', timeinds=[np.asarray([i+1])], point=self.particle.location, num=2)
                    saltarray2 = self.dataset.get_values('salt', timeinds=[np.asarray([i+1])], point=self.particle.location, num=2)
                    temp = [temparray1.mean(), temparray2.mean()]
                    salt = [saltarray1.mean(), saltarray2.mean()]
                u = [uarray1.mean(), uarray2.mean()]
                v = [varray1.mean(), varray2.mean()]

            # Linear interp of data between timesteps
            currenttime = date2num(currenttime)
            timevar = self.timevar.datenum
            u = self.linterp(timevar[i:i+2], u, currenttime)
            v = self.linterp(timevar[i:i+2], v, currenttime)
            w = self.linterp(timevar[i:i+2], w, currenttime)
            if self.temp_name is not None and self.salt_name is not None:
                temp = self.linterp(timevar[i:i+2], temp, currenttime)
                salt = self.linterp(timevar[i:i+2], salt, currenttime)

            if self.temp_name is None:
                temp = np.nan
            if self.salt_name is None:
                salt = np.nan

        except Exception:
            logger.exception("Could not retrieve data.")
            raise

        return u, v, w, temp, salt
Exemplo n.º 24
0
    def run(self):

        self.load_initial_dataset()

        redis_connection = None
        if self.redis_url is not None and self.redis_results_channel is not None:
            import redis
            redis_connection = redis.from_url(self.redis_url)

        # Setup shoreline
        self._shoreline = None
        if self.useshore is True:
            self._shoreline = Shoreline(
                path=self.shoreline_path,
                feature_name=self.shoreline_feature,
                point=self.release_location_centroid,
                spatialbuffer=self.shoreline_index_buffer)
            # Make sure we are not starting on land.  Raises exception if we are.
            self._shoreline.intersect(
                start_point=self.release_location_centroid,
                end_point=self.release_location_centroid)

        # Setup Bathymetry
        if self.usebathy is True:
            try:
                self._bathymetry = Bathymetry(file=self.bathy_path)
            except Exception:
                logger.exception(
                    "Could not load Bathymetry file: %s, using no Bathymetry for this run!"
                    % self.bathy_path)
                self.usebathy = False

        # Calculate datetime at every timestep
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(
            self.times, start=self.start_time)

        if self.time_method == 'interp':
            time_indexs = self.timevar.nearest_index(newtimes, select='before')
        elif self.time_method == 'nearest':
            time_indexs = self.timevar.nearest_index(newtimes)
        else:
            logger.warn("Method for computing u,v,w,temp,salt not supported!")
        try:
            assert len(newtimes) == len(time_indexs)
        except AssertionError:
            logger.exception(
                "Time indexes are messed up. Need to have equal datetime and time indexes"
            )
            raise

        # Keep track of how much time we spend in each area.
        tot_boundary_time = 0.
        tot_model_time = {}
        tot_read_data = 0.
        for m in self.models:
            tot_model_time[m.name] = 0.

        # Set the base conditions
        # If using Redis, send the results
        if redis_connection is not None:
            redis_connection.publish(self.redis_results_channel,
                                     json.dumps(self.particle.timestep_dump()))

        # loop over timesteps
        # We don't loop over the last time_index because
        # we need to query in the time_index and set the particle's
        # location as the 'newtime' object.
        for loop_i, i in enumerate(time_indexs[0:-1]):

            if self.active and self.active.value is False:
                raise ValueError("Particle exiting due to Failure.")

            newloc = None

            st = time.clock()
            # Get the variable data required by the models
            if self.time_method == 'nearest':
                u, v, w, temp, salt = self.get_nearest_data(i)
            elif self.time_method == 'interp':
                u, v, w, temp, salt = self.get_linterp_data(
                    i, newtimes[loop_i])
            else:
                logger.warn(
                    "Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported."
                )
            tot_read_data += (time.clock() - st)

            # Get the bathy value at the particles location
            if self.usebathy is True:
                bathymetry_value = self._bathymetry.get_depth(
                    self.particle.location)
            else:
                bathymetry_value = -999999999999999

            # Age the particle by the modelTimestep (seconds)
            # 'Age' meaning the amount of time it has been forced.
            self.particle.age(seconds=modelTimestep[loop_i])

            # loop over models - sort these in the order you want them to run
            for model in self.models:
                st = time.clock()
                movement = model.move(self.particle,
                                      u,
                                      v,
                                      w,
                                      modelTimestep[loop_i],
                                      temperature=temp,
                                      salinity=salt,
                                      bathymetry_value=bathymetry_value)
                newloc = Location4D(latitude=movement['latitude'],
                                    longitude=movement['longitude'],
                                    depth=movement['depth'],
                                    time=newtimes[loop_i + 1])
                tot_model_time[m.name] += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug(
                        "%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s"
                        % (self.particle.logstring(), movement['distance'],
                           movement['vertical_distance'],
                           model.__class__.__name__,
                           newtimes[loop_i].isoformat()))
                if newloc:
                    st = time.clock()
                    self.boundary_interaction(
                        particle=self.particle,
                        starting=self.particle.location,
                        ending=newloc,
                        distance=movement['distance'],
                        angle=movement['angle'],
                        azimuth=movement['azimuth'],
                        reverse_azimuth=movement['reverse_azimuth'],
                        vertical_distance=movement['vertical_distance'],
                        vertical_angle=movement['vertical_angle'])
                    tot_boundary_time += (time.clock() - st)
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug(
                        "%s - was forced by %s and is now at %s" %
                        (self.particle.logstring(), model.__class__.__name__,
                         self.particle.location.logstring()))

            self.particle.note = self.particle.outputstring()
            # Each timestep, save the particles status and environmental variables.
            # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps
            self.particle.save()

            # If using Redis, send the results
            if redis_connection is not None:
                redis_connection.publish(
                    self.redis_results_channel,
                    json.dumps(self.particle.timestep_dump()))

        self.dataset.closenc()

        # We won't pull data for the last entry in locations, but we need to populate it with fill data.
        self.particle.fill_gap()

        if self.usebathy is True:
            self._bathymetry.close()

        if self.useshore is True:
            self._shoreline.close()

        logger.info(
            textwrap.dedent('''Particle %i Stats:
                          Data read: %f seconds
                          Model forcing: %s seconds
                          Boundary intersection: %f seconds''' %
                            (self.particle.uid, tot_read_data, {
                                s: '{:g} seconds'.format(f)
                                for s, f in list(tot_model_time.items())
                            }, tot_boundary_time)))

        return self.particle
Exemplo n.º 25
0
    def setup_run(self, **kwargs):

        logger.setLevel(logging.PROGRESS)

        self.redis_url             = None
        self.redis_log_channel     = None
        self.redis_results_channel = None
        if "redis" in kwargs.get("output_formats", []):
            from paegan.logger.redis_handler import RedisHandler
            self.redis_url             = kwargs.get("redis_url")
            self.redis_log_channel     = kwargs.get("redis_log_channel")
            self.redis_results_channel = kwargs.get("redis_results_channel")
            rhandler = RedisHandler(self.redis_log_channel, self.redis_url)
            rhandler.setLevel(logging.PROGRESS)
            logger.addHandler(rhandler)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(unicode(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(unicode(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = range(0, (self._step*self._nstep)+1, self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        if kwargs.get("manager", True):
            # Get the number of cores (may take some tuning) and create that
            # many workers then pass particles into the queue for the workers
            self.mgr = multiprocessing.Manager()

            # This tracks if the system is 'alive'.  Most looping whiles will check this
            # and break out if it is False.  This is True until something goes very wrong.
            self.active = self.mgr.Value('bool', True)

            # Each particle is a task, plus the CachingDataController
            self.number_of_tasks = self.get_number_of_tasks()

            # Either spin up the number of cores, or the number of tasks
            self.nproc = min(multiprocessing.cpu_count() - 1, self.number_of_tasks)

            # Number of tasks that we need to run.  This is decremented everytime something exits.
            self.n_run = self.mgr.Value('int', self.number_of_tasks)
            # The lock that controls access to the 'n_run' variable
            self.nproc_lock = self.mgr.Lock()

            # Create the task queue for all of the particles and the CachingDataController
            self.tasks = multiprocessing.JoinableQueue(self.number_of_tasks)
            # Create the result queue for all of the particles and the CachingDataController
            self.results = self.mgr.Queue(self.number_of_tasks)

        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(self.hydrodataset)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset)
        # Query the dataset for common variable names
        # and the time variable.
        logger.debug("Retrieving variable information from dataset")
        self.common_variables = self.get_common_variables_from_dataset(ds)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
        except AssertionError:
            logger.exception("Could not locate variables needed to run model: %s" % unicode(self.common_variables))
            raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset)

        model_start = self.timevar.get_dates()[0]
        model_end   = self.timevar.get_dates()[-1]

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))

        ds.closenc()
Exemplo n.º 26
0
    def __init__(self, **kwargs):

        """
            Mandatory named arguments:
            * geometry (Shapely Geometry Object) no default
            * depth (meters) default 0
            * start (DateTime Object) none
            * step (seconds) default 3600
            * npart (number of particles) default 1
            * nstep (number of steps) no default
            * models (list object) no default, so far there is a transport model and a behavior model
            geometry is interchangeable (if it is a point release) with:
            * latitude (DD) no default
            * longitude (DD) no default
            * depth (meters) default 0
        """

        # Shoreline
        self._use_shoreline         = kwargs.pop('use_shoreline', True)
        self.shoreline_path         = kwargs.get("shoreline_path", None)
        self.shoreline_feature      = kwargs.get("shoreline_feature", None)
        self.shoreline_index_buffer = kwargs.get("shoreline_index_buffer", 0.1)
        self.reverse_distance       = kwargs.get("reverse_distance", 100)

        # Bathy
        self._use_bathymetry = kwargs.pop('use_bathymetry', True)
        self.bathy_path      = kwargs.get("bathy_path", None)

        # SeaSurface
        self._use_seasurface = kwargs.pop('use_seasurface', True)

        self._depth          = kwargs.pop('depth', 0)
        self._npart          = kwargs.pop('npart', 1)
        self._step           = kwargs.pop('step', 3600)
        self.start           = kwargs.get('start', None)
        if self.start is None:
            raise TypeError("must provide a start time to run the model")

        # Always convert to UTC
        if self.start.tzinfo is None:
            self.start = self.start.replace(tzinfo=pytz.utc)
        self.start = self.start.astimezone(pytz.utc)

        self._models = kwargs.pop('models', None)
        self._dirty  = True

        self.particles              = []
        self.time_method            = kwargs.get('time_method', 'interp').lower()
        try:
            assert "interp" == self.time_method or "nearest" == self.time_method
        except:
            raise TypeError("Not a recognized 'time_method' parameter.  Only 'nearest' or 'interp' are allowed.")

        # The model timesteps in datetime objects
        self.datetimes = []

        # Interchangeables
        if "geometry" in kwargs:
            self.geometry = kwargs.pop('geometry')
            if not isinstance(self.geometry, Point) and not isinstance(self.geometry, Polygon) and not isinstance(self.geometry, MultiPolygon):
                raise TypeError("The geometry attribute must be a shapely Point or Polygon")
        elif "latitude" and "longitude" in kwargs:
            self.geometry = Point(kwargs.pop('longitude'), kwargs.pop('latitude'))
        else:
            raise TypeError("must provide a shapely geometry object (point or polygon) or a latitude and a longitude")

        # Errors
        try:
            self._nstep = kwargs.pop('nstep')
        except StandardError:
            logger.exception("Must provide the number of timesteps to the ModelController")
            raise
    def run(self, hydrodataset, **kwargs):

        # Relax.
        time.sleep(2)

        # Add ModelController description to logfile
        logger.info(self)

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(m)

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        times = range(0, (self._step*self._nstep)+1, self._step)
        # Calculate a datetime object for each model timestep
        # This method is duplicated in DataController and ForceParticle
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(times, start=self.start)

        time_chunk = self._time_chunk
        horiz_chunk = self._horiz_chunk

        caching = kwargs.get("caching", True)
        if caching is True:
            # Should we remove the cache file at the end of the run?
            remove_cache = kwargs.get("remove_cache", True)
            self.cache_path = kwargs.get("cache", None)

            # Create a temp file for the cache if nothing was passed in
            if self.cache_path is None:
                default_cache_dir = os.path.join(os.path.dirname(__file__), "_cache")
                temp_name = AsaRandom.filename(prefix=str(datetime.now().microsecond), suffix=".nc")
                self.cache_path = os.path.join(default_cache_dir, temp_name)

            # Be sure the cache directory exists
            if not os.path.exists(os.path.dirname(self.cache_path)):
                logger.info("Creating cache directory: %s" % self.cache_path)
                os.makedirs(os.path.dirname(self.cache_path))
        else:
            # Don't remove cache if we are not caching, because the cache path is set to the dataset path!
            # DONT SET THIS TO TRUE
            remove_cache = False
            # Use the hydrodataset as the cache
            self.cache_path = hydrodataset

        self.bathy_path = kwargs.get("bathy", None)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in xrange(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        # This is where it makes sense to implement the multiprocessing
        # looping for particles and models. Can handle each particle in
        # parallel probably.
        #
        # Get the number of cores (may take some tuning) and create that
        # many workers then pass particles into the queue for the workers
        mgr = multiprocessing.Manager()
        nproc = multiprocessing.cpu_count() - 1
        if nproc <= 0:
            raise ValueError("Model does not run using less than two CPU cores")

        # Each particle is a task, plus the DataController
        number_of_tasks = len(self.particles) + 1

        # We need a process for each particle and one for the data controller
        nproc = min(number_of_tasks, nproc)

        # Create the task queue for all of the particles and the DataController
        tasks = multiprocessing.JoinableQueue(number_of_tasks)
        # Create the result queue for all of the particles and the DataController
        results = mgr.Queue(number_of_tasks)

        # Number of tasks that we need to run.  This is decremented everytime something
        # completes.
        n_run = mgr.Value('int', number_of_tasks)
        # The lock that controls access to the 'n_run' variable
        nproc_lock = mgr.Lock()

        # Create the shared state objects

        # This tracks if the system is 'alive'.  Most looping whiles will check this
        # and break out if it is False.  This is True until something goes very wrong.
        active = mgr.Value('bool', True)

        # Particles use this to tell the Data Controller to "get_data".
        # The DataController sets this to False when it is done writing to the cache file.
        # Particles will wait for this to be False before reading from the cache file.
        # If we are caching, this starts as True so the Particles don't take off.  If we
        # are not caching, this is False so the Particles can start immediatly.
        get_data = mgr.Value('bool', caching)
        # Particles use this to tell the DataContoller which indices to 'get_data' for
        point_get = mgr.Value('list', [0, 0, 0])

        # This locks access to the 'has_data_request_lock' value
        data_request_lock = mgr.Lock()
        # This tracks which Particle PID is asking the DataController for data
        has_data_request_lock = mgr.Value('int', -1)

        # The lock that controls access to modifying 'has_read_lock' and 'read_count'
        read_lock = mgr.Lock()
        # List of Particle PIDs that are reading from the cache
        has_read_lock = mgr.list()
        # The number of Particles that are reading from the cache
        read_count = mgr.Value('int', 0)

        # When something is writing to the cache file
        write_lock = mgr.Lock()
        # PID of process with lock
        has_write_lock = mgr.Value('int', -1)

        logger.progress((3, "Initializing and caching hydro model's grid"))
        try:
            ds = CommonDataset.open(hydrodataset)
        except Exception:
            logger.exception("Failed to access dataset %s" % hydrodataset)
            raise DataControllerError("Inaccessible Dataset: %s" % hydrodataset)

        # Query the dataset for common variable names
        # and the time variable.
        logger.debug("Retrieving variable information from dataset")
        common_variables = self.get_common_variables_from_dataset(ds)

        timevar = None
        try:
            assert common_variables.get("u") in ds._current_variables
            assert common_variables.get("v") in ds._current_variables
            assert common_variables.get("x") in ds._current_variables
            assert common_variables.get("y") in ds._current_variables

            timevar = ds.gettimevar(common_variables.get("u"))
        except AssertionError:
            logger.exception("Could not locate variables needed to run model: %s" % unicode(common_variables))
            raise DataControllerError("A required data variable was not found in %s" % hydrodataset)

        # Add data controller to the queue first so that it
        # can get the initial data and is not blocked

        logger.debug('Starting DataController')
        logger.progress((4, "Starting processes"))
        data_controller = parallel.DataController(hydrodataset, common_variables, n_run, get_data, write_lock, has_write_lock, read_lock, read_count,
                                                  time_chunk, horiz_chunk, times,
                                                  self.start, point_get, self.reference_location,
                                                  caching=caching, cache_path=self.cache_path)
        tasks.put(data_controller)
        # Create DataController worker
        data_controller_process = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="DataController")
        data_controller_process.start()

        logger.debug('Adding %i particles as tasks' % len(self.particles))
        for part in self.particles:
            forcing = parallel.ForceParticle(self.cache_path,
                                             part,
                                             common_variables,
                                             timevar,
                                             times,
                                             self.start,
                                             self._models,
                                             self.reference_location.point,
                                             self._use_bathymetry,
                                             self._use_shoreline,
                                             self._use_seasurface,
                                             get_data,
                                             n_run,
                                             read_lock,
                                             has_read_lock,
                                             read_count,
                                             point_get,
                                             data_request_lock,
                                             has_data_request_lock,
                                             reverse_distance=self.reverse_distance,
                                             bathy=self.bathy_path,
                                             shoreline_path=self.shoreline_path,
                                             shoreline_feature=self.shoreline_feature,
                                             time_method=self.time_method,
                                             caching=caching)
            tasks.put(forcing)

        # Create workers for the particles.
        procs = [ parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name="ForceParticle-%d" % i)
                  for i in xrange(nproc - 1) ]
        for w in procs:
            w.start()
            logger.debug('Started %s' % w.name)

        # Get results back from queue, test for failed particles
        return_particles = []
        retrieved = 0.
        error_code = 0

        logger.info("Waiting for %i particle results" % len(self.particles))
        logger.progress((5, "Running model"))
        while retrieved < number_of_tasks:
            try:
                # Returns a tuple of code, result
                code, tempres = results.get(timeout=240)
            except Queue.Empty:
                # Poll the active processes to make sure they are all alive and then continue with loop
                if not data_controller_process.is_alive() and data_controller_process.exitcode != 0:
                    # Data controller is zombied, kill off other processes.
                    get_data.value is False
                    results.put((-2, "DataController"))

                new_procs = []
                old_procs = []
                for p in procs:
                    if not p.is_alive() and p.exitcode != 0:
                        # Do what the Consumer would do if something finished.
                        # Add something to results queue
                        results.put((-3, "ZombieParticle"))
                        # Decrement nproc (DataController exits when this is 0)
                        with nproc_lock:
                            n_run.value = n_run.value - 1

                        # Remove task from queue (so they can be joined later on)
                        tasks.task_done()

                        # Start a new Consumer.  It will exit if there are no tasks available.
                        np = parallel.Consumer(tasks, results, n_run, nproc_lock, active, get_data, name=p.name)
                        new_procs.append(np)
                        old_procs.append(p)

                        # Release any locks the PID had
                        if p.pid in has_read_lock:
                            with read_lock:
                                read_count.value -= 1
                                has_read_lock.remove(p.pid)

                        if has_data_request_lock.value == p.pid:
                            has_data_request_lock.value = -1
                            try:
                                data_request_lock.release()
                            except:
                                pass

                        if has_write_lock.value == p.pid:
                            has_write_lock.value = -1
                            try:
                                write_lock.release()
                            except:
                                pass

                for p in old_procs:
                    try:
                        procs.remove(p)
                    except ValueError:
                        logger.warn("Did not find %s in the list of processes.  Continuing on." % p.name)

                for p in new_procs:
                    procs.append(p)
                    logger.warn("Started a new consumer (%s) to replace a zombie consumer" % p.name)
                    p.start()

            else:
                # We got one.
                retrieved += 1
                if code is None:
                    logger.warn("Got an unrecognized response from a task.")
                elif code == -1:
                    logger.warn("Particle %s has FAILED!!" % tempres.uid)
                elif code == -2:
                    error_code = code
                    logger.warn("DataController has FAILED!!  Removing cache file so the particles fail.")
                    try:
                        os.remove(self.cache_path)
                    except OSError:
                        logger.debug("Could not remove cache file, it probably never existed")
                        pass
                elif code == -3:
                    error_code = code
                    logger.info("A zombie process was caught and task was removed from queue")
                elif isinstance(tempres, Particle):
                    logger.info("Particle %d finished" % tempres.uid)
                    return_particles.append(tempres)
                    # We mulitply by 95 here to save 5% for the exporting
                    logger.progress((round((retrieved / number_of_tasks) * 90., 1), "Particle %d finished" % tempres.uid))
                elif tempres == "DataController":
                    logger.info("DataController finished")
                    logger.progress((round((retrieved / number_of_tasks) * 90., 1), "DataController finished"))
                else:
                    logger.info("Got a strange result on results queue")
                    logger.info(str(tempres))

                logger.info("Retrieved %i/%i results" % (int(retrieved), number_of_tasks))

        if len(return_particles) != len(self.particles):
            logger.warn("Some particles failed and are not included in the output")

        # The results queue should be empty at this point
        assert results.empty() is True

        # Should be good to join on the tasks now that the queue is empty
        logger.info("Joining the task queue")
        tasks.join()

        # Join all processes
        logger.info("Joining the processes")
        for w in procs + [data_controller_process]:
                # Wait 10 seconds
                w.join(10.)
                if w.is_alive():
                    # Process is hanging, kill it.
                    logger.info("Terminating %s forcefully.  This should have exited itself." % w.name)
                    w.terminate()

        logger.info('Workers complete')

        self.particles = return_particles

        # Remove Manager so it shuts down
        del mgr

        # Remove timevar
        del timevar

        # Remove the cache file
        if remove_cache is True:
            try:
                os.remove(self.cache_path)
            except OSError:
                logger.debug("Could not remove cache file, it probably never existed")

        logger.progress((96, "Exporting results"))

        if len(self.particles) > 0:
            # If output_formats and path specified,
            # output particle run data to disk when completed
            if "output_formats" in kwargs:
                # Make sure output_path is also included
                if kwargs.get("output_path", None) is not None:
                    formats = kwargs.get("output_formats")
                    output_path = kwargs.get("output_path")
                    if isinstance(formats, list):
                        for format in formats:
                            logger.info("Exporting to: %s" % format)
                            try:
                                self.export(output_path, format=format)
                            except:
                                logger.exception("Failed to export to: %s" % format)
                    else:
                        logger.warn('The output_formats parameter should be a list, not saving any output!')
                else:
                    logger.warn('No output path defined, not saving any output!')
            else:
                logger.warn('No output format defined, not saving any output!')
        else:
            logger.warn("Model didn't actually do anything, check the log.")
            if error_code == -2:
                raise DataControllerError("Error in the DataController")
            else:
                raise ModelError("Error in the model")

        logger.progress((99, "Model Run Complete"))
        return
Exemplo n.º 28
0
    def __init__(self, **kwargs):
        """
            Mandatory named arguments:
            * geometry (Shapely Geometry Object) no default
            * depth (meters) default 0
            * start (DateTime Object) none
            * step (seconds) default 3600
            * npart (number of particles) default 1
            * nstep (number of steps) no default
            * models (list object) no default, so far there is a transport model and a behavior model
            geometry is interchangeable (if it is a point release) with:
            * latitude (DD) no default
            * longitude (DD) no default
            * depth (meters) default 0

            Non-mandatory named arguments:
            * pool (task pool) - defaults to multiprocessing.pool, inject your own for cluster ability
        """

        # Should we thread the result listener?  Defined in subclasses that need it.
        self.thread_result_listener = False

        # Shoreline
        self._use_shoreline = kwargs.pop('use_shoreline', True)
        self.shoreline_path = kwargs.get("shoreline_path", None)
        self.shoreline_feature = kwargs.get("shoreline_feature", None)
        self.shoreline_index_buffer = kwargs.get("shoreline_index_buffer", 0.1)
        self.reverse_distance = kwargs.get("reverse_distance", 100)

        # Bathy
        self._use_bathymetry = kwargs.pop('use_bathymetry', True)
        self.bathy_path = kwargs.get("bathy_path", None)

        # SeaSurface
        self._use_seasurface = kwargs.pop('use_seasurface', True)

        self._depth = kwargs.pop('depth', 0)
        self._npart = kwargs.pop('npart', 1)
        self._step = kwargs.pop('step', 3600)
        self.start = kwargs.get('start', None)
        if self.start is None:
            raise TypeError("must provide a start time to run the model")

        # Always convert to UTC
        if self.start.tzinfo is None:
            self.start = self.start.replace(tzinfo=pytz.utc)
        self.start = self.start.astimezone(pytz.utc)

        self._models = kwargs.pop('models', None)
        self._dirty = True

        self.particles = []
        self.time_method = kwargs.get('time_method', 'interp').lower()
        try:
            assert "interp" == self.time_method or "nearest" == self.time_method
        except:
            raise TypeError(
                "Not a recognized 'time_method' parameter.  Only 'nearest' or 'interp' are allowed."
            )

        # The model timesteps in datetime objects
        self.datetimes = []

        # Interchangeables
        if "geometry" in kwargs:
            self.geometry = kwargs.pop('geometry')
            if not isinstance(self.geometry, Point) and not isinstance(
                    self.geometry, Polygon) and not isinstance(
                        self.geometry, MultiPolygon):
                raise TypeError(
                    "The geometry attribute must be a shapely Point or Polygon"
                )
        elif "latitude" and "longitude" in kwargs:
            self.geometry = Point(kwargs.pop('longitude'),
                                  kwargs.pop('latitude'))
        else:
            raise TypeError(
                "must provide a shapely geometry object (point or polygon) or a latitude and a longitude"
            )

        # Errors
        try:
            self._nstep = kwargs.pop('nstep')
        except Exception:
            logger.exception(
                "Must provide the number of timesteps to the ModelController")
            raise

        self.pool = kwargs.get('pool', None)
Exemplo n.º 29
0
    def setup_run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        logger.setLevel(logging.PROGRESS)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(str(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(str(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = list(range(0, (self._step * self._nstep) + 1, self._step))
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(
            self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(
                self.geometry, MultiPolygon):
            point_locations = [
                Location4D(latitude=loc.y,
                           longitude=loc.x,
                           depth=self._depth,
                           time=self.start)
                for loc in AsaTransport.fill_polygon_with_points(
                    goal=self._npart, polygon=self.geometry)
            ]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in range(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        logger.progress((3, "Initializing and caching hydro model's grid %s" %
                         self.hydrodataset))
        try:
            ds = CommonDataset.open(self.hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            self.common_variables = self.get_common_variables_from_dataset(ds)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" %
                                          self.hydrodataset)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
            model_start = self.timevar.get_dates()[0]
            model_end = self.timevar.get_dates()[-1]
        except AssertionError:
            logger.exception(
                "Could not locate variables needed to run model: %s" %
                str(self.common_variables))
            raise BaseDataControllerError(
                "A required data variable was not found in %s" %
                self.hydrodataset)
        finally:
            ds.closenc()

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError(
                "Start time for model (%s) is not available in source dataset (%s/%s)"
                % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError(
                "End time for model (%s) is not available in source dataset (%s/%s)"
                % (self.datetimes[-1], model_start, model_end))
Exemplo n.º 30
0
    def setup_run(self, hydrodataset, **kwargs):

        self.hydrodataset = hydrodataset

        logger.setLevel(logging.PROGRESS)

        # Relax.
        time.sleep(0.5)

        # Add ModelController description to logfile
        logger.info(str(self))

        # Add the model descriptions to logfile
        for m in self._models:
            logger.info(str(m))

        # Calculate the model timesteps
        # We need times = len(self._nstep) + 1 since data is stored one timestep
        # after a particle is forced with the final timestep's data.
        self.times = list(range(0, (self._step*self._nstep)+1, self._step))
        # Calculate a datetime object for each model timestep
        # This method is duplicated in CachingDataController and CachingForcer
        # using the 'times' variables above.  Will be useful in those other
        # locations for particles released at different times
        # i.e. released over a few days
        self.modelTimestep, self.datetimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start)

        logger.progress((1, "Setting up particle start locations"))
        point_locations = []
        if isinstance(self.geometry, Point):
            point_locations = [self.reference_location] * self._npart
        elif isinstance(self.geometry, Polygon) or isinstance(self.geometry, MultiPolygon):
            point_locations = [Location4D(latitude=loc.y, longitude=loc.x, depth=self._depth, time=self.start) for loc in AsaTransport.fill_polygon_with_points(goal=self._npart, polygon=self.geometry)]

        # Initialize the particles
        logger.progress((2, "Initializing particles"))
        for x in range(0, self._npart):
            p = LarvaParticle(id=x)
            p.location = point_locations[x]
            # We don't need to fill the location gaps here for environment variables
            # because the first data collected actually relates to this original
            # position.
            # We do need to fill in fields such as settled, halted, etc.
            p.fill_status_gap()
            # Set the inital note
            p.note = p.outputstring()
            p.notes.append(p.note)
            self.particles.append(p)

        logger.progress((3, "Initializing and caching hydro model's grid %s" % self.hydrodataset))
        try:
            ds = CommonDataset.open(self.hydrodataset)
            # Query the dataset for common variable names
            # and the time variable.
            logger.debug("Retrieving variable information from dataset")
            self.common_variables = self.get_common_variables_from_dataset(ds)
        except Exception:
            logger.exception("Failed to access dataset %s" % self.hydrodataset)
            raise BaseDataControllerError("Inaccessible Dataset: %s" % self.hydrodataset)

        self.timevar = None
        try:
            assert self.common_variables.get("u") in ds._current_variables
            assert self.common_variables.get("v") in ds._current_variables
            assert self.common_variables.get("x") in ds._current_variables
            assert self.common_variables.get("y") in ds._current_variables

            self.timevar = ds.gettimevar(self.common_variables.get("u"))
            model_start = self.timevar.get_dates()[0]
            model_end = self.timevar.get_dates()[-1]
        except AssertionError:
            logger.exception("Could not locate variables needed to run model: %s" % str(self.common_variables))
            raise BaseDataControllerError("A required data variable was not found in %s" % self.hydrodataset)
        finally:
            ds.closenc()

        try:
            assert self.start > model_start
            assert self.start < model_end
        except AssertionError:
            raise BaseDataControllerError("Start time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[0], model_start, model_end))

        try:
            assert self.datetimes[-1] > model_start
            assert self.datetimes[-1] < model_end
        except AssertionError:
            raise BaseDataControllerError("End time for model (%s) is not available in source dataset (%s/%s)" % (self.datetimes[-1], model_start, model_end))
Exemplo n.º 31
0
    def get_nearest_data(self, i):
        """ Note: self.dataset.opennc() must be called before calling this function.
            This is because the caching forcer must close it everytime, while a non caching
            forcer can leave the dataset open.
        """
        try:
            # Grab data at time index closest to particle location
            u = np.mean(
                np.mean(
                    self.dataset.get_values('u',
                                            timeinds=[np.asarray([i])],
                                            point=self.particle.location)))
            v = np.mean(
                np.mean(
                    self.dataset.get_values('v',
                                            timeinds=[np.asarray([i])],
                                            point=self.particle.location)))
            # if there is vertical velocity inthe dataset, get it
            if 'w' in self.dataset.nc.variables:
                w = np.mean(
                    np.mean(
                        self.dataset.get_values('w',
                                                timeindsf=[np.asarray([i])],
                                                point=self.particle.location)))
            else:
                w = 0.0
            # If there is salt and temp in the dataset, get it
            if self.temp_name is not None and self.salt_name is not None:
                temp = np.mean(
                    np.mean(
                        self.dataset.get_values('temp',
                                                timeinds=[np.asarray([i])],
                                                point=self.particle.location)))
                salt = np.mean(
                    np.mean(
                        self.dataset.get_values('salt',
                                                timeinds=[np.asarray([i])],
                                                point=self.particle.location)))

            # Check for nans that occur in the ocean (happens because
            # of model and coastline resolution mismatches)
            if np.isnan(u).any() or np.isnan(v).any() or np.isnan(w).any():
                # Take the mean of the closest 4 points
                # If this includes nan which it will, result is nan
                uarray1 = self.dataset.get_values('u',
                                                  timeinds=[np.asarray([i])],
                                                  point=self.particle.location,
                                                  num=2)
                varray1 = self.dataset.get_values('v',
                                                  timeinds=[np.asarray([i])],
                                                  point=self.particle.location,
                                                  num=2)
                if 'w' in self.dataset.nc.variables:
                    warray1 = self.dataset.get_values(
                        'w',
                        timeinds=[np.asarray([i])],
                        point=self.particle.location,
                        num=2)
                    w = warray1.mean()
                else:
                    w = 0.0

                if self.temp_name is not None and self.salt_name is not None:
                    temparray1 = self.dataset.get_values(
                        'temp',
                        timeinds=[np.asarray([i])],
                        point=self.particle.location,
                        num=2)
                    saltarray1 = self.dataset.get_values(
                        'salt',
                        timeinds=[np.asarray([i])],
                        point=self.particle.location,
                        num=2)
                    temp = temparray1.mean()
                    salt = saltarray1.mean()
                u = uarray1.mean()
                v = varray1.mean()

            if self.temp_name is None:
                temp = np.nan
            if self.salt_name is None:
                salt = np.nan

        except Exception:
            logger.exception("Could not retrieve data.")
            raise

        return u, v, w, temp, salt
Exemplo n.º 32
0
    def start_tasks(self, **kwargs):
        try:
            logger.info('Starting CachingDataController')

            # Add data controller to the queue first so that it
            # can get the initial data and is not blocked
            data_controller = CachingDataController(self.hydrodataset,
                                                    self.common_variables,
                                                    self.n_run,
                                                    self.get_data,
                                                    self.write_lock,
                                                    self.has_write_lock,
                                                    self.read_lock,
                                                    self.read_count,
                                                    self.time_chunk,
                                                    self.horiz_chunk,
                                                    self.times,
                                                    self.start,
                                                    self.point_get,
                                                    self.reference_location,
                                                    cache_path=self.cache_path)
            self.tasks.put(data_controller)
            # Create CachingDataController worker
            self.data_controller_process = Consumer(
                self.tasks,
                self.results,
                self.n_run,
                self.nproc_lock,
                self.active,
                self.get_data,
                name="CachingDataController")
            self.data_controller_process.start()

            logger.info('Adding %i particles as tasks' %
                        self.total_particle_count())

            for part in self.particles:
                forcer = CachingForcer(
                    self.cache_path,
                    particle=part,
                    common_variables=self.common_variables,
                    timevar=self.timevar,
                    times=self.times,
                    start_time=self.start,
                    models=self._models,
                    release_location_centroid=self.reference_location.point,
                    usebathy=self._use_bathymetry,
                    useshore=self._use_shoreline,
                    usesurface=self._use_seasurface,
                    reverse_distance=self.reverse_distance,
                    bathy_path=self.bathy_path,
                    shoreline_path=self.shoreline_path,
                    shoreline_feature=self.shoreline_feature,
                    time_method=self.time_method,
                    shoreline_index_buffer=self.shoreline_index_buffer,
                    get_data=self.get_data,
                    read_lock=self.read_lock,
                    has_read_lock=self.has_read_lock,
                    read_count=self.read_count,
                    point_get=self.point_get,
                    data_request_lock=self.data_request_lock,
                    has_data_request_lock=self.has_data_request_lock)
                self.tasks.put(forcer)

            # Create workers for the particles.
            self.procs = [
                Consumer(self.tasks,
                         self.results,
                         self.n_run,
                         self.nproc_lock,
                         self.active,
                         self.get_data,
                         name="CachingForcer-%d" % i)
                for i in range(self.nproc - 1)
            ]
            logger.progress((5, 'Running model'))
            for w in self.procs:
                w.start()
                logger.info('Started %s' % w.name)

            return True

        except Exception:
            logger.exception("Something didn't start correctly!")
            return False
    def __call__(self, active):
        c = 0

        self.dataset = CommonDataset.open(self.hydrodataset)
        self.remote = self.dataset.nc

        # Calculate the datetimes of the model timesteps like
        # the particle objects do, so we can figure out unique
        # time indices
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time)

        timevar = self.dataset.gettimevar(self.uname)

        # Don't need to grab the last datetime, as it is not needed for forcing, only
        # for setting the time of the final particle forcing
        time_indexs = timevar.nearest_index(newtimes[0:-1], select='before')

        # Have to make sure that we get the plus 1 for the
        # linear interpolation of u,v,w,temp,salt
        self.inds = np.unique(time_indexs)
        self.inds = np.append(self.inds, self.inds.max()+1)

        # While there is at least 1 particle still running,
        # stay alive, if not break
        while self.n_run.value > 1:

            if self.caching is False:
                logger.debug("Caching is False, not doing much.  Just hanging out until all of the particles finish.")
                timer.sleep(10)
                continue

            # If particle asks for data, do the following
            if self.get_data.value is True:
                logger.debug("Particle asked for data!")

                # Wait for particles to get out
                while True:
                    self.read_lock.acquire()

                    logger.debug("Read count: %d" % self.read_count.value)
                    if self.read_count.value > 0:
                        logger.debug("Waiting for write lock on cache file (particles must stop reading)...")
                        self.read_lock.release()
                        timer.sleep(2)
                    else:
                        break

                # Get write lock on the file.  Already have read lock.
                self.write_lock.acquire()
                self.has_write_lock.value = os.getpid()

                if c == 0:
                    logger.debug("Creating cache file")
                    try:
                        # Open local cache for writing, overwrites
                        # existing file with same name
                        self.local = netCDF4.Dataset(self.cache_path, 'w')

                        indices = self.dataset.get_indices(self.uname, timeinds=[np.asarray([0])], point=self.start)
                        self.point_get.value = [self.inds[0], indices[-2], indices[-1]]

                        # Create dimensions for u and v variables
                        self.local.createDimension('time', None)
                        self.local.createDimension('level', None)
                        self.local.createDimension('x', None)
                        self.local.createDimension('y', None)

                        # Create 3d or 4d u and v variables
                        if self.remote.variables[self.uname].ndim == 4:
                            self.ndim = 4
                            dimensions = ('time', 'level', 'y', 'x')
                            coordinates = "time z lon lat"
                        elif self.remote.variables[self.uname].ndim == 3:
                            self.ndim = 3
                            dimensions = ('time', 'y', 'x')
                            coordinates = "time lon lat"
                        shape = self.remote.variables[self.uname].shape

                        # If there is no FillValue defined in the dataset, use np.nan.
                        # Sometimes it will work out correctly and other times we will
                        # have a huge cache file.
                        try:
                            fill = self.remote.variables[self.uname].missing_value
                        except Exception:
                            fill = np.nan

                        # Create domain variable that specifies
                        # where there is data geographically/by time
                        # and where there is not data,
                        #   Used for testing if particle needs to
                        #   ask cache to update
                        domain = self.local.createVariable('domain', 'i', dimensions, zlib=False, fill_value=0)
                        domain.coordinates = coordinates

                        # Create local u and v variables
                        u = self.local.createVariable('u', 'f', dimensions, zlib=False, fill_value=fill)
                        v = self.local.createVariable('v', 'f', dimensions, zlib=False, fill_value=fill)

                        v.coordinates = coordinates
                        u.coordinates = coordinates

                        localvars = [u, v, ]
                        remotevars = [self.remote.variables[self.uname], self.remote.variables[self.vname]]

                        # Create local w variable
                        if self.wname is not None:
                            w = self.local.createVariable('w', 'f', dimensions, zlib=False, fill_value=fill)
                            w.coordinates = coordinates
                            localvars.append(w)
                            remotevars.append(self.remote.variables[self.wname])

                        if self.temp_name is not None and self.salt_name is not None:
                            # Create local temp and salt vars
                            temp = self.local.createVariable('temp', 'f', dimensions, zlib=False, fill_value=fill)
                            salt = self.local.createVariable('salt', 'f', dimensions, zlib=False, fill_value=fill)
                            temp.coordinates = coordinates
                            salt.coordinates = coordinates
                            localvars.append(temp)
                            localvars.append(salt)
                            remotevars.append(self.remote.variables[self.temp_name])
                            remotevars.append(self.remote.variables[self.salt_name])

                        # Create local lat/lon coordinate variables
                        if self.remote.variables[self.xname].ndim == 2:
                            lon = self.local.createVariable('lon', 'f', ("y", "x"), zlib=False)
                            lon[:] = self.remote.variables[self.xname][:, :]
                            lat = self.local.createVariable('lat', 'f', ("y", "x"), zlib=False)
                            lat[:] = self.remote.variables[self.yname][:, :]
                        if self.remote.variables[self.xname].ndim == 1:
                            lon = self.local.createVariable('lon', 'f', ("x"), zlib=False)
                            lon[:] = self.remote.variables[self.xname][:]
                            lat = self.local.createVariable('lat', 'f', ("y"), zlib=False)
                            lat[:] = self.remote.variables[self.yname][:]

                        # Create local z variable
                        if self.zname is not None:
                            if self.remote.variables[self.zname].ndim == 4:
                                z = self.local.createVariable('z', 'f', ("time", "level", "y", "x"), zlib=False)
                                remotez = self.remote.variables[self.zname]
                                localvars.append(z)
                                remotevars.append(remotez)
                            elif self.remote.variables[self.zname].ndim == 3:
                                z = self.local.createVariable('z', 'f', ("level", "y", "x"), zlib=False)
                                z[:] = self.remote.variables[self.zname][:, :, :]
                            elif self.remote.variables[self.zname].ndim == 1:
                                z = self.local.createVariable('z', 'f', ("level",), zlib=False)
                                z[:] = self.remote.variables[self.zname][:]

                        # Create local time variable
                        time = self.local.createVariable('time', 'f8', ("time",), zlib=False)
                        if self.tname is not None:
                            time[:] = self.remote.variables[self.tname][self.inds]

                        if self.point_get.value[0]+self.time_size > np.max(self.inds):
                            current_inds = np.arange(self.point_get.value[0], np.max(self.inds)+1)
                        else:
                            current_inds = np.arange(self.point_get.value[0], self.point_get.value[0] + self.time_size)

                        # Get data from remote dataset and add
                        # to local cache.
                        # Try 20 times on the first attempt
                        current_attempt = 1
                        max_attempts = 20
                        while True:
                            try:
                                assert current_attempt <= max_attempts
                                self.get_remote_data(localvars, remotevars, current_inds, shape)
                            except AssertionError:
                                raise
                            except:
                                logger.warn("CachingDataController failed to get remote data.  Trying again in 20 seconds. %s attempts left." % str(max_attempts-current_attempt))
                                logger.exception("Data Access Error")
                                timer.sleep(20)
                                current_attempt += 1
                            else:
                                break

                        c += 1
                    except (Exception, AssertionError):
                        logger.error("CachingDataController failed to get data (first request)")
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug("Done updating cache file, closing file, and releasing locks")
                else:
                    logger.debug("Updating cache file")
                    try:
                        # Open local cache dataset for appending
                        self.local = netCDF4.Dataset(self.cache_path, 'a')

                        # Create local and remote variable objects
                        # for the variables of interest
                        u = self.local.variables['u']
                        v = self.local.variables['v']
                        time = self.local.variables['time']
                        remoteu = self.remote.variables[self.uname]
                        remotev = self.remote.variables[self.vname]

                        # Create lists of variable objects for
                        # the data updater
                        localvars = [u, v, ]
                        remotevars = [remoteu, remotev, ]
                        if self.salt_name is not None and self.temp_name is not None:
                            salt = self.local.variables['salt']
                            temp = self.local.variables['temp']
                            remotesalt = self.remote.variables[self.salt_name]
                            remotetemp = self.remote.variables[self.temp_name]
                            localvars.append(salt)
                            localvars.append(temp)
                            remotevars.append(remotesalt)
                            remotevars.append(remotetemp)
                        if self.wname is not None:
                            w = self.local.variables['w']
                            remotew = self.remote.variables[self.wname]
                            localvars.append(w)
                            remotevars.append(remotew)
                        if self.zname is not None:
                            remotez = self.remote.variables[self.zname]
                            if remotez.ndim == 4:
                                z = self.local.variables['z']
                                localvars.append(z)
                                remotevars.append(remotez)
                        if self.tname is not None:
                            # remotetime = self.remote.variables[self.tname]
                            time[self.inds] = self.remote.variables[self.inds]

                        if self.point_get.value[0]+self.time_size > np.max(self.inds):
                            current_inds = np.arange(self.point_get.value[0], np.max(self.inds)+1)
                        else:
                            current_inds = np.arange(self.point_get.value[0], self.point_get.value[0] + self.time_size)

                        # Get data from remote dataset and add
                        # to local cache
                        while True:
                            try:
                                self.get_remote_data(localvars, remotevars, current_inds, shape)
                            except:
                                logger.warn("CachingDataController failed to get remote data.  Trying again in 30 seconds")
                                timer.sleep(30)
                            else:
                                break

                        c += 1
                    except Exception:
                        logger.error("CachingDataController failed to get data (not first request)")
                        raise
                    finally:
                        self.local.sync()
                        self.local.close()
                        self.has_write_lock.value = -1
                        self.write_lock.release()
                        self.get_data.value = False
                        self.read_lock.release()
                        logger.debug("Done updating cache file, closing file, and releasing locks")
            else:
                logger.debug("Particles are still running, waiting for them to request data...")
                timer.sleep(2)

        self.dataset.closenc()

        return "CachingDataController"
    def __call__(self, proc, active):

        self.active = active

        if self.usebathy is True:
            try:
                self._bathymetry = Bathymetry(file=self.bathy)
            except Exception:
                logger.exception("Could not load Bathymetry file: %s, using no Bathymetry for this run!" % self.bathy)
                self.usebathy = False

        self._shoreline = None
        if self.useshore is True:
            self._shoreline = Shoreline(path=self.shoreline_path, feature_name=self.shoreline_feature, point=self.release_location_centroid, spatialbuffer=0.25)
            # Make sure we are not starting on land.  Raises exception if we are.
            self._shoreline.intersect(start_point=self.release_location_centroid, end_point=self.release_location_centroid)

        if self.active.value is True:
            while self.get_data.value is True:
                logger.info("Waiting for DataController to start...")
                timer.sleep(5)
                pass

        # Initialize commondataset of local cache, then
        # close the related netcdf file
        try:
            if self.caching is True:
                with self.read_lock:
                    self.read_count.value += 1
                    self.has_read_lock.append(os.getpid())
            self.dataset = CommonDataset.open(self.hydrodataset)
            self.dataset.closenc()
        except StandardError:
            logger.warn("No source dataset: %s.  Particle exiting" % self.hydrodataset)
            raise
        finally:
            if self.caching is True:
                with self.read_lock:
                    self.read_count.value -= 1
                    self.has_read_lock.remove(os.getpid())

        # Calculate datetime at every timestep
        modelTimestep, newtimes = AsaTransport.get_time_objects_from_model_timesteps(self.times, start=self.start_time)

        if self.time_method == 'interp':
            time_indexs = self.timevar.nearest_index(newtimes, select='before')
        elif self.time_method == 'nearest':
            time_indexs = self.timevar.nearest_index(newtimes)
        else:
            logger.warn("Method for computing u,v,w,temp,salt not supported!")
        try:
            assert len(newtimes) == len(time_indexs)
        except AssertionError:
            logger.error("Time indexes are messed up. Need to have equal datetime and time indexes")
            raise

        # loop over timesteps
        # We don't loop over the last time_index because
        # we need to query in the time_index and set the particle's
        # location as the 'newtime' object.
        for loop_i, i in enumerate(time_indexs[0:-1]):

            if self.active.value is False:
                raise ValueError("Particle exiting due to Failure.")

            newloc = None

            # Get the variable data required by the models
            if self.time_method == 'nearest':
                u, v, w, temp, salt = self.data_nearest(i, newtimes[loop_i])
            elif self.time_method == 'interp':
                u, v, w, temp, salt = self.data_interp(i, newtimes[loop_i])
            else:
                logger.warn("Method for computing u,v,w,temp,salt is unknown. Only 'nearest' and 'interp' are supported.")

            # Get the bathy value at the particles location
            if self.usebathy is True:
                bathymetry_value = self._bathymetry.get_depth(self.part.location)
            else:
                bathymetry_value = -999999999999999

            # Age the particle by the modelTimestep (seconds)
            # 'Age' meaning the amount of time it has been forced.
            self.part.age(seconds=modelTimestep[loop_i])

            # loop over models - sort these in the order you want them to run
            for model in self.models:
                movement = model.move(self.part, u, v, w, modelTimestep[loop_i], temperature=temp, salinity=salt, bathymetry_value=bathymetry_value)
                newloc = Location4D(latitude=movement['latitude'], longitude=movement['longitude'], depth=movement['depth'], time=newtimes[loop_i+1])
                logger.debug("%s - moved %.3f meters (horizontally) and %.3f meters (vertically) by %s with data from %s" % (self.part.logstring(), movement['distance'], movement['vertical_distance'], model.__class__.__name__, newtimes[loop_i].isoformat()))
                if newloc:
                    self.boundary_interaction(particle=self.part, starting=self.part.location, ending=newloc,
                                              distance=movement['distance'], angle=movement['angle'],
                                              azimuth=movement['azimuth'], reverse_azimuth=movement['reverse_azimuth'],
                                              vertical_distance=movement['vertical_distance'], vertical_angle=movement['vertical_angle'])
                logger.debug("%s - was forced by %s and is now at %s" % (self.part.logstring(), model.__class__.__name__, self.part.location.logstring()))

            self.part.note = self.part.outputstring()
            # Each timestep, save the particles status and environmental variables.
            # This keep fields such as temp, salt, halted, settled, and dead matched up with the number of timesteps
            self.part.save()

        self.dataset.closenc()

        # We won't pull data for the last entry in locations, but we need to populate it with fill data.
        self.part.fill_environment_gap()

        if self.usebathy is True:
            self._bathymetry.close()

        if self.useshore is True:
            self._shoreline.close()

        return self.part