示例#1
0
class Worker(object):
    def __init__(self, config: configs.Config, loglevel: str, logfile: str):

        logging.basicConfig(format='%(levelname)s:%(message)s', level=loglevel, filename=logfile)
        self.config = config
        self.fm = ForwardModel(self.config)

        if self.config.implementation.mode == 'mcmc_inversion':
            self.iv = MCMCInversion(self.config, self.fm)
        elif self.config.implementation.mode in ['inversion', 'simulation']:
            self.iv = Inversion(self.config, self.fm)
        else:
            # This should never be reached due to configuration checking
            raise AttributeError('Config implementation mode node valid')

        self.io = IO(self.config, self.fm)


    def run_set_of_spectra(self, indices: np.array):


        for index in range(0, indices.shape[0]):

            logging.debug("Read chunk of spectra")
            row, col = indices[index,0], indices[index,1]

            input_data = self.io.get_components_at_index(row, col)

            if input_data is not None:
                logging.debug("Run model")
                # The inversion returns a list of states, which are
                # intepreted either as samples from the posterior (MCMC case)
                # or as a gradient descent trajectory (standard case). For
                # a trajectory, the last spectrum is the converged solution.
                states = self.iv.invert(input_data.meas, input_data.geom)

                logging.debug("Write chunk of spectra")
                # Write the spectra to disk
                try:
                    self.io.write_spectrum(row, col, states, self.fm, self.iv)
                except ValueError as err:
                    logging.info(
                        f"""
                        Encountered the following ValueError in (row,col) ({row},{col}).
                        Results for this pixel will be all zeros.
                        """
                    )
                    logging.error(err)
                if index % 100 == 0:
                    logging.info(f'Core at start location ({row},{col}) completed {index}/{indices.shape[0]}')

        self.io.flush_buffers()
示例#2
0
def run_forward():
    """Simulate the remote measurement of a spectrally uniform surface."""

    # Configure the surface/atmosphere/instrument model
    testdir, fname = os.path.split(os.path.abspath(__file__))
    datadir = os.path.join(testdir, 'data')
    config = create_new_config(os.path.join(datadir, 'config_forward.json'))
    fm = ForwardModel(config)
    iv = Inversion(config, fm)
    io = IO(config, fm, iv, [0], [0])

    # Simulate a measurement and write result
    for row, col, meas, geom, configs in io:
        states = iv.invert(meas, geom)
        io.write_spectrum(row, col, states, meas, geom)

    assert True
    return states[0]
示例#3
0
def run_inverse():
    """Invert the remote measurement."""

    # Configure the surface/atmosphere/instrument model
    testdir, fname = os.path.split(os.path.abspath(__file__))
    datadir = os.path.join(testdir, 'data')
    config = create_new_config(os.path.join(datadir, 'config_forward.json'))
    fm = ForwardModel(config)
    iv = Inversion(config, fm)
    io = IO(config, fm, iv, [0], [0])
    geom = None

    # Get our measurement from the simulation results, and invert.
    # Calculate uncertainties at the solution state, write result
    for row, col, meas, geom, configs in io:
        states = iv.invert(meas, geom)
        io.write_spectrum(row, col, states, meas, geom)

    assert True
    return states[-1]
示例#4
0
def run_forward():
    """Simulate the remote measurement of a spectrally uniform surface."""

    # Configure the surface/atmosphere/instrument model
    testdir, fname = os.path.split(os.path.abspath(__file__))
    datadir = os.path.join(testdir, 'data')
    config = create_new_config(os.path.join(datadir, 'config_forward.json'))
    fm = ForwardModel(config)
    iv = Inversion(config, fm)
    io = IO(config, fm)

    # Simulate a measurement and write result
    for row in range(io.n_rows):
        for col in range(io.n_cols):
            id = io.get_components_at_index(row, col)
            if id is not None:
                states = iv.invert(id.meas, id.geom)
                io.write_spectrum(row, col, states, fm, iv)

    assert True
    return states[0]
示例#5
0
def run_inverse():
    """Invert the remote measurement."""

    # Configure the surface/atmosphere/instrument model
    testdir, fname = os.path.split(os.path.abspath(__file__))
    datadir = os.path.join(testdir, 'data')
    config = create_new_config(os.path.join(datadir, 'config_forward.json'))
    fm = ForwardModel(config)
    iv = Inversion(config, fm)
    io = IO(config, fm)

    # Get our measurement from the simulation results, and invert.
    # Calculate uncertainties at the solution state, write result
    for row in range(io.n_rows):
        for col in range(io.n_cols):
            id = io.get_components_at_index(row, col)
            if id is not None:
                states = iv.invert(id.meas, id.geom)
                io.write_spectrum(row, col, states, fm, iv)

    assert True
    return states[-1]
示例#6
0
class Isofit:
    """Initialize the Isofit class.

    Args:
        config_file: isofit configuration file in JSON or YAML format
        row_column: The user can specify

                    * a single number, in which case it is interpreted as a row
                    * a comma-separated pair, in which case it is interpreted as a
                      row/column tuple (i.e. a single spectrum)
                    * a comma-separated quartet, in which case it is interpreted as
                      a row, column range in the order (line_start, line_end, sample_start,
                      sample_end) all values are inclusive.

                    If none of the above, the whole cube will be analyzed.
        level: logging level (ERROR, WARNING, INFO, DEBUG)
        logfile: file to write output logs to
    """

    def __init__(self, config_file, row_column='', level='INFO', logfile=None):

        # Explicitly set the number of threads to be 1, so we more effectively
        #run in parallel 
        os.environ["MKL_NUM_THREADS"] = "1"

        # Set logging level
        self.loglevel = level
        self.logfile = logfile
        logging.basicConfig(format='%(levelname)s:%(message)s', level=self.loglevel, filename=self.logfile)

        self.rows = None
        self.cols = None
        self.config = None
        self.fm = None
        self.iv = None
        self.io = None
        self.states = None

        # Load configuration file
        self.config = configs.create_new_config(config_file)
        self.config.get_config_errors()

        # Initialize ray for parallel execution
        rayargs = {'address': self.config.implementation.ip_head,
                   'redis_password': self.config.implementation.redis_password,
                   'ignore_reinit_error':True,
                   'local_mode': self.config.implementation.n_cores == 1}

        # only specify a temporary directory if we are not connecting to 
        # a ray cluster
        if rayargs['local_mode']:
            rayargs['temp_dir'] = self.config.implementation.ray_temp_dir
            # Used to run on a VPN
            ray.services.get_node_ip_address = lambda: '127.0.0.1'

        # We can only set the num_cpus if running on a single-node
        if self.config.implementation.ip_head is None and self.config.implementation.redis_password is None:
            rayargs['num_cpus'] = self.config.implementation.n_cores
        ray.init(**rayargs)

        if len(row_column) > 0:
            ranges = row_column.split(',')
            if len(ranges) == 1:
                self.rows, self.cols = [int(ranges[0])], None
            if len(ranges) == 2:
                row_start, row_end = ranges
                self.rows, self.cols = range(
                    int(row_start), int(row_end)), None
            elif len(ranges) == 4:
                row_start, row_end, col_start, col_end = ranges
                line_start, line_end, samp_start, samp_end = ranges
                self.rows = range(int(row_start), int(row_end))
                self.cols = range(int(col_start), int(col_end))

        # Build the forward model and inversion objects
        self._init_nonpicklable_objects()
        self.io = IO(self.config, self.fm, self.iv, self.rows, self.cols)
    

    def __del__(self):
        ray.shutdown()

    def _init_nonpicklable_objects(self) -> None:
        """ Internal function to initialize objects that cannot be pickled
        """
        self.fm = ForwardModel(self.config)

        if self.config.implementation.mode == 'mcmc_inversion':
            self.iv = MCMCInversion(self.config, self.fm)
        elif self.config.implementation.mode in ['inversion', 'simulation']:
            self.iv = Inversion(self.config, self.fm)
        else:
            # This should never be reached due to configuration checking
            raise AttributeError('Config implementation mode node valid')

    def _clear_nonpicklable_objects(self):
        """ Internal function to clean objects that cannot be pickled
        """
        self.fm = None
        self.iv = None

    @ray.remote
    def _run_set_of_spectra(self, index_start: int, index_stop: int) -> None:
        """Internal function to run a chunk of spectra

        Args:
            index_start: spectral index to start execution at
            index_stop: spectral index to stop execution at

        """
        logging.basicConfig(format='%(levelname)s:%(message)s', level=self.loglevel, filename=self.logfile)
        self._init_nonpicklable_objects()
        io = IO(self.config, self.fm, self.iv, self.rows, self.cols)
        for index in range(index_start, index_stop):
            success, row, col, meas, geom = io.get_components_at_index(
                index)
            # Only run through the inversion if we got some data
            if success:
                if meas is not None and all(meas < -49.0):
                    # Bad data flags
                    self.states = []
                else:
                    # The inversion returns a list of states, which are
                    # intepreted either as samples from the posterior (MCMC case)
                    # or as a gradient descent trajectory (standard case). For
                    # a trajectory, the last spectrum is the converged solution.
                    self.states = self.iv.invert(meas, geom)

                # Write the spectra to disk
                io.write_spectrum(row, col, self.states, meas,
                                  geom, flush_immediately=True)
                if (index - index_start) % 100 == 0:
                    logging.info(
                        'Core at start index {} completed inversion {}/{}'.format(index_start, index-index_start,
                                                                                  index_stop-index_start))

    def run(self):
        """
        Iterate over all spectra, reading and writing through the IO
        object to handle formatting, buffering, and deferred write-to-file.
        The idea is to avoid reading the entire file into memory, or hitting
        the physical disk too often. These are our main class variables.
        """

        n_iter = len(self.io.iter_inds)
        self._clear_nonpicklable_objects()
        self.io = None

        if self.config.implementation.n_cores is None:
            n_workers = min(multiprocessing.cpu_count(), n_iter)
        else:
            n_workers = min(self.config.implementation.n_cores, n_iter)

        start_time = time.time()
        logging.info('Beginning inversions using {} cores'.format(n_workers))

        # Divide up spectra to run into chunks
        index_sets = np.linspace(0, n_iter, num=n_workers+1, dtype=int)

        # Run spectra, in either serial or parallel depending on n_workers
        results = ray.get([self._run_set_of_spectra.remote(self, index_sets[l], index_sets[l + 1])
                           for l in range(len(index_sets)-1)])

        total_time = time.time() - start_time
        logging.info('Inversions complete.  {} s total, {} spectra/s, {} spectra/s/core'.format(
            round(total_time,2), round(n_iter/total_time,4), round(n_iter/total_time/n_workers,4)))
示例#7
0
class Worker(object):
    def __init__(self, config: configs.Config, loglevel: str, logfile: str, worker_id: int = None, total_workers: int = None):
        """
        Worker class to help run a subset of spectra.

        Args:
            config: isofit configuration
            loglevel: output logging level
            logfile: output logging file
            worker_id: worker ID for logging reference
            total_workers: the total number of workers running, for logging reference
        """

        logging.basicConfig(format='%(levelname)s:%(message)s', level=loglevel, filename=logfile)
        self.config = config
        self.fm = ForwardModel(self.config)

        if self.config.implementation.mode == 'mcmc_inversion':
            self.iv = MCMCInversion(self.config, self.fm)
        elif self.config.implementation.mode in ['inversion', 'simulation']:
            self.iv = Inversion(self.config, self.fm)
        else:
            # This should never be reached due to configuration checking
            raise AttributeError('Config implementation mode node valid')

        self.io = IO(self.config, self.fm)

        self.approximate_total_spectra = None
        if total_workers is not None:
            self.approximate_total_spectra = self.io.n_cols * self.io.n_rows / total_workers
        self.worker_id = worker_id
        self.completed_spectra = 0


    def run_set_of_spectra(self, indices: np.array):


        for index in range(0, indices.shape[0]):

            logging.debug("Read chunk of spectra")
            row, col = indices[index,0], indices[index,1]

            input_data = self.io.get_components_at_index(row, col)

            self.completed_spectra += 1
            if input_data is not None:
                logging.debug("Run model")
                # The inversion returns a list of states, which are
                # intepreted either as samples from the posterior (MCMC case)
                # or as a gradient descent trajectory (standard case). For
                # a trajectory, the last spectrum is the converged solution.
                states = self.iv.invert(input_data.meas, input_data.geom)

                logging.debug("Write chunk of spectra")
                # Write the spectra to disk
                try:
                    self.io.write_spectrum(row, col, states, self.fm, self.iv)
                except ValueError as err:
                    logging.info(
                        f"""
                        Encountered the following ValueError in (row,col) ({row},{col}).
                        Results for this pixel will be all zeros.
                        """
                    )
                    logging.error(err)

                if index % 100 == 0:
                    if self.worker_id is not None and self.approximate_total_spectra is not None:
                        percent = np.round(self.completed_spectra / self.approximate_total_spectra * 100,2)
                        logging.info(f'Worker {self.worker_id} completed {self.completed_spectra}/~{self.approximate_total_spectra}:: {percent}% complete')
                    else:
                        logging.info(f'Worker at start location ({row},{col}) completed {index}/{indices.shape[0]}')

        self.io.flush_buffers()
示例#8
0
class Isofit:
    """Spectroscopic Surface and Atmosphere Fitting."""
    def __init__(self,
                 config_file,
                 row_column='',
                 profile=False,
                 level='INFO'):
        """Initialize the Isofit class."""

        # Explicitly set the number of threads to be 1, so we can make better
        # use of multiprocessing
        os.environ["MKL_NUM_THREADS"] = "1"

        # Set logging level
        logging.basicConfig(format='%(message)s', level=level)

        self.rows = None
        self.cols = None
        self.config = None
        self.profile = profile
        self.fm = None
        self.iv = None
        self.io = None
        self.states = None

        # Load configuration file
        self.config = configs.create_new_config(config_file)
        self.config.get_config_errors()

        # Build the forward model and inversion objects
        self._init_nonpicklable_objects()

        # We set the row and column range of our analysis. The user can
        # specify: a single number, in which case it is interpreted as a row;
        # a comma-separated pair, in which case it is interpreted as a
        # row/column tuple (i.e. a single spectrum); or a comma-separated
        # quartet, in which case it is interpreted as a row, column range in the
        # order (line_start, line_end, sample_start, sample_end) - all values are
        # inclusive. If none of the above, we will analyze the whole cube.
        if len(row_column) > 0:
            ranges = row_column.split(',')
            if len(ranges) == 1:
                self.rows, self.cols = [int(ranges[0])], None
            if len(ranges) == 2:
                row_start, row_end = ranges
                self.rows, self.cols = range(int(row_start),
                                             int(row_end)), None
            elif len(ranges) == 4:
                row_start, row_end, col_start, col_end = ranges
                line_start, line_end, samp_start, samp_end = ranges
                self.rows = range(int(row_start), int(row_end))
                self.cols = range(int(col_start), int(col_end))

    def _init_nonpicklable_objects(self):
        self.fm = ForwardModel(self.config)

        if self.config.implementation.mode == 'mcmc_inversion':
            self.iv = MCMCInversion(self.config, self.fm)
        elif self.config.implementation.mode in ['inversion', 'simulation']:
            self.iv = Inversion(self.config, self.fm)
        else:
            # This should never be reached due to configuration checking
            raise AttributeError('Config implementation mode node valid')

    def _clear_nonpicklable_objects(self):
        self.fm = None
        self.iv = None

    def _run_set_of_spectra(self, index_start, index_stop):
        self._init_nonpicklable_objects()
        io = IO(self.config, self.fm, self.iv, self.rows, self.cols)
        for index in range(index_start, index_stop):
            success, row, col, meas, geom, configs = io.get_components_at_index(
                index)
            # Only run through the inversion if we got some data
            if success:
                if meas is not None and all(meas < -49.0):
                    # Bad data flags
                    self.states = []
                else:
                    # The inversion returns a list of states, which are
                    # intepreted either as samples from the posterior (MCMC case)
                    # or as a gradient descent trajectory (standard case). For
                    # a trajectory, the last spectrum is the converged solution.
                    self.states = self.iv.invert(meas, geom)

                # Write the spectra to disk
                io.write_spectrum(row,
                                  col,
                                  self.states,
                                  meas,
                                  geom,
                                  flush_immediately=True)
                if index % 1000 == 0:
                    logging.info('Completed inversion {}/{}'.format(
                        index, len(io.iter_inds)))

    def run(self, profile=False):
        """
        Iterate over all spectra, reading and writing through the IO
        object to handle formatting, buffering, and deferred write-to-file.
        The idea is to avoid reading the entire file into memory, or hitting
        the physical disk too often. These are our main class variables.
        """

        io = IO(self.config, self.fm, self.iv, self.rows, self.cols)
        if profile:
            for row, col, meas, geom, configs in io:
                if meas is not None and all(meas < -49.0):
                    # Bad data flags
                    self.states = []
                else:
                    # Profile output
                    gbl, lcl = globals(), locals()
                    cProfile.runctx('self.iv.invert(meas, geom)', gbl, lcl)

                # Write the spectra to disk
                io.write_spectrum(row, col, self.states, meas, geom)
        else:
            n_iter = len(io.iter_inds)
            io = None
            self._clear_nonpicklable_objects()

            if self.config.implementation.n_cores is None:
                n_cores = multiprocessing.cpu_count()
            else:
                n_cores = self.config.implementation.n_cores

            # Don't use more cores than needed
            n_cores = min(n_cores, n_iter)

            if self.config.implementation.runtime_nice_level is None:
                pool = multiprocessing.Pool(processes=n_cores)
            else:
                pool = multiprocessing.Pool(
                    processes=n_cores,
                    initializer=common.nice_me(
                        self.config.implementation.runtime_nice_level))

            start_time = time.time()
            logging.info('Beginning inversions using {} cores'.format(n_cores))

            results = []
            index_sets = np.linspace(0, n_iter, num=n_cores + 1, dtype=int)
            for l in range(len(index_sets) - 1):
                if n_cores == 1:
                    self._run_set_of_spectra(index_sets[0], index_sets[-1])
                else:
                    results.append(
                        pool.apply_async(self._run_set_of_spectra,
                                         args=(index_sets[l],
                                               index_sets[l + 1])))
            results = [p.get() for p in results]
            pool.close()
            pool.join()

            total_time = time.time() - start_time
            logging.info(
                'Parallel inversions complete.  {} s total, {} spectra/s, {}/spectra/core'
                .format(total_time, n_iter / total_time,
                        n_iter / total_time / n_cores))
示例#9
0
    inverse_config = Config(inversion_settings)
    iv = Inversion(inverse_config, fm)

    # Refine water vapor LUT
    water = []
    window  =5
    for line in range(10,990,100):
        print(line)
        obs_mean  =  observables.get_chunk(2,990,
                                          line-window,line+window).mean(axis =(0,1))
        loc_mean  =  location.get_chunk(2,990,
                                          line-window,line+window).mean(axis =(0,1))
        rad_mean  = radiance.get_chunk(2,990,
                                     line-window,line+window).mean(axis =(0,1))
        geom = Geometry(obs = obs_mean,loc = loc_mean)
        state_trajectory = iv.invert(rad_mean, geom)
        water.append(state_trajectory[-1][-1])

    #Create new water table
    water_min = np.min(water)*.9
    water_max = np.max(water)*1.1
    water_vals = np.linspace(water_min,water_max,5).tolist()

    # Recreate configs with refined water vapor
    for file in glob.glob(cal_dir+"/lut_full/*"):
        os.remove(file)

    rtm_config,surface_config,instrument_config =fw_configs(iso_base,
                                                            cal_dir,
                                                            template_file,
                                                            wavelength_file,
示例#10
0
    inverse_config = Config(inversion_settings)
    iv = Inversion(inverse_config, fm)

    # Refine water vapor LUT
    water = []
    window = 5
    for line in range(10, 990, 100):
        print(line)
        obs_mean = observables.get_chunk(2, 990, line - window,
                                         line + window).mean(axis=(0, 1))
        loc_mean = location.get_chunk(2, 990, line - window,
                                      line + window).mean(axis=(0, 1))
        rad_mean = radiance.get_chunk(2, 990, line - window,
                                      line + window).mean(axis=(0, 1))
        geom = Geometry(obs=obs_mean, loc=loc_mean)
        state_trajectory = iv.invert(rad_mean, geom)
        water.append(state_trajectory[-1][-1])

    #Create new water table
    water_min = np.min(water) * .9
    water_max = np.max(water) * 1.1
    water_vals = np.linspace(water_min, water_max, 5).tolist()

    # Recreate configs with refined water vapor
    for file in glob.glob(cal_dir + "/lut_full/*"):
        os.remove(file)

    rtm_config, surface_config, instrument_config = fw_configs(
        iso_base, cal_dir, template_file, wavelength_file, surface_file, 1,
        "multicomponent_surface", engine, observables, water_vals)