示例#1
0
    def load(self, key, inputcheckpoint, xslice, yslice, scenes_dates,
             dataloc):
        # save dates for debug purposes
        self.scenes_dates = scenes_dates

        # initialise empty matrix of the right size, with nan values (this assumes that xslice.step = 1 or None)
        shape = (xslice.stop - xslice.start, yslice.stop - yslice.start)
        self.values = np.full(shape, np.nan, order='F', dtype='<f8')

        if len(scenes_dates) == 0:
            # hack : if no scenes, we still need to get the latitude to compute
            # theta_sol_midi in albedo_angular_integration.f90
            filename = inputcheckpoint['filename']
            firstdate = robust_date_parse(inputcheckpoint['firstdate'])
            filename = instanciate_datetime(deepcopy(filename), firstdate)
            logging.warn('No data. Using latitude from file ' + filename)
            try:
                with AugmentedNetcdfDataset(filename, 'r') as f:
                    self.values[:, :] = f['latitude'][xslice, yslice]
                    return self
            except FileNotFoundError:
                logging.error(
                    'Apparently there is no input data scenes for this date. There is no BRDF checkpoint file either. The algorithm cannot be initialized with no input data'
                )
                exit_status('UNABLE_TO_CONFIG')
            return

        # loop through all each input scene date
        # note that we loop until one read is successful because we expect the
        # latitude to be the same for each scene date
        # in order to ensure this, we could add a security check (read each
        # date and compare to the latest one).
        for idate, d in enumerate(scenes_dates):
            filename = dataloc[d]['filename']

            # save filename for debug purposes
            self.filenames = {d: filename}
            logging.debug(str(d) + ' ' + filename)

            try:
                # actual reading of the data
                # TODO : honor the missing values and set to np.nan
                with AugmentedNetcdfDataset(filename, 'r') as f:
                    self.values[:, :] = f[key][xslice, yslice]
                    self.show_info(self.name, f[key])
                logging.debug('extract of ' + key + ' data = ' +
                              str(self.values[0, 0]))
                # return as soon as a file has been successfully read
                return self
            except KeyError as e:
                # if anything bad happend when reading the data
                logging.info('Problem reading ' + filename + '/' + 'key' +
                             ' to get the ' + self.name + ' ' + str(e))
                # just log the problem and skip it

        # if no files could be loaded successfully, show an error message
        logging.error('Cannot read files for "' + str(key) +
                      '" : input files location are : ' + str(dataloc))
        return self
示例#2
0
def stack_it(one_band_list, sensorname, check_sizes=False, stack_axis=2):
    """ Reflectance object is created from several ReflectanceOneBand objects. Stacking data for all bands along the merge_axis dimension. """

    # security check in case there is nothing to  merge
    if len(one_band_list) == 0:
        logging.error(f"No band available for the sensor {sensorname}")
        raise Exception('Input error')

    one_band_type_name = type(one_band_list[0]).__name__
    if one_band_type_name == 'Angle':
        out = AngleMultiBand()
    elif one_band_type_name == 'LWCS_maskOneBand':
        out = LWCS_mask()
    elif one_band_type_name == 'ReflectanceOneBand':
        out = Reflectance()
    elif one_band_type_name == 'ReflectanceOneBandCov':
        out = ReflectanceCov()
    else:
        logging.error(f'Cannot stack object type {one_band_type_name}')
        exit_status('UNABLE_TO_PROCESS')

    # This is the actual code perfoming the merge. The reste of this function is only security checks.
    out.values = np.stack([x.values for x in one_band_list], axis=stack_axis)

    # keep the metadata
    out.sensorname = sensorname

    # propagate (and check) the scenes_dates
    out.scenes_dates = one_band_list[0].scenes_dates
    for dataset in one_band_list:
        if dataset.scenes_dates != out.scenes_dates:
            logging.error(
                f'ERROR : Mismatch in scenes dates : {dataset.scenes_dates} != {out.scenes_dates}'
            )

    # propagate (and check) the missing data value
    out.missing = one_band_list[0].missing
    for dataset in one_band_list:
        if dataset.missing != out.missing:
            logging.error(
                f'ERROR : Mismatch in scenes dates : {dataset.missing} != {out.missing}'
            )

    # TODO : propagate also the filenames it would be usefull to debug
    #attrs = { 'filenames': out.filenames })

    if check_sizes:
        shape_one_band = one_band_list[0].values.shape
        # shape_one_band is : (xsize, ysize,n_scenes)
        shape = (shape_one_band[0], shape_one_band[1], len(one_band_list),
                 shape_one_band[2])
        if out.values.shape != shape:
            logging.error(str(shape_one_band))
            logging.error(str(shape))
            logging.error(str(out.values.shape))
            raise Exception('Merging error')
    return out
示例#3
0
def main():
    args = parse_args()

    infilename = args.infile
    reader = BrdfReader()
    logging.info(f'Loading brdf from {infilename}')
    reader.load_brdf(infilename,
                     model_len=None,
                     n_channels_ref=None,
                     xslice=None,
                     yslice=None)

    xsize = reader.brdf.values.shape[0]
    ysize = reader.brdf.values.shape[1]
    date = reader.previous_date
    tocr = np.full((xsize, ysize, reader.n_channels_ref),
                   np.nan,
                   order='F',
                   dtype='<f4')
    solzenith = np.full((xsize, ysize), np.nan, order='F', dtype='<f4')
    tocr_cov = np.full((xsize, ysize, reader.n_channels_ref),
                       np.nan,
                       order='F',
                       dtype='<f4')

    errcode, errmsg = pyal2.lib.toc_r.toc_r(
        debuglevel=args.debuglevel,
        model=reader.model_id,
        day_of_year=date.timetuple().tm_yday,
        latitude=np.asfortranarray(reader.latitude.values, '<f4'),
        k_array=np.asfortranarray(reader.brdf.values, '<f4'),
        ck_array=np.asfortranarray(reader.covariance.values, '<f4'),
        toc_min=-0.1,
        toc_max=2.0,
        sig_min=0.0,
        sig_max=10.0,
        solzenith_out=solzenith,
        tocr=tocr,
        tocr_cov=tocr_cov)

    writer = TocrIO(outfilename=args.outfile)
    writer.xoutputsize = xsize
    writer.youtputsize = ysize
    writer.n_channels_ref = reader.n_channels_ref
    writer.model_id = reader.model_id
    writer.write_tocr(tocr, slice(0, xsize), slice(0, ysize), date, 'TOC-R',
                      'TOC-R')
    writer.write_tocr(tocr_cov, slice(0, xsize), slice(0, ysize), date,
                      'TOC-R-ERR', 'TOC-R-ERR')

    # hack introduced by VITO
    missing = np.isnan(tocr)
    reader.quality.values[missing] = 128
    # end-of hack introduced by VITO

    writer.write_tocr(reader.quality.values, slice(0, xsize), slice(0, ysize),
                      date, 'Z-QFLAG', 'Z-QFLAG')
    writer.write_tocr(reader.n_valid_obs.values, slice(0, xsize),
                      slice(0, ysize), date, 'Z-NMOD', 'n_valid_obs')
    writer.write_tocr(solzenith, slice(0, xsize), slice(0, ysize), date,
                      'SOLZENITH', 'solzenith')
    writer.write_tocr(reader.latitude.values, slice(0, xsize), slice(0, ysize),
                      date, 'latitude', 'latitude')
    writer.write_tocr(reader.longitude.values, slice(0, xsize),
                      slice(0, ysize), date, 'longitude', 'longitude')
    print('Processed ' + str(date))

    exit_status("PROCESS_OK")
示例#4
0
    writer.write_tocr(tocr, slice(0, xsize), slice(0, ysize), date, 'TOC-R',
                      'TOC-R')
    writer.write_tocr(tocr_cov, slice(0, xsize), slice(0, ysize), date,
                      'TOC-R-ERR', 'TOC-R-ERR')

    # hack introduced by VITO
    missing = np.isnan(tocr)
    reader.quality.values[missing] = 128
    # end-of hack introduced by VITO

    writer.write_tocr(reader.quality.values, slice(0, xsize), slice(0, ysize),
                      date, 'Z-QFLAG', 'Z-QFLAG')
    writer.write_tocr(reader.n_valid_obs.values, slice(0, xsize),
                      slice(0, ysize), date, 'Z-NMOD', 'n_valid_obs')
    writer.write_tocr(solzenith, slice(0, xsize), slice(0, ysize), date,
                      'SOLZENITH', 'solzenith')
    writer.write_tocr(reader.latitude.values, slice(0, xsize), slice(0, ysize),
                      date, 'latitude', 'latitude')
    writer.write_tocr(reader.longitude.values, slice(0, xsize),
                      slice(0, ysize), date, 'longitude', 'longitude')
    print('Processed ' + str(date))

    exit_status("PROCESS_OK")


if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        exit_status("UNABLE_TO_PROCESS")
示例#5
0
def main():
    """ This is the main function. It selects the right runner according 
        the the sensor and run it (run by chunks if required). """
    # TODO : The whole process of chunking could be done using a combination 
    # of xarray and dask. This would be more reliable and cleaner.

    # parse command line arguments
    args = parse_args()

    # check informations about instruments we will treat:
    logging.info(f'Instruments to process :  We gave a instrument list to process :{args.instruments}')
    instr_list = args.instruments
    chunk_list = args.chunksize

    # parse config files and mix it with command lines arguments
    dstore = DataStore()
    # TODO : check if we could write this part better
    if args.instruments:
        dstore.load_config(args.acf, args.pcf,
                           startseries=args.startseries,
                           dates=args.outputdates,
                           debuglevel=args.debuglevel,
                           window_predefined=args.window_predefined,
                           window_lat=args.window_lat,
                           window_lon=args.window_lon,
                           keywords=args.keywords,
                           config_format=args.config_format, 
                           instruments=args.instruments)
    else:
        dstore.load_config(args.acf, args.pcf,
                           startseries=args.startseries,
                           dates=args.outputdates,
                           debuglevel=args.debuglevel,
                           window_predefined=args.window_predefined,
                           window_lat=args.window_lat,
                           window_lon=args.window_lon,
                           keywords=args.keywords,
                           config_format=args.config_format)

    logdir = dstore.get('globalconfig', {}).get('logdir','.')
    # save DataStore for logging purposes
    dstore.to_yaml(f'{logdir}/dstore.yaml')
    # reload to double check it is well saved
    other_dstore = DataStore()
    other_dstore.load_full_yaml(f'{logdir}/dstore.yaml')
    other_dstore.to_yaml(f'{logdir}/dstore-copy.yaml')
    dstore = other_dstore

    for k, instrument in enumerate(instr_list):

        # Create chunks of data to process
        if chunk_list is not None:
            chunks = chunk_2D(dstore['xfullslice'], dstore['yfullslice'], chunk_list[k], chunk_list[k])
        else:
            chunks = chunk_2D(dstore['xfullslice'], dstore['yfullslice'], None, None)
        # Run the code, in parallel if required
        if args.cpu > 1: # if parallel processing is required, use a Pool of process to run several runner in parallel, calling "process_one_chunk"

            global write_lock
            write_lock = Lock()
            with Pool(args.cpu, initializer=chunk_init, initargs=(write_lock,)) as p:
                results = p.starmap(process_one_chunk, [(instrument, i, c, args, dstore) for i,c in enumerate(chunks)])
            for result in results:
                if isinstance(result, ExceptionInSubprocessWrapper):
                    logging.error('Exception occured in chunk ' + str(result.info) + '"')
                    result.log_error()
                    exit_status('UNABLE_TO_PROCESS')
        else: # if no parallelization is requested (or if there is no chunking), run sequentially, using "process_one_chunk_no_parallel"

            logging.info('                                          ')
            logging.info('Now treating instrument ' + str(instrument))

            for c in chunks:
                result = process_one_chunk_no_parallel(instrument, c, args, dstore)
                if isinstance(result, ExceptionInSubprocessWrapper):
                    logging.warn('Exception occured in the chunk ' + str(result.info) + '"')
                    result.log_error()
                    exit_status('UNABLE_TO_PROCESS')

    exit_status("PROCESS_OK")
示例#6
0
    def get_data_sparse(self, sensor, internalkey):
        """ This function uses the config dictionnary to locate the appropriate code that
        must be used to read the data. The piece of code to run is located in the folder
        'readers'. The function 'get_data_reader()' (which is in readers/__init__.py) is
        responsible to transform the string "readername" into data_reader_class (code
        that can read the data) and data_params (list params required by
        data_reader_class). Then actual data is read and the return value of the get_data
        function is an object containing the data (along with some logging information).
        This function is used when each band needs to be read separately. Then it
        aggregate them together """

        # there are multiple values for this layer, one for each band.
        # Find the paths to the config for each band
        #~ band_keys = get_param_in_tree(self, ['input', sensor, internalkey], 'band_keys')
        try:
            band_indices = get_param_in_tree(self,
                                             ['input', sensor, internalkey],
                                             'band_indexing_list')
        except:
            logging.error(' Error in get_data_sparse ')
            exit_status('UNABLE_TO_PROCESS')

        paths = [['input', sensor, internalkey, band_key]
                 for band_key in band_indices]

        data_objects = []
        for path in paths:
            # get the reader from the config
            readername = get_param_in_tree(self, path, 'data_reader_name')
            data_reader_class, data_params = get_data_reader(readername)

            # get the actual value of the parameters required by this reader
            data_params_dict = {
                p: get_param_in_tree(self, path, p)
                for p in data_params
            }
            data_object = data_reader_class(name=internalkey)
            # read the data
            logging.debug(
                f' using {readername} {path}, and parameters {data_params_dict.keys()}'
            )
            data_object = data_object.load(
                **{
                    'scenes_dates': get_param_in_tree(self, path,
                                                      '_scenes_dates'),
                    'xslice': get_param_in_tree(self, [], 'xslice'),
                    'yslice': get_param_in_tree(self, [], 'yslice'),
                    'dataloc': get_param_in_tree(self, path, '_dataloc'),
                    **data_params_dict
                })
            data_objects.append(data_object)

        # now the list "data_objects" contains a list of matrix, let us merge it into one unique matrix
        # a drawback of moving the data around like this is slightly slower than loading
        # directly into the final matrix, but this is not a real problem considering
        # that the moves performed in memory are very fast compared to reading from disk.
        # the main advantage of this is to simplify the I/O code (and to allow easily a different
        # configuration for each band if needed)
        data_object = stack_it(data_objects, sensorname=sensor)

        logging.info(
            f'Data loaded {internalkey} : {data_object.values.shape} matrix for sensor {sensor}'
        )
        return data_object
示例#7
0
    def load_config(self, acf, pcf, dates=None, startseries=None,
                    window_predefined=None, window_lat=None, window_lon=None,
                    debuglevel=0, keywords=None, config_format=None,
                    instruments=None):
        """
        This is the main function to initialize DataStore object.
        It reads the config files to get the relevant parameters, it also
        parses the inputfiles to get their dates, and creates one DataBox
        for each step (date) to process.

        Required parameters are the algorithm config file (acf) and the product config file (pcf).
        Optional parameters to overwrite the values from the config files :

        :param dates: Start and end dates (as strings)
        :type dates: [string, string]
        :param startseries: For the first time step of the algorithm, a
        value set to 'False' will attempt to load an a priori BRDF file
        checkpoint, which must be specific in the pcf file. 'True' will
        not attempt to load a pervious brdf file and will use default values
        with high variance. Note that this parameter impacts **only the first
        date to be processed** : if several output dates are computed with the
        same command, the BRDF checkpoint for the next steps (other than the
        first one) **will** be used, even if the parameter startseries is set to 'True'.**
        :type startseries: boolean
        :param window_predefined: not implemented.
        :param window_lat: list of two integers. Define a rectangle region (a subset) to process.
        Only these pixels will be processed.
        :param window_lon: see window_lat
        :param debuglevel: the higher the value is, the more verbose output will be.
        Will affect fortran code only.
        :param keywords: a dictionnary of {key:string}. BEFORE processing
        the yaml config files, all occurences of "{key}" will be replaced
        by the string. Useful for batch processing on several regions using
        the same config file.
        :param config_format: for backward compatibility, if set to 'f90nml_c3s',
        the acf and files will be transformed from a f90 namelist into standard
        yaml before using it.
        :type config_format: 'f90nml_c3s' or 'f90nml_msg'

        >>> to,lnk = '../test/testc3s_full/data_c3s_alldata','data_c3s_alldata'
        >>> if not os.path.realpath(lnk) == os.path.realpath(to): os.symlink(to, lnk)
        >>> dstore = DataStore();

        >>> dstore.load_config('../config/acf.c3s.multi-sensor.true.yaml', '../config/pcf.c3s.VGT.yaml', dates=[['2002-06-20','2002-07-20']], startseries=None, window_predefined=None, window_lat=[25,25], window_lon=[25,25], debuglevel=0, keywords={'year':'1998','name':'VGT','sensorname':'VGT','site':'Avignon'}, config_format='yaml', instruments=['VGT'])
        >>> dstore['n_channels_meas']
        [4]

        >>> dstore = DataStore();
        >>> dstore.load_config('../config/acf.c3s.multi-sensor.true.yaml', '../config/pcf.c3s.AVHRR_NOAA11.yaml', dates=[['2002-06-20','2002-07-20']], startseries=None, window_predefined=None, window_lat=[25,25], window_lon=[25,25], debuglevel=0, keywords={'year':'1988','name':'AVHRR11','sensorname':'AVHRR_NOAA11','site':'Avignon'}, config_format='yaml', instruments=['AVHRR_NOAA11'])
        >>> dstore['n_channels_meas']
        [3]

        #>>> dstore = DataStore();
        #>>> dstore.load_config('../config/acf.c3s.multi-sensor.true.yaml', '../config/pcf.c3s.AVHRR_NOAA7.yaml', dates=[['2002-06-20','2002-07-20']], startseries=None, window_predefined=None, window_lat=[25,25], window_lon=[25,25], debuglevel=0, keywords={'name':'AVHRR7','sensorname':'AVHRR_NOAA7','site':'Avignon'}, config_format='yaml', instruments=['AVHRR_NOAA7'])
        #>>> dstore['n_channels_meas']
        #[2]

        """

        if keywords is None: keywords = {}

        #  if required, transform the config files from f90 namelist into standard yaml
        if config_format == 'yaml':
            with open(acf,'r') as f: self.acf = yaml.unsafe_load(f)
            with open(pcf,'r') as f: self.pcf = yaml.unsafe_load(f)
        elif config_format == 'f90nml_c3s' or config_format == 'f90nmlc3s': 
                    self.acf, self.pcf = f90nml_to_yaml_c3s(acf, pcf)
        elif config_format == 'f90nml_msg' or config_format == 'f90nmlmsg': 
                    self.acf, self.pcf  = f90nml_to_yaml_msg(acf, pcf)
        else:
            logging.critical(f'Unknown config format {config_format}')
            exit_status("UNABLE_TO_CONFIG")


        if config_format != 'yaml':
            # write config file for logging purposes if required
            save_yaml(self.acf, f'log/{keywords.get("name","")}/{os.path.basename(acf)}')
            save_yaml(self.pcf, f'log/{keywords.get("name","")}/{os.path.basename(pcf)}')

        self['acf_file'] = acf
        self['pcf_file'] = pcf

        # all occurences of "{key}" will be replaced by its value from the dictionary "keywords"
        self.acf = instanciate_keywords(self.acf, keywords)
        self.pcf = instanciate_keywords(self.pcf, keywords)

        if instruments is None:
            instruments = self.pcf['input'].keys()
            logging.debug('No instrument defined in parameters, using pcf file : instruments = {instruments}')

        logging.debug('Parsing acf and pcf files is OK')
        # global config hook, read the parameters "globalconfig" in the acf and pcf and make it available eveywhere.
        # useful to debug and trace some issue. But DO NOT use it too much, it will break the logic of the code,
        # create confusion, prevent maintanabilty and give bad karma.
        self['globalconfig'] = {}
        self['globalconfig'].update(self.acf.get('globalconfig',{}))
        self['globalconfig'].update(self.pcf.get('globalconfig',{}))
        self['globalconfig']['debuglevel'] = debuglevel

        try:
            self['n_channels_ref'] = self['globalconfig']['n_channels_ref']
        except KeyError:
            self['n_channels_ref'] = 4
            logging.warning(f'No n_channels_ref provided, usind default value {self["n_channels_ref"]}')

        # initialisation of 'input' dictionnary
        self['input'] = dict()

        for j,instr in enumerate(instruments):

            # read relevant parameters, from acf and pcf file into the "self" config dict
            self[instr]= dict()
            self[instr]['output_dates'] = self._read_dates(args_dates=dates[j],
                    filename = self.pcf['input'][instr]['output']['albedo'].get('filename', None) 
                            or self.pcf['input'][instr]['output']['albedo']['band1']['filename'],
                    configfile_dates = self.pcf['dates']['output_date'],
                    date_filter=self.pcf['input'][instr]['output'].get('date_pattern', None))

            logging.info('We finishted _read_dates in load_config, instr ' + str(instr))

            # read the checkpoint parameters
            self[instr]['inputcheckpoint'] = self.pcf['input'][instr]['inputcheckpoint']

            # use cache ?
            self.pcf['input'][instr]['use_cache'] = self.pcf['globalconfig']['use_cache']

            self[instr]['output'] = self.pcf['input'][instr]['output']

            self[instr]['startseries'] = (parse_boolean(startseries)
                                or self.pcf.get('startseries',None)
                                or self.acf.get('startseries',None)
                                or self.acf.get('nam_inv',{}).get('startseries',None)
                                )

            # Change the firstdate with the one readed in sensors_constants => first date
            #self[instr]['inputcheckpoint']['firstdate'] = self[instr]['output_dates'][0].strftime('%Y/%m/%d')
            # parse input filenames to get their dates
            # this should populate self['input'][instr] and some metadata (xsize, ysize, etc)
            self.setup_input_one_sensor(self.pcf['input'][instr], instr)

            # if cropping is required, restrict the size of the zone to process
            # note that these xfullslice, yfullslice (zone to process) are not
            # related to xslice,yslice (chunk to process)
            if window_lat:
                self['xfullslice'] = slice(window_lat[0] - 1, window_lat[1] )
                self['xfullsize'] = self['xfullslice'].stop - self['xfullslice'].start
                logging.warn('Restricting to x slice : ' + str(self['xfullslice']))
            if window_lon:
                self['yfullslice'] = slice(window_lon[0] - 1, window_lon[1] )
                self['yfullsize'] = self['yfullslice'].stop - self['yfullslice'].start
                logging.warn('Restricting to y slice : ' + str(self['yfullslice']))


        # TODO : n_channels_meas is duplicated in some places, need to clean this up when doing multisensor
        #~ firstsensor = self['input']['sensors'][0]
        #~ self['sensorname'] = firstsensor
        #~ if len(self['input']['sensors']) != 1:
            #~ raise Exception('Multiple sensors detected in input {str(self["input"]["sensors"])}. Not implemented')
        #~ self['n_channels_meas'] = self['input'][firstsensor]['n_channels_meas']
        self['sensorname'] = instruments#self['input']['sensors'][:]
        self['n_channels_meas'] = list()
        list_to_del = list()

        for sensor in self.acf['spectral'].keys():
            if sensor in self['sensorname']:
                self['n_channels_meas'].append(self.acf['spectral'][sensor]['nbands'])
            else:
                # Deleting sensor in acf['spectral'] tree
                list_to_del.append(sensor)
        for sensor in list_to_del:
            del self.acf['spectral'][sensor]

        logging.warning(f'we will work on these sensors : '+ str(self['sensorname']))

        self.load_acf(self.acf, sensorlist=self['sensorname'], 
            n_channel_meas=self['n_channels_meas'], startseries=self[instr]['startseries'])