def load(self, key, inputcheckpoint, xslice, yslice, scenes_dates, dataloc): # save dates for debug purposes self.scenes_dates = scenes_dates # initialise empty matrix of the right size, with nan values (this assumes that xslice.step = 1 or None) shape = (xslice.stop - xslice.start, yslice.stop - yslice.start) self.values = np.full(shape, np.nan, order='F', dtype='<f8') if len(scenes_dates) == 0: # hack : if no scenes, we still need to get the latitude to compute # theta_sol_midi in albedo_angular_integration.f90 filename = inputcheckpoint['filename'] firstdate = robust_date_parse(inputcheckpoint['firstdate']) filename = instanciate_datetime(deepcopy(filename), firstdate) logging.warn('No data. Using latitude from file ' + filename) try: with AugmentedNetcdfDataset(filename, 'r') as f: self.values[:, :] = f['latitude'][xslice, yslice] return self except FileNotFoundError: logging.error( 'Apparently there is no input data scenes for this date. There is no BRDF checkpoint file either. The algorithm cannot be initialized with no input data' ) exit_status('UNABLE_TO_CONFIG') return # loop through all each input scene date # note that we loop until one read is successful because we expect the # latitude to be the same for each scene date # in order to ensure this, we could add a security check (read each # date and compare to the latest one). for idate, d in enumerate(scenes_dates): filename = dataloc[d]['filename'] # save filename for debug purposes self.filenames = {d: filename} logging.debug(str(d) + ' ' + filename) try: # actual reading of the data # TODO : honor the missing values and set to np.nan with AugmentedNetcdfDataset(filename, 'r') as f: self.values[:, :] = f[key][xslice, yslice] self.show_info(self.name, f[key]) logging.debug('extract of ' + key + ' data = ' + str(self.values[0, 0])) # return as soon as a file has been successfully read return self except KeyError as e: # if anything bad happend when reading the data logging.info('Problem reading ' + filename + '/' + 'key' + ' to get the ' + self.name + ' ' + str(e)) # just log the problem and skip it # if no files could be loaded successfully, show an error message logging.error('Cannot read files for "' + str(key) + '" : input files location are : ' + str(dataloc)) return self
def stack_it(one_band_list, sensorname, check_sizes=False, stack_axis=2): """ Reflectance object is created from several ReflectanceOneBand objects. Stacking data for all bands along the merge_axis dimension. """ # security check in case there is nothing to merge if len(one_band_list) == 0: logging.error(f"No band available for the sensor {sensorname}") raise Exception('Input error') one_band_type_name = type(one_band_list[0]).__name__ if one_band_type_name == 'Angle': out = AngleMultiBand() elif one_band_type_name == 'LWCS_maskOneBand': out = LWCS_mask() elif one_band_type_name == 'ReflectanceOneBand': out = Reflectance() elif one_band_type_name == 'ReflectanceOneBandCov': out = ReflectanceCov() else: logging.error(f'Cannot stack object type {one_band_type_name}') exit_status('UNABLE_TO_PROCESS') # This is the actual code perfoming the merge. The reste of this function is only security checks. out.values = np.stack([x.values for x in one_band_list], axis=stack_axis) # keep the metadata out.sensorname = sensorname # propagate (and check) the scenes_dates out.scenes_dates = one_band_list[0].scenes_dates for dataset in one_band_list: if dataset.scenes_dates != out.scenes_dates: logging.error( f'ERROR : Mismatch in scenes dates : {dataset.scenes_dates} != {out.scenes_dates}' ) # propagate (and check) the missing data value out.missing = one_band_list[0].missing for dataset in one_band_list: if dataset.missing != out.missing: logging.error( f'ERROR : Mismatch in scenes dates : {dataset.missing} != {out.missing}' ) # TODO : propagate also the filenames it would be usefull to debug #attrs = { 'filenames': out.filenames }) if check_sizes: shape_one_band = one_band_list[0].values.shape # shape_one_band is : (xsize, ysize,n_scenes) shape = (shape_one_band[0], shape_one_band[1], len(one_band_list), shape_one_band[2]) if out.values.shape != shape: logging.error(str(shape_one_band)) logging.error(str(shape)) logging.error(str(out.values.shape)) raise Exception('Merging error') return out
def main(): args = parse_args() infilename = args.infile reader = BrdfReader() logging.info(f'Loading brdf from {infilename}') reader.load_brdf(infilename, model_len=None, n_channels_ref=None, xslice=None, yslice=None) xsize = reader.brdf.values.shape[0] ysize = reader.brdf.values.shape[1] date = reader.previous_date tocr = np.full((xsize, ysize, reader.n_channels_ref), np.nan, order='F', dtype='<f4') solzenith = np.full((xsize, ysize), np.nan, order='F', dtype='<f4') tocr_cov = np.full((xsize, ysize, reader.n_channels_ref), np.nan, order='F', dtype='<f4') errcode, errmsg = pyal2.lib.toc_r.toc_r( debuglevel=args.debuglevel, model=reader.model_id, day_of_year=date.timetuple().tm_yday, latitude=np.asfortranarray(reader.latitude.values, '<f4'), k_array=np.asfortranarray(reader.brdf.values, '<f4'), ck_array=np.asfortranarray(reader.covariance.values, '<f4'), toc_min=-0.1, toc_max=2.0, sig_min=0.0, sig_max=10.0, solzenith_out=solzenith, tocr=tocr, tocr_cov=tocr_cov) writer = TocrIO(outfilename=args.outfile) writer.xoutputsize = xsize writer.youtputsize = ysize writer.n_channels_ref = reader.n_channels_ref writer.model_id = reader.model_id writer.write_tocr(tocr, slice(0, xsize), slice(0, ysize), date, 'TOC-R', 'TOC-R') writer.write_tocr(tocr_cov, slice(0, xsize), slice(0, ysize), date, 'TOC-R-ERR', 'TOC-R-ERR') # hack introduced by VITO missing = np.isnan(tocr) reader.quality.values[missing] = 128 # end-of hack introduced by VITO writer.write_tocr(reader.quality.values, slice(0, xsize), slice(0, ysize), date, 'Z-QFLAG', 'Z-QFLAG') writer.write_tocr(reader.n_valid_obs.values, slice(0, xsize), slice(0, ysize), date, 'Z-NMOD', 'n_valid_obs') writer.write_tocr(solzenith, slice(0, xsize), slice(0, ysize), date, 'SOLZENITH', 'solzenith') writer.write_tocr(reader.latitude.values, slice(0, xsize), slice(0, ysize), date, 'latitude', 'latitude') writer.write_tocr(reader.longitude.values, slice(0, xsize), slice(0, ysize), date, 'longitude', 'longitude') print('Processed ' + str(date)) exit_status("PROCESS_OK")
writer.write_tocr(tocr, slice(0, xsize), slice(0, ysize), date, 'TOC-R', 'TOC-R') writer.write_tocr(tocr_cov, slice(0, xsize), slice(0, ysize), date, 'TOC-R-ERR', 'TOC-R-ERR') # hack introduced by VITO missing = np.isnan(tocr) reader.quality.values[missing] = 128 # end-of hack introduced by VITO writer.write_tocr(reader.quality.values, slice(0, xsize), slice(0, ysize), date, 'Z-QFLAG', 'Z-QFLAG') writer.write_tocr(reader.n_valid_obs.values, slice(0, xsize), slice(0, ysize), date, 'Z-NMOD', 'n_valid_obs') writer.write_tocr(solzenith, slice(0, xsize), slice(0, ysize), date, 'SOLZENITH', 'solzenith') writer.write_tocr(reader.latitude.values, slice(0, xsize), slice(0, ysize), date, 'latitude', 'latitude') writer.write_tocr(reader.longitude.values, slice(0, xsize), slice(0, ysize), date, 'longitude', 'longitude') print('Processed ' + str(date)) exit_status("PROCESS_OK") if __name__ == "__main__": try: main() except Exception as e: exit_status("UNABLE_TO_PROCESS")
def main(): """ This is the main function. It selects the right runner according the the sensor and run it (run by chunks if required). """ # TODO : The whole process of chunking could be done using a combination # of xarray and dask. This would be more reliable and cleaner. # parse command line arguments args = parse_args() # check informations about instruments we will treat: logging.info(f'Instruments to process : We gave a instrument list to process :{args.instruments}') instr_list = args.instruments chunk_list = args.chunksize # parse config files and mix it with command lines arguments dstore = DataStore() # TODO : check if we could write this part better if args.instruments: dstore.load_config(args.acf, args.pcf, startseries=args.startseries, dates=args.outputdates, debuglevel=args.debuglevel, window_predefined=args.window_predefined, window_lat=args.window_lat, window_lon=args.window_lon, keywords=args.keywords, config_format=args.config_format, instruments=args.instruments) else: dstore.load_config(args.acf, args.pcf, startseries=args.startseries, dates=args.outputdates, debuglevel=args.debuglevel, window_predefined=args.window_predefined, window_lat=args.window_lat, window_lon=args.window_lon, keywords=args.keywords, config_format=args.config_format) logdir = dstore.get('globalconfig', {}).get('logdir','.') # save DataStore for logging purposes dstore.to_yaml(f'{logdir}/dstore.yaml') # reload to double check it is well saved other_dstore = DataStore() other_dstore.load_full_yaml(f'{logdir}/dstore.yaml') other_dstore.to_yaml(f'{logdir}/dstore-copy.yaml') dstore = other_dstore for k, instrument in enumerate(instr_list): # Create chunks of data to process if chunk_list is not None: chunks = chunk_2D(dstore['xfullslice'], dstore['yfullslice'], chunk_list[k], chunk_list[k]) else: chunks = chunk_2D(dstore['xfullslice'], dstore['yfullslice'], None, None) # Run the code, in parallel if required if args.cpu > 1: # if parallel processing is required, use a Pool of process to run several runner in parallel, calling "process_one_chunk" global write_lock write_lock = Lock() with Pool(args.cpu, initializer=chunk_init, initargs=(write_lock,)) as p: results = p.starmap(process_one_chunk, [(instrument, i, c, args, dstore) for i,c in enumerate(chunks)]) for result in results: if isinstance(result, ExceptionInSubprocessWrapper): logging.error('Exception occured in chunk ' + str(result.info) + '"') result.log_error() exit_status('UNABLE_TO_PROCESS') else: # if no parallelization is requested (or if there is no chunking), run sequentially, using "process_one_chunk_no_parallel" logging.info(' ') logging.info('Now treating instrument ' + str(instrument)) for c in chunks: result = process_one_chunk_no_parallel(instrument, c, args, dstore) if isinstance(result, ExceptionInSubprocessWrapper): logging.warn('Exception occured in the chunk ' + str(result.info) + '"') result.log_error() exit_status('UNABLE_TO_PROCESS') exit_status("PROCESS_OK")
def get_data_sparse(self, sensor, internalkey): """ This function uses the config dictionnary to locate the appropriate code that must be used to read the data. The piece of code to run is located in the folder 'readers'. The function 'get_data_reader()' (which is in readers/__init__.py) is responsible to transform the string "readername" into data_reader_class (code that can read the data) and data_params (list params required by data_reader_class). Then actual data is read and the return value of the get_data function is an object containing the data (along with some logging information). This function is used when each band needs to be read separately. Then it aggregate them together """ # there are multiple values for this layer, one for each band. # Find the paths to the config for each band #~ band_keys = get_param_in_tree(self, ['input', sensor, internalkey], 'band_keys') try: band_indices = get_param_in_tree(self, ['input', sensor, internalkey], 'band_indexing_list') except: logging.error(' Error in get_data_sparse ') exit_status('UNABLE_TO_PROCESS') paths = [['input', sensor, internalkey, band_key] for band_key in band_indices] data_objects = [] for path in paths: # get the reader from the config readername = get_param_in_tree(self, path, 'data_reader_name') data_reader_class, data_params = get_data_reader(readername) # get the actual value of the parameters required by this reader data_params_dict = { p: get_param_in_tree(self, path, p) for p in data_params } data_object = data_reader_class(name=internalkey) # read the data logging.debug( f' using {readername} {path}, and parameters {data_params_dict.keys()}' ) data_object = data_object.load( **{ 'scenes_dates': get_param_in_tree(self, path, '_scenes_dates'), 'xslice': get_param_in_tree(self, [], 'xslice'), 'yslice': get_param_in_tree(self, [], 'yslice'), 'dataloc': get_param_in_tree(self, path, '_dataloc'), **data_params_dict }) data_objects.append(data_object) # now the list "data_objects" contains a list of matrix, let us merge it into one unique matrix # a drawback of moving the data around like this is slightly slower than loading # directly into the final matrix, but this is not a real problem considering # that the moves performed in memory are very fast compared to reading from disk. # the main advantage of this is to simplify the I/O code (and to allow easily a different # configuration for each band if needed) data_object = stack_it(data_objects, sensorname=sensor) logging.info( f'Data loaded {internalkey} : {data_object.values.shape} matrix for sensor {sensor}' ) return data_object
def load_config(self, acf, pcf, dates=None, startseries=None, window_predefined=None, window_lat=None, window_lon=None, debuglevel=0, keywords=None, config_format=None, instruments=None): """ This is the main function to initialize DataStore object. It reads the config files to get the relevant parameters, it also parses the inputfiles to get their dates, and creates one DataBox for each step (date) to process. Required parameters are the algorithm config file (acf) and the product config file (pcf). Optional parameters to overwrite the values from the config files : :param dates: Start and end dates (as strings) :type dates: [string, string] :param startseries: For the first time step of the algorithm, a value set to 'False' will attempt to load an a priori BRDF file checkpoint, which must be specific in the pcf file. 'True' will not attempt to load a pervious brdf file and will use default values with high variance. Note that this parameter impacts **only the first date to be processed** : if several output dates are computed with the same command, the BRDF checkpoint for the next steps (other than the first one) **will** be used, even if the parameter startseries is set to 'True'.** :type startseries: boolean :param window_predefined: not implemented. :param window_lat: list of two integers. Define a rectangle region (a subset) to process. Only these pixels will be processed. :param window_lon: see window_lat :param debuglevel: the higher the value is, the more verbose output will be. Will affect fortran code only. :param keywords: a dictionnary of {key:string}. BEFORE processing the yaml config files, all occurences of "{key}" will be replaced by the string. Useful for batch processing on several regions using the same config file. :param config_format: for backward compatibility, if set to 'f90nml_c3s', the acf and files will be transformed from a f90 namelist into standard yaml before using it. :type config_format: 'f90nml_c3s' or 'f90nml_msg' >>> to,lnk = '../test/testc3s_full/data_c3s_alldata','data_c3s_alldata' >>> if not os.path.realpath(lnk) == os.path.realpath(to): os.symlink(to, lnk) >>> dstore = DataStore(); >>> dstore.load_config('../config/acf.c3s.multi-sensor.true.yaml', '../config/pcf.c3s.VGT.yaml', dates=[['2002-06-20','2002-07-20']], startseries=None, window_predefined=None, window_lat=[25,25], window_lon=[25,25], debuglevel=0, keywords={'year':'1998','name':'VGT','sensorname':'VGT','site':'Avignon'}, config_format='yaml', instruments=['VGT']) >>> dstore['n_channels_meas'] [4] >>> dstore = DataStore(); >>> dstore.load_config('../config/acf.c3s.multi-sensor.true.yaml', '../config/pcf.c3s.AVHRR_NOAA11.yaml', dates=[['2002-06-20','2002-07-20']], startseries=None, window_predefined=None, window_lat=[25,25], window_lon=[25,25], debuglevel=0, keywords={'year':'1988','name':'AVHRR11','sensorname':'AVHRR_NOAA11','site':'Avignon'}, config_format='yaml', instruments=['AVHRR_NOAA11']) >>> dstore['n_channels_meas'] [3] #>>> dstore = DataStore(); #>>> dstore.load_config('../config/acf.c3s.multi-sensor.true.yaml', '../config/pcf.c3s.AVHRR_NOAA7.yaml', dates=[['2002-06-20','2002-07-20']], startseries=None, window_predefined=None, window_lat=[25,25], window_lon=[25,25], debuglevel=0, keywords={'name':'AVHRR7','sensorname':'AVHRR_NOAA7','site':'Avignon'}, config_format='yaml', instruments=['AVHRR_NOAA7']) #>>> dstore['n_channels_meas'] #[2] """ if keywords is None: keywords = {} # if required, transform the config files from f90 namelist into standard yaml if config_format == 'yaml': with open(acf,'r') as f: self.acf = yaml.unsafe_load(f) with open(pcf,'r') as f: self.pcf = yaml.unsafe_load(f) elif config_format == 'f90nml_c3s' or config_format == 'f90nmlc3s': self.acf, self.pcf = f90nml_to_yaml_c3s(acf, pcf) elif config_format == 'f90nml_msg' or config_format == 'f90nmlmsg': self.acf, self.pcf = f90nml_to_yaml_msg(acf, pcf) else: logging.critical(f'Unknown config format {config_format}') exit_status("UNABLE_TO_CONFIG") if config_format != 'yaml': # write config file for logging purposes if required save_yaml(self.acf, f'log/{keywords.get("name","")}/{os.path.basename(acf)}') save_yaml(self.pcf, f'log/{keywords.get("name","")}/{os.path.basename(pcf)}') self['acf_file'] = acf self['pcf_file'] = pcf # all occurences of "{key}" will be replaced by its value from the dictionary "keywords" self.acf = instanciate_keywords(self.acf, keywords) self.pcf = instanciate_keywords(self.pcf, keywords) if instruments is None: instruments = self.pcf['input'].keys() logging.debug('No instrument defined in parameters, using pcf file : instruments = {instruments}') logging.debug('Parsing acf and pcf files is OK') # global config hook, read the parameters "globalconfig" in the acf and pcf and make it available eveywhere. # useful to debug and trace some issue. But DO NOT use it too much, it will break the logic of the code, # create confusion, prevent maintanabilty and give bad karma. self['globalconfig'] = {} self['globalconfig'].update(self.acf.get('globalconfig',{})) self['globalconfig'].update(self.pcf.get('globalconfig',{})) self['globalconfig']['debuglevel'] = debuglevel try: self['n_channels_ref'] = self['globalconfig']['n_channels_ref'] except KeyError: self['n_channels_ref'] = 4 logging.warning(f'No n_channels_ref provided, usind default value {self["n_channels_ref"]}') # initialisation of 'input' dictionnary self['input'] = dict() for j,instr in enumerate(instruments): # read relevant parameters, from acf and pcf file into the "self" config dict self[instr]= dict() self[instr]['output_dates'] = self._read_dates(args_dates=dates[j], filename = self.pcf['input'][instr]['output']['albedo'].get('filename', None) or self.pcf['input'][instr]['output']['albedo']['band1']['filename'], configfile_dates = self.pcf['dates']['output_date'], date_filter=self.pcf['input'][instr]['output'].get('date_pattern', None)) logging.info('We finishted _read_dates in load_config, instr ' + str(instr)) # read the checkpoint parameters self[instr]['inputcheckpoint'] = self.pcf['input'][instr]['inputcheckpoint'] # use cache ? self.pcf['input'][instr]['use_cache'] = self.pcf['globalconfig']['use_cache'] self[instr]['output'] = self.pcf['input'][instr]['output'] self[instr]['startseries'] = (parse_boolean(startseries) or self.pcf.get('startseries',None) or self.acf.get('startseries',None) or self.acf.get('nam_inv',{}).get('startseries',None) ) # Change the firstdate with the one readed in sensors_constants => first date #self[instr]['inputcheckpoint']['firstdate'] = self[instr]['output_dates'][0].strftime('%Y/%m/%d') # parse input filenames to get their dates # this should populate self['input'][instr] and some metadata (xsize, ysize, etc) self.setup_input_one_sensor(self.pcf['input'][instr], instr) # if cropping is required, restrict the size of the zone to process # note that these xfullslice, yfullslice (zone to process) are not # related to xslice,yslice (chunk to process) if window_lat: self['xfullslice'] = slice(window_lat[0] - 1, window_lat[1] ) self['xfullsize'] = self['xfullslice'].stop - self['xfullslice'].start logging.warn('Restricting to x slice : ' + str(self['xfullslice'])) if window_lon: self['yfullslice'] = slice(window_lon[0] - 1, window_lon[1] ) self['yfullsize'] = self['yfullslice'].stop - self['yfullslice'].start logging.warn('Restricting to y slice : ' + str(self['yfullslice'])) # TODO : n_channels_meas is duplicated in some places, need to clean this up when doing multisensor #~ firstsensor = self['input']['sensors'][0] #~ self['sensorname'] = firstsensor #~ if len(self['input']['sensors']) != 1: #~ raise Exception('Multiple sensors detected in input {str(self["input"]["sensors"])}. Not implemented') #~ self['n_channels_meas'] = self['input'][firstsensor]['n_channels_meas'] self['sensorname'] = instruments#self['input']['sensors'][:] self['n_channels_meas'] = list() list_to_del = list() for sensor in self.acf['spectral'].keys(): if sensor in self['sensorname']: self['n_channels_meas'].append(self.acf['spectral'][sensor]['nbands']) else: # Deleting sensor in acf['spectral'] tree list_to_del.append(sensor) for sensor in list_to_del: del self.acf['spectral'][sensor] logging.warning(f'we will work on these sensors : '+ str(self['sensorname'])) self.load_acf(self.acf, sensorlist=self['sensorname'], n_channel_meas=self['n_channels_meas'], startseries=self[instr]['startseries'])