def get_preproc(type, input_adapter, output_handler, cfg): """ A function returning the pre-processor class corresponding the type definition :param type: type of the pre-processor (`orbit_segment`) :param input_adapter: A class that return a L1bData object for a given input product file :param output_handler: A class that creates a pysiral l1p product from the merged L1bData object :param cfg: a treedict of options for the pre-processor :return: Initialized pre-processor class """ # A lookup dictionary for the appropriate class preproc_class_lookup_dict = { "custom_orbit_segment": L1PreProcCustomOrbitSegment, "half_orbit": L1PreProcHalfOrbit, "full_orbit": L1PreProcFullOrbit, } # Try the get the class cls = preproc_class_lookup_dict.get(type, None) # Error handling if cls is None: msg = "Unrecognized Level-1 Pre-Processor class type: %s" % (str(type)) msg += "\nKnown types:" for key in preproc_class_lookup_dict.keys(): msg += "\n - %s" % key error = ErrorStatus(caller_id="Level1PreProcessor") error.add_error("invalid-l1preproc-class", msg) error.raise_on_error() # Return the initialized class return cls(input_adapter, output_handler, cfg)
class Level2ProductDefinition(DefaultLoggingClass): """ Main configuration class for the Level-2 Processor """ def __init__(self, run_tag, l2_settings_file): super(Level2ProductDefinition, self).__init__(self.__class__.__name__) self.error = ErrorStatus(self.__class__.__name__) # Mandatory parameter self._run_tag = run_tag self._l2_settings_file = l2_settings_file self._parse_l2_settings() # Optional parameters (may be set to default values if not specified) self._output_handler = [] def add_output_definition(self, output_def_file, period="default", overwrite_protection=True): # Set given or default output handler self._output_handler.append(DefaultLevel2OutputHandler( output_def=output_def_file, subdirectory=self.run_tag, period=period, overwrite_protection=overwrite_protection)) def _parse_l2_settings(self): try: self._l2def = get_yaml_config(self._l2_settings_file) except Exception, msg: self.error.add_error("invalid-l2-settings", msg) self.error.raise_on_error()
def get_files_from_time_range(self, time_range: DatePeriod) -> List[str]: """ Query l1p files for a a given time range. :param time_range: a dateperiods.DatePeriod instance :return: """ # Validate time_range (needs to be of type DatePeriod) if not isinstance(time_range, DatePeriod): error = ErrorStatus() msg = "Invalid type of time_range, required: dateperiods.DatePeriod, was %s" % (type(time_range)) error.add_error("invalid-timerange-type", msg) error.raise_on_error() # 1) get list of all files for monthly folders yyyy, mm = "%04g" % time_range.tcs.year, "%02g" % time_range.tcs.month directory = Path(self.l1p_base_dir) if self._file_version is not None: directory = directory / self._file_version directory = directory / self._hemisphere / yyyy / mm all_l1p_files = sorted(list(directory.rglob("*.nc"))) # 3) Check if files are in requested time range # This serves two purposes: a) filter out files with timestamps that do # not belong in the directory. b) get a subset if required l1p_filepaths = [l1p_file for l1p_file in all_l1p_files if self.l1p_in_trange(l1p_file, time_range)] # Save last search directory self._last_directory = directory # Done return l1p_filepaths
def get_local_l1bdata_files(mission_id, time_range, hemisphere, config=None, version="default", allow_multiple_baselines=True): """ Returns a list of l1bdata files for a given mission, hemisphere, version and time range XXX: Note: this function will slowly replace `get_l1bdata_files`, which is limited to full month """ # parse config data (if not provided) if config is None or not isinstance(config, ConfigInfo): config = ConfigInfo() # Validate time_range (needs to be of type TimeRangeIteration) try: time_range_is_correct_object = time_range.base_period == "monthly" except: time_range_is_correct_object = False if not time_range_is_correct_object: error = ErrorStatus() msg = "Invalid type of time_range, required: %s, was %s" % ( type(time_range), type(TimeRangeIteration)) error.add_error("invalid-timerange-type", msg) error.raise_on_error() # 1) get list of all files for monthly folders yyyy, mm = "%04g" % time_range.start.year, "%02g" % time_range.start.month l1b_repo = config.local_machine.l1b_repository[mission_id][version].l1bdata directory = os.path.join(l1b_repo, hemisphere, yyyy, mm) all_l1bdata_files = sorted(glob.glob(os.path.join(directory, "*.nc"))) # 2) First filtering step: Check if different algorithm baseline values # exist in the list of l1bdata files algorithm_baselines = [l1bdata_get_baseline(f) for f in all_l1bdata_files] baselines = np.unique(np.array(algorithm_baselines)) n_baselines = len(baselines) if not allow_multiple_baselines and n_baselines > 1: error = ErrorStatus() baseline_str_list = ", ".join(baselines) msg = "Multiple l1bdata baselines (%g) [%s] found in directory: %s" % ( n_baselines, baseline_str_list, directory) error.add_error("multiple-l1b-baselines", msg) error.raise_on_error() # 3) Check if files are in requested time range # This serves two purporses: a) filter out files with timestamps that do # not belong in the directory. b) get a subset if required l1bdata_files_checked = [l1bdata_file for l1bdata_file in all_l1bdata_files if l1bdata_in_trange(l1bdata_file, time_range)] # Done return list (empty or not) return l1bdata_files_checked, directory
class Level2PreProcessor(DefaultLoggingClass): def __init__(self, product_def): super(Level2PreProcessor, self).__init__(self.__class__.__name__) self.error = ErrorStatus() # Sanity check of product definition object if not isinstance(product_def, Level2PreProcProductDefinition): msg = "Invalid Level-2 PreProcessor product definition: %s" % \ type(product_def) self.error.add_error("invalid-l2preproc-def", msg) self.error.raise_on_error() self._job = product_def def process_l2i_files(self, l2i_files, period): """ Reads all l2i files and merges the valid data into a l2p summary file """ # l2p: Container for storing l2i objects l2p = Level2PContainer(period) # Add all l2i objects to the l2p container. # NOTE: Only memory is the limit for l2i_file in l2i_files: try: l2i = L2iNCFileImport(l2i_file) except Exception as ex: msg = "Error (%s) in l2i file: %s" msg = msg % (ex, Path(l2i_file).name) self.log.error(msg) continue l2p.append_l2i(l2i) # Merge the l2i object to a single L2Data object l2 = l2p.get_merged_l2() if l2 is None: self.log.warning("- No valid freeboard data found for, skip day") return # Write output output = Level2Output(l2, self.job.output_handler) self.log.info("- Wrote %s data file: %s" % (self.job.output_handler.id, output.export_filename)) @property def job(self): return self._job
def get_local_l1bdata_files(mission_id, time_range, hemisphere, config=None, version="default", allow_multiple_baselines=True): """ Returns a list of l1bdata files for a given mission, hemisphere, version and time range XXX: Note: this function will slowly replace `get_l1bdata_files`, which is limited to full month """ # parse config data (if not provided) if config is None or not isinstance(config, psrlcfg): config = psrlcfg # Validate time_range (needs to be of type dateperiods.DatePeriod) if not isinstance(time_range, DatePeriod): error = ErrorStatus() msg = "Invalid type of time_range, required: dateperiods.DatePeriod, was %s" % ( type(time_range)) error.add_error("invalid-timerange-type", msg) error.raise_on_error() # 1) get list of all files for monthly folders yyyy, mm = "%04g" % time_range.tcs.year, "%02g" % time_range.tcs.month repo_branch = config.local_machine.l1b_repository[mission_id][version] directory = Path(repo_branch["l1p"]) / hemisphere / yyyy / mm all_l1bdata_files = sorted(directory.glob("*.nc")) # 3) Check if files are in requested time range # This serves two purporses: a) filter out files with timestamps that do # not belong in the directory. b) get a subset if required l1bdata_files_checked = [ l1bdata_file for l1bdata_file in all_l1bdata_files if l1bdata_in_trange(l1bdata_file, time_range) ] # Done return list (empty or not) return l1bdata_files_checked, directory
def MaskSourceFile(mask_name, mask_cfg): """ Wrapper method for different mask source file classes """ error = ErrorStatus(caller_id="MaskSourceFile") try: mask_dir = psrlcfg.local_machine.auxdata_repository.mask[mask_name] except KeyError: mask_dir = None msg = "path to mask %s not in local_machine_def.yaml" % mask_name error.add_error("missing-lmd-def", msg) error.raise_on_error() # Return the Dataset class try: return globals()[mask_cfg.pyclass_name](mask_dir, mask_name, mask_cfg) except KeyError: msg = "pysiral.mask.%s not implemented" % str(mask_cfg.pyclass_name) error.add_error("missing-mask-class", msg) error.raise_on_error()
class ICESatGLAH13Repository(DefaultLoggingClass): _GLAH13_SEARCH = r"GLAH13_*.H5" def __init__(self, local_repository_path): # Init class and error handler class_name = self.__class__.__name__ super(ICESatGLAH13Repository, self).__init__(class_name) self.error = ErrorStatus(caller_id=class_name) # Sanity check on path to local repository if os.path.isdir(str(local_repository_path)): self._local_repository_path = local_repository_path else: msg = "Invalid GLAH13 directory: %s" % str(local_repository_path) self.error.add_error("invalid-dir", msg) self.error.raise_on_error() def get_glah13_hdfs(self, time_range): search_folder = self._get_full_path(time_range) search = os.path.join(search_folder, self._GLAH13_SEARCH) return sorted(glob(search)) def _get_full_path(self, time_range): """ Assuming the time range monthly """ folder = self.local_repository_path subfolders = [ "%04g" % time_range.start.year, "%02g" % time_range.start.month ] return os.path.join(folder, *subfolders) @property def local_repository_path(self): return self._local_repository_path
class Warren99AMSR2ClimDataContainer(object): """ A dedicated data container for the merged W99/AMSR2 snow climatology. This class has been introduced with the use of daily scaling that requires data to loaded also from month adjacent to the month of the current Level-2 data object """ def __init__(self, cfg, use_daily_scaling): """ Init the class :param cfg: A copy of the auxdata class configuration :param use_daily_scaling: """ # Properties self.cfg = cfg self.use_daily_scaling = use_daily_scaling self.data = None self.filepaths = [] self.error = ErrorStatus() def load(self): """ Load the required data. This will load the data for all winter month into memory and the return either a weighted fiels (if `use_daily_scaling` is True) or just the field from the corresponding month :return: """ # Check if data is already loaded if self.has_data_loaded: return # Load the data of all month self.data = [] for month_num in self.month_nums: # Get the target file path filepath = self.get_filepath(month_num) # Read the data set (and raise hard error if input is missing) try: nc = open_dataset(filepath) self.data.append(nc) self.filepaths.append(filepath) except FileNotFoundError: msg = "Could not locate file: {}".format(filepath) self.error.add_error("invalid-filepath", msg) self.error.raise_on_error() def get_lonlat(self): """ Return longitude and latitude variables :return: """ # The grid is the same for all month, therefore we can just retrieve the fields # from the first data sets dset = self.data[0] return dset.longitude.values, dset.latitude.values def get_var(self, parameter_name, date_tuple): """ Get the a geophysical variable from the netCDF(s). If daily scaling is activated, the date information given by date tuple will be used to create output fields that are interpolated between adjacent month. :param parameter_name: :param date_tuple: :return: """ # There are three cases that requires a different handling: # # 1. daily scaling is off # -> return the single field of the single data set for the corresponding month if not self.use_daily_scaling: return self.get_monthly_field(date_tuple[1], parameter_name) # 2. daily scaling is on and requested date is a reference date # -> return the field of the single data set for the reference date is_reference_date = date_tuple[1:] in self.reference_dates if self.use_daily_scaling and is_reference_date: return self.get_monthly_field(date_tuple[1], parameter_name) # 3. daily scaling is on and requested date is between reference dates # -> return a linear interpolated field based on the distance to the two enclosing # reference dates if self.use_daily_scaling and not is_reference_date: return self.get_weighted_variable(date_tuple, parameter_name) def get_filepath(self, month_num): """ Return the file path for a given month :param month_num: Number of month (1-12) :return: """ # Create a dictionary for automatic filepath completion date_dict = dict(month="{:02g}".format(month_num)) # Main directory path = Path(self.cfg.local_repository) # Add the subfolders for subfolder_tag in self.cfg.subfolders: subfolder = date_dict[subfolder_tag] path = path / subfolder # Get the period dict (will be constructed from filenaming) period_dict = {} attrs = re.findall("{.*?}", self.cfg.filenaming) for attr_def in attrs: attr_name = attr_def[1:-1] period_dict[attr_name] = date_dict[attr_name] filename = self.cfg.filenaming.format(**period_dict) path = path / filename return path def get_monthly_field(self, month_num, parameter_name): """ Return the monthly field for given parameter name :param month_num: :param parameter_name: :return: """ index = self.month_nums.index(month_num) variable = getattr(self.data[index], parameter_name, None) if variable is None: msg = "Dataset has no variable: {}".format(parameter_name) self.error.add_error("invalid-variable", msg) self.error.raise_on_error() return variable.values def get_reference_month_nums(self, date_tuple): """ Return the two month required for the interpolation. :param date_tuple: [year, month, day] as integer :return: month_left, month_right, weight_factor """ # Compute the difference in days between requested days requested_date_dt = datetime(*date_tuple) ref_datetimes = self.get_reference_datetimes(date_tuple) ref_date_offset = [(requested_date_dt - dt).days for dt in ref_datetimes] # Find the index of the first month where the difference in day is negative (right boundary) month_right_index = int(np.argmax(np.array(ref_date_offset) < 0)) month_left_index = month_right_index - 1 month_left, month_right = self.month_nums[ month_left_index], self.month_nums[month_right_index] # Check solution if month_left_index < 0: logger.warning( "Target month is outside data coverage, weighting factor -> NaN" ) return 10, 11, np.nan # msg = "Month not found, check input or bug in code" # self.error.add_error("unspecified-error", msg) # self.error.raise_on_error() # Compute the weighting factor period_n_days = (ref_datetimes[month_right_index] - ref_datetimes[month_left_index]).days weight_factor = float( ref_date_offset[month_left_index]) / float(period_n_days) # All done return month_left, month_right, weight_factor def get_reference_datetimes(self, date_tuple): """ Creates datetimes objects for the reference dates for the actual winter season :param date_tuple: :return: """ # Get the winter id (year of October for October - April winter) winter_id = date_tuple[0] - int(date_tuple[1] < 10) year_vals = [winter_id] * 3 + [winter_id + 1] * 4 ref_dts = [ datetime(yyyy, mm, dd) for yyyy, (mm, dd) in zip(year_vals, self.reference_dates) ] return ref_dts def get_weighted_variable(self, date_tuple, parameter_name): """ Compute the weighted variable between two reference dates :param date_tuple: :param parameter_name: :return: """ # Get the fields of both reference month month_num_left, month_num_right, weight_factor = self.get_reference_month_nums( date_tuple) var_left = self.get_monthly_field(month_num_left, parameter_name) var_right = self.get_monthly_field(month_num_right, parameter_name) # Get the relative distance (0: var_left, 1: var_right) var = var_left + weight_factor * (var_right - var_left) # Done return var @property def w99_weight(self): """ Return the static regional mask for the merged climatology :return: """ return self.data[0].w99_weight.values @property def has_data_loaded(self): """ Status flag if data is present for the current data period :return: """ return self.data is not None @property def month_nums(self): return [10, 11, 12, 1, 2, 3, 4] @property def reference_dates(self): """ Return the reference dates for the :return: """ return [ [10, 1], # October 1st (to get full coverage of October) [11, 15], [12, 15], [1, 15], [2, 15], [3, 15], [4, 30] ] # April 30th (to get full coverage of April)
class ESACryoSat2PDSBaselineD(DefaultLoggingClass): def __init__(self, cfg, raise_on_error=False): cls_name = self.__class__.__name__ super(ESACryoSat2PDSBaselineD, self).__init__(cls_name) self.error = ErrorStatus(caller_id=cls_name) # Store arguments self.raise_on_error = raise_on_error self.cfg = cfg # Init main class variables self.nc = None @staticmethod def translate_opmode2radar_mode(op_mode): """ Converts the ESA operation mode str in the pysiral compliant version """ translate_dict = {"sar": "sar", "lrm": "lrm", "sarin": "sin"} return translate_dict.get(op_mode, None) def get_l1(self, filepath, polar_ocean_check=None): """ Main entry point to the CryoSat-2 Baseline-D Input Adapter :param filepath: :return: """ timer = StopWatch() timer.start() # Save filepath self.filepath = filepath # Create an empty Level-1 data object self.l1 = Level1bData() # Input Validation if not os.path.isfile(filepath): msg = "Not a valid file: %s" % filepath self.log.warning(msg) self.error.add_error("invalid-filepath", msg) return self.empty # Parse the input file self._read_input_netcdf(filepath, attributes_only=True) if self.error.status: return self.empty # Get metadata self._set_input_file_metadata() if polar_ocean_check is not None: has_polar_ocean_data = polar_ocean_check.has_polar_ocean_segments( self.l1.info) if not has_polar_ocean_data: timer.stop() return self.empty # Polar ocean check passed, now fill the rest of the l1 data groups self._set_l1_data_groups() timer.stop() self.log.info("- Created L1 object in %.3f seconds" % timer.get_seconds()) # Return the l1 object return self.l1 @staticmethod def get_wfm_range(window_delay, n_range_bins): """ Returns the range for each waveform bin based on the window delay and the number of range bins :param window_delay: The two-way delay to the center of the range window in seconds :param n_range_bins: The number of range bins (256: sar, 512: sin) :return: The range for each waveform bin as array (time, ns) """ lightspeed = 299792458.0 bandwidth = 320000000.0 # The two way delay time give the distance to the central bin central_window_range = window_delay * lightspeed / 2.0 # Calculate the offset from the center to the first range bin window_size = (n_range_bins * lightspeed) / (4.0 * bandwidth) first_bin_offset = window_size / 2.0 # Calculate the range increment for each bin range_increment = np.arange(n_range_bins) * lightspeed / (4.0 * bandwidth) # Reshape the arrays range_offset = np.tile(range_increment, (window_delay.shape[0], 1)) - first_bin_offset window_range = np.tile(central_window_range, (n_range_bins, 1)).transpose() # Compute the range for each bin and return wfm_range = window_range + range_offset return wfm_range @staticmethod def interp_1Hz_to_20Hz(variable_1Hz, time_1Hz, time_20Hz, **kwargs): """ Computes a simple linear interpolation to transform a 1Hz into a 20Hz variable :param variable_1Hz: an 1Hz variable array :param time_1Hz: 1Hz reference time :param time_20Hz: 20 Hz reference time :return: the interpolated 20Hz variable """ error_status = False try: f = interpolate.interp1d(time_1Hz, variable_1Hz, bounds_error=False, **kwargs) variable_20Hz = f(time_20Hz) except ValueError: fill_value = np.nan variable_20Hz = np.full(time_20Hz.shape, fill_value) error_status = True return variable_20Hz, error_status def _read_input_netcdf(self, filepath, attributes_only=False): """ Read the netCDF file via xarray """ try: self.nc = xarray.open_dataset(filepath, decode_times=False, mask_and_scale=True) except: msg = "Error encountered by xarray parsing: %s" % filepath self.error.add_error("xarray-parse-error", msg) self.log.warning(msg) return def _set_input_file_metadata(self): """ Fill the product info """ # Short cuts metadata = self.nc.attrs info = self.l1.info # Processing environment metadata info.set_attribute("pysiral_version", pysiral_version) # General product metadata info.set_attribute("mission", "cryosat2") info.set_attribute("mission_sensor", "siral") info.set_attribute("mission_data_version", "D") info.set_attribute("orbit", metadata["abs_orbit_start"]) info.set_attribute("cycle", metadata["cycle_number"]) info.set_attribute("mission_data_source", filename_from_path(self.filepath)) info.set_attribute( "timeliness", cs2_procstage2timeliness(metadata["processing_stage"])) # Time-Orbit Metadata lats = [ float(metadata["first_record_lat"]) * 1e-6, float(metadata["last_record_lat"]) * 1e-6 ] lons = [ float(metadata["first_record_lon"]) * 1e-6, float(metadata["last_record_lon"]) * 1e-6 ] info.set_attribute("start_time", parse_datetime_str( metadata["first_record_time"][4:])) # TAI=.... info.set_attribute("stop_time", parse_datetime_str( metadata["last_record_time"][4:])) # TAI=.... info.set_attribute("lat_min", np.amin(lats)) info.set_attribute("lat_max", np.amax(lats)) info.set_attribute("lon_min", np.amin(lons)) info.set_attribute("lon_max", np.amax(lons)) # Product Content Metadata for mode in ["sar", "sin", "lrm"]: percent_value = 0.0 if metadata["sir_op_mode"].strip().lower() == mode: percent_value = 100. info.set_attribute("{}_mode_percent".format(mode), percent_value) info.set_attribute("open_ocean_percent", float(metadata["open_ocean_percent"]) * 0.01) def _set_l1_data_groups(self): """ Fill all data groups of the Level-1 data object with the content of the netCDF file. This is just the overview method, see specific sub-methods below :return: None """ self._set_time_orbit_data_group() self._set_waveform_data_group() self._set_range_correction_group() self._set_surface_type_group() self._set_classifier_group() def _set_time_orbit_data_group(self): """ Transfer the time orbit parameter from the netcdf to l1 data object :return: None """ # Transfer the timestamp # NOTE: Here it is critical that the xarray does not automatically decodes time since it is # difficult to work with the numpy datetime64 date format. Better to compute datetimes using # a know num2date conversion tai_datetime = num2date(self.nc.time_20_ku.values, units=self.nc.time_20_ku.units) converter = UTCTAIConverter() utc_timestamp = converter.tai2utc(tai_datetime, check_all=False) self.l1.time_orbit.timestamp = utc_timestamp # Set the geolocation self.l1.time_orbit.set_position(self.nc.lon_20_ku.values, self.nc.lat_20_ku.values, self.nc.alt_20_ku.values, self.nc.orb_alt_rate_20_ku.values) # Set antenna attitude self.l1.time_orbit.set_antenna_attitude( self.nc.off_nadir_pitch_angle_str_20_ku.values, self.nc.off_nadir_roll_angle_str_20_ku.values, self.nc.off_nadir_yaw_angle_str_20_ku.values) def _set_waveform_data_group(self): """ Transfer of the waveform group to the Level-1 object. This includes 1. the computation of waveform power in Watts 2. the computation of the window delay in meter for each waveform bin 3. extraction of the waveform valid flag :return: None """ # Get the waveform # NOTE: Convert the waveform units to Watts. From the documentation:is applied as follows: # pwr_waveform_20_ku(time, ns) * echo_scale_factor_20_ku(time, ns) * 2 ^ echo_scale_pwr_20_ku(time) wfm_linear = self.nc.pwr_waveform_20_ku.values # Get the shape of the waveform array dim_time, dim_ns = wfm_linear.shape # Scaling parameter are 1D -> Replicate to same shape as waveform array echo_scale_factor = self.nc.echo_scale_factor_20_ku.values echo_scale_pwr = self.nc.echo_scale_pwr_20_ku.values echo_scale_factor = np.tile(echo_scale_factor, (dim_ns, 1)).transpose() echo_scale_pwr = np.tile(echo_scale_pwr, (dim_ns, 1)).transpose() # Convert the waveform from linear counts to Watts wfm_power = wfm_linear * echo_scale_factor * 2.0**echo_scale_pwr # Get the window delay # From the documentation: # Calibrated 2-way window delay: distance from CoM to middle range window (at sample ns/2 from 0). # It includes all the range corrections given in the variable instr_cor_range and in the # variable uso_cor_20_ku. This is a 2-way time and 2-way corrections are applied. window_delay = self.nc.window_del_20_ku.values # Convert window delay to range for each waveform range bin wfm_range = self.get_wfm_range(window_delay, dim_ns) # Make sure that parameter are float and not double # -> Import for cythonized algorithm parts (ctfrma specifically uses floats) wfm_power = wfm_power.astype(np.float32) wfm_range = wfm_range.astype(np.float32) # Set the waveform op_mode = str(self.nc.attrs["sir_op_mode"].strip().lower()) radar_mode = self.translate_opmode2radar_mode(op_mode) self.l1.waveform.set_waveform_data(wfm_power, wfm_range, radar_mode) # Get the valid flags measurement_confident_flag = self.nc.flag_mcd_20_ku.values valid_flag = measurement_confident_flag == 0 self.l1.waveform.set_valid_flag(valid_flag) def _set_range_correction_group(self): """ Transfer the range corrections defined in the l1p config file to the Level-1 object NOTE: The range corrections are all in 1 Hz and must be interpolated to 20Hz :return: None """ # Get the reference times for interpolating the range corrections from 1Hz -> 20Hz time_1Hz = self.nc.time_cor_01.values time_20Hz = self.nc.time_20_ku.values # Loop over all range correction variables defined in the processor definition file for key in self.cfg.range_correction_targets.keys(): pds_var_name = self.cfg.range_correction_targets[key] variable_1Hz = getattr(self.nc, pds_var_name) variable_20Hz, error_status = self.interp_1Hz_to_20Hz( variable_1Hz.values, time_1Hz, time_20Hz) if error_status: msg = "- Error in 20Hz interpolation for variable `%s` -> set only dummy" % pds_var_name self.log.warning(msg) self.l1.correction.set_parameter(key, variable_20Hz) def _set_surface_type_group(self): """ Transfer of the surface type flag to the Level-1 object NOTE: In the current state (TEST dataset), the surface type flag is only 1 Hz. A nearest neighbour interpolation is used to get the 20Hz surface type flag. :return: None """ # Get the reference times for interpolating the flag from 1Hz -> 20Hz time_1Hz = self.nc.time_cor_01.values time_20Hz = self.nc.time_20_ku.values # Interpolate 1Hz surface type flag to 20 Hz surface_type_1Hz = self.nc.surf_type_01.values surface_type_20Hz, error_status = self.interp_1Hz_to_20Hz( surface_type_1Hz, time_1Hz, time_20Hz, kind="nearest") if error_status: msg = "- Error in 20Hz interpolation for variable `surf_type_01` -> set only dummy" self.log.warning(msg) # Set the flag for key in ESA_SURFACE_TYPE_DICT.keys(): flag = surface_type_20Hz == ESA_SURFACE_TYPE_DICT[key] self.l1.surface_type.add_flag(flag, key) def _set_classifier_group(self): """ Transfer the classifiers defined in the l1p config file to the Level-1 object. NOTE: It is assumed that all classifiers are 20Hz In addition, a few legacy parameter are computed based on the waveform counts that is only available at this stage. Computation of other parameter such as sigma_0, leading_edge_width, ... are moved to the post-processing :return: None """ # Loop over all classifier variables defined in the processor definition file for key in self.cfg.classifier_targets.keys(): variable_20Hz = getattr(self.nc, self.cfg.classifier_targets[key]) self.l1.classifier.add(variable_20Hz, key) # Calculate Parameters from waveform counts # XXX: This is a legacy of the CS2AWI IDL processor # Threshold defined for waveform counts not power in dB wfm_counts = self.nc.pwr_waveform_20_ku.values # Calculate the OCOG Parameter (CryoSat-2 notation) ocog = CS2OCOGParameter(wfm_counts) self.l1.classifier.add(ocog.width, "ocog_width") self.l1.classifier.add(ocog.amplitude, "ocog_amplitude") # Calculate the Peakiness (CryoSat-2 notation) pulse = CS2PulsePeakiness(wfm_counts) self.l1.classifier.add(pulse.peakiness, "peakiness") self.l1.classifier.add(pulse.peakiness_r, "peakiness_r") self.l1.classifier.add(pulse.peakiness_l, "peakiness_l") # fmi version: Calculate the LTPP ltpp = CS2LTPP(wfm_counts) self.l1.classifier.add(ltpp.ltpp, "late_tail_to_peak_power") # Get satellite velocity vector (classifier needs to be vector -> manual extraction needed) satellite_velocity_vector = self.nc.sat_vel_vec_20_ku.values self.l1.classifier.add(satellite_velocity_vector[:, 0], "satellite_velocity_x") self.l1.classifier.add(satellite_velocity_vector[:, 1], "satellite_velocity_y") self.l1.classifier.add(satellite_velocity_vector[:, 2], "satellite_velocity_z") @property def empty(self): return None
class DefaultAuxdataClassHandler(DefaultLoggingClass): """ Class for retrieving handler classes for auxiliary data (mss, sic, sitype, snow). The classes are initialized with directory information from the local machine definition and the auxdata information from `auxdata.yaml` configuration file. """ def __init__(self): super(DefaultAuxdataClassHandler, self).__init__(self.__class__.__name__) self.pysiral_config = ConfigInfo() self.error = ErrorStatus(caller_id=self.__class__.__name__) def get_pyclass(self, auxdata_class, auxdata_id, l2_procdef_opt): """ Returns a class for handling auxiliary data files, that is initialized with auxdata settings in `config/auxdata_def.yaml` and with the directory specified in `local_machine_def.yaml` Args: auxdata_class (str): Auxdata class (e.g. mss, sic, sitype, snow) auxdata_id (str): Auxdata class identifier (e.g. osisaf) Returns: class: The initialized auxdata handler class """ # Clear errors self.error.reset() # Initialize the class with information from auxdata_def.yaml auxdata_def = self.get_auxdata_def(auxdata_class, auxdata_id) if auxdata_def is None: error_id = "auxdata_missing_definition" error_message = PYSIRAL_ERROR_CODES[error_id] % (auxdata_class, auxdata_id) self.error.add_error(error_id, error_message) self.error.raise_on_error() # Set the auxdata config cfg = AuxClassConfig() # connect to repository on local machine if "local_repository" in auxdata_def: local_repository_id = auxdata_def.local_repository local_repo = self.get_local_repository(auxdata_class, local_repository_id) if local_repo is None and local_repository_id is not None: error_id = "auxdata_missing_localrepo_def" error_message = PYSIRAL_ERROR_CODES[error_id] % (auxdata_class, auxdata_id) self.error.add_error(error_id, error_message) self.error.raise_on_error() cfg.set_local_repository(local_repo) # set doc str (should be mandatory for all auxdata handlers) if "long_name" in auxdata_def: cfg.set_long_name(auxdata_def.long_name) # set filename (e.g. for mss) if "filename" in auxdata_def: local_repository_id = auxdata_def.local_repository local_repo = self.get_local_repository(auxdata_class, local_repository_id) filename = os.path.join(local_repo, auxdata_def.filename) cfg.set_filename(filename) # set filenaming (e.g. for sic, sitype, snow) if "filenaming" in auxdata_def: cfg.set_filenaming(auxdata_def.filenaming) # set subfolders (e.g. for sic, sitype, snow) if "subfolders" in auxdata_def: cfg.set_subfolder(auxdata_def.subfolders) # Set the default options from the auxiliary definition file if "options" in auxdata_def: options = auxdata_def.get("options", None) if options is not None: cfg.set_options(**options) # Override option with definition from the l2 processor settings if l2_procdef_opt is not None: cfg.set_options(**l2_procdef_opt) # Get the auxiliary data class module_name, class_name = "pysiral.auxdata.%s" % ( auxdata_class), auxdata_def["pyclass"] auxclass = get_cls(module_name, class_name) if auxclass is None: error_id = "auxdata_invalid_class_name" msg = "Invalid Auxdata class: %s.%s" % (module_name, class_name) self.error.add_error(PYSIRAL_ERROR_CODES[error_id], msg) self.error.raise_on_error() # Init the auxiliary class # Note: This will trigger any action defined in the subclasses, such as reading static background files auxdata_handler = auxclass(cfg) # All done, return return auxdata_handler def get_local_repository(self, auxdata_class, auxdata_id): """ Get the local repository for the the auxdata type and id """ if auxdata_id is None: return None aux_repo_defs = self.pysiral_config.local_machine.auxdata_repository try: local_repo_auxclass = aux_repo_defs[auxdata_class] except KeyError: msg = "Missing auxdata definition in local_machine_def.yaml: auxdata_repository.%s" % auxdata_class self.error.add_error("missing-localmachinedef-tag", msg) self.error.raise_on_error() return local_repo_auxclass.get(auxdata_id, None) def get_auxdata_def(self, auxdata_class, auxdata_id): """ Returns the definition in `config/auxdata_def.yaml` for specified auxdata class and id """ try: auxdata_class_def = self.pysiral_config.auxdata[auxdata_class] except KeyError: msg = "Invalid auxdata class [%s] in auxdata_def.yaml" % auxdata_class self.error.add_error("invalid-auxdata-class", msg) self.error.raise_on_error() return auxdata_class_def.get(auxdata_id, None)
class L2iDataHandler(DefaultLoggingClass): """ Class for retrieving default l1b directories and filenames """ def __init__(self, base_directory, force_l2i_subfolder=True): super(L2iDataHandler, self).__init__(self.__class__.__name__) self.error = ErrorStatus(caller_id=self.__class__.__name__) self._base_directory = base_directory self._force_l2i_subfolder = force_l2i_subfolder self._subdirectory_list = self.get_subdirectory_list() self._validate_base_directory() def get_files_from_time_range(self, time_range): """ Get all files that fall into time range (May be spread over the different year/ month subfolders """ l2i_files = [] for year, month, day in time_range.days_list: lookup_directory = self.get_lookup_directory(year, month) if not os.path.isdir(lookup_directory): continue l2i_pattern = self.get_l2i_search_str(year=year, month=month, day=day) result = glob.glob(os.path.join(lookup_directory, l2i_pattern)) l2i_files.extend(sorted(result)) return l2i_files def get_files_for_day(self, day_dt): """ Retrieve a list of l2i files with data points for a given day. Also specifically looks for files with had a start time on the previous day """ # Get the lookup directory lookup_directory = self.get_lookup_directory(day_dt.year, day_dt.month) # XXX: We are not evaluating the netCDF attributes at this point # but assuming that the filename contains start and stop # time. This is a pretty safe assumption, but this approach # should be replaced as soon as a proper inspection tool is # available day_search = self.get_l2i_search_str(year=day_dt.year, month=day_dt.month, day=day_dt.day) search_str = os.path.join(lookup_directory, day_search) l2i_files = glob.glob(search_str) # Check if day is the first day of the month # yes -> check last file of previous month which might have data # for the target day if day_dt.day == 1: previous_day = day_dt - timedelta(days=1) lookup_directory = self.get_lookup_directory( previous_day.year, previous_day.month) search_str = os.path.join(lookup_directory, day_search) additional_l2i_files = glob.glob(search_str) l2i_files.extend(additional_l2i_files) # All done, return sorted output return sorted(l2i_files) def _validate_base_directory(self): """ Performs sanity checks and enforces the l2i subfolder """ # 1. Path must exist if not os.path.isdir(self._base_directory): msg = "Invalid l2i product directory: %s" msg = msg % str(self._base_directory) self.error.add_error("invalid-l2i-productdir", msg) self.error.raise_on_error() def get_lookup_directory(self, year, month): """ Return the sub folders for a given time (datetime object) """ subfolders = ["%4g" % year, "%02g" % month] lookup_directory = os.path.join(self.product_basedir, *subfolders) return lookup_directory def get_subdirectory_list(self): """ Returns a list of all subdirectories of type yyyy/mm """ subdirectory_list = list() try: years = sorted(next(os.walk(self.product_basedir))[1]) except StopIteration: self.log.warning("No subdirectories in %s" % self.product_basedir) return [] # filter any invalid directories years = [y for y in years if re.match(r'[1-3][0-9]{3}', y)] for year in years: subdir_year = os.path.join(self.product_basedir, year) months = sorted(next(os.walk(subdir_year))[1]) # filter any invalid directories months = [m for m in months if re.match(r'[0-1][0-9]', m)] subdirectory_list.extend([[year, m] for m in months]) return subdirectory_list def get_l2i_search_str(self, year=None, month=None, day=None): """ Returns a search pattern for l2i files with optional refined search for year, month, day. Note: month & day can only be set, if the year & year + month respectively is set Examples: l2i*.nc l2i*2017*.nc l2i*201704*.nc l2i*20170401*.nc """ date_str = "*" if year is not None: date_str += "%04g" % year if month is not None and year is not None: date_str += "%02g" % month else: raise ValueError("year must be set if month is set") if day is not None and month is not None: date_str += "%02g" % day else: raise ValueError("year & month must be set if day is set") if len(date_str) > 1: date_str += "*" l2i_file_pattern = "l2i%s.nc" % date_str return l2i_file_pattern @property def product_basedir(self): return self._base_directory @property def subdirectory_list(self): return self._subdirectory_list @property def start_month(self): """ Returns a date time object for the first month of the l2i product repository """ first_month = self.subdirectory_list[0] return datetime(int(first_month[0]), int(first_month[1]), 1) @property def stop_month(self): """ Returns a date time object for the last month of the l2i product repository """ last_month = self.subdirectory_list[-1] return datetime(int(last_month[0]), int(last_month[1]), 1) + relativedelta(months=1, microseconds=-1)
class DefaultAuxdataClassHandler(DefaultLoggingClass): """ Class for retrieving handler classes for auxiliary data (mss, sic, sitype, snow). The classes are initialized with directory information from the local machine definition and the auxdata information from `auxdata.yaml` configuration file. """ def __init__(self): super(DefaultAuxdataClassHandler, self).__init__(self.__class__.__name__) self.error = ErrorStatus(caller_id=self.__class__.__name__) def get_pyclass(self, auxdata_class, auxdata_id, l2_procdef_opt): """ Returns a class for handling auxiliary data files, that is initialized with auxdata settings in `config/auxdata_def.yaml` and with the directory specified in `local_machine_def.yaml` Args: auxdata_class (str): Auxdata class (e.g. mss, sic, sitype, snow) auxdata_id (str): Auxdata class identifier (e.g. osisaf) Returns: class: The initialized auxdata handler class """ # Clear errors self.error.reset() # Initialize the class with information from auxdata_def.yaml auxdata_def = self.get_auxdata_def(auxdata_class, auxdata_id) if auxdata_def is None: error_id = "auxdata_missing_definition" error_message = PYSIRAL_ERROR_CODES[error_id] % (auxdata_class, auxdata_id) self.error.add_error(error_id, error_message) self.error.raise_on_error() # Set the auxdata config cfg = AuxClassConfig() # connect to repository on local machine if "local_repository" in auxdata_def: local_repository_id = auxdata_def.local_repository local_repo = self.get_local_repository(auxdata_class, local_repository_id) if local_repo is None and local_repository_id is not None: error_id = "auxdata_missing_localrepo_def" error_message = f"Missing entry `auxdata_repository.{auxdata_class}.{auxdata_id}` in " + \ f"local_machine_def ({psrlcfg.local_machine_def_filepath})" self.error.add_error(error_id, error_message) self.error.raise_on_error() empty_str = len(local_repo) == 0 if local_repo is not None else False if empty_str: msg = "Path definition for {}.{} exists in local_machine_def.yaml, but is empty string" msg = msg.format(auxdata_class, auxdata_id) logger.warning(msg) cfg.set_local_repository(local_repo) # set doc str (should be mandatory for all auxdata handlers) if "long_name" in auxdata_def: cfg.set_long_name(auxdata_def.long_name) # set filename (e.g. for mss) if "filename" in auxdata_def: local_repository_id = auxdata_def.local_repository local_repo = self.get_local_repository(auxdata_class, local_repository_id) filename = Path(local_repo) / auxdata_def.filename cfg.set_filename(filename) # set filenaming (e.g. for sic, sitype, snow) if "filenaming" in auxdata_def: cfg.set_filenaming(auxdata_def.filenaming) # set subfolders (e.g. for sic, sitype, snow) if "subfolders" in auxdata_def: cfg.set_subfolder(auxdata_def.subfolders) # Set the default options from the auxiliary definition file if "options" in auxdata_def: options = auxdata_def.get("options", None) if options is not None: cfg.set_options(**options) # Override option with definition from the l2 processor settings if l2_procdef_opt is not None: cfg.set_options(**l2_procdef_opt) # Get the auxiliary data class module_name, class_name = f"pysiral.auxdata.{auxdata_class}", auxdata_def["pyclass"] auxclass = get_cls(module_name, class_name) if auxclass is None: error_id = "auxdata_invalid_class_name" msg = "Invalid Auxdata class: %s.%s" % (module_name, class_name) self.error.add_error(PYSIRAL_ERROR_CODES[error_id], msg) self.error.raise_on_error() # Init the auxiliary class # Note: This will trigger any action defined in the subclasses, such as reading static background files auxdata_handler = auxclass(cfg) # All done, return return auxdata_handler def get_local_repository(self, auxdata_class, auxdata_id): """ Get the local repository for the the auxdata type and id """ if auxdata_id is None: return None aux_repo_defs = psrlcfg.local_machine.auxdata_repository try: local_repo_auxclass = aux_repo_defs[auxdata_class] except KeyError: local_repo_auxclass = {} msg = "Missing auxdata definition in local_machine_def.yaml: auxdata_repository.%s" % auxdata_class self.error.add_error("missing-localmachinedef-tag", msg) self.error.raise_on_error() return local_repo_auxclass.get(auxdata_id, None) def get_auxdata_def(self, auxdata_class: str, auxdata_id: str) -> "AttrDict": """ Returns the definition in `config/auxdata_def.yaml` for specified auxdata class and id. Raises an error if the entry is not found. :param auxdata_class: The code for auxiliary data type (sic, mss, sitype, snow, ...) :param auxdata_id: The id of a specific data set for the auxiliary data class (e.g. sic:osisaf-operational) :return: The configuration dictionary """ auxdata_def = psrlcfg.auxdef.get_definition(auxdata_class, auxdata_id) if auxdata_def is None: msg = f"Cannot find entry for auxiliary data set {auxdata_class}:{auxdata_id} in auxdata_def.yaml" self.error.add_error("invalid-auxdata-class", msg) self.error.raise_on_error() return auxdata_def.attrdict
class NCDataFile(DefaultLoggingClass): def __init__(self, output_handler): """ Init the netCDF output parent class. NOTE: This class should only be used as a parent class. :param output_handler: An output handler class for the different processing level """ # Init parent class_name = self.__class__.__name__ super(NCDataFile, self).__init__(class_name) self.error = ErrorStatus(caller_id=class_name) # Output handler property self.output_handler = output_handler # Class attributes self.data = None self.filename = None self.base_export_path = None self.parameter_attributes = None self.time_def = NCDateNumDef() # TODO: Make this an option? self.zlib = True self._rootgrp = None self._options = None self._proc_settings = None self.verbose = False def set_options(self, **opt_dict): self._options = AttrDict(**opt_dict) def set_processor_settings(self, proc_settings): self._proc_settings = proc_settings def set_base_export_path(self, path): self.base_export_path = path def _set_doi(self): if self.output_handler.has_doi: self.data.set_doi(self.output_handler.doi) def _set_data_record_type(self): if self.output_handler.has_doi: self.data.set_data_record_type(self.output_handler.data_record_type) def _write_global_attributes(self): attr_dict = self.output_handler.get_global_attribute_dict(self.data) self._set_global_attributes(attr_dict) def _populate_data_groups(self, level3=False, flip_yc=False): lonlat_parameter_names = ["lon", "lat", "longitude", "latitude"] dimdict = self.data.dimdict dims = dimdict.keys() for key in dims: self._rootgrp.createDimension(key, dimdict[key]) for parameter_name, attribute_dict in self.output_handler.variable_def: # Check if parameter name is also the the name or the source # parameter if "var_source_name" in attribute_dict.keys(): attribute_dict = dict(attribute_dict) var_source_name = attribute_dict.pop("var_source_name") else: var_source_name = parameter_name # Get the data container data = self.data.get_parameter_by_name(var_source_name, raise_on_error=False) # Check if the data exists if data is None: msg = "Invalid parameter name for data object: %s" msg = msg % parameter_name logger.error(msg) self.error.add_error("invalid-paramater", msg) self.error.raise_on_error() # Convert datetime objects to number if isinstance(data[0], (datetime, cftime.datetime, cftime.real_datetime)): data = date2num(data, self.time_def.units, self.time_def.calendar) # Convert bool objects to integer if data.dtype.str == "|b1": data = np.int8(data) # Set dimensions (dependent on product level) if level3: if flip_yc: data = np.flipud(data) if parameter_name not in lonlat_parameter_names: data = np.array([data]) dimensions = tuple(list(dims)[0:len(data.shape)]) else: dimensions = tuple(list(dims)[1:len(data.shape)+1]) else: if len(data.shape) == 1: dimensions = tuple(list(dims)[0:len(data.shape)]) else: # Register the additional dimension aux_dimdict = self.data.get_multidim_auxdata_dimdict(parameter_name) for dim_name, dim_value in aux_dimdict["new_dims"]: self._rootgrp.createDimension(dim_name, dim_value) # Add the dimension variable for name, dim_data in aux_dimdict["add_dims"]: dimvar = self._rootgrp.createVariable(name, dim_data.dtype.str, name, zlib=self.zlib) dimvar[:] = dim_data # The full dimension dimensions = aux_dimdict["dimensions"] dtype = np.byte flag_mask_vals = [] # flag_mask attributes need special handling if 'flag_masks' in attribute_dict.keys(): # Check to see if data is currently using less bits than the flag allows flag_mask_vals = [int(x) for x in str(attribute_dict['flag_masks']).split(sep=',')] if max(flag_mask_vals) >= 128: dtype = np.short if max(flag_mask_vals) >= 65536: dtype = np.int32 # Create and set the variable with the wider type var = self._rootgrp.createVariable(parameter_name, dtype, dimensions, zlib=self.zlib) var[:] = data.astype(dtype) else: # Create and set the variable var = self._rootgrp.createVariable(parameter_name, data.dtype.str, dimensions, zlib=self.zlib) var[:] = data # Add Parameter Attributes # NOTE: The parameter attributes may be template strings and there are special cases with # flags when the data type of the attribute is not a string for key in sorted(attribute_dict.keys()): attribute = attribute_dict[key] attribute = self.output_handler.fill_template_string(attribute, self.data) if key == 'flag_masks': # Use values pre-computed above attribute = np.asarray(flag_mask_vals, dtype=dtype) elif key == 'flag_values': # The flag_values attribute also needs to be converted to a list of the correct datatype flag_values = [int(x) for x in attribute.split(sep=',')] attribute = np.asarray(flag_values, dtype=data.dtype) setattr(var, key, attribute) def _create_root_group(self, attdict, **global_attr_keyw): """ Create the root group and add l1b metadata as global attributes """ self._convert_datetime_attributes(attdict) self._convert_bool_attributes(attdict) self._convert_nonetype_attributes(attdict) self._set_global_attributes(attdict, **global_attr_keyw) def _convert_datetime_attributes(self, attdict): """ Replace l1b info parameters of type datetime.datetime by a double representation to match requirements for netCDF attribute data type rules """ for key in attdict.keys(): content = attdict[key] if isinstance(content, (datetime, cftime.datetime, cftime.real_datetime)): attdict[key] = date2num(content, self.time_def.units, self.time_def.calendar) @staticmethod def _convert_bool_attributes(attdict): """ Replace l1b info parameters of type bool ['b1'] by a integer representation to match requirements for netCDF attribute data type rules """ for key in attdict.keys(): content = attdict[key] if type(content) is bool: attdict[key] = int(content) @staticmethod def _convert_nonetype_attributes(attdict): """ Replace l1b info parameters of type bool ['b1'] by a integer representation to match requirements for netCDF attribute data type rules """ for key in attdict.keys(): content = attdict[key] if content is None: attdict[key] = "" def _set_global_attributes(self, attdict, prefix=""): """ Save l1b.info dictionary as global attributes """ for key in attdict.keys(): self._rootgrp.setncattr(prefix+key, attdict[key]) def _get_variable_attr_dict(self, parameter): """ Retrieve the parameter attributes """ default_attrs = { "long_name": parameter, "standard_name": parameter, "scale_factor": 1.0, "add_offset": 0.0} if parameter not in self.parameter_attributes: # self._missing_parameters.append(parameter) return default_attrs else: return dict(self.parameter_attributes[parameter]) def _write_processor_settings(self): if self._proc_settings is None: pass settings = self._proc_settings for item in settings.iterkeys(): self._rootgrp.setncattr(item, str(settings[item])) def _open_file(self): try: self._rootgrp = Dataset(self.full_path, "w") except RuntimeError: msg = "Unable to create netCDF file: %s" % self.full_path self.error.add_error("nc-runtime-error", msg) self.error.raise_on_error() def _write_to_file(self): self._rootgrp.close() @property def export_path(self): """ Evoking this property will also create the directory if it does not already exists """ return self.output_handler.get_directory_from_data(self.data, create=True) @property def export_filename(self): """ Returns the filename for the level2 output file """ return self.output_handler.get_filename_from_data(self.data) @property def full_path(self): return Path(self.export_path) / self.export_filename
class TimeRangeRequest(DefaultLoggingClass): # Defintion of periods: # # monthly: # from 00:00:00.000000 of first day in month to 23:59:59.99999 of # last day per month for each month in time range # # weekly: # from Monday 00:00:00.00000 to Sunday 23:59:59.99999 for all weeks # in the time range including partially covered weeks # # daily: # from 00:00:00.000000 to 23:59:59.99999 for each day in time range # # custom: # from 00:00:00.000000 of first day to 23:59:59.99999 of last day # in timer range _PERIODS = ["monthly", "weekly", "daily", "custom"] # TODO: Future planned option (weekly: 7 days from start day) and # (week_of_year: self explanatory) def __init__(self, start_dt, stop_dt, period="monthly", exclude_month=[], raise_if_empty=False): super(TimeRangeRequest, self).__init__(self.__class__.__name__) self.pysiral_config = ConfigInfo() self.error = ErrorStatus() self.set_range(start_dt, stop_dt) self.set_period(period) self.set_exclude_month(exclude_month) if raise_if_empty: self.raise_if_empty() def __repr__(self): output = "TimeRangeRequest object:\n" for field in ["_start_dt", "_stop_dt", "_period", "_exclude_month"]: output += "%12s: %s" % (field, getattr(self, field)) output += "\n" return output def clip_to_mission(self, mission_id): mission_info = self.pysiral_config.get_mission_info(mission_id) start = mission_info.data_period.start stop = mission_info.data_period.stop is_clipped = self.clip_to_range(start, stop) if is_clipped: self.log.info("Clipped to mission time range: %s till %s" % (mission_info.data_period.start, mission_info.data_period.stop)) def raise_if_empty(self): message = "" if self._start_dt is None: message += "start time is invalid" if self._stop_dt is None: message += "; stop time is invalid" if len(message) > 0: self.error.add_error("empty-time-range", message) self.error.raise_on_error() def set_range(self, start_date, stop_date): """ Set the range of the request, start_date and stop_data can be either int lists (year, month, [day]) or datetime objects """ # 1. Check if datetime objects valid_start, valid_stop = False, False if isinstance(start_date, datetime): self._start_dt = start_date valid_start = True if isinstance(stop_date, datetime): self._stop_dt = stop_date valid_stop = True if valid_start and valid_stop: self._validate_range() return # 2. Check and decode integer lists msg_template = "invalid %s time (not integer list or datetime)" if isinstance(start_date, list): if all(isinstance(item, int) for item in start_date): self._start_dt = self._decode_int_list(start_date, "start") else: error_message = msg_template % "start" self.error.add_error("invalid-timedef", error_message) if isinstance(stop_date, list): if all(isinstance(item, int) for item in stop_date): self._stop_dt = self._decode_int_list(stop_date, "stop") else: error_message = msg_template % "stop" self.error.add_error("invalid-timedef", error_message) # 3. Raise on parsing errors self.error.raise_on_error() # 4. Check range self._validate_range() def clip_to_range(self, range_start, range_stop): """ Clip the current time range to an defined time range """ is_clipped = False if self._start_dt < range_start and self._stop_dt > range_start: is_clipped = True self._start_dt = range_start elif self._start_dt < range_start and self._stop_dt < range_start: is_clipped = True self._start_dt = None self._stop_dt = None if self._stop_dt > range_stop and self._start_dt < range_stop: is_clipped = True self._stop_dt = range_stop elif self._stop_dt > range_stop and self._start_dt > range_stop: is_clipped = True self._start_dt = None self._stop_dt = None return is_clipped def set_period(self, period): """ Set the period (monthly, weekly, etc) for the generation of iterations for the time range """ if period in self._PERIODS: self._period = period else: raise ValueError("Invalid TimeRangeRequest period: %s" % period) def set_exclude_month(self, exclude_month_list): """ Set a list of month, that shall be ignored during the generation of iterations for the time range """ if exclude_month_list is None: exclude_month_list = [] self._exclude_month = exclude_month_list def get_id(self, dt_fmt="%Y%m%dT%H%M%S"): return self.start_dt.strftime(dt_fmt) + "_" + self.stop_dt.strftime( dt_fmt) def _get_iterations(self): """ Return a list of iterations for the number of periods in the time range """ # Return empty list if no start/stop are set if self._start_dt is None or self._stop_dt is None: return [] # monthly periods: return a list of time ranges that cover the full # month from the first to the last month if self._period == "monthly": iterations = self._get_monthly_iterations() # default week periods: return a list of time ranges for each default # week definition (from Monday to Sunday) elif self._period == "weekly": iterations = self._get_weekly_iterations() # daily periods: return a list of time ranges for each day # in the requested period (exclude_month still applies) elif self._period == "daily": iterations = self._get_daily_iterations() # Just return one iteration with custom time range elif self._period == "custom": time_range = TimeRangeIteration(base_period="custom") time_range.set_range(self.start_dt, self.stop_dt) time_range.set_indices(1, 1) iterations = [time_range] # This should be caught before, but always terminate an # an if-elif-else else: msg = "Invalid period: %s" % str(self._period) self.error.add_error("invalid-period", msg) self.error.raise_on_error() return iterations def _decode_int_list(self, int_list, start_or_stop): # XXX: Currently only yyyy mm [dd] (day is optional) are allowed n_entries = len(int_list) if n_entries < 2 or n_entries > 3: error_message = "%s date integer list must be yyyy mm [dd]" self.error.add_error("invalid-date-int-list", error_message) return None # Set the day day = 1 if n_entries == 2 else int_list[2] # Set the datetime object (as if would be start date) # Raise error and return none if unsuccessful try: dt = datetime(int_list[0], int_list[1], day) except: error_message = "cannot convert integer list to datetime: %s" % ( str(int_list)) self.error.add_error("invalid-date-int-list", error_message) return None # if stop time: add one period if start_or_stop == "stop": if n_entries == 2: extra_period = relativedelta(months=1, microseconds=-1) else: extra_period = relativedelta(days=1, microseconds=-1) dt = dt + extra_period return dt def _validate_range(self): # Check if start and stop are in the right order if self.stop_dt <= self.start_dt: msg = "stop [%s] before start [%s]" msg = msg % (str(self.stop_dt), str(self.start_dt)) self.error.add_error("invalid-period", msg) self.error.raise_on_error() def _get_monthly_iterations(self): """ Create iterator with monthly period """ # Create Iterations iterations = [] n_iterations = len(self.month_list) index = 1 for year, month in self.month_list: # Per default get the full month period_start, period_stop = get_month_time_range(year, month) # Clip time range to actual days for first and last iteration # (only if the first and the last month are not in the # exclude_month list) first_month = self._start_dt.month first_month_excluded = first_month in self._exclude_month if index == 1 and not first_month_excluded: period_start = self.start_dt last_month = self._stop_dt.month last_month_excluded = last_month in self._exclude_month if index == n_iterations and not last_month_excluded: period_stop = self.stop_dt # set final time range # iteration will be a of type TimeRangeIteration time_range = TimeRangeIteration(base_period=self.base_period) time_range.set_range(period_start, period_stop) time_range.set_indices(index, n_iterations) iterations.append(time_range) index += 1 return iterations def _get_weekly_iterations(self): """ Create iterator with weekly (Monday throught Sunday) period """ # Start with empty iteration iterations = [] index = 1 # Get the start date: period start date (if is monday) or previous # monday. If the day is not monday we can use the isoweekday # (monday=1m sunday=7) to compute the number days we have to subtract # from the start day of the period start_offset_days = self.start_dt.isoweekday() - 1 week_start_day = self.start_dt - relativedelta(days=start_offset_days) # Same for the stop date: Make sure the end date either a Sunday # already or a Sunday after the stop date of the period stop_offset_days = 7 - self.stop_dt.isoweekday() week_stop_day = self.stop_dt + relativedelta(days=stop_offset_days) # Get the list of weeks weeks = weeks_list(week_start_day, week_stop_day, self._exclude_month) n_iterations = len(weeks) for start_day, stop_day in weeks: # weeks list provide only a start = datetime(start_day[0], start_day[1], start_day[2]) stop = start + relativedelta(days=7, microseconds=-1) # set final time range # iteration will be a of type TimeRangeIteration time_range = TimeRangeIteration(base_period=self.base_period) time_range.set_range(start, stop) time_range.set_indices(index, n_iterations) iterations.append(time_range) index += 1 return iterations def _get_daily_iterations(self): """ Create iterator with daily period """ # Get list of days day_list = self.days_list iterations = [] n_iterations = len(day_list) index = 1 # Loop over days for year, month, day in day_list: # Start and stop are beginning/end of day start = datetime(year, month, day) stop = start + relativedelta(days=1, microseconds=-1) # Create the iteration time_range = TimeRangeIteration(base_period=self.base_period) time_range.set_range(start, stop) time_range.set_indices(index, n_iterations) iterations.append(time_range) index += 1 return iterations @property def month_list(self): return month_list(self.start_dt, self.stop_dt, self._exclude_month) @property def days_list(self): return days_list(self.start_dt, self.stop_dt, self._exclude_month) @property def _default_period(self): return self._PERIODS[0] @property def start_dt(self): return self._start_dt @property def stop_dt(self): return self._stop_dt @property def label(self): return str(self.start_dt) + " till " + str(self.stop_dt) @property def iterations(self): return self._get_iterations() @property def base_period(self): return self._period @property def base_duration(self): """ Return a duration object """ if self.base_period == "monthly": return Duration(months=1) elif self.base_period == "daily": return Duration(days=1) else: timedelta = relativedelta(dt1=self.start, dt2=self.stop) return Duration(months=timedelta.months, days=timedelta.days, hours=timedelta.hours, minutes=timedelta.minutes, seconds=timedelta.seconds) @property def base_duration_isoformat(self): return duration_isoformat(self.base_duration)
class Sentinel3CODAL2Wat(DefaultLoggingClass): def __init__(self, cfg, raise_on_error=False): """ Input handler for Sentinel-3 L2WAT netCDF files from the CODA. :param cfg: A treedict object (root.input_handler.options) from the corresponding Level-1 pre-processor config file :param raise_on_error: Boolean value if the class should raise an exception upon an error (default: False) """ cls_name = self.__class__.__name__ super(Sentinel3CODAL2Wat, self).__init__(cls_name) self.error = ErrorStatus(caller_id=cls_name) # Store arguments self.raise_on_error = raise_on_error self.cfg = cfg # Init main class variables self.nc = None # Debug variables self.timer = None def get_l1(self, filepath, polar_ocean_check=None): """ Create a Level-1 data container from Sentinel-3 CODA L2WAT files :param filepath: The full file path to the netCDF file :param polar_ocean_check: :return: The parsed (or empty) Level-1 data container """ # for debug purposes self.timer = StopWatch() self.timer.start() # Save filepath self.filepath = filepath # Create an empty Level-1 data object self.l1 = Level1bData() # Input Validation if not os.path.isfile(filepath): msg = "Not a valid file: %s" % filepath self.log.warning(msg) self.error.add_error("invalid-filepath", msg) return self.empty # Parse xml header file self._parse_xml_manifest(filepath) # Parse the input netCDF file self._read_input_netcdf(filepath) if self.error.status: return self.empty # Get metadata self._set_input_file_metadata() # Test if input file contains data over polar oceans (optional) if polar_ocean_check is not None: has_polar_ocean_data = polar_ocean_check.has_polar_ocean_segments( self.l1.info) if not has_polar_ocean_data: self.timer.stop() return self.empty # Polar ocean check passed, now fill the rest of the l1 data groups self._set_l1_data_groups() self.timer.stop() self.log.info("- Created L1 object in %.3f seconds" % self.timer.get_seconds()) # Return the l1 object return self.l1 @staticmethod def interp_1Hz_to_20Hz(variable_1Hz, time_1Hz, time_20Hz, **kwargs): """ Computes a simple linear interpolation to transform a 1Hz into a 20Hz variable :param variable_1Hz: an 1Hz variable array :param time_1Hz: 1Hz reference time :param time_20Hz: 20 Hz reference time :return: the interpolated 20Hz variable """ error_status = False try: f = interpolate.interp1d(time_1Hz, variable_1Hz, bounds_error=False, **kwargs) variable_20Hz = f(time_20Hz) except ValueError: fill_value = np.nan variable_20Hz = np.full(time_20Hz.shape, fill_value) error_status = True return variable_20Hz, error_status @staticmethod def parse_sentinel3_l1b_xml_header(filename): """ Reads the XML header file of a Sentinel 3 L1b Data set and returns the contents as an OrderedDict """ with open(filename) as fd: content_odereddict = xmltodict.parse(fd.read()) return content_odereddict[u'xfdu:XFDU'] def _parse_xml_manifest(self, filepath): """ Parse the Sentinel-3 XML header file and extract key attributes for filtering :param filepath: the filepath for the netcdf :return: None """ # Retrieve header information from mission settings xml_header_file = self.cfg.xml_manifest dataset_folder = folder_from_filename(filepath) filename_header = os.path.join(dataset_folder, xml_header_file) self._xmlh = self.parse_sentinel3_l1b_xml_header(filename_header) def _get_xml_content(self, section_name, tag): """ Returns the generalProductInformation content of the xml manifest :return: dictionary """ # Extract Metadata metadata = self._xmlh["metadataSection"]["metadataObject"] # Extract General Product Info index = self.cfg.xml_metadata_object_index[section_name] product_info = metadata[index]["metadataWrap"]["xmlData"] product_info = product_info[tag] return product_info def _read_input_netcdf(self, filepath): """ Read the netCDF file via xarray :param filepath: The full filepath to the netCDF file :return: none """ try: self.nc = xarray.open_dataset(filepath, decode_times=False, mask_and_scale=True) except: msg = "Error encountered by xarray parsing: %s" % filepath self.error.add_error("xarray-parse-error", msg) self.log.warning(msg) return def _set_input_file_metadata(self): """ Populates the product info segment of the Level1Data object with information from the global attributes of the netCDF and content of the xml manifest :return: None """ # Short cuts metadata = self.nc.attrs info = self.l1.info # Get xml manifest content product_info = self._get_xml_content( "generalProductInformation", "sentinel3:generalProductInformation") sral_info = self._get_xml_content("sralProductInformation", "sralProductInformation") # Processing environment metadata info.set_attribute("pysiral_version", pysiral_version) # General product metadata mission = metadata["mission_name"].lower().replace(" ", "") info.set_attribute("mission", str(mission)) info.set_attribute("mission_sensor", "sral") info.set_attribute("mission_data_version", metadata["source"]) info.set_attribute("orbit", metadata["absolute_rev_number"]) info.set_attribute("cycle", metadata["cycle_number"]) info.set_attribute("mission_data_source", metadata["product_name"]) info.set_attribute( "timeliness", self.cfg.timeliness_dict[str( product_info["sentinel3:timeliness"])]) # Time-Orbit Metadata lats = [ float(metadata["first_meas_lat"]), float(metadata["last_meas_lat"]) ] lons = [ float(metadata["first_meas_lon"]), float(metadata["last_meas_lon"]) ] info.set_attribute("start_time", parse_datetime_str(metadata["first_meas_time"][4:])) info.set_attribute("stop_time", parse_datetime_str(metadata["last_meas_time"][4:])) info.set_attribute("lat_min", np.amin(lats)) info.set_attribute("lat_max", np.amax(lats)) info.set_attribute("lon_min", np.amin(lons)) info.set_attribute("lon_max", np.amax(lons)) # Product Content Metadata for mode in ["sar", "sin", "lrm"]: percent_value = 0.0 if mode == "sar": percent_value = 100. info.set_attribute("{}_mode_percent".format(mode), percent_value) info.set_attribute("open_ocean_percent", float(sral_info["sral:openOceanPercentage"])) def _set_l1_data_groups(self): """ Fill all data groups of the Level-1 data object with the content of the netCDF file. This is just the overview method, see specific sub-methods below :return: None """ self._set_time_orbit_data_group() self._set_waveform_data_group() self._set_range_correction_group() self._set_surface_type_group() self._set_classifier_group() def _set_time_orbit_data_group(self): """ Transfer the time orbit parameter from the netcdf to l1 data object :return: None """ # Transfer the timestamp # NOTE: Here it is critical that the xarray does not automatically decodes time since it is # difficult to work with the numpy datetime64 date format. Better to compute datetimes using # a know num2date conversion utc_timestamp = num2date(self.nc.time_20_ku.values, units=self.nc.time_20_ku.units) self.l1.time_orbit.timestamp = utc_timestamp # Set the geolocation self.l1.time_orbit.set_position(self.nc.lon_20_ku.values, self.nc.lat_20_ku.values, self.nc.alt_20_ku.values, self.nc.orb_alt_rate_20_ku.values) # Set antenna attitude # NOTE: This are only available in 1Hz and need to be interpolated time_01, time_20 = self.nc.time_01.values, self.nc.time_20_ku.values pitch_angle_20, stat = self.interp_1Hz_to_20Hz( self.nc.off_nadir_pitch_angle_pf_01.values, time_01, time_20) roll_angle_20, stat = self.interp_1Hz_to_20Hz( self.nc.off_nadir_roll_angle_pf_01.values, time_01, time_20) yaw_angle_20, stat = self.interp_1Hz_to_20Hz( self.nc.off_nadir_yaw_angle_pf_01.values, time_01, time_20) self.l1.time_orbit.set_antenna_attitude(pitch_angle_20, roll_angle_20, yaw_angle_20) def _set_waveform_data_group(self): """ Transfer of the waveform group to the Level-1 object. This includes 1. the computation of waveform power in Watts 2. the computation of the window delay in meter for each waveform bin 3. extraction of the waveform valid flag :return: None """ # Get the waveform # NOTE: The waveform is given in counts wfm_counts = self.nc.waveform_20_ku.values n_records, n_range_bins = wfm_counts.shape # Convert the waveform to power # TODO: This needs to be verified. Currently using the scale factor and documentation in netcdf unclear # From the documentation: # "This scaling factor represents the backscatter coefficient for a waveform amplitude equal to 1. # It is corrected for AGC instrumental errors (agc_cor_20_ku) and internal calibration (sig0_cal_20_ku)" # NOTE: Make sure type of waveform is float and not double # (double will cause issues with cythonized retrackers) wfm_power = np.ndarray(shape=wfm_counts.shape, dtype=np.float32) waveform_scale_factor = self.nc.scale_factor_20_ku.values for record in np.arange(n_records): wfm_power[record, :] = waveform_scale_factor[record] * wfm_counts[ record, :].astype(float) # Get the window delay # "The tracker_range_20hz is the range measured by the onboard tracker # as the window delay, corrected for instrumental effects and # CoG offset" tracker_range_20hz = self.nc.tracker_range_20_ku.values wfm_range = np.ndarray(shape=wfm_counts.shape, dtype=np.float32) range_bin_index = np.arange(n_range_bins) for record in np.arange(n_records): wfm_range[record, :] = tracker_range_20hz[record] + \ (range_bin_index*self.cfg.range_bin_width) - \ (self.cfg.nominal_tracking_bin*self.cfg.range_bin_width) # Set the operation mode op_mode = self.nc.instr_op_mode_20_ku.values op_mode_translator = self.cfg.instr_op_mode_list radar_mode = np.array( [op_mode_translator[int(val)] for val in op_mode]).astype("int8") # Set the waveform self.l1.waveform.set_waveform_data(wfm_power, wfm_range, radar_mode) # Get the valid flags # TODO: Find a way to get a valid flag # measurement_confident_flag = self.nc.flag_mcd_20_ku.values # valid_flag = measurement_confident_flag == 0 # self.l1.waveform.set_valid_flag(valid_flag) def _set_range_correction_group(self): """ Transfer the range corrections defined in the l1p config file to the Level-1 object NOTE: The range corrections are all in 1 Hz and must be interpolated to 20Hz :return: None """ # Get the reference times for interpolating the range corrections from 1Hz -> 20Hz time_1Hz = self.nc.time_01.values time_20Hz = self.nc.time_20_ku.values # Loop over all range correction variables defined in the processor definition file for key in self.cfg.range_correction_targets.keys(): var_name = self.cfg.range_correction_targets[key] variable_1Hz = getattr(self.nc, var_name) variable_20Hz, error_status = self.interp_1Hz_to_20Hz( variable_1Hz.values, time_1Hz, time_20Hz) if error_status: msg = "- Error in 20Hz interpolation for variable `%s` -> set only dummy" % var_name self.log.warning(msg) self.l1.correction.set_parameter(key, variable_20Hz) def _set_surface_type_group(self): """ Transfer of the surface type flag to the Level-1 object NOTE: In the current state (TEST dataset), the surface type flag is only 1 Hz. A nearest neighbour interpolation is used to get the 20Hz surface type flag. :return: None """ # Set the flag for key in ESA_SURFACE_TYPE_DICT.keys(): flag = self.nc.surf_type_20_ku.values == ESA_SURFACE_TYPE_DICT[key] self.l1.surface_type.add_flag(flag, key) def _set_classifier_group(self): """ Transfer the classifiers defined in the l1p config file to the Level-1 object. NOTE: It is assumed that all classifiers are 20Hz In addition, a few legacy parameter are computed based on the waveform counts that is only available at this stage. Computation of other parameter such as sigma_0, leading_edge_width, ... are moved to the post-processing :return: None """ # Loop over all classifier variables defined in the processor definition file for key in self.cfg.classifier_targets.keys(): variable_20Hz = getattr(self.nc, self.cfg.classifier_targets[key]) self.l1.classifier.add(variable_20Hz, key) @property def empty(self): """ Default return object, if nodata should be returned :return: Representation of an empty object (None) """ return None
class Level2Data(object): _L2_DATA_ITEMS = [ "range", "ssa", "elev", "afrb", "frb", "sit", "radar_mode" ] _HEMISPHERE_CODES = {"north": "nh", "south": "sh"} # These are only the standard Level-2 parameters # NOTE: Auxiliary parameter are handled differently _PARAMETER_CATALOG = { "time": "time", "longitude": "longitude", "latitude": "latitude", "surface_type": "surface_type_flag", "radar_mode": "radar_mode", "elevation": "elev", "sea_surface_anomaly": "ssa", "radar_freeboard": "afrb", "freeboard": "frb", "sea_ice_thickness": "sit", } _PROPERTY_CATALOG = {"sea_surface_height": "ssh"} def __init__(self, metadata, time_orbit, period=None): # Copy necessary fields form l1b self.error = ErrorStatus() self._n_records = metadata.n_records self.info = metadata self.track = time_orbit self.period = period self._auto_auxvar_num = 0 # A dictionary similar to the parameter catalog # To be filled during the set auxdata method self._auxiliary_catalog = {} # Metadata self._auxdata_source_dict = {} self._source_primary_filename = "unkown" self._l2_algorithm_id = "unkown" self._l2_version_tag = "unkown" self._doi = "" # Define time of dataset creation as the time of object initialization # to avoid slightly different timestamps for repated calls of datetime.now() self._creation_time = datetime.now() # Other Class properties self._is_evenly_spaced = time_orbit.is_evenly_spaced # Create Level2 Data Groups self._create_l2_data_items() def set_surface_type(self, surface_type): self.surface_type = surface_type def set_radar_mode(self, radar_mode): self.radar_mode = radar_mode def set_parameter(self, target, value, uncertainty=None, bias=None): """ Convienience method to safely add a parameter with optional uncertainty and/or bias to the level-2 data structure """ # Sanity checks is_l2_default = self._check_if_valid_parameter(target) # Check if the full name has been passed if not is_l2_default and target in self.parameter_catalog.keys(): target = self.parameter_catalog[target] else: # TODO: Need to figure something out for the auxvar id (not known if reinstated from l2i) par_name = self.auto_auxvar_id self.set_auxiliary_parameter(par_name, target, value, uncertainty) return # Next check: Needs to be of correct shape is_correct_size = self._check_valid_size(value) if not is_correct_size: msg = "Invalid parameter dimension: %s (See self._L2_DATA_ITEMS)" msg = msg % str(target) self.error.add_error("l2-invalid-parameter_name", msg) self.error.raise_on_error() # Test if parameter exists # (older l2i files might not have all parameters) try: parameter = getattr(self, target) except AttributeError: return # Set values, uncertainty bias parameter.set_value(value) if uncertainty is not None: uncertainty_value = self._get_as_array(uncertainty) parameter.set_uncertainty(uncertainty_value) if bias is not None: bias_value = self._get_as_array(bias) parameter.set_bias(bias, bias_value) setattr(self, target, parameter) def set_auxiliary_parameter(self, var_id, var_name, value, uncertainty=None): """ Adds an auxiliary parameter to the data object""" # Use L2Elevation Array # TODO: This is to cumbersome, replace by xarray at due time param = L2ElevationArray(shape=(self.n_records)) # Allow value to be None # NOTE: In this case an empty value will be generated if value is None: value = np.full((self.n_records), np.nan) param.set_value(value) if uncertainty is not None: param.set_uncertainty(uncertainty) setattr(self, var_id, param) # Register auxiliary parameter (this allows to find the parameter # by its long name self._auxiliary_catalog[var_name] = var_id def set_data_record_type(self, data_record_type): self._data_record_type = data_record_type def update_retracked_range(self, retracker): # Update only for indices (surface type) supplied by retracker class # XXX: should get an overhaul ii = retracker.indices self.range[ii] = retracker.range[ii] self.range.uncertainty[ii] = retracker.uncertainty[ii] self.elev[ii] = self.altitude[ii] - retracker.range[ii] self.elev.uncertainty[ii] = retracker.uncertainty[ii] # Register potential auxiliary data for var_id, var_name, value, uncertainty in retracker.auxdata_output: # --- Check if output variable already exists --- # Create if new if var_name not in self.auxvar_names: self.set_auxiliary_parameter(var_id, var_name, value, uncertainty) # Transfer values for indices if already exists else: auxdata = getattr(self, var_id) auxdata[ii] = value[ii] if uncertainty is not None: auxdata.uncertainty[ii] = uncertainty[ii] setattr(self, var_id, auxdata) def set_metadata(self, auxdata_source_dict=None, source_primary_filename=None, l2_algorithm_id=None, l2_version_tag=None): if auxdata_source_dict is not None: self._auxdata_source_dict = auxdata_source_dict if source_primary_filename is not None: self._source_primary_filename = source_primary_filename if l2_algorithm_id is not None: self._l2_algorithm_id = l2_algorithm_id if l2_version_tag is not None: self._l2_version_tag = l2_version_tag def set_doi(self, doi): self._doi = doi def get_parameter_by_name(self, parameter_name): """ Method to retrieve a level-2 parameter """ # Combine parameter and property catalogs catalog = self.parameter_catalog catalog.update(self.property_catalog) catalog.update(self._auxiliary_catalog) if "_uncertainty" in parameter_name: parameter_name = parameter_name.replace("_uncertainty", "") source = catalog[parameter_name] parameter = getattr(self, source) return parameter.uncertainty elif "_bias" in parameter_name: parameter_name = parameter_name.replace("_bias", "") source = catalog[parameter_name] parameter = getattr(self, source) return parameter.bias else: try: source = catalog[parameter_name] except KeyError: msg = "Variable name `%s` is not in the catalog of this l2 object" % parameter_name self.error.add_error("l2data-missing-variable", msg) self.error.raise_on_error() parameter = getattr(self, source) return parameter def get_attribute(self, attribute_name, *args): """ Return a string for a given attribute name. This method is required for the output data handler """ try: attr_getter = getattr(self, "_get_attr_" + attribute_name) attribute = attr_getter(*args) return attribute except AttributeError: return "unkown" def _create_l2_data_items(self): for item in self._L2_DATA_ITEMS: setattr(self, item, L2ElevationArray(shape=(self.n_records))) def _check_if_valid_parameter(self, parameter_name): """ Performs a test if parameter name is a valid level-2 parameter name. Adds error if result negative and returns flag (valid: True, invalid: False) """ if parameter_name not in self._L2_DATA_ITEMS: return False else: return True def _check_valid_size(self, array, name=""): """ Test if array has the correct size shape=(n_records). Adds error if not and returns flag (valid: True, invalid: False) """ condition = array.ndim == 1 and len(array) == self._n_records if condition: return True else: self.error.add_error("Invalid array added to level-2 class") return False def _get_as_array(self, value, dtype=np.float32): """ Create an output array from values that is of length n_records. Value can be scalar or array of length n_records. If value is any other length or dimension, an error will be added and a nan array of length n_records will be returned Arguments: value (integer, float or ) Note: This method is mostly used to allow scalar uncertainty and bias values. It also makes sure that uncertainty and bias are of the same shape than the value, which is not guaranteed in L2ElevationArray. If a wrong uncertainty, bias shape is passed, the result will be nan uncertainties/biases throughout the processing chain and the start of NaN occurences can be used to trace the origin of the error. """ # Check if value is either float or integer is_numeric = np.asarray(value).dtype.kind in "if" if not is_numeric: return np.full(self.arrshape, np.nan) # Check if value is scalar or array if np.isscalar(value): return np.full(self.arrshape, value).astype(dtype) # if array, check if correct size else: is_np_array = isinstance(value, (np.ndarray, np.array)) is_correct_size = self._check_valid_size(value) if is_np_array and is_correct_size: return value else: return np.full(self.arrshape, np.nan) def _get_attr_pysiral_version(self, *args): return psrlcfg.version def _get_attr_mission_id(self, *args): # XXX: Deprecated return self.info.mission def _get_attr_source_mission_id(self, *args): mission_id = self.info.mission if args[0] == "uppercase": mission_id = mission_id.upper() if args[0] == "select": for entry in args[1:]: mission_id_code, label = entry.split(":") if mission_id == mission_id_code: return label return "Error (mission id %s not in select statement)" % mission_id return mission_id def _get_attr_source_mission_name(self, *args): mission_name = psrlcfg.platforms.get_name(self.info.mission) if args[0] == "uppercase": mission_name = mission_name.upper() return mission_name def _get_attr_source_mission_sensor(self, *args): mission_sensor = psrlcfg.platforms.get_sensor(self.info.mission) if args[0] == "uppercase": mission_sensor = mission_sensor.upper() return mission_sensor def _get_attr_source_mission_sensor_fn(self, *args): """ Same as source mission sensor, only a sanitized version for filenames """ mission_sensor = psrlcfg.platforms.get_sensor(self.info.mission) for character in ["-"]: mission_sensor = mission_sensor.replace(character, "") if args[0] == "uppercase": mission_sensor = mission_sensor.upper() return mission_sensor def _get_attr_source_hemisphere(self, *args): if args[0] == "select": choices = {"north": args[1], "south": args[2]} return choices.get(self.hemisphere, "n/a") return self.hemisphere def _get_attr_hemisphere(self, *args): # XXX: Deprecated return self.hemisphere def _get_attr_hemisphere_code(self, *args): hemisphere_code = self.hemisphere_code if args[0] == "uppercase": hemisphere_code = hemisphere_code.upper() return hemisphere_code def _get_attr_startdt(self, dtfmt): # XXX: Deprecated return self.info.start_time.strftime(dtfmt) def _get_attr_stopdt(self, dtfmt): # XXX: Deprecated return self.info.stop_time.strftime(dtfmt) def _get_attr_geospatial_lat_min(self, *args): return self._gett_attr_geospatial_str(np.nanmin(self.latitude)) def _get_attr_geospatial_lat_max(self, *args): return self._gett_attr_geospatial_str(np.nanmax(self.latitude)) def _get_attr_geospatial_lon_min(self, *args): return self._gett_attr_geospatial_str(np.nanmin(self.longitude)) def _get_attr_geospatial_lon_max(self, *args): return self._gett_attr_geospatial_str(np.nanmax(self.longitude)) def _gett_attr_geospatial_str(self, value): return "%.4f" % value def _get_attr_source_auxdata_sic(self, *args): value = self._auxdata_source_dict.get("sic", "unkown") if value == "unkown": value = self.info.source_auxdata_sic return value def _get_attr_source_auxdata_sitype(self, *args): value = self._auxdata_source_dict.get("sitype", "unkown") if value == "unkown": value = self.info.source_auxdata_sitype return value def _get_attr_source_auxdata_mss(self, *args): value = self._auxdata_source_dict.get("mss", "unkown") if value == "unkown": value = self.info.source_auxdata_mss return value def _get_attr_source_auxdata_snow(self, *args): value = self._auxdata_source_dict.get("snow", "unkown") if value == "unkown": value = self.info.source_auxdata_snow return value def _get_attr_source_sic(self, *args): # XXX: Deprecated return self._auxdata_source_dict.get("sic", "unkown") def _get_attr_source_sitype(self, *args): # XXX: Deprecated return self._auxdata_source_dict.get("sitype", "unkown") def _get_attr_source_mss(self, *args): # XXX: Deprecated return self._auxdata_source_dict.get("mss", "unkown") def _get_attr_source_snow(self, *args): # XXX: Deprecated return self._auxdata_source_dict.get("snow", "unkown") def _get_attr_source_primary(self, *args): return self._source_primary_filename def _get_attr_l2_algorithm_id(self, *args): return self._l2_algorithm_id def _get_attr_l2_version_tag(self, *args): return self._l2_version_tag def _get_attr_utcnow(self, *args): datetime = self._creation_time if re.match("%", args[0]): time_string = datetime.strftime(args[0]) else: time_string = datetime.isoformat() return time_string def _get_attr_time_coverage_start(self, *args): datetime = self.period.start if re.match("%", args[0]): time_string = datetime.strftime(args[0]) else: time_string = datetime.isoformat() return time_string def _get_attr_time_coverage_end(self, *args): datetime = self.period.stop if re.match("%", args[0]): time_string = datetime.strftime(args[0]) else: time_string = datetime.isoformat() return time_string def _get_attr_time_coverage_duration(self, *args): return self.period.duration_isoformat def _get_attr_time_resolution(self, *args): tdelta = self.time[-1] - self.time[0] seconds = tdelta.total_seconds() + 1e-6 * tdelta.microseconds resolution = seconds / self.n_records return "%.2f seconds" % resolution def _get_attr_source_timeliness(self, *args): """ Return the timeliness of the l1b source data. Set default to NTC for backwark compability """ try: timeliness = self.info.timeliness except AttributeError: timeliness = "NTC" if timeliness is None: timeliness = "NTC" if args[0] == "lowercase": timeliness = timeliness.lower() return timeliness def _get_attr_uuid(self, *args): """ Provide an uuid code (for tracking id's) """ return str(uuid.uuid4()) def _get_attr_doi(self, *args): return self._doi @property def parameter_catalog(self): return dict(self._PARAMETER_CATALOG) @property def property_catalog(self): return dict(self._PROPERTY_CATALOG) @property def auxvar_names(self): return sorted(self._auxiliary_catalog.keys()) @property def auto_auxvar_id(self): name = "auxvar%02g" % self._auto_auxvar_num self._auto_auxvar_num += 1 return name @property def arrshape(self): return (self.n_records) @property def n_records(self): return self._n_records @property def hemisphere(self): return self.info.subset_region_name @property def hemisphere_code(self): return self._HEMISPHERE_CODES[self.hemisphere] @property def footprint_spacing(self): spacing = great_circle((self.latitude[1], self.longitude[1]), (self.latitude[0], self.longitude[0])).meters if np.isclose(spacing, 0.0): spacing = great_circle( (self.latitude[-2], self.longitude[-2]), (self.latitude[-1], self.longitude[-1])).meters return spacing @property def dimdict(self): """ Returns dictionary with dimensions""" dimdict = OrderedDict([("time", self.n_records)]) return dimdict @property def time(self): try: time = self.track.time except AttributeError: time = self.track.timestamp return time @property def longitude(self): return self.track.longitude @property def latitude(self): return self.track.latitude @property def altitude(self): return self.track.altitude @property def surface_type_flag(self): return self.surface_type.flag @property def ssh(self): ssh = L2ElevationArray(shape=self._n_records) ssh.set_value(self.mss + self.ssa) ssh.set_uncertainty(self.ssa.uncertainty) return ssh
class Level1PreProcJobDef(DefaultLoggingClass): """ A class that contains the information for the Level-1 pre-processor JOB (not the pre-processor class!) """ def __init__(self, l1p_settings_id_or_file, tcs, tce, exclude_month=None, hemisphere="global", platform=None, output_handler_cfg=None, source_repo_id=None): """ The settings for the Level-1 pre-processor job :param l1p_settings_id_or_file: An id of an proc/l1 processor config file (filename excluding the .yaml extension) or an full filepath to a yaml config file :param tcs: [int list] Time coverage start (YYYY MM [DD]) :param tce: [int list] Time coverage end (YYYY MM [DD]) [int list] :param exclude_month: [int list] A list of month that will be ignored :param hemisphere: [str] The target hemisphere (`north`, `south`, `global`:default). :param platform: [str] The target platform (pysiral id). Required if l1p settings files is valid for multiple platforms (e.g. ERS-1/2, ...) :param output_handler_cfg: [dict] An optional dictionary with options of the output handler (`overwrite_protection`: [True, False], `remove_old`: [True, False]) :param source_repo_id: [str] The tag in local_machine_def.yaml (l1b_repository.<platform>.<source_repo_id>) -> Overwrites the default source repo in the l1p settings (input_handler.options.local_machine_def_tag & output_handler.options.local_machine_def_tag) """ super(Level1PreProcJobDef, self).__init__(self.__class__.__name__) self.error = ErrorStatus() # Get pysiral configuration # TODO: Move to global self._cfg = psrlcfg # Store command line options self._hemisphere = hemisphere self._platform = platform self._source_repo_id = source_repo_id # Parse the l1p settings file self.set_l1p_processor_def(l1p_settings_id_or_file) # Get full requested time range self._time_range = DatePeriod(tcs, tce) logger.info("Requested time range is %s" % self.time_range.label) # Store the data handler options if output_handler_cfg is None: output_handler_cfg = {} self._output_handler_cfg = output_handler_cfg # Measure execution time self.stopwatch = StopWatch() @classmethod def from_args(cls, args): """ Init the Processor Definition from the pysiral-l1preproc command line argument object """ # Optional Keywords kwargs = {} if args.exclude_month is not None: kwargs["exclude_month"] = args.exclude_month data_handler_cfg = dict() data_handler_cfg["overwrite_protection"] = args.overwrite_protection data_handler_cfg["remove_old"] = args.remove_old if args.source_repo_id is not None: data_handler_cfg["local_machine_def_tag"] = args.source_repo_id kwargs["output_handler_cfg"] = data_handler_cfg kwargs["hemisphere"] = args.hemisphere kwargs["platform"] = args.platform kwargs["source_repo_id"] = args.source_repo_id # Return the initialized class return cls(args.l1p_settings, args.start_date, args.stop_date, **kwargs) def set_l1p_processor_def(self, l1p_settings_id_or_file): """ Parse the content of the processor definition file """ # 1. Resolve the absolute file path procdef_file_path = self.get_l1p_proc_def_filename( l1p_settings_id_or_file) # 2. Read the content logger.info("Parsing L1P processor definition file: %s" % procdef_file_path) self._l1pprocdef = get_yaml_config(procdef_file_path) self._check_if_unambiguous_platform() # 3. Expand info (input data lookup directories) self._get_local_input_directory() # 4. update hemisphere for input adapter self._l1pprocdef.level1_preprocessor.options.polar_ocean.target_hemisphere = self.target_hemisphere def get_l1p_proc_def_filename(self, l1p_settings_id_or_file): """ Query pysiral config to obtain filename for processor definition file """ # A. Check if already filename if Path(l1p_settings_id_or_file).is_file(): return l1p_settings_id_or_file # B. Not a file, try to resolve filename via pysiral config filename = self.pysiral_cfg.get_settings_file("proc", "l1", l1p_settings_id_or_file) if filename is None: msg = "Invalid Level-1 pre-processor definition filename or id: %s\n" % l1p_settings_id_or_file msg = msg + " \nRecognized Level-1 pre-processor definitions ids:\n" ids = self.pysiral_cfg.get_setting_ids("proc", "l1") for id in ids: msg = msg + " - " + id + "\n" self.error.add_error("invalid-l1p-outputdef", msg) self.error.raise_on_error() return filename def _get_local_input_directory(self): """ Replace the tag for local machine def with the actual path info """ input_handler_cfg = self.l1pprocdef.input_handler.options local_machine_def_tag = input_handler_cfg.local_machine_def_tag primary_input_def = self.pysiral_cfg.local_machine.l1b_repository platform, tag = self.platform, local_machine_def_tag # Overwrite the tag if specifically supplied if self._source_repo_id is not None: tag = self._source_repo_id # Get the value expected_branch_name = "root.l1b_repository.%s.%s" % (platform, tag) try: branch = AttrDict(primary_input_def[platform][tag]) except KeyError: msg = "Missing definition in `local_machine_def.yaml`. Expected branch: %s" msg = msg % expected_branch_name self.error.add_error("local-machine-def-missing-tag", msg) self.error.raise_on_error() # Sanity Checks # TODO: Obsolete? if branch is None: msg = "Missing definition in `local_machine_def.yaml`. Expected branch: %s" msg = msg % expected_branch_name self.error.add_error("local-machine-def-missing-tag", msg) self.error.raise_on_error() # Validity checks # TODO: These checks are probably better located in a separate method? for key in ["source", "l1p"]: # 1. Branch must have specific keys for input and output if not key in branch: msg = "Missing definition in `local_machine_def.yaml`. Expected value: %s.%s" msg = msg % (expected_branch_name, key) self.error.add_error("local-machine-def-missing-tag", msg) self.error.raise_on_error() # 2. The value of each branch must be a valid directory or a # attr (e.g. for different radar modes) with a list of directories directory_or_attrdict = branch[key] try: directories = directory_or_attrdict.values() except AttributeError: directories = [directory_or_attrdict] for directory in directories: if not Path(directory).is_dir(): msg = "Invalid directory in `local_machine_def.yaml`: %s is not a valid directory" msg = msg % directory self.error.add_error("local-machine-def-invalid-dir", msg) self.error.raise_on_error() # Update the lookup dir parameter self.l1pprocdef.input_handler["options"]["lookup_dir"] = branch.source def _check_if_unambiguous_platform(self): """ Checks if the platform is unique, since some l1 processor definitions are valid for a series of platforms, such as ERS-1/2, Sentinel-3A/B, etc. The indicator is that the platform tag in the l1 preprocessor settings is comma separated list. For the location of the source data, it is however necessary that the exact platform is known. It must therefore be specified explicitly by the -platform argument """ settings_is_ambigous = "," in self._l1pprocdef.platform platform_is_known = self.platform is not None # Test if platform is given if the settings file is valid for more than 1 platform if settings_is_ambigous and not platform_is_known: msg = "Error: platform in l1p settings is ambiguous (%s), but no platform has been given (-platform)" msg = msg % self._l1pprocdef.platform sys.exit(msg) # Test if platform provided matches the platform list in the settings file if settings_is_ambigous and platform_is_known: if not self.platform in str(self._l1pprocdef.platform): msg = "Error: platform in l1p settings (%s) and given platform (%s) do not match" msg = msg % (self._l1pprocdef.platform, self.platform) sys.exit(msg) # If platform in settings is unambigous, but not provided -> get platform from settings if not settings_is_ambigous and not platform_is_known: self._platform = self._l1pprocdef.platform logger.info("- get platform from l1p settings -> %s" % self.platform) @property def hemisphere(self): return self._hemisphere @property def target_hemisphere(self): values = { "north": ["north"], "south": ["south"], "global": ["north", "south"] } return values[self.hemisphere] @property def pysiral_cfg(self): return self._cfg @property def l1pprocdef(self): return self._l1pprocdef @property def time_range(self): return self._time_range @property def period_segments(self): segments = self._time_range.get_segments("month", crop_to_period=True) return segments @property def output_handler_cfg(self): return self._output_handler_cfg @property def platform(self): return self._platform
class L1PreProcBase(DefaultLoggingClass): def __init__(self, cls_name, input_adapter, output_handler, cfg): # Make sure the logger/error handler has the name of the parent class super(L1PreProcBase, self).__init__(cls_name) self.error = ErrorStatus(caller_id=cls_name) # The class that translates a given input file into an L1BData object self.input_adapter = input_adapter # Output data handler that creates l1p netCDF files from l1 data objects self.output_handler = output_handler # The configuration for the pre-processor self.cfg = cfg # The stack of Level-1 objects is a simple list self.l1_stack = [] def process_input_files(self, input_file_list): """ Main entry point for the Level-Preprocessor. :param input_file_list: A list full filepath for the pre-processor :return: None """ # Validity Check n_input_files = len(input_file_list) if n_input_files == 0: logger.warning( "Passed empty input file list to process_input_files()") return # Init helpers prgs = ProgressIndicator(n_input_files) # A class that is passed to the input adapter to check if the pre-processsor wants the # content of the current file polar_ocean_check = L1PreProcPolarOceanCheck(self.__class__.__name__, self.polar_ocean_props) # orbit segments may or may not be connected, therefore the list of input file # needs to be processed sequentially. for i, input_file in enumerate(input_file_list): # Step 1: Read Input # Map the entire orbit segment into on Level-1 data object. This is the task # of the input adaptor. The input handler gets only the filename and the target # region to assess whether it is necessary to parse and transform the file content # for the sake of computational efficiency. logger.info("+ Process input file %s" % prgs.get_status_report(i)) l1 = self.input_adapter.get_l1(input_file, polar_ocean_check) if l1 is None: logger.info( "- No polar ocean data for curent job -> skip file") continue # Step 2: Extract and subset # The input files may contain unwanted data (low latitude/land segments). It is the job of the # L1PReProc children class to return only the relevant segments over polar ocean as a list of l1 objects. l1_segments = self.extract_polar_ocean_segments(l1) # Step 3: Post-processing # Computational expensive post-processing (e.g. computation of waveform shape parameters) can now be # executed as the the Level-1 segments are cropped to the minimal length. self.l1_post_processing(l1_segments) # Step 4: Merge orbit segments # Add the list of orbit segments to the l1 data stack and merge those that are connected # (e.g. two half orbits connected at the pole) into a single l1 object. Orbit segments that # are unconnected from other segments in the stack will be exported to netCDF files. self.l1_stack_merge_and_export(l1_segments) # Step : Export the last item in the stack l1_merged = self.l1_get_merged_stack() self.l1_export_to_netcdf(l1_merged) def l1_post_processing(self, l1_segments): """ Apply the post-processing procedures defined in the l1p processor definition file. :param l1_segments: A list of Level-1 data objects :return: None, the l1_segments are changed in place """ # Get the post processing options pre_processing_items = self.cfg.get("pre_processing_items", None) if pre_processing_items is None: logger.info("No pre processing items defined") return # Measure time for the different post processors timer = StopWatch() # Get the list of post-processing items for pp_item in pre_processing_items: timer.start() pp_class = get_cls(pp_item["module_name"], pp_item["class_name"], relaxed=False) post_processor = pp_class(**pp_item["options"]) for l1 in l1_segments: post_processor.apply(l1) timer.stop() msg = "- L1 pre-processing item `%s` applied in %.3f seconds" % ( pp_item["label"], timer.get_seconds()) logger.info(msg) def l1_stack_merge_and_export(self, l1_segments): """ Add the input Level-1 segments to the l1 stack and export the unconnected ones as l1p netCDF products :param l1_segments: :return: None """ # Loop over all input segments for l1 in l1_segments: # Test if l1 segment is connected to stack is_connected = self.l1_is_connected_to_stack(l1) # Case 1: Segment is connected # -> Add the l1 segment to the stack and check the next segment. if is_connected: logger.info("- L1 segment connected -> add to stack") self.l1_stack.append(l1) # Case 2: Segment is not connected # -> In this case all items in the l1 stack will be merged and the merged l1 object will be # exported to a l1p netCDF product. The current l1 segment that was unconnected to the stack # will become the next stack else: logger.info( "- L1 segment unconnected -> exporting current stack") l1_merged = self.l1_get_merged_stack() self.l1_export_to_netcdf(l1_merged) self.l1_stack = [l1] def l1_is_connected_to_stack(self, l1): """ Check if the start time of file i and the stop time if file i-1 indicate neighbouring orbit segments (e.g. due to radar mode change, or two half-orbits :param l1: :return: Flag if l1 is connected (True of False) """ # Stack is empty (return True -> create a new stack) if self.stack_len == 0: return True # Test if segments are adjacent based on time gap between them timedelta = l1.info.start_time - self.last_stack_item.info.stop_time threshold = self.cfg.orbit_segment_connectivity.max_connected_segment_timedelta_seconds is_connected = timedelta.seconds <= threshold return is_connected def l1_get_merged_stack(self): """ Concatenates all items in the l1 stack and returns the merged Level-1 data object. Note: This operation leaves the state of the Level-1 stack untouched :return: Level-1 data object """ l1_merged = self.l1_stack[0] for l1 in self.l1_stack[1:]: l1_merged.append(l1) return l1_merged def l1_export_to_netcdf(self, l1): """ Exports the Level-1 object as as l1p netCDF :param l1_merged: The Level-1 object to exported :return: """ if "export_minimum_n_records" in self.cfg: minimum_n_records = self.cfg.export_minimum_n_records else: minimum_n_records = 0 if l1.n_records >= minimum_n_records: self.output_handler.export_to_netcdf(l1) logger.info("- Written l1p product: %s" % self.output_handler.last_written_file) else: logger.info("- Orbit segment below minimum size (%g), skipping" % l1.n_records) def trim_single_hemisphere_segment_to_polar_region(self, l1): """ Extract polar region of interest from a segment that is either north or south (not global) :param l1: Input Level-1 object :return: Trimmed Input Level-1 object """ polar_threshold = self.cfg.polar_ocean.polar_latitude_threshold is_polar = np.abs(l1.time_orbit.latitude) >= polar_threshold polar_subset = np.where(is_polar)[0] if len(polar_subset) != l1.n_records: l1.trim_to_subset(polar_subset) return l1 def trim_two_hemisphere_segment_to_polar_regions(self, l1): """ Extract polar regions of interest from a segment that is either north, south or both. The method will preserve the order of the hemispheres :param l1: Input Level-1 object :return: List of Trimmed Input Level-1 objects """ polar_threshold = self.cfg.polar_ocean.polar_latitude_threshold l1_list = [] # Loop over the two hemispheres for hemisphere in self.cfg.polar_ocean.target_hemisphere: if hemisphere == "north": is_polar = l1.time_orbit.latitude >= polar_threshold elif hemisphere == "south": is_polar = l1.time_orbit.latitude <= (-1.0 * polar_threshold) else: msg = "Unknown hemisphere: %s [north|south]" % hemisphere self.error.add_error("invalid-hemisphere", msg) self.error.raise_on_error() # Extract the subset (if applicable) polar_subset = np.where(is_polar)[0] n_records_subset = len(polar_subset) # is true subset -> add subset to output list if n_records_subset != l1.n_records and n_records_subset > 0: l1_segment = l1.extract_subset(polar_subset) l1_list.append(l1_segment) # entire segment in polar region -> add full segment to output list elif n_records_subset == l1.n_records: l1_list.append(l1) # no coverage in target hemisphere -> remove segment from list else: pass # Last step: Sort the list to maintain temporal order # (only if more than 1 segment) if len(l1_list) > 1: l1_list = sorted(l1_list, key=attrgetter("tcs")) return l1_list def trim_full_orbit_segment_to_polar_regions(self, l1): """ Extract polar regions of interest from a segment that is either north, south or both. The method will preserve the order of the hemispheres :param l1: Input Level-1 object :return: List of Trimmed Input Level-1 objects """ polar_threshold = self.cfg.polar_ocean.polar_latitude_threshold l1_list = [] # Loop over the two hemispheres for hemisphere in self.cfg.polar_ocean.target_hemisphere: # Compute full polar subset range if hemisphere == "north": is_polar = l1.time_orbit.latitude >= polar_threshold elif hemisphere == "south": is_polar = l1.time_orbit.latitude <= (-1.0 * polar_threshold) else: msg = "Unknown hemisphere: %s [north|south]" % hemisphere self.error.add_error("invalid-hemisphere", msg) self.error.raise_on_error() # Step: Extract the polar ocean segment for the given hemisphere polar_subset = np.where(is_polar)[0] n_records_subset = len(polar_subset) # Safety check if n_records_subset == 0: continue l1_segment = l1.extract_subset(polar_subset) # Step: Trim non-ocean segments l1_segment = self.trim_non_ocean_data(l1_segment) # Step: Split the polar subset to its marine regions l1_segment_list = self.split_at_large_non_ocean_segments( l1_segment) # Step: append the ocean segments l1_list.extend(l1_segment_list) # Last step: Sort the list to maintain temporal order # (only if more than 1 segment) if len(l1_list) > 1: l1_list = sorted(l1_list, key=attrgetter("tcs")) return l1_list def filter_small_ocean_segments(self, l1): """ This method sets the surface type flag of very small ocean segments to land. This action should prevent large portions of land staying in the l1 segment is a small fjord etc is crossed. It should also filter out smaller ocean segments that do not have a realistic chance of freeboard retrieval. :param l1: A pysiral.l1bdata.Level1bData instance :return: filtered l1 object """ # Minimum size for valid ocean segments ocean_mininum_size_nrecords = self.cfg.polar_ocean.ocean_mininum_size_nrecords # Get the clusters of ocean parts in the l1 object ocean_flag = l1.surface_type.get_by_name("ocean").flag land_flag = l1.surface_type.get_by_name("land").flag segments_len, segments_start, not_ocean = rle(ocean_flag) # Find smaller than threshold ocean segments small_cluster_indices = np.where( segments_len < ocean_mininum_size_nrecords)[0] # Do not mess with the l1 object if not necessary if len(small_cluster_indices == 0): return l1 # Set land flag -> True for small ocean segments for small_cluster_index in small_cluster_indices: i0 = segments_start[small_cluster_index] i1 = i0 + segments_len[small_cluster_index] land_flag[i0:i1] = True # Update the l1 surface type flag by re-setting the land flag l1.surface_type.add_flag(land_flag, "land") # All done return l1 # import matplotlib.pyplot as plt # import sys # # print segments_len # # plt.figure() # plt.plot(ocean_flag, alpha=0.5) # plt.plot(land_flag, alpha=0.5) # plt.show() # sys.exit() def trim_non_ocean_data(self, l1): """ Remove leading and trailing data that is not if type ocean. :param l1: The input Level-1 objects :return: The subsetted Level-1 objects. (Segments with no ocean data are removed from the list) """ """ """ ocean = l1.surface_type.get_by_name("ocean") first_ocean_index = get_first_array_index(ocean.flag, True) last_ocean_index = get_last_array_index(ocean.flag, True) if first_ocean_index is None or last_ocean_index is None: return None n = l1.info.n_records - 1 is_full_ocean = first_ocean_index == 0 and last_ocean_index == n if not is_full_ocean: ocean_subset = np.arange(first_ocean_index, last_ocean_index + 1) l1.trim_to_subset(ocean_subset) return l1 def split_at_large_non_ocean_segments(self, l1): """ Identify larger segments that are not ocean (land, land ice) and split the segments if necessary. The return value will always be a list of Level-1 object instances, even if no non-ocean data segment is present in the input data file :param l1: Input Level-1 object :return: a list of Level-1 objects. """ # Identify connected non-ocean segments within the orbit ocean = l1.surface_type.get_by_name("ocean") not_ocean_flag = np.logical_not(ocean.flag) segments_len, segments_start, not_ocean = rle(not_ocean_flag) landseg_index = np.where(not_ocean)[0] # no non-ocean segments, return full segment if len(landseg_index) == 0: return [l1] # Test if non-ocean segments above the size threshold that will require a split of the segment. # The motivation behind this step to keep l1p data files as small as possible, while tolerating # smaller non-ocean sections treshold = self.cfg.polar_ocean.allow_nonocean_segment_nrecords large_landsegs_index = np.where( segments_len[landseg_index] > treshold)[0] large_landsegs_index = landseg_index[large_landsegs_index] # no segment split necessary, return full segment if len(large_landsegs_index) == 0: return [l1] # Split of orbit segment required, generate individual Level-1 segments from the ocean segments l1_segments = [] start_index = 0 for index in large_landsegs_index: stop_index = segments_start[index] subset_list = np.arange(start_index, stop_index) l1_segments.append(l1.extract_subset(subset_list)) start_index = segments_start[index + 1] # Extract the last subset last_subset_list = np.arange(start_index, len(ocean.flag)) l1_segments.append(l1.extract_subset(last_subset_list)) # Return a list of segments return l1_segments def split_at_time_discontinuities(self, l1_list): """ Split l1 object(s) at discontinuities of the timestamp value and return the expanded list with l1 segments. :param l1_list: [list] a list of l1b_files :return: expanded list """ # Prepare input (should always be list) seconds_threshold = self.cfg.timestamp_discontinuities.split_at_time_gap_seconds dt_threshold = timedelta(seconds=seconds_threshold) # Output (list with l1b segments) l1_segments = [] for l1 in l1_list: # Get timestamp discontinuities (if any) time = l1.time_orbit.timestamp # Get start start/stop indices pairs segments_start = np.array([0]) segments_start_indices = np.where( np.ediff1d(time) > dt_threshold)[0] + 1 segments_start = np.append(segments_start, segments_start_indices) segments_stop = segments_start[1:] - 1 segments_stop = np.append(segments_stop, len(time) - 1) # Check if only one segment found if len(segments_start) == 1: l1_segments.append(l1) continue # Extract subsets segment_indices = zip(segments_start, segments_stop) for start_index, stop_index in segment_indices: subset_indices = np.arange(start_index, stop_index + 1) l1_segment = l1.extract_subset(subset_indices) l1_segments.append(l1_segment) return l1_segments @property def target_region_def(self): if not "polar_ocean" in self.cfg: msg = "Missing configuration key `polar_ocean` in Level-1 Pre-Processor Options" self.error.add_error("l1preproc-missing-option", msg) self.error.raise_on_error() return self.cfg.polar_ocean @property def polar_ocean_props(self): if not "polar_ocean" in self.cfg: msg = "Missing configuration key `polar_ocean` in Level-1 Pre-Processor Options" self.error.add_error("l1preproc-missing-option", msg) self.error.raise_on_error() return self.cfg.polar_ocean @property def orbit_segment_connectivity_props(self): if not "orbit_segment_connectivity" in self.cfg: msg = "Missing configuration key `orbit_segment_connectivity` in Level-1 Pre-Processor Options" self.error.add_error("l1preproc-missing-option", msg) self.error.raise_on_error() return self.cfg.orbit_segment_connectivity @property def stack_len(self): return len(self.l1_stack) @property def last_stack_item(self): return self.l1_stack[-1]
class ESACryoSat2PDSBaselineD(DefaultLoggingClass): def __init__(self, cfg, raise_on_error=False): cls_name = self.__class__.__name__ super(ESACryoSat2PDSBaselineD, self).__init__(cls_name) self.error = ErrorStatus(caller_id=cls_name) # Store arguments self.raise_on_error = raise_on_error self.cfg = cfg # Init main class variables self.nc = None self.filepath = None self.l1 = None @staticmethod def translate_opmode2radar_mode(op_mode): """ Converts the ESA operation mode str in the pysiral compliant version """ translate_dict = {"sar": "sar", "lrm": "lrm", "sarin": "sin"} return translate_dict.get(op_mode, None) def get_l1(self, filepath, polar_ocean_check=None): """ Main entry point to the CryoSat-2 Baseline-D Input Adapter :param filepath: :param polar_ocean_check: :return: """ timer = StopWatch() timer.start() # Save filepath self.filepath = filepath # Create an empty Level-1 data object self.l1 = Level1bData() # Input Validation if not Path(filepath).is_file(): msg = "Not a valid file: %s" % filepath logger.warning(msg) self.error.add_error("invalid-filepath", msg) return self.empty # Parse the input file self._read_input_netcdf(filepath, attributes_only=True) if self.nc is None: return self.empty # CAVEAT: An issue has been identified with baseline-D L1b data when the orbit solution # is based on predicted orbits and not the DORIS solution (Nov 2020). # The source of the orbit data can be identified by the `vector_source` global attribute # in the L1b source files. This can take/should take the following values: # # nrt: "fos predicted" (predicted orbit) # "doris_navigator" (DORIS Nav solution) # # rep: "doris_precise" (final and precise DORIS solution) # # To prevent l1 data with erroneous orbit solution entering the processing chain, l1 data # with the predicted orbit can be excluded here. The process of exclusion requires to set # a flag in the l1 processor definition for the input handler: # # exclude_predicted_orbits: True # exclude_predicted_orbits = self.cfg.get("exclude_predicted_orbits", False) is_predicted_orbit = self.nc.vector_source.lower().strip( ) == "fos predicted" logger.debug(self.nc.vector_source.lower().strip()) if is_predicted_orbit and exclude_predicted_orbits: logger.warning("Predicted orbit solution detected -> skip file") return self.empty # Get metadata self._set_input_file_metadata() if polar_ocean_check is not None: has_polar_ocean_data = polar_ocean_check.has_polar_ocean_segments( self.l1.info) if not has_polar_ocean_data: timer.stop() return self.empty # Polar ocean check passed, now fill the rest of the l1 data groups self._set_l1_data_groups() timer.stop() logger.info("- Created L1 object in %.3f seconds" % timer.get_seconds()) # Return the l1 object return self.l1 @staticmethod def get_wfm_range(window_delay, n_range_bins): """ Returns the range for each waveform bin based on the window delay and the number of range bins :param window_delay: The two-way delay to the center of the range window in seconds :param n_range_bins: The number of range bins (256: sar, 512: sin) :return: The range for each waveform bin as array (time, ns) """ lightspeed = 299792458.0 bandwidth = 320000000.0 # The two way delay time give the distance to the central bin central_window_range = window_delay * lightspeed / 2.0 # Calculate the offset from the center to the first range bin window_size = (n_range_bins * lightspeed) / (4.0 * bandwidth) first_bin_offset = window_size / 2.0 # Calculate the range increment for each bin range_increment = np.arange(n_range_bins) * lightspeed / (4.0 * bandwidth) # Reshape the arrays range_offset = np.tile(range_increment, (window_delay.shape[0], 1)) - first_bin_offset window_range = np.tile(central_window_range, (n_range_bins, 1)).transpose() # Compute the range for each bin and return wfm_range = window_range + range_offset return wfm_range @staticmethod def interp_1hz_to_20hz(variable_1hz, time_1hz, time_20hz, **kwargs): """ Computes a simple linear interpolation to transform a 1Hz into a 20Hz variable :param variable_1hz: an 1Hz variable array :param time_1hz: 1Hz reference time :param time_20hz: 20 Hz reference time :return: the interpolated 20Hz variable """ error_status = False try: f = interpolate.interp1d(time_1hz, variable_1hz, bounds_error=False, **kwargs) variable_20hz = f(time_20hz) except ValueError: fill_value = np.nan variable_20hz = np.full(time_20hz.shape, fill_value) error_status = True return variable_20hz, error_status def _read_input_netcdf(self, filepath, **kwargs): """ Read the netCDF file via xarray """ try: self.nc = xarray.open_dataset(filepath, decode_times=False, mask_and_scale=True) except: msg = "Error encountered by xarray parsing: %s" % filepath self.error.add_error("xarray-parse-error", msg) self.nc = None logger.warning(msg) return def _set_input_file_metadata(self): """ Fill the product info """ # Short cuts metadata = self.nc.attrs info = self.l1.info # Processing environment metadata info.set_attribute("pysiral_version", pysiral_version) # General product metadata info.set_attribute("mission", "cryosat2") info.set_attribute("mission_sensor", "siral") info.set_attribute("mission_data_version", "D") info.set_attribute("orbit", metadata["abs_orbit_start"]) info.set_attribute("cycle", metadata["cycle_number"]) info.set_attribute("mission_data_source", Path(self.filepath).name) info.set_attribute( "timeliness", cs2_procstage2timeliness(metadata["processing_stage"])) # Time-Orbit Metadata lats = [ float(metadata["first_record_lat"]) * 1e-6, float(metadata["last_record_lat"]) * 1e-6 ] lons = [ float(metadata["first_record_lon"]) * 1e-6, float(metadata["last_record_lon"]) * 1e-6 ] info.set_attribute("start_time", parse_datetime_str( metadata["first_record_time"][4:])) # TAI=.... info.set_attribute("stop_time", parse_datetime_str( metadata["last_record_time"][4:])) # TAI=.... info.set_attribute("lat_min", np.amin(lats)) info.set_attribute("lat_max", np.amax(lats)) info.set_attribute("lon_min", np.amin(lons)) info.set_attribute("lon_max", np.amax(lons)) # Product Content Metadata for mode in ["sar", "sin", "lrm"]: percent_value = 0.0 if metadata["sir_op_mode"].strip().lower() == mode: percent_value = 100. info.set_attribute("{}_mode_percent".format(mode), percent_value) info.set_attribute("open_ocean_percent", float(metadata["open_ocean_percent"]) * 0.01) def _set_l1_data_groups(self): """ Fill all data groups of the Level-1 data object with the content of the netCDF file. This is just the overview method, see specific sub-methods below :return: None """ self._set_time_orbit_data_group() self._set_waveform_data_group() self._set_range_correction_group() self._set_surface_type_group() self._set_classifier_group() def _set_time_orbit_data_group(self): """ Transfer the time orbit parameter from the netcdf to l1 data object :return: None """ # Transfer the timestamp # NOTE: Here it is critical that the xarray does not automatically decodes time since it is # difficult to work with the numpy datetime64 date format. Better to compute datetimes using # a know num2pydate conversion tai_datetime = num2pydate(self.nc.time_20_ku.values, units=self.nc.time_20_ku.units) converter = UTCTAIConverter() utc_timestamp = converter.tai2utc(tai_datetime, check_all=False) self.l1.time_orbit.timestamp = utc_timestamp # Set the geolocation self.l1.time_orbit.set_position(self.nc.lon_20_ku.values, self.nc.lat_20_ku.values, self.nc.alt_20_ku.values, self.nc.orb_alt_rate_20_ku.values) # Set antenna attitude self.l1.time_orbit.set_antenna_attitude( self.nc.off_nadir_pitch_angle_str_20_ku.values, self.nc.off_nadir_roll_angle_str_20_ku.values, self.nc.off_nadir_yaw_angle_str_20_ku.values) def _set_waveform_data_group(self): """ Transfer of the waveform group to the Level-1 object. This includes 1. the computation of waveform power in Watts 2. the computation of the window delay in meter for each waveform bin 3. extraction of the waveform valid flag :return: None """ # Get the waveform # NOTE: Convert the waveform units to Watts. From the documentation:is applied as follows: # pwr_waveform_20_ku(time, ns) * echo_scale_factor_20_ku(time, ns) * 2 ^ echo_scale_pwr_20_ku(time) wfm_linear = self.nc.pwr_waveform_20_ku.values # Get the shape of the waveform array dim_time, dim_ns = wfm_linear.shape # Scaling parameter are 1D -> Replicate to same shape as waveform array echo_scale_factor = self.nc.echo_scale_factor_20_ku.values echo_scale_pwr = self.nc.echo_scale_pwr_20_ku.values echo_scale_factor = np.tile(echo_scale_factor, (dim_ns, 1)).transpose() echo_scale_pwr = np.tile(echo_scale_pwr, (dim_ns, 1)).transpose() # Convert the waveform from linear counts to Watts wfm_power = wfm_linear * echo_scale_factor * 2.0**echo_scale_pwr # Get the window delay # From the documentation: # Calibrated 2-way window delay: distance from CoM to middle range window (at sample ns/2 from 0). # It includes all the range corrections given in the variable instr_cor_range and in the # variable uso_cor_20_ku. This is a 2-way time and 2-way corrections are applied. window_delay = self.nc.window_del_20_ku.values # Convert window delay to range for each waveform range bin wfm_range = self.get_wfm_range(window_delay, dim_ns) # Make sure that parameter are float and not double # -> Import for cythonized algorithm parts (ctfrma specifically uses floats) wfm_power = wfm_power.astype(np.float32) wfm_range = wfm_range.astype(np.float32) # Set the waveform op_mode = str(self.nc.attrs["sir_op_mode"].strip().lower()) radar_mode = self.translate_opmode2radar_mode(op_mode) self.l1.waveform.set_waveform_data(wfm_power, wfm_range, radar_mode) # --- Get the valid flag --- # # From the documentation # :comment = "Measurement confidence flags. Generally the MCD flags indicate problems when set. # If the whole MCD is 0 then no problems or non-nominal conditions were detected. # Serious errors are indicated by setting the most significant bit, i.e. block_degraded, # in which case the block must not be processed. Other error settings can be regarded # as warnings."; # # :flag_masks = -2147483648, block_degraded <- most severe error # 1073741824, blank_block # 536870912, datation_degraded # 268435456, orbit_prop_error # 134217728, orbit_file_change # 67108864, orbit_gap # 33554432, echo_saturated # 16777216, other_echo_error # 8388608, sarin_rx1_error # 4194304, sarin_rx2_error # 2097152, window_delay_error # 1048576, agc_error # 524288, cal1_missing # 262144, cal1_default # 131072, doris_uso_missing # 65536, ccal1_default # 32768, trk_echo_error # 16384, echo_rx1_error # 8192, echo_rx2_error # 4096, npm_error <- Defined as maximum permissible error level # 2048, cal1_pwr_corr_type # 128, phase_pert_cor_missing <- Seems to be always set for SARin # 64, cal2_missing # 32, cal2_default # 16, power_scale_error # 8, attitude_cor_missing # 1, phase_pert_cor_default measurement_confident_flag = self.nc.flag_mcd_20_ku.values valid_flag = (measurement_confident_flag >= 0) & (measurement_confident_flag <= 4096) self.l1.waveform.set_valid_flag(valid_flag) def _set_range_correction_group(self): """ Transfer the range corrections defined in the l1p config file to the Level-1 object NOTE: The range corrections are all in 1 Hz and must be interpolated to 20Hz :return: None """ # Get the reference times for interpolating the range corrections from 1Hz -> 20Hz time_1hz = self.nc.time_cor_01.values time_20hz = self.nc.time_20_ku.values # Loop over all range correction variables defined in the processor definition file for key in self.cfg.range_correction_targets.keys(): pds_var_name = self.cfg.range_correction_targets[key] variable_1hz = getattr(self.nc, pds_var_name) variable_20hz, error_status = self.interp_1hz_to_20hz( variable_1hz.values, time_1hz, time_20hz) if error_status: msg = "- Error in 20Hz interpolation for variable `%s` -> set only dummy" % pds_var_name logger.warning(msg) self.l1.correction.set_parameter(key, variable_20hz) def _set_surface_type_group(self): """ Transfer of the surface type flag to the Level-1 object NOTE: In the current state (TEST dataset), the surface type flag is only 1 Hz. A nearest neighbour interpolation is used to get the 20Hz surface type flag. :return: None """ # Get the reference times for interpolating the flag from 1Hz -> 20Hz time_1hz = self.nc.time_cor_01.values time_20hz = self.nc.time_20_ku.values # Interpolate 1Hz surface type flag to 20 Hz surface_type_1hz = self.nc.surf_type_01.values surface_type_20hz, error_status = self.interp_1hz_to_20hz( surface_type_1hz, time_1hz, time_20hz, kind="nearest") if error_status: msg = "- Error in 20Hz interpolation for variable `surf_type_01` -> set only dummy" logger.warning(msg) # Set the flag for key in ESA_SURFACE_TYPE_DICT.keys(): flag = surface_type_20hz == ESA_SURFACE_TYPE_DICT[key] self.l1.surface_type.add_flag(flag, key) def _set_classifier_group(self): """ Transfer the classifiers defined in the l1p config file to the Level-1 object. NOTE: It is assumed that all classifiers are 20Hz In addition, a few legacy parameter are computed based on the waveform counts that is only available at this stage. Computation of other parameter such as sigma_0, leading_edge_width, ... are moved to the post-processing :return: None """ # Loop over all classifier variables defined in the processor definition file for key in self.cfg.classifier_targets.keys(): variable_20hz = getattr(self.nc, self.cfg.classifier_targets[key]) self.l1.classifier.add(variable_20hz, key) # Calculate Parameters from waveform counts # XXX: This is a legacy of the CS2AWI IDL processor # Threshold defined for waveform counts not power in dB wfm_counts = self.nc.pwr_waveform_20_ku.values # Calculate the OCOG Parameter (CryoSat-2 notation) ocog = CS2OCOGParameter(wfm_counts) self.l1.classifier.add(ocog.width, "ocog_width") self.l1.classifier.add(ocog.amplitude, "ocog_amplitude") # Calculate the Peakiness (CryoSat-2 notation) pulse = CS2PulsePeakiness(wfm_counts) self.l1.classifier.add(pulse.peakiness, "peakiness") self.l1.classifier.add(pulse.peakiness_r, "peakiness_r") self.l1.classifier.add(pulse.peakiness_l, "peakiness_l") # fmi version: Calculate the LTPP ltpp = CS2LTPP(wfm_counts) self.l1.classifier.add(ltpp.ltpp, "late_tail_to_peak_power") # Get satellite velocity vector (classifier needs to be vector -> manual extraction needed) satellite_velocity_vector = self.nc.sat_vel_vec_20_ku.values self.l1.classifier.add(satellite_velocity_vector[:, 0], "satellite_velocity_x") self.l1.classifier.add(satellite_velocity_vector[:, 1], "satellite_velocity_y") self.l1.classifier.add(satellite_velocity_vector[:, 2], "satellite_velocity_z") @property def empty(self): return None
class Level1POutputHandler(DefaultLoggingClass): """ The output handler for l1p product files NOTE: This is not a subclass of OutputHandlerbase due to the special nature of pysiral l1p products """ def __init__(self, cfg): cls_name = self.__class__.__name__ super(Level1POutputHandler, self).__init__(cls_name) self.error = ErrorStatus(caller_id=cls_name) self.cfg = cfg self.pysiral_cfg = psrlcfg # Init class properties self._path = None self._filename = None def remove_old_if_applicable(self, period): logger.warning("Not implemented: self.remove_old_if_applicable") return def export_to_netcdf(self, l1): """ Workflow to export a Level-1 object to l1p netCDF product. The workflow includes the generation of the output path (if applicable). :param l1: The Level-1 object to be exported :return: None """ # Get filename and path self.set_output_filepath(l1) # Check if path exists Path(self.path).mkdir(exist_ok=True, parents=True) # Export the data object ncfile = L1bDataNC() ncfile.l1b = l1 ncfile.output_folder = self.path ncfile.filename = self.filename ncfile.export() def set_output_filepath(self, l1): """ Sets the class properties required for the file export :param l1: The Level-1 object :return: None """ local_machine_def_tag = self.cfg.get("local_machine_def_tag", None) if local_machine_def_tag is None: msg = "Missing mandatory option %s in l1p processor definition file -> aborting" msg = msg % "root.output_handler.options.local_machine_def_tag" msg = msg + "\nOptions: \n" + self.cfg.makeReport() self.error.add_error("missing-option", msg) self.error.raise_on_error() # TODO: This is work in progress filename_template = "pysiral-l1p-{platform}-{source}-{timeliness}-{hemisphere}-{tcs}-{tce}-{file_version}.nc" time_fmt = "%Y%m%dT%H%M%S" values = { "platform": l1.info.mission, "source": self.cfg.version.source_file_tag, "timeliness": l1.info.timeliness, "hemisphere": l1.info.hemisphere, "tcs": l1.time_orbit.timestamp[0].strftime(time_fmt), "tce": l1.time_orbit.timestamp[-1].strftime(time_fmt), "file_version": self.cfg.version.version_file_tag } self._filename = filename_template.format(**values) local_repository = self.pysiral_cfg.local_machine.l1b_repository export_folder = Path( local_repository[l1.info.mission][local_machine_def_tag]["l1p"]) yyyy = "%04g" % l1.time_orbit.timestamp[0].year mm = "%02g" % l1.time_orbit.timestamp[0].month self._path = export_folder / l1.info.hemisphere / yyyy / mm @property def path(self): return Path(self._path) @property def filename(self): return self._filename @property def last_written_file(self): return self.path / self.filename
class Level2ProcArgParser(DefaultLoggingClass): def __init__(self): super(Level2ProcArgParser, self).__init__(self.__class__.__name__) self.error = ErrorStatus() self.pysiral_config = psrlcfg self._args = None def parse_command_line_arguments(self): # use python module argparse to parse the command line arguments # (first validation of required options and data types) self._args = self.parser.parse_args() # Add additional check to make sure either `l1b-files` or # `start ` and `stop` are set l1b_file_preset_is_set = self._args.l1b_files_preset is not None start_and_stop_is_set = self._args.start_date is not None and \ self._args.stop_date is not None if l1b_file_preset_is_set and start_and_stop_is_set: self.parser.error("-start & -stop and -l1b-files are exclusive") if not l1b_file_preset_is_set and not start_and_stop_is_set: self.parser.error("either -start & -stop or -l1b-files required") def critical_prompt_confirmation(self): # Any confirmation prompts can be overridden by --no-critical-prompt no_prompt = self._args.no_critical_prompt # if --remove_old is set, all previous l1bdata files will be # erased for all month if self._args.remove_old and not no_prompt: message = "You have selected to remove all previous " + \ "l2 files for the requested period\n" + \ "(Note: use --no-critical-prompt to skip confirmation)\n" + \ "Enter \"YES\" to confirm and continue: " result = input(message) if result != "YES": sys.exit(1) @property def parser(self): # XXX: Move back to caller # Take the command line options from default settings # -> see config module for data types, destination variables, etc. clargs = DefaultCommandLineArguments() # List of command line option required for pre-processor # (argname, argtype (see config module), destination, required flag) options = [("-l2-settings", "l2-settings", "l2_settings", True), ("-run-tag", "run-tag", "run_tag", False), ("-start", "date", "start_date", False), ("-stop", "date", "stop_date", False), ("-l1b-files", "l1b_files", "l1b_files_preset", False), ("-exclude-month", "exclude-month", "exclude_month", False), ("-input-version", "input-version", "input_version", False), ("-l2-output", "l2-output", "l2_output", False), ("--remove-old", "remove-old", "remove_old", False), ("--no-critical-prompt", "no-critical-prompt", "no_critical_prompt", False), ("--no-overwrite-protection", "no-overwrite-protection", "overwrite_protection", False), ("--overwrite-protection", "overwrite-protection", "overwrite_protection", False)] # create the parser parser = argparse.ArgumentParser() for option in options: argname, argtype, destination, required = option argparse_dict = clargs.get_argparse_dict(argtype, destination, required) parser.add_argument(argname, **argparse_dict) parser.set_defaults(overwrite_protection=True) return parser @property def arg_dict(self): """ Return the arguments as dictionary """ return self._args.__dict__ @property def start(self): return self._args.start_date @property def stop(self): return self._args.stop_date @property def run_tag(self): """ run_tag is a str or relative path that determines the output directory for the Level-2 processor. If the -run-tag option is not specified, the output directory will be the `product_repository` specification in `local_machine_def` with the l2 settings file basename as subfolder. One can however specify a custom string, or a relative path, with subfolders defined by `\` or `/`, e.g. Examples: -run-tag cs2awi_v2p0_nrt -run-tag c3s/cdr/cryosat2/v1p0/nh """ # Get from command line arguments (default: None) run_tag = self._args.run_tag # If argument is empty use the basename of the l2 settings file if run_tag is None: run_tag = self._args.l2_settings # Settings file may be specified as full path and not just the id if Path(run_tag).is_file(): run_tag = Path(run_tag).stem # split the run-tag on potential path separators run_tag = re.split(r'[\\|/]', run_tag) return run_tag @property def exclude_month(self): return self._args.exclude_month @property def overwrite_protection(self): return self._args.overwrite_protection @property def l2_settings_file(self): l2_settings = self._args.l2_settings filename = self.pysiral_config.get_settings_file( "proc", "l2", l2_settings) if filename is None: msg = "Invalid l2 settings filename or id: %s\n" % l2_settings msg = msg + " \nRecognized Level-2 processor setting ids:\n" for l2_settings_id in self.pysiral_config.get_setting_ids( "proc", "l2"): msg = msg + " " + l2_settings_id + "\n" self.error.add_error("invalid-l2-settings", msg) self.error.raise_on_error() else: return filename @property def l1b_version(self): return self._args.input_version @property def l1b_predef_files(self): l1b_files = glob.glob(self._args.l1b_files_preset) return l1b_files @property def l2_output(self): l2_output = self._args.l2_output filename = self.pysiral_config.get_settings_file( "output", "l2i", l2_output) if filename is None: msg = "Invalid l2 outputdef filename or id: %s\n" % l2_output msg = msg + " \nRecognized Level-2 output definitions ids:\n" l2_output_ids = self.pysiral_config.get_setting_ids( "output", "l2i") for l2_output_id in l2_output_ids: msg = msg + " - " + l2_output_id + "\n" self.error.add_error("invalid-l2-outputdef", msg) self.error.raise_on_error() else: return filename @property def is_time_range_request(self): return self._args.l1b_files_preset is None @property def remove_old(self): return self._args.remove_old and not self._args.overwrite_protection
class OutputHandlerBase(DefaultLoggingClass): """ A class that defines properties of output files (content, location, format) based on the output definition, data container and other processor settings """ subfolder_format = {"month": "%02g", "year": "%04g", "day": "%02g"} def __init__(self, output_def, applicable_data_level=None, subfolder_tags=None, default_file_location=None): """ Init the output handler with the content of the output definition file and keywords specific for the data processing levels. These keywords have to be set during the initialization of the parent (this) class. TODO: Move applicable data level and subfolder tags to output definition file TODO: Remove default_file_location :param output_def: (str or pathlib.Path): The full file path to the output definition file :param applicable_data_level: (int) Intended processing level for validation of output definition :param subfolder_tags: (str list) A list of intended sub-folders and their meaning (example ["year", "month"]) :param default_file_location: A list relative to the pysiral resource directory of sub-directories and filenames that links to the default output definition for the respectice data processing level (deprecated) """ # Init the parent super(OutputHandlerBase, self).__init__(self.__class__.__name__) self.pysiral_config = psrlcfg self.error = ErrorStatus() self._basedir = "n/a" # Attributes self._doi = None self.subfolders = None self.subfolder_tags = subfolder_tags self.applicable_data_level = applicable_data_level self.default_file_location = default_file_location self._init_from_output_def(output_def) self.output_def_filename = output_def def fill_template_string(self, template, dataset): """ Fill an template string with information of a dataset object (in this case Level2Data) """ attributes = self.get_template_attrs(template) result = str(template) for attribute in attributes: attribute_name, option, placeholder = attribute attribute = dataset.get_attribute(attribute_name, *option) if attribute is None: attribute = "unknown" result = result.replace(placeholder, attribute) return result def get_dt_subfolders(self, dt, subfolder_tags): """ Returns a list of subdirectories based on a datetime object (usually the start time of data collection) """ subfolders = [] for subfolder_tag in subfolder_tags: parameter = getattr(dt, subfolder_tag) subfolder = self.subfolder_format[subfolder_tag] % parameter subfolders.append(subfolder) return subfolders @staticmethod def get_template_attrs(template): """ Extract attribute names and options (if defined) for a give template string """ try: template = template.encode('utf-8').strip() except AttributeError: template = str(template) attr_defs = re.findall("{.*?}", str(template)) attrs, options = [], [] for attr_def in attr_defs: attr_name, _, optstr = attr_def[1:-1].partition(":") attrs.append(attr_name) options.append(optstr.split(";")) return zip(attrs, options, attr_defs) def _init_from_output_def(self, output_def): """ Adds the information for the output def yaml files (either full filename or treedict structure) """ if Path(output_def).is_file(): try: self._output_def = get_yaml_config(output_def) except Exception as ex: self.error.add_error("outputdef-parser-error", ex) self.error.raise_on_error() else: self._output_def = output_def self._validate_outputdef() def _set_basedir(self, basedir, create=True): """ Sets and and (per default) creates the main output directory """ self._basedir = basedir if create: self._create_directory(self._basedir) def _create_directory(self, directory): """ Convinience method to create a directory and add an error when failed """ Path(directory).mkdir(exist_ok=True, parents=True) if not Path(directory).is_dir(): msg = "Unable to create directory: %s" % str(directory) self.error.add_error("directory-error", msg) def _get_subdirectories(self, dt): directory = Path(self.basedir) for subfolder_tag in self.subfolders: parameter = getattr(dt, subfolder_tag) subfolder = self.subfolder_format[subfolder_tag] % parameter directory = directory / subfolder def _get_directory_from_dt(self, dt): subfolders = self.get_dt_subfolders(dt, self.subfolder_tags) return Path(self.basedir) / Path(*subfolders) def _validate_outputdef(self): """ Run a series of tests to check if a valid output definition has been passed. Note: theses tests will only check existing items of the output definition. If the requested item is missing a separate exception will be evoked """ # Test 1: Applicable data level needs if self.applicable_data_level != self.data_level: msg = "outputdef data level (%g) does not match %s reqirement (%g)" msg = msg % (self.data_level, self.__class__.__name__, self.applicable_data_level) self.error.add_error("datalevel-mismatch", msg) self.error.raise_on_error() @property def has_doi(self): try: return self._doi is not None except AttributeError: return False @property def id(self): try: return self._output_def.metadata.output_id except (AttributeError, KeyError): return None @property def product_level_subfolder(self): subfolder = self._output_def.product_level_subfolder if type(subfolder) is not str: msg = "root.product_level_subfolder (str) missing or wrong dtype" self.error.add_error("outputdef-invalid", msg) self.error.raise_on_error() return subfolder @property def data_level(self): data_level = self._output_def.metadata.data_level if type(data_level) is not int: msg = "root.metadata.data_level (int) missing or wrong dtype" self.error.add_error("outputdef-invalid", msg) self.error.raise_on_error() return data_level @property def basedir(self): return self._basedir @property def output_def(self): return self._output_def @property def now_directory(self): """ Returns a directory suitable string with the current time """ return datetime.now().strftime("%Y%m%dT%H%M%S") @property def variable_def(self): variables = sorted(list(self.output_def.variables.keys())) attribute_dicts = [self.output_def.variables[a] for a in variables] return zip(variables, attribute_dicts)
class Level2PreProcArgParser(DefaultLoggingClass): def __init__(self): super(Level2PreProcArgParser, self).__init__(self.__class__.__name__) self.error = ErrorStatus() self._args = None def parse_command_line_arguments(self): # use python module argparse to parse the command line arguments # (first validation of required options and data types) self._args = self.parser.parse_args() def critical_prompt_confirmation(self): # Any confirmation prompts can be overriden by --no-critical-prompt no_prompt = self._args.no_critical_prompt # if --remove_old is set, all previous l1bdata files will be # erased for all month if self._args.remove_old and not no_prompt: message = "You have selected to remove all previous " + \ "l2p files for the requested period\n" + \ "(Note: use --no-critical-prompt to skip confirmation)\n" + \ "Enter \"YES\" to confirm and continue: " result = input(message) if result != "YES": sys.exit(1) @property def parser(self): # XXX: Move back to caller # Take the command line options from default settings # -> see config module for data types, destination variables, etc. clargs = DefaultCommandLineArguments() # List of command line option required for pre-processor # (argname, argtype (see config module), destination, required flag) options = [("-start", "date", "start_date", False), ("-stop", "date", "stop_date", False), ("-l2i-product-dir", "l2i-product-dir", "l2i_product_dir", True), ("-l2p-output", "l2p-output", "l2p_output", False), ("-exclude-month", "exclude-month", "exclude_month", False), ("-doi", "doi", "doi", False), ("--remove-old", "remove-old", "remove_old", False), ("--no-critical-prompt", "no-critical-prompt", "no_critical_prompt", False), ("--no-overwrite-protection", "no-overwrite-protection", "overwrite_protection", False), ("--overwrite-protection", "overwrite-protection", "overwrite_protection", False)] # create the parser parser = argparse.ArgumentParser() for option in options: argname, argtype, destination, required = option argparse_dict = clargs.get_argparse_dict(argtype, destination, required) parser.add_argument(argname, **argparse_dict) parser.set_defaults(overwrite_protection=False) return parser @property def arg_dict(self): """ Return the arguments as dictionary """ return self._args.__dict__ @property def start(self): return self._args.start_date @property def stop(self): return self._args.stop_date @property def exclude_month(self): return self._args.exclude_month @property def doi(self): return self._args.doi @property def overwrite_protection(self): return self._args.overwrite_protection @property def l2i_product_dir(self): l2i_product_dir = self._args.l2i_product_dir if Path(l2i_product_dir).is_dir(): return Path(l2i_product_dir).resolve(strict=False) else: msg = "Invalid l2i product dir: %s" % str(l2i_product_dir) self.error.add_error("invalid-l2i-product-dir", msg) self.error.raise_on_error() @property def l2p_output(self): l2p_output = self._args.l2p_output filename = psrlcfg.get_settings_file("output", "l2p", l2p_output) if filename is None: msg = "Invalid l2p outputdef filename or id: %s\n" % l2p_output msg = msg + " \nRecognized Level-2 output definitions ids:\n" l2p_output_ids = psrlcfg.get_setting_ids("output", "l2p") for l2p_output_id in l2p_output_ids: msg = msg + " - " + l2p_output_id + "\n" self.error.add_error("invalid-l2p-outputdef", msg) self.error.raise_on_error() else: return filename @property def remove_old(self): return self._args.remove_old and not self._args.overwrite_protection
class Level3ProcArgParser(DefaultLoggingClass): def __init__(self): super(Level3ProcArgParser, self).__init__(self.__class__.__name__) self.error = ErrorStatus() self.pysiral_config = ConfigInfo() self._args = None def parse_command_line_arguments(self): # use python module argparse to parse the command line arguments # (first validation of required options and data types) self._args = self.parser.parse_args() # Add addtional check to make sure either `l1b-files` or # `start ` and `stop` are set # l1b_file_preset_is_set = self._args.l1b_files_preset is not None # start_and_stop_is_set = self._args.start_date is not None and \ # self._args.stop_date is not None # # if l1b_file_preset_is_set and start_and_stop_is_set: # self.parser.error("-start & -stop and -l1b-files are exclusive") # # if not l1b_file_preset_is_set and not start_and_stop_is_set: # self.parser.error("either -start & -stop or -l1b-files required") def critical_prompt_confirmation(self): # Any confirmation prompts can be overriden by --no-critical-prompt no_prompt = self._args.no_critical_prompt # if --remove_old is set, all previous l1bdata files will be # erased for all month if self._args.remove_old and not no_prompt: message = "You have selected to remove all previous " + \ "l3 files for the requested period\n" + \ "(Note: use --no-critical-prompt to skip confirmation)\n" + \ "Enter \"YES\" to confirm and continue: " result = raw_input(message) if result != "YES": sys.exit(1) @property def parser(self): # XXX: Move back to caller # Take the command line options from default settings # -> see config module for data types, destination variables, etc. clargs = DefaultCommandLineArguments() # List of command line option required for pre-processor # (argname, argtype (see config module), destination, required flag) options = [("-l2i-product-dir", "l2i-product-dir", "l2i_basedir", True), ("-l3-settings", "l3-settings", "l3_settings", False), ("-l3-griddef", "l3-griddef", "l3_griddef", True), ("-l3-output", "l3-output", "l3_output", True), ("-start", "date", "start_date", True), ("-stop", "date", "stop_date", True), ("-period", "period", "period", False), ("-doi", "doi", "doi", False), ("-data-record-type", "data_record_type", "data_record_type", False), ("--remove-old", "remove-old", "remove_old", False), ("--no-critical-prompt", "no-critical-prompt", "no_critical_prompt", False)] # create the parser parser = argparse.ArgumentParser() for option in options: argname, argtype, destination, required = option argparse_dict = clargs.get_argparse_dict(argtype, destination, required) parser.add_argument(argname, **argparse_dict) return parser @property def arg_dict(self): """ Return the arguments as dictionary """ return self._args.__dict__ @property def start(self): return self._args.start_date @property def stop(self): return self._args.stop_date @property def period(self): return self._args.period @property def doi(self): return self._args.doi @property def data_record_type(self): return self._args.data_record_type @property def l2i_product_directory(self): return os.path.join(self.l3_product_basedir, "l2i") @property def l3_settings_file(self): l3_settings = self._args.l3_settings filename = self.pysiral_config.get_settings_file( "proc", "l3", l3_settings) if filename is None: msg = "Invalid l3 settings filename or id: %s\n" % l3_settings msg = msg + " \nRecognized Level-3 processor setting ids:\n" for l3_settings_id in self.pysiral_config.get_setting_ids( "proc", "l3"): msg = msg + " " + l3_settings_id + "\n" self.error.add_error("invalid-l3-settings", msg) self.error.raise_on_error() else: return filename @property def l3_griddef(self): l3_griddef = self._args.l3_griddef filename = self.pysiral_config.get_settings_file( "grid", None, l3_griddef) if filename is None: msg = "Invalid griddef filename or id: %s\n" % l3_griddef msg = msg + " Recognized grid definition ids:\n" for griddef_id in self.pysiral_config.get_setting_ids("griddef"): msg = msg + " - " + griddef_id + "\n" self.error.add_error("invalid-griddef", msg) self.error.raise_on_error() else: return filename @property def l3_output_file(self): l3_output = self._args.l3_output filename = self.pysiral_config.get_settings_file( "output", "l3", l3_output) if filename is None: msg = "Invalid output definition filename or id: %s\n" % l3_output msg = msg + " Recognized output definition ids:\n" for output_id in self.pysiral_config.get_setting_ids( "output", "l3"): msg = msg + " - " + output_id + "\n" self.error.add_error("invalid-outputdef", msg) self.error.raise_on_error() else: return filename @property def l3_product_basedir(self): """ Returns the base directory (one level below l2i) """ # 1. Clean up the path product_basedir = os.path.abspath(self._args.l2i_basedir) dirs = os.path.split(product_basedir) if dirs[1] == "l2i": return dirs[0] else: return product_basedir @property def remove_old(self): return self._args.remove_old and not self._args.overwrite_protection
class OutputHandlerBase(DefaultLoggingClass): subfolder_format = {"month": "%02g", "year": "%04g", "day": "%02g"} def __init__(self, output_def): super(OutputHandlerBase, self).__init__(self.__class__.__name__) self.pysiral_config = ConfigInfo() self.error = ErrorStatus() self._basedir = "n/a" self._init_from_output_def(output_def) self.output_def_filename = output_def def fill_template_string(self, template, dataset): """ Fill an template string with information of a dataset object (in this case Level2Data) """ attributes = self.get_template_attrs(template) try: result = template.encode("utf-8") except AttributeError: result = str(template) for attribute in attributes: attribute_name, option, placeholder = attribute attribute = dataset.get_attribute(attribute_name, *option) if attribute is None: attribute = "unknown" result = result.replace(placeholder, attribute) return result def get_dt_subfolders(self, dt, subfolder_tags): """ Returns a list of subdirectories based on a datetime object (usually the start time of data collection) """ subfolders = [] for subfolder_tag in subfolder_tags: parameter = getattr(dt, subfolder_tag) subfolder = self.subfolder_format[subfolder_tag] % parameter subfolders.append(subfolder) return subfolders def get_template_attrs(self, template): """ Extract attribute names and options (if defined) for a give template string """ try: template = template.encode('utf-8').strip() except AttributeError: template = str(template) attr_defs = re.findall("{.*?}", template) attrs, options = [], [] for attr_def in attr_defs: attr_name, _, optstr = attr_def[1:-1].partition(":") attrs.append(attr_name) options.append(optstr.split(";")) return zip(attrs, options, attr_defs) def _init_from_output_def(self, output_def): """ Adds the information for the output def yaml files (either full filename or treedict structure) """ if os.path.isfile(output_def): try: self._output_def = get_yaml_config(output_def) except Exception, msg: self.error.add_error("outputdef-parser-error", msg) self.error.raise_on_error() else:
class AuxdataBaseClass(object): """ Base class for all sub-type auxdata base classes (e.g. SICBaseClass). This class defines the mandatory set of methods and properties for all auxdata classes """ def __init__(self, auxclass_cfg): """ This class should not be called directly, only its subclasses. auxclass_cfg needs to be of type AuxClassConfig """ # Error handler self.error = ErrorStatus(self.pyclass) # Auxiliary class options if not isinstance(auxclass_cfg, AuxClassConfig): msg = "Invalid config object: %s (needs to be of type pysiral.auxdata.AuxClassConfig" msg = msg % str(auxclass_cfg) self.error.add_error("invalid-auxclasscfg-type", msg) self.error.raise_on_error() self._cfg = auxclass_cfg # Main properties self._data = None # Data container for external data self._auxvars = [ ] # List of auxiliary variables generated by the child class # General messages self.msgs = [] # --- Class internals --- # This is for auxiliary data handlers that require to read external product files for # a defined period (daily, monthly, ...). The implementation currently keeps only one # external product in memory at the time. The period (date list: yyyy, mm, dd) of this # currently loaded product is designated as current_date This date is compared to the # requested date and if a new product is loaded upon mismatch of current & requested data # NOTE: This will be bypassed by static auxiliary data classes # TODO: Load all auxiliary products for processing period in memory (allow parallel processing) self._current_date = [0, 0, 0] self._requested_date = [-1, -1, -1] def set_requested_date(self, year, month, day): """ Use first timestamp as reference, date changes are ignored """ self._requested_date = [year, month, day] def set_requested_date_from_l2(self, l2): """ Convenience method, Use first timestamp as reference, date changes are ignored """ year = l2.track.timestamp[0].year month = l2.track.timestamp[0].month day = l2.track.timestamp[0].day self.set_requested_date(year, month, day) def check_data_availability(self, data_container_name="_data"): """ Checks if data is loaded. If data container is None, raise an Error :param data_container_name: :return: """ data_container = getattr(self, data_container_name, None) if data_container is None: msg = "%s: Data not loaded [%s]" msg = msg % (self.__class__.__name__, self.year + "-" + self.month + "-" + self.day) self.add_handler_message(msg) self.error.add_error("auxdata_missing", msg) def reset_auxvars(self): """ Empties the auxiliary data store. To be executed during class initialization and before retrieving data (e.g. since the Level-2 processor calls this instance repeatedly) """ self._auxvars = [] def reset_handler_messages(self): """ Empties the message list. To be executed during class initialization and before retrieving data (e.g. since the Level-2 processor calls this instance repeatedly) """ self.msgs = [] def add_variables_to_l2(self, l2): """ Main Access points for the Level-2 Processor """ # Call the API get_track class. This is the mandatory method of all auxiliary subclasses (independent # of type. Test if this is indeed the case if not self.has_mandatory_track_method: msg = "Mandatory subclass method `get_l2_track_vars` not implemented for %s " % self.pyclass self.error.add_error("not-implemented", msg) self.error.raise_on_error() # Before calling the get_track_vars of the subclass, we must empty any existing data from a potential # previous execution self.reset_auxvars() self.reset_handler_messages() # Call the mandatory track extraction method. Each subclass should register its output via the # `register_auxvar` method of the parent class self.get_l2_track_vars(l2) # Check on errors if self.error.status and self.exception_on_error: self.error.raise_on_error() # Update the Level-2 object try: self.update_l2(l2) except KeyError: msg = "Invalid auxiliary parameter return from class %s" % self.pyclass self.error.add_error("invalid-auxvar-return", msg) self.error.raise_on_error() def register_auxvar(self, var_id, var_name, value, uncertainty=None): """ Register an auxiliary variable. The different parameters are necessary for the L2 data object. When it will be added to the l2 object in self.update_l2, the variable will be accessible from the l2 with the following expressions: value = l2.%var_id% uncertainty = l2.%var_id%.uncertainty or value = l2.get_parameter_by_name(%var_name%) uncertainty = l2.get_parameter_by_name(%var_name%_uncertainty) """ auxvar_dict = dict(id=var_id, name=var_name, value=value, uncertainty=uncertainty) self._auxvars.append(auxvar_dict) def add_handler_message(self, msg): self.msgs.append(msg) @staticmethod def get_empty_array(l2, empty_val=np.nan): return np.full(l2.n_records, empty_val) def update_external_data(self): """ This method will check if the requested date matches current data and call the subclass data loader method if not """ # Check if data for day is already loaded if self._requested_date != self._current_date: # NOTE: The implementation of this method needs to be in the subclass self.load_requested_auxdata() self._current_date = self._requested_date if self.has_data_loaded: self.add_handler_message(self.__class__.__name__ + ": Load " + str(self.requested_filepath)) else: if self.has_data_loaded: self.add_handler_message(self.__class__.__name__ + ": Data already present") else: msg = ": No Data: Loading failed in an earlier attempt" self.add_handler_message(self.__class__.__name__ + msg) def load_requested_auxdata(self): """ This methods raises a NotImplementedError if it is not overwritten by child class :return: """ msg = """ This Exception is caused because the auxiliary data class ({}) is missing the method `load_requested_auxdata` or AuxdataBaseClass was called directly (which it should not)""" msg = msg.format(self.__class__.__name__) raise NotImplementedError(msg) def get_l2_track_vars(self, *args): """ This methods raises a NotImplementedError if it is not overwritten by child class :return: """ msg = """ This Exception is caused because the auxiliary data class ({}) is missing the method `get_l2_track_vars` or AuxdataBaseClass was called directly (which it should not)""" msg = msg.format(self.__class__.__name__) raise NotImplementedError(msg) def update_l2(self, l2): """ Automatically add all auxiliary variables to a Level-2 data object""" for auxvar in self._auxvars: uncertainty = auxvar.get("uncertainty", None) l2.set_auxiliary_parameter(auxvar["id"], auxvar["name"], auxvar["value"], uncertainty) @property def pyclass(self): return self.__class__.__name__ @property def cfg(self): return self._cfg @property def has_data_loaded(self): if not hasattr(self, "_data"): return False return self._data is not None @property def exception_on_error(self): if "exception_on_error" in self.cfg.options: exception_on_error = self.cfg.options.exception_on_error else: exception_on_error = False return exception_on_error @property def requested_filepath(self): """ Returns the local file path for the requested date""" # Main directory path = Path(self.cfg.local_repository) # Add the subfolders for subfolder_tag in self.cfg.subfolders: subfolder = getattr(self, subfolder_tag) path = path / subfolder # Get the period dict (will be constructed from filenaming) period_dict = {} attrs = re.findall("{.*?}", self.cfg.filenaming) for attr_def in attrs: attr_name = attr_def[1:-1] period_dict[attr_name] = getattr(self, attr_name) filename = self.cfg.filenaming.format(**period_dict) path = path / filename return path @property def year(self): return "%04g" % self._requested_date[0] @property def month(self): return "%02g" % self._requested_date[1] @property def day(self): return "%02g" % self._requested_date[2] @property def has_mandatory_track_method(self): """ Test if this object instance has the mandatory method for extracting track data. This method is named get_l2_track_vars() and needs to be present in any auxiliary subclass""" has_method = False get_track_children_method = getattr(self, "get_l2_track_vars", None) if callable(get_track_children_method): has_method = True return has_method @property def auxvar_names(self): return list([auxvar["name"] for auxvar in self._auxvars])
class ReadNC(object): """ Quick & dirty method to parse content of netCDF file into a python object with attributes from file variables """ def __init__(self, filename, verbose=False, autoscale=True, nan_fill_value=False, global_attrs_only=False): self.error = ErrorStatus() self.time_def = NCDateNumDef() self.parameters = [] self.attributes = [] self.verbose = verbose self.autoscale = autoscale self.global_attrs_only = global_attrs_only self.nan_fill_value = nan_fill_value self.filename = filename self.parameters = [] self.read_globals() self.read_content() def read_globals(self): pass # self.gobal_attributes = {} # f = Dataset(self.filename) # print f.ncattrs() # f.close() def read_content(self): self.keys = [] # Open the file try: f = Dataset(self.filename) except RuntimeError: msg = "Cannot read netCDF file: %s" % self.filename self.error.add_error("nc-runtime-error", msg) self.error.raise_on_error() f.set_auto_scale(self.autoscale) # Get the global attributes for attribute_name in f.ncattrs(): self.attributes.append(attribute_name) attribute_value = getattr(f, attribute_name) # Convert timestamps back to datetime objects # TODO: This needs to be handled better if attribute_name in ["start_time", "stop_time"]: attribute_value = num2date( attribute_value, self.time_def.units, calendar=self.time_def.calendar) setattr(self, attribute_name, attribute_value) # Get the variables if not self.global_attrs_only: for key in f.variables.keys(): variable = f.variables[key][:] try: is_float = variable.dtype in ["float32", "float64"] has_mask = hasattr(variable, "mask") except: is_float, has_mask = False, False if self.nan_fill_value and has_mask and is_float: is_fill_value = np.where(variable.mask) variable[is_fill_value] = np.nan setattr(self, key, variable) self.keys.append(key) self.parameters.append(key) if self.verbose: print key self.parameters = f.variables.keys() f.close()
class ConfigInfo(DefaultLoggingClass): """ Container for the content of the pysiral definition files (in pysiral/configuration) and the local machine definition file (local_machine_definition.yaml) """ # Global variables _DEFINITION_FILES = { "mission": "mission_def.yaml", "auxdata": "auxdata_def.yaml", } # FIXME: This is only a quick fix for a bug that was caused by the removal of `parameter_def.yaml` in v0.6.1 # (This list was implemented to ensure consistent naming of geophysical range corrections through all # platform pre-processors. CORRECTION_LIST = [ "dry_troposphere", "wet_troposphere", "inverse_barometric", "dynamic_atmosphere", "ionospheric", "ocean_tide_elastic", "ocean_tide_long_period", "ocean_loading_tide", "solid_earth_tide", "geocentric_polar_tide" ] _LOCAL_MACHINE_DEF_FILE = "local_machine_def.yaml" VALID_SETTING_TYPES = ["proc", "output", "grid"] VALID_DATA_LEVEL_IDS = ["l1", "l2", "l2i", "l2p", "l3", None] def __init__(self): """ Read all definition files """ super(ConfigInfo, self).__init__(self.__class__.__name__) self.error = ErrorStatus(self.__class__.__name__) # read the definition files in the config folder self._read_config_files() # read the local machine definition file self._read_local_machine_file() @property def mission_ids(self): return self.mission.missions def get_mission_defaults(self, mission): mission_options = self.mission[mission].options defaults = {} names, options = td_branches(mission_options) for name, option in zip(names, options): defaults[name] = option.default return defaults def get_mission_options(self, mission): mission_options = self.mission[mission].options return mission_options def get_mission_settings(self, mission): mission_options = self.mission[mission].settings return mission_options def get_mission_info(self, mission): mission_info = self.mission[mission] if mission_info.data_period.start is None: mission_info.data_period.start = datetime.utcnow() if mission_info.data_period.stop is None: mission_info.data_period.stop = datetime.utcnow() return mission_info def get_setting_ids(self, type, data_level=None): lookup_directory = self.get_local_setting_path(type, data_level) ids, files = self.get_yaml_setting_filelist(lookup_directory) return ids def get_settings_file(self, type, data_level, setting_id_or_filename): """ Returns a processor settings file for a given data level. (data level: l2 or l3). The second argument can either be an direct filename (which validity will be checked) or an id, for which the corresponding file (id.yaml) will be looked up in the default directory """ if type not in self.VALID_SETTING_TYPES: return None if data_level not in self.VALID_DATA_LEVEL_IDS: return None # Check if filename if os.path.isfile(setting_id_or_filename): return setting_id_or_filename # Get all settings files in settings/{data_level} and its # subdirectories lookup_directory = self.get_local_setting_path(type, data_level) ids, files = self.get_yaml_setting_filelist(lookup_directory) # Test if ids are unique and return error for the moment if len(np.unique(ids)) != len(ids): msg = "Non-unique %-%s setting filename" % (type, str(data_level)) self.error.add_error("ambiguous-setting-files", msg) self.error.raise_on_error() # Find filename to setting_id try: index = ids.index(setting_id_or_filename) return files[index] except: return None def get_yaml_setting_filelist(self, directory, ignore_obsolete=True): """ Retrieve all yaml files from a given directory (including subdirectories). Directories named "obsolete" are ignored if ignore_obsolete=True (default) """ setting_ids = [] setting_files = [] for root, dirs, files in os.walk(directory): if os.path.split(root)[-1] == "obsolete" and ignore_obsolete: continue for filename in files: if re.search("yaml$", filename): setting_ids.append(filename.replace(".yaml", "")) setting_files.append(os.path.join(root, filename)) return setting_ids, setting_files def get_local_setting_path(self, type, data_level): if type in self.VALID_SETTING_TYPES and data_level in self.VALID_DATA_LEVEL_IDS: args = [type] if data_level is not None: args.append(data_level) return os.path.join(USER_CONFIG_PATH, *args) else: return None def _read_config_files(self): for key in self._DEFINITION_FILES.keys(): filename = os.path.join(USER_CONFIG_PATH, self._DEFINITION_FILES[key]) setattr(self, key, get_yaml_config(filename)) def _read_local_machine_file(self): filename = os.path.join(USER_CONFIG_PATH, self._LOCAL_MACHINE_DEF_FILE) try: local_machine_def = get_yaml_config(filename) except IOError: msg = "local_machine_def.yaml not found (expected: %s)" % filename self.error.add_error("local-machine-def-missing", msg) self.error.raise_on_error() setattr(self, "local_machine", local_machine_def) def _return_path(self, subfolder): return os.path.join(USER_CONFIG_PATH, subfolder)
class NCDataFile(DefaultLoggingClass): def __init__(self): class_name = self.__class__.__name__ super(NCDataFile, self).__init__(class_name) self.error = ErrorStatus(caller_id=class_name) self.filename = None self.time_def = NCDateNumDef() self.zlib = True self._rootgrp = None self._options = None self._proc_settings = None self.verbose = False def set_options(self, **opt_dict): self._options = AttrDict(**opt_dict) def set_processor_settings(self, proc_settings): self._proc_settings = proc_settings def set_base_export_path(self, path): self.base_export_path = path def get_full_export_path(self, startdt): self._get_full_export_path(startdt) return self.export_path def _set_doi(self): if self.output_handler.has_doi: self.data.set_doi(self.output_handler.doi) def _set_data_record_type(self): if self.output_handler.has_doi: self.data.set_data_record_type( self.output_handler.data_record_type) def _write_global_attributes(self): attr_dict = self.output_handler.get_global_attribute_dict(self.data) self._set_global_attributes(attr_dict) def _populate_data_groups(self, level3=False, flip_yc=False): lonlat_parameter_names = ["lon", "lat", "longitude", "latitude"] dimdict = self.data.dimdict dims = dimdict.keys() for key in dims: self._rootgrp.createDimension(key, dimdict[key]) for parameter_name, attribute_dict in self.output_handler.variable_def: # Check if parameter name is also the the name or the source # parameter if "var_source_name" in attribute_dict.keys(): attribute_dict = dict(attribute_dict) var_source_name = attribute_dict.pop("var_source_name") else: var_source_name = parameter_name data = self.data.get_parameter_by_name(var_source_name) if data is None: msg = "Invalid parameter name for data object: %s" msg = msg % parameter_name self.log.error(msg) self.error.add_error("invalid-paramater", msg) self.error.raise_on_error() # Convert datetime objects to number if type(data[0]) is datetime: data = date2num(data, self.time_def.units, self.time_def.calendar) # Convert bool objects to integer if data.dtype.str == "|b1": data = np.int8(data) # Set dimensions (dependend on product level) if level3: if flip_yc: data = np.flipud(data) if parameter_name not in lonlat_parameter_names: data = np.array([data]) dimensions = tuple(list(dims)[0:len(data.shape)]) else: dimensions = tuple(list(dims)[1:len(data.shape) + 1]) else: dimensions = tuple(list(dims)[0:len(data.shape)]) # Create and set the variable var = self._rootgrp.createVariable(parameter_name, data.dtype.str, dimensions, zlib=self.zlib) var[:] = data # Add Parameter Attributes for key in sorted(attribute_dict.keys()): attribute = attribute_dict[key] attribute = self.output_handler.fill_template_string( attribute, self.data) setattr(var, key, attribute) def _create_root_group(self, attdict, **global_attr_keyw): """ Create the root group and add l1b metadata as global attributes """ self._convert_datetime_attributes(attdict) self._convert_bool_attributes(attdict) self._convert_nonetype_attributes(attdict) self._set_global_attributes(attdict, **global_attr_keyw) def _convert_datetime_attributes(self, attdict): """ Replace l1b info parameters of type datetime.datetime by a double representation to match requirements for netCDF attribute data type rules """ for key in attdict.keys(): content = attdict[key] if type(content) is datetime: attdict[key] = date2num(content, self.time_def.units, self.time_def.calendar) def _convert_bool_attributes(self, attdict): """ Replace l1b info parameters of type bool ['b1'] by a integer representation to match requirements for netCDF attribute data type rules """ for key in attdict.keys(): content = attdict[key] if type(content) is bool: attdict[key] = int(content) def _convert_nonetype_attributes(self, attdict): """ Replace l1b info parameters of type bool ['b1'] by a integer representation to match requirements for netCDF attribute data type rules """ for key in attdict.keys(): content = attdict[key] if content is None: attdict[key] = "" def _set_global_attributes(self, attdict, prefix=""): """ Save l1b.info dictionary as global attributes """ for key in attdict.keys(): self._rootgrp.setncattr(prefix + key, attdict[key]) def _get_variable_attr_dict(self, parameter): """ Retrieve the parameter attributes """ default_attrs = { "long_name": parameter, "standard_name": parameter, "scale_factor": 1.0, "add_offset": 0.0 } if parameter not in self.parameter_attributes: # self._missing_parameters.append(parameter) return default_attrs else: return dict(self.parameter_attributes[parameter]) def _write_processor_settings(self): if self._proc_settings is None: pass settings = self._proc_settings for item in settings.iterkeys(): self._rootgrp.setncattr(item, str(settings[item])) def _open_file(self): try: self._rootgrp = Dataset(self.full_path, "w") except RuntimeError: msg = "Unable to create netCDF file: %s" % self.full_path self.error.add_error("nc-runtime-error", msg) self.error.raise_on_error() def _write_to_file(self): self._rootgrp.close() @property def export_path(self): """ Evoking this property will also create the directory if it does not already exists """ return self.output_handler.get_directory_from_data(self.data, create=True) @property def export_filename(self): """ Returns the filename for the level2 output file """ return self.output_handler.get_filename_from_data(self.data) @property def full_path(self): return Path(self.export_path) / self.export_filename