def _insert_slice(self, current): """ Inserts one slice :param Slice current: the slice to be imported """ current_exception = None current_str = "" for attempt in range(0, ConfigManager.retries): try: current_str = str(current) file = self._generate_gml_slice(current) subsets = self._get_update_subsets_for_slice(current) request = WCSTUpdateRequest(self.coverage.coverage_id, file.get_url(), subsets, ConfigManager.insitu) executor = ConfigManager.executor executor.execute(request, mock=ConfigManager.mock) file.release() self.resumer.add_imported_data(current.data_provider) except Exception as e: log.warn( "\nException thrown when trying to insert slice: \n" + current_str + "Retrying, you can safely ignore the warning for now. Tried " + str(attempt + 1) + " times.\n") current_exception = e sleep(ConfigManager.retry_sleep) pass else: break else: log.warn("\nFailed to insert slice. Attempted " + str(ConfigManager.retries) + " times.") raise current_exception
def __load_imported_data_from_resume_file(self, coverage_id): """ Try to load a resume file coverage_id.resume.json from input data folder. :param str coverage_id: coverage id of current importer to find the resume file. """ if coverage_id not in Resumer.__IMPORTED_DATA_DICT: resume_file_path = ConfigManager.resumer_dir_path + coverage_id + Resumer.__RESUMER_FILE_SUFFIX Resumer.__RESUMER_FILE_NAME_DICT[coverage_id] = resume_file_path try: if os.path.isfile(resume_file_path) \ and os.access(resume_file_path, os.R_OK): log.info( "We found a resumer file in the ingredients folder. The slices listed in '" + resume_file_path + "' will not be imported.") file = open(Resumer.__RESUMER_FILE_NAME_DICT[coverage_id]) data = json.loads(file.read()) Resumer.__IMPORTED_DATA_DICT[coverage_id] = data file.close() except IOError as e: raise RuntimeException( "Could not read the resume file, full error message: " + str(e)) except ValueError as e: log.warn( "The resumer JSON file could not be parsed. A new one will be created." )
def _insert_slice(self, current): """ Inserts one slice :param Slice current: the slice to be imported """ current_exception = None current_str = "" for attempt in range(0, ConfigManager.retries): try: current_str = str(current) file = self._generate_gml_slice(current) subsets = self._get_update_subsets_for_slice(current) request = WCSTUpdateRequest(self.coverage.coverage_id, file.get_url(), subsets, ConfigManager.insitu) executor = ConfigManager.executor executor.execute(request) file.release() self.resumer.add_imported_data(current.data_provider) except Exception as e: log.warn( "\nException thrown when trying to insert slice: \n" + current_str + "Retrying, you can safely ignore the warning for now. Tried " + str( attempt + 1) + " times.\n") current_exception = e sleep(ConfigManager.retry_sleep) pass else: break else: log.warn("\nFailed to insert slice. Attempted " + str(ConfigManager.retries) + " times.") raise current_exception
def __init__(self, coverage_id): """ The resumer keeps track of data providers that have been imported so that a record is kept if several runs are performed :param str coverage_id: the id of the coverage that is imported """ if ConfigManager.track_files: self.__RESUMER_FILE_NAME__ = ConfigManager.resumer_dir_path + coverage_id + self.__RESUMER_FILE_SUFFIX__ self.imported_data = [] try: if os.path.isfile(self.__RESUMER_FILE_NAME__) and os.access( self.__RESUMER_FILE_NAME__, os.R_OK): log.info( "We found a resumer file in the ingredients folder. The slices listed in " + str(self.__RESUMER_FILE_NAME__) + " will not be imported.") self.resume_fp = open(self.__RESUMER_FILE_NAME__) self.imported_data = json.loads(self.resume_fp.read()) self.resume_fp.close() except IOError as e: raise RuntimeException( "Could not read the resume file, full error message: " + str(e)) except ValueError as e: log.warn( "The resumer JSON file could not be parsed. A new one will be created." )
def __parse_netcdf_bands_metadata(self, user_bands): """ Parse the netCDF file to extract the bands' metadata for the coverage :return: dict: """ netcdf_files = self.session.get_files() file_path = netcdf_files[0].filepath # NOTE: all files should have same bands's metadata for each file netCDF4 = import_netcdf4() dataset = netCDF4.Dataset(file_path, 'r') bands_metadata = {} for user_band in user_bands: band_id = user_band.identifier attrs_list = dataset.variables[band_id].ncattrs() bands_metadata[band_id] = {} for attr in attrs_list: try: bands_metadata[band_id][attr] = str(getattr(dataset.variables[band_id], attr)) except: log.warn("Attribute '" + attr + "' of band '" + band_id + "' cannot be parsed as string, ignored.") return bands_metadata
def parse_netcdf_bands_metadata(file_path, user_bands): """ Parse the netCDF file to extract the bands' metadata for the coverage's global metadata str file_path: path to the first input netCDF file list[UserBand] user_bands: list of configured bands in ingredient file :return: dict: """ # NOTE: all files should have same bands's metadata for each file netCDF4 = import_netcdf4() dataset = netCDF4.Dataset(file_path, 'r') bands_metadata = {} for user_band in user_bands: band_id = user_band.identifier attrs_list = dataset.variables[band_id].ncattrs() bands_metadata[band_id] = {} for attr in attrs_list: try: bands_metadata[band_id][attr] = str(getattr(dataset.variables[band_id], attr)) except: log.warn("Attribute '" + attr + "' of band '" + band_id + "' cannot be parsed as string, ignored.") return bands_metadata
def _get_level(self, file_path): if '_MSIL1C_' in file_path: return self.LVL_L1C elif '_MSIL2A_' in file_path: return self.LVL_L2A else: log.warn("Cannot determine level from collected file: " + file_path + "; assuming L1C.") return self.LVL_L1C
def diff_files(out, exp): """ Print diff of out and exp on stderr. """ outstr = read_lines(out) expstr = read_lines(exp) for line in difflib.unified_diff(outstr, expstr, fromfile=exp, tofile=out): log.warn(line.strip())
def get_progress(self): """ Returns the progress of the import :rtype: tuple """ if self.total == 0: log.warn("No slices to import.") return -1, -1 return self.processed, self.total
def validate_input_file_paths(file_paths): """ If all input file paths are not available to analyze. Exit wcst_import process and log an warning. :param list[str] file_paths: list of input file paths """ if len(file_paths) == 0: log.warn( "No files provided. Check that the paths you provided are correct. Done." ) exit(0)
def validate(self): """ Check that the test case is valid, e.g. it must have an id. Return True if valid, False otherwise. """ ret = True if not self.testid: log.warn("Testcase missing an id.") ret = False return ret
def log_crs_replacement_epsg_version_0_by_version_85(): """ Just log a warning when it needs to replace version 0 to version 8.5 for EPSG """ log.warn( "EPSG/0/NNNN points to the latest EPSG dictionary version, " "so CRS definitions may change with new releases of the EPSG dataset. \n" "In particular, coverage was created when latest EPSG " "version was 8.5, which for longitude axis is now incompatible with the current " "EPSG version ('Long' axis label changed to 'Lon').\n Therefore wcst_import will change " "longitude axis label to 'Long' for EPSG/0/NNNN.")
def add_config_line(self, line): if line.strip() == "" or line.startswith("#"): return False if not ':' in line: log.warn( "Invalid test case, expected line of " "format 'key: value', but no ':' is found in:\n%s", line) return False kv = line.split(":", 1) self.add_config(kv[0].strip(), kv[1].strip()) return True
def uncompress_directory(d): """ If directory d is not found, check for d.tar.gz and uncompress it if it exists. """ if os.path.exists(d): return True dtargz = remove_slash(d) + ".tar.gz" if not os.path.exists(dtargz): log.warn("Directory '%s' or corresponding archive '%s' not found.", d, dtargz) return False with tarfile.open(dtargz, "r:gz") as tar: tar.extractall()
def _insert_slices(self): """ Insert the slices of the coverage """ for i in range(self.processed, self.total): try: self._insert_slice(self.coverage.slices[i]) except Exception as e: if ConfigManager.skip: log.warn("Skipped slice " + str(self.coverage.slices[i])) else: raise e self.processed += 1
def kill_pids(pids, sig): """ Kill the list of pids with the given signal. """ pids = pgrep("rasserver") failed_pids = [] for pid in pids: try: os.kill(int(pid), sig) except OSError as err: log.warn("Failed killing rasserver process with pid %d: %s.", pid, err.strerr) failed_pids.append(pid) return failed_pids
def parse_netcdf_axes_metadata(file_path, crs_axes_configured_dict): """ Parse the netCDF file to extract the axes metadata for the coverage's global metadata str file_path: path to the first input netCDF file dict crs_axes_configured_dict: dictionary of crs axis labels and themselves configuration in ingredient file under "slicer"/"axes" section. :return: dict: """ netCDF4 = import_netcdf4() # NOTE: all files should have same axes's metadata for each file dataset = netCDF4.Dataset(file_path, 'r') axes_metadata = {} # Iterate all slicer/axes configured in ingredient file for crs_axis_label, axis_configured_dict in crs_axes_configured_dict.items( ): min = axis_configured_dict["min"] # Get the axis variable name in netCDF file from the min configuration # e.g: "Long": { "min": "${netcdf:variable:lon:min}" } -> axis variable name is "lon" variable_axis_label = None # Find the variable axis label from netCDF expression for this axis for key, value in axis_configured_dict.items(): tmp = re.search("variable:(.*):.*}", str(value)) if tmp is not None: variable_axis_label = tmp.group(1) break if variable_axis_label is not None: if variable_axis_label in dataset.variables: axes_metadata[crs_axis_label] = {} attrs_list = dataset.variables[ variable_axis_label].ncattrs() for attr in attrs_list: try: # crs axis (e.g: Long) -> variable axis (e.g: lon) axes_metadata[crs_axis_label][attr] = str( getattr(dataset.variables[variable_axis_label], attr)) except: log.warn("Attribute '" + attr + "' of axis '" + variable_axis_label + "' cannot be parsed as string, ignored.") return axes_metadata
def validate(self): super(Recipe, self).validate() valid_files = [] # Local validate for input files for file in self.session.get_files(): file_name = os.path.basename(file.get_filepath()) if not bool(re.match(self.GRD_FILE_PATTERN, file_name)): log.warn( "File '" + file.get_filepath() + "' is not valid GRD TIFF file, ignored for further processing." ) else: valid_files.append(file) self.session.files = valid_files
def __run_shell_command(self, command, abort_on_error=False): """ Run a shell command and exit wcst_import if needed :param str command: shell command to run """ try: log.info("Executing shell command '{}'...".format(command)) output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True) output = decode_res(output) if output != "": log.info("Output result '{}'".format(output)) except subprocess.CalledProcessError as exc: log.warn("Failed, status code '{}', error message '{}'.".format(exc.returncode, str(exc.output).strip())) if abort_on_error: log.error("wcst_import terminated on running hook command.") exit(1)
def parse_netcdf_global_metadata(file_path): """ Parse the first file of importing netCDF files to extract the global metadata for the coverage str file_path: path to first netCDF input file :return: dict: global_metadata """ # NOTE: all files should have same global metadata for each file netCDF4 = import_netcdf4() dataset = netCDF4.Dataset(file_path, 'r') global_metadata = {} for attr in dataset.ncattrs(): try: global_metadata[attr] = str(getattr(dataset, attr)) except: log.warn("Attribute '" + attr + "' of global metadata cannot be parsed as string, ignored.") return global_metadata
def cmp_files(out, exp, show_diff=False): """ Compare output file (out) to expected file (exp). """ if not os.path.isfile(exp): log.warn( "Expected file '%s' not found, will be copied from output '%s'.", exp, out) shutil.copyfile(out, exp) return False ret = filecmp.cmp(out, exp) if not ret: log.error("Output file '%s' does not match expected file '%s'.%s", out, exp, " Diff:" if show_diff else "") if show_diff: diff_files(out, exp) return ret
def main(): args = parse_cmdline() global RMANHOME, RMANHOME_BIN RMANHOME = args.rmanhome if not RMANHOME: log.warn( "RMANHOME has not been set; consider specifying it with the --rmanhome option." ) else: RMANHOME = append_slash(RMANHOME) RMANHOME_BIN = RMANHOME + "bin/" RASQL[0] = RMANHOME_BIN + RASQL[0] log.info("rasql: %s", RASQL) log.title("Running rasql systemtest.") # queries and oracles directory are quite big, so they are distributed # as compressed archives, which potentially need to be uncompressed (for a first run) for d in [QUERIES_DIR, ORACLES_DIR]: uncompress_directory(d) all_tests = None if args.testsfile: all_tests = [SETUP_TESTS, args.testsfile] elif args.drop: all_tests = [] else: all_tests = [SETUP_TESTS] + sorted([QUERIES_DIR + f for f in os.listdir(QUERIES_DIR) \ if not f in SPECIAL_FILES and not f.endswith('.bak')]) all_tests = all_tests + [TEARDOWN_TESTS] log.info("Tests files to execute: %s", all_tests) # run tests ret = True start_time = timer() for tests_file in all_tests: ret = evaluate_existing_tests(tests_file) and ret end_time = timer() print_stats(end_time - start_time) return ret
def parse_input(self, paths): """ Parses the list of paths and returns an ordered list of complete file paths :param list[str] paths: the list of paths :rtype list[File] """ file_paths = self.__get_file_paths(paths) if len(file_paths) < len(paths): log.warn( "WARNING: The materialized paths contain less files than the initial paths. This can be normal if " "a directory provided in the paths is empty or if a path regex returns no results. If this is not " "the case, make sure the paths are correct and readable by the importer." ) file_paths.sort() file_obs = [File(f) for f in file_paths] return file_obs
def get_valid_files(self): """ Valid file path could be opened by GDAL files is list of files need to valid """ # Validate input files by GDAL. If GDAL could not decode file then will have an warning. # GDAL needs file name encode in 'utf8' or file name with spaces could not open. file_paths = [] for file in self.files: fileName = str(file).encode('utf8') check = gdal.Open(fileName) if check is not None: file_paths = file_paths + [file] else: log.warn("WARNING: File " + fileName + " is not is not a valid GDAL decodable file. The import process will ignore this file.\n") return file_paths
def ignore_coverage_slice_from_file_if_possible(file_path, exception): """ In case, wcst_import cannot process 1 file due to some problem on it and "skip" is set to True, wcst_import should not throw exception but log an warning to user. :param str file_path: path to the problem file. :param Exception exception: exception was thrown from previous statements. """ if ConfigManager.skip: log.warn( "WARNING: input file '" + file_path + "' cannot be processed,\n" "wcst_import will ignore this file as \"skip\" is set to true in the ingredient file. Reason: " + str(exception)) else: # Throws the original source of exception(!) type, val, tb = sys.exc_info() raise type, val, tb
def restart_rasdaman_if_rasmgr_down(): """ if rasmgr is down: kill all rasservers, and run start_rasdaman.sh again """ if len(pgrep("rasmgr")) == 0: log.warn( "rasmgr is down. Killing all rasservers, and starting rasdaman again." ) pids = pgrep("rasserver") not_killed = kill_pids(pids, signal.SIGTERM) not_killed = kill_pids(not_killed, signal.SIGKILL) if len(not_killed) == 0: out, err, rc = execute(["start_rasdaman.sh"]) log.info( "Started rasdaman\nexit code: %d\nstdout: %s\nstderr: %s\n", rc, out, err) return True else: log.error("Failed killing rasservers, cannot start rasdaman.") return False
def parse_input(self, paths): """ Parses the list of paths and returns an ordered list of complete file paths :param list[str] paths: the list of paths :rtype list[File] """ file_paths = [] for path in paths: path = path.strip() if not path.startswith("/"): path = self.ingredients_dir_path + path file_paths = file_paths + glob.glob(path) if len(file_paths) < len(paths): log.warn("WARNING: The materialized paths contain less files than the initial paths. This can be normal if " "a directory provided in the paths is empty or if a path regex returns no results. If this is not " "the case, make sure the paths are correct and readable by the importer.") file_paths.sort() file_obs = map(lambda f: File(f), file_paths) return file_obs
def __init__(self, coverage_id): """ The resumer keeps track of data providers that have been imported so that a record is kept if several runs are performed :param str coverage_id: the id of the coverage that is imported """ if ConfigManager.track_files: self.__RESUMER_FILE_NAME__ = ConfigManager.resumer_dir_path + coverage_id + self.__RESUMER_FILE_SUFFIX__ self.imported_data = [] try: if os.path.isfile(self.__RESUMER_FILE_NAME__) and os.access(self.__RESUMER_FILE_NAME__, os.R_OK): log.info( "We found a resumer file in the ingredients folder. The slices listed in " + str( self.__RESUMER_FILE_NAME__) + " will not be imported.") self.resume_fp = open(self.__RESUMER_FILE_NAME__) self.imported_data = json.loads(self.resume_fp.read()) self.resume_fp.close() except IOError as e: raise RuntimeException("Could not read the resume file, full error message: " + str(e)) except ValueError as e: log.warn("The resumer JSON file could not be parsed. A new one will be created.")
def interpret_result(ret, test, separator): """ Collect statistics, and log failed / known fails tests. """ global TOTAL_TESTS_COUNT, TOTAL_TESTS_IGNORED, TOTAL_QUERIES_COUNT, FAILED_TESTS TOTAL_TESTS_COUNT += 1 TOTAL_QUERIES_COUNT += len(test.queries) if test.skip: log.warn("Test evaluation skipped, reason: %s", test.skip) IGNORED_TESTS.append("{} (evaluation skipped, reason: {})".format( test.testid, test.skip)) ret = True elif not ret and test.knownfail: log.warn("Test result ignored, reason: %s", test.knownfail) IGNORED_TESTS.append("{} (result ignored, reason: {})".format( test.testid, test.knownfail)) ret = True elif ret and test.knownfail: log.warn("Test marked as known fail has been fixed (%s)", test.knownfail) elif not ret: FAILED_TESTS.append(test.testid) log.info(separator) return ret
def validate_file_path(file_path): """ Check if file exists, if not just log it and continue :param file_path: path to an input file :return: boolean """ # For gdal virtual file path, example: # SENTINEL2_L1C:/vsizip//*_20181204T111726.zip/*_20181204T111726.SAFE/MTD_MSIL1C.xml:TCI:EPSG_32632 pattern = re.compile(".*/vsi[a-z]+/.*") if pattern.match(file_path) or ":" in file_path: # It is gdal virtual file system or subdataset (e.g: NETCDF:file_path:variable), just ignore return True elif not os.access(file_path, os.R_OK): log.warn( "File '" + file_path + "' is not accessible, will be skipped from further processing." ) return False return True
def get_valid_files(self): """ Valid file path could be opened by GDAL files is list of files need to valid """ # Validate input files by GDAL. If GDAL could not decode file then will have an warning. # GDAL needs file name encode in 'utf8' or file name with spaces could not open. file_paths = [] for file in self.files: fileName = str(file).encode('utf8') check = gdal.Open(fileName) if check is not None: file_paths = file_paths + [file] else: log.warn( "WARNING: File " + fileName + " is not is not a valid GDAL decodable file. The import process will ignore this file.\n" ) return file_paths
def parse_input(self, paths): """ Parses the list of paths and returns an ordered list of complete file paths :param list[str] paths: the list of paths :rtype list[File] """ file_paths = [] for path in paths: path = path.strip() if not path.startswith("/"): path = self.ingredients_dir_path + path file_paths = file_paths + glob.glob(path) if len(file_paths) < len(paths): log.warn( "WARNING: The materialized paths contain less files than the initial paths. This can be normal if " "a directory provided in the paths is empty or if a path regex returns no results. If this is not " "the case, make sure the paths are correct and readable by the importer." ) file_paths.sort() file_obs = map(lambda f: File(f), file_paths) return file_obs
def __filter_invalid_geo_bounds(self, slices_dict): """ Filter any coverage slices (scenes) which have invalid lat and long bounds in EPSG:4326 """ results = OrderedDict() for key, slices in slices_dict.items(): results[key] = [] for slice in slices: input_file = slice.data_provider.file.filepath axis_subsets = slice.axis_subsets is_valid = True for axis_subset in axis_subsets: axis = axis_subset.coverage_axis.axis geo_lower_bound = axis.low geo_upper_bound = axis.high axis_label = axis.label if axis.crs_axis.uri.endswith(self.EPSG_4326): if CRSUtil.is_latitude_axis(axis_label): is_valid = geo_lower_bound >= -90 and geo_upper_bound <= 90 elif CRSUtil.is_longitude_axis(axis_label): is_valid = geo_lower_bound >= -180 and geo_upper_bound <= 180 if not is_valid: log.warn( "File '" + input_file + "' has invalid lat or long axes geo bounds in EPSG:4326 CRS, ignored for further processing." ) break if is_valid: results[key].append(slice) return results
def get_progress(self): if self.total == 0: log.warn("No slices to import.") return -1, -1 else: return self.processed, self.total
def _apply_operation(self, nc_dataset, nc_obj_name, operation): """ Applies operation on a given variable which contains a list of values (e.g: lat = [0, 1, 2, 3,...]), (e.g: find the min of time variable ${netcdf:variable:time:min}) :param netCDF4 nc_dataset: the netcdf dataset :param str nc_obj_name: name of netCDF variable or netCDF dimension :param str operation: the operation to apply: :return: str value: The value from the applied operation with precession """ """ NOTE: min or max of list(variable) with values like [148654.08425925925,...] will return 148654.084259 in float which will cause problem with calculate coefficient as the first coeffcient should be 0 but due to this change of min/max value, the coefficient is like 0.00000000001 (case: PML) "min": "${netcdf:variable:ansi:min} * 24 * 3600 - 11644560000.0", -> return: 1199152879.98 "directPositions": "[float(x) * 24 * 3600 - 11644560000.0 for x in ${netcdf:variable:ansi}]", -> return 1199152880.0 So we must use the values in the list by string and split it to a list to get the same values """ MAX = "max" MIN = "min" LAST = "last" FIRST = "first" RESOLUTION = "resolution" METADATA = "metadata" # List of support operation on a netCDF variable supported_operations = [MAX, MIN, LAST, FIRST, RESOLUTION, METADATA] import_util.import_numpy() import numpy as np if nc_obj_name in nc_dataset.variables: nc_obj = nc_dataset.variables[nc_obj_name] if operation not in supported_operations: # it must be an attribute of variable return nc_obj.__getattribute__(operation) # It must be an operation that could be applied on a netCDF variable # convert list of string values to list of decimal values values = nc_obj[:].flatten() elif nc_obj_name in nc_dataset.dimensions: nc_obj = nc_dataset.dimensions[nc_obj_name] # Cannot determine list of values from variable but only dimension (e.g: station = 758) values = np.arange(0, nc_obj.size) else: raise Exception("Cannot find '" + nc_obj_name + "' from list of netCDF variables and dimensions.") if operation == MAX: return to_decimal(np.amax(values)) elif operation == MIN: return to_decimal(np.amin(values)) elif operation == LAST: last_index = len(nc_obj) - 1 return to_decimal(values[last_index]) elif operation == FIRST: return to_decimal(values[0]) elif operation == RESOLUTION: # NOTE: only netCDF needs this expression to calculate resolution automatically # for GDAL: it uses: ${gdal:resolutionX} and GRIB: ${grib:jDirectionIncrementInDegrees} respectively return self.__calculate_netcdf_resolution(values) elif operation == METADATA: # return a dict of variable (axis) metadata with keys, values as string tmp_dict = {} for attr in nc_obj.ncattrs(): try: tmp_dict[attr] = escape(getattr(nc_obj, attr)) except: log.warn("Attribute '" + attr + "' of variable '" + nc_obj._getname() + "' cannot be parsed as string, ignored.") return tmp_dict # Not supported operation and not valid attribute of netCDF variable raise RuntimeException( "Invalid operation on netcdf variable: " + operation + ". Currently supported: " + ', '.join(supported_operations) + " or any metadata entry of the variable.")