def find_local_raw_file(year, directory_name=None, raise_error_if_missing=True): """Finds raw file on local machine. This file should contain all storm reports for one year. :param year: [integer] Will look for file from this year. :param directory_name: Name of directory with Storm Events files. :param raise_error_if_missing: Boolean flag. If True and file is missing, this method will raise an error. :return: raw_file_name: File path. If raise_error_if_missing = False and file is missing, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_integer(year) error_checking.assert_is_string(directory_name) error_checking.assert_is_boolean(raise_error_if_missing) raw_file_name = '{0:s}/{1:s}{2:s}{3:s}'.format( directory_name, PATHLESS_RAW_FILE_PREFIX, _year_number_to_string(year), RAW_FILE_EXTENSION) if raise_error_if_missing and not os.path.isfile(raw_file_name): raise ValueError('Cannot find raw file. Expected at location: ' + raw_file_name) return raw_file_name
def gzip_file(input_file_name, output_file_name=None, delete_input_file=True): """Creates gzip archive with one file. :param input_file_name: Path to input file (will be gzipped). :param output_file_name: Path to output file (extension must be ".gz"). If `output_file_name is None`, will simply append ".gz" to name of input file. :param delete_input_file: Boolean flag. If True, will delete input file after gzipping. :raises: ValueError: if `output_file_name` does not end with ".gz". :raises: ValueError: if the Unix command fails. """ error_checking.assert_file_exists(input_file_name) error_checking.assert_is_boolean(delete_input_file) if output_file_name is None: output_file_name = '{0:s}.gz'.format(input_file_name) if not output_file_name.endswith('.gz'): error_string = ( 'Output file ("{0:s}") should have extension ".gz".' ).format(output_file_name) raise ValueError(error_string) unix_command_string = 'gzip -v -c "{0:s}" > "{1:s}"'.format( input_file_name, output_file_name) exit_code = os.system(unix_command_string) if exit_code != 0: raise ValueError('\nUnix command failed (log messages shown above ' 'should explain why).') if delete_input_file: os.remove(input_file_name)
def find_file(year, directory_name, raise_error_if_missing=True): """Finds Storm Events file. This file should contain all storm reports for one year. :param year: Year (integer). :param directory_name: Name of directory with Storm Events files. :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. :return: storm_event_file_name: Path to Storm Events file. If file is missing and raise_error_if_missing = False, this will be the *expected* path. :raises: ValueError: if file is missing and raise_error_if_missing = True. """ error_checking.assert_is_integer(year) error_checking.assert_is_string(directory_name) error_checking.assert_is_boolean(raise_error_if_missing) storm_event_file_name = '{0:s}/{1:s}{2:s}{3:s}'.format( directory_name, PATHLESS_FILE_PREFIX, _year_number_to_string(year), FILE_EXTENSION) if raise_error_if_missing and not os.path.isfile(storm_event_file_name): error_string = ('Cannot find Storm Events file. Expected at: {0:s}'. format(storm_event_file_name)) raise ValueError(error_string) return storm_event_file_name
def find_metafile(model_file_name, raise_error_if_missing=True): """Finds metafile for CNN. :param model_file_name: Path to model itself (see doc for `read_model`). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :return: metafile_name: Path to metafile. If file is missing and `raise_error_if_missing = False`, this will be the expected path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(model_file_name) error_checking.assert_is_boolean(raise_error_if_missing) metafile_name = '{0:s}/model_metadata.p'.format( os.path.split(model_file_name)[0] ) if not os.path.isfile(metafile_name): metafile_name = metafile_name.replace( '/glade/work/ryanlage', '/condo/swatwork/ralager' ) if not os.path.isfile(metafile_name) and raise_error_if_missing: error_string = 'Cannot find file. Expected at: "{0:s}"'.format( metafile_name) raise ValueError(error_string) return metafile_name
def find_file(directory_name, year, raise_error_if_missing=True): """Finds NetCDF file with RRTM data. :param directory_name: Name of directory where file is expected. :param year: Year (integer). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing == True`, will throw error. If file is missing and `raise_error_if_missing == False`, will return *expected* file path. :return: rrtm_file_name: File path. :raises: ValueError: if file is missing and `raise_error_if_missing == True`. """ error_checking.assert_is_string(directory_name) error_checking.assert_is_integer(year) error_checking.assert_is_boolean(raise_error_if_missing) rrtm_file_name = '{0:s}/rrtm_output_{1:04d}.nc'.format( directory_name, year) if raise_error_if_missing and not os.path.isfile(rrtm_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( rrtm_file_name) raise ValueError(error_string) return rrtm_file_name
def find_file(valid_time_unix_sec, top_directory_name, raise_error_if_missing=True): """Finds file (text file in WPC format) on local machine. This file should contain positions of cyclones, anticyclones, fronts, etc. for a single valid time. :param valid_time_unix_sec: Valid time. :param top_directory_name: Name of top-level directory with WPC bulletins. :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. If file is missing and raise_error_if_missing = False, this method will return the *expected* path to the file. :return: bulletin_file_name: Path to file. If file is missing and raise_error_if_missing = False, this is the *expected* path. :raises: ValueError: if file is missing and raise_error_if_missing = True. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) valid_time_string = time_conversion.unix_sec_to_string( valid_time_unix_sec, TIME_FORMAT_IN_FILE_NAME) bulletin_file_name = '{0:s}/{1:s}/{2:s}_{3:s}'.format( top_directory_name, valid_time_string[:4], PATHLESS_FILE_NAME_PREFIX, valid_time_string) if raise_error_if_missing and not os.path.isfile(bulletin_file_name): error_string = ('Cannot find file. Expected at location: "{0:s}"'. format(bulletin_file_name)) raise ValueError(error_string) return bulletin_file_name
def _check_training_args( model_file_name, history_file_name, tensorboard_dir_name, num_epochs, num_training_batches_per_epoch, num_validation_batches_per_epoch, training_option_dict, weight_loss_function): """Error-checks input arguments for training. :param model_file_name: Path to output file (HDF5 format). The model will be saved here after each epoch. :param history_file_name: Path to output file (CSV format). Training history (performance metrics) will be saved here after each epoch. :param tensorboard_dir_name: Path to output directory for TensorBoard log files. :param num_epochs: Number of epochs. :param num_training_batches_per_epoch: Number of training batches in each epoch. :param num_validation_batches_per_epoch: Number of validation batches in each epoch. :param training_option_dict: See doc for `training_validation_io.example_generator_2d_or_3d`. :param weight_loss_function: Boolean flag. If False, classes will be weighted equally in the loss function. If True, classes will be weighted differently (inversely proportional to their sampling fractions). :return: class_to_weight_dict: Dictionary, where each key is the integer ID for a target class (-2 for "dead storm") and each value is the weight for the loss function. If None, classes will be equally weighted in the loss function. """ orig_option_dict = training_option_dict.copy() training_option_dict = trainval_io.DEFAULT_OPTION_DICT.copy() training_option_dict.update(orig_option_dict) file_system_utils.mkdir_recursive_if_necessary(file_name=model_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=history_file_name) file_system_utils.mkdir_recursive_if_necessary( directory_name=tensorboard_dir_name) error_checking.assert_is_integer(num_epochs) error_checking.assert_is_geq(num_epochs, 1) error_checking.assert_is_integer(num_training_batches_per_epoch) error_checking.assert_is_geq(num_training_batches_per_epoch, 1) error_checking.assert_is_integer(num_validation_batches_per_epoch) error_checking.assert_is_geq(num_validation_batches_per_epoch, 0) error_checking.assert_is_boolean(weight_loss_function) if not weight_loss_function: return None class_to_sampling_fraction_dict = training_option_dict[ trainval_io.SAMPLING_FRACTIONS_KEY ] if class_to_sampling_fraction_dict is None: return None return dl_utils.class_fractions_to_weights( sampling_fraction_by_class_dict=class_to_sampling_fraction_dict, target_name=training_option_dict[trainval_io.TARGET_NAME_KEY], binarize_target=training_option_dict[trainval_io.BINARIZE_TARGET_KEY] )
def check_field_name(field_name, require_standard=False): """Ensures that name of model field is recognized. :param field_name: Field name in GewitterGefahr format (not the original NetCDF format). :param require_standard: Boolean flag. If True, `field_name` must be in `STANDARD_FIELD_NAMES`. If False, `field_name` must be in `FIELD_NAMES`. :raises: ValueError: if field name is unrecognized. """ error_checking.assert_is_string(field_name) error_checking.assert_is_boolean(require_standard) if require_standard: valid_field_names = STANDARD_FIELD_NAMES else: valid_field_names = FIELD_NAMES if field_name not in valid_field_names: error_string = ( '\n\n' + str(valid_field_names) + '\n\nValid field names (listed above) do not include "' + field_name + '".') raise ValueError(error_string)
def _check_args_one_step(predictor_matrix, permuted_flag_matrix, scalar_channel_flags, shuffle_profiles_together, num_bootstrap_reps): """Checks input args for `run_*_test_one_step`. :param predictor_matrix: See doc for `run_forward_test_one_step` or `run_backwards_test_one_step`. :param permuted_flag_matrix: Same. :param scalar_channel_flags: Same. :param shuffle_profiles_together: Same. :param num_bootstrap_reps: Same. :return: num_bootstrap_reps: Same as input but maxxed with 1. """ error_checking.assert_is_numpy_array_without_nan(predictor_matrix) num_predictor_dim = len(predictor_matrix.shape) error_checking.assert_is_geq(num_predictor_dim, 3) error_checking.assert_is_leq(num_predictor_dim, 3) error_checking.assert_is_boolean_numpy_array(permuted_flag_matrix) these_expected_dim = numpy.array(predictor_matrix.shape[1:], dtype=int) error_checking.assert_is_numpy_array(permuted_flag_matrix, exact_dimensions=these_expected_dim) error_checking.assert_is_boolean_numpy_array(scalar_channel_flags) these_expected_dim = numpy.array([predictor_matrix.shape[-1]], dtype=int) error_checking.assert_is_numpy_array(scalar_channel_flags, exact_dimensions=these_expected_dim) error_checking.assert_is_boolean(shuffle_profiles_together) error_checking.assert_is_integer(num_bootstrap_reps) return numpy.maximum(num_bootstrap_reps, 1)
def find_rap_file_any_grid(top_directory_name, init_time_unix_sec, lead_time_hours, raise_error_if_missing=True): """Finds RAP (Rapid Refresh) file on any grid. :param top_directory_name: See doc for `find_ruc_file_any_grid`. :param init_time_unix_sec: Same. :param lead_time_hours: Same. :param raise_error_if_missing: Same. :return: grib_file_name: Same. """ error_checking.assert_is_boolean(raise_error_if_missing) grid_ids = nwp_model_utils.RAP_GRID_IDS for i in range(len(grid_ids)): grib_file_name = find_grib_file( top_directory_name=top_directory_name, init_time_unix_sec=init_time_unix_sec, model_name=nwp_model_utils.RAP_MODEL_NAME, grid_id=grid_ids[i], lead_time_hours=lead_time_hours, raise_error_if_missing=(raise_error_if_missing and i == len(grid_ids) - 1)) if os.path.isfile(grib_file_name): return grib_file_name return None
def download_rap_file_any_grid(top_local_directory_name, init_time_unix_sec, lead_time_hours, raise_error_if_fails=True): """Downloads RAP (Rapid Refresh) file on any grid. :param top_local_directory_name: Name of top-level directory for grib files on local machine. :param init_time_unix_sec: Model-initialization time. :param lead_time_hours: Lead time. :param raise_error_if_fails: See doc for `download_grib_file`. :return: local_file_name: See doc for `download_grib_file`. """ error_checking.assert_is_boolean(raise_error_if_fails) # grid_ids = nwp_model_utils.RAP_GRID_IDS grid_ids = [nwp_model_utils.ID_FOR_130GRID, nwp_model_utils.ID_FOR_252GRID] for i in range(len(grid_ids)): local_file_name = download_grib_file( top_local_directory_name=top_local_directory_name, init_time_unix_sec=init_time_unix_sec, model_name=nwp_model_utils.RAP_MODEL_NAME, grid_id=grid_ids[i], lead_time_hours=lead_time_hours, raise_error_if_fails=(raise_error_if_fails and i == len(grid_ids) - 1)) if local_file_name is not None: break return local_file_name
def find_ruc_file_any_grid(top_directory_name, init_time_unix_sec, lead_time_hours, raise_error_if_missing=True): """Finds RUC (Rapid Update Cycle) file on any grid. :param top_directory_name: Name of top-level directory with grib files. :param init_time_unix_sec: Model-initialization time. :param lead_time_hours: Lead time. :param raise_error_if_missing: Boolean flag. If no file is found and raise_error_if_missing = True, this method will error out. :return: grib_file_name: Path to grib file. If no file is found and raise_error_if_missing = False, this will be None. """ error_checking.assert_is_boolean(raise_error_if_missing) grid_ids = nwp_model_utils.RUC_GRID_IDS for i in range(len(grid_ids)): grib_file_name = find_grib_file( top_directory_name=top_directory_name, init_time_unix_sec=init_time_unix_sec, model_name=nwp_model_utils.RUC_MODEL_NAME, grid_id=grid_ids[i], lead_time_hours=lead_time_hours, raise_error_if_missing=(raise_error_if_missing and i == len(grid_ids) - 1)) if os.path.isfile(grib_file_name): return grib_file_name return None
def cross_entropy_function(target_values, class_probability_matrix, test_mode=False): """Cross-entropy cost function. This function works for binary or multi-class classification. :param target_values: See doc for `run_permutation_test`. :param class_probability_matrix: Same. :param test_mode: Never mind. Leave this alone. :return: cross_entropy: Scalar. """ error_checking.assert_is_boolean(test_mode) num_examples = class_probability_matrix.shape[0] num_classes = class_probability_matrix.shape[1] class_probability_matrix[ class_probability_matrix < MIN_PROBABILITY] = MIN_PROBABILITY class_probability_matrix[ class_probability_matrix > MAX_PROBABILITY] = MAX_PROBABILITY target_matrix = keras.utils.to_categorical(target_values, num_classes).astype(int) if test_mode: return -1 * numpy.sum( target_matrix * numpy.log(class_probability_matrix)) / num_examples return -1 * numpy.sum( target_matrix * numpy.log2(class_probability_matrix)) / num_examples
def find_prediction_file(top_directory_name, spc_date_string, raise_error_if_missing=False): """Finds file with upconvnet predictions (reconstructed radar images). :param top_directory_name: Name of top-level directory with upconvnet predictions. :param spc_date_string: SPC date (format "yyyymmdd"). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :return: prediction_file_name: Path to prediction file. If file is missing and `raise_error_if_missing = False`, this will be the expected path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) time_conversion.spc_date_string_to_unix_sec(spc_date_string) prediction_file_name = ( '{0:s}/{1:s}/{2:s}_{3:s}.p' ).format( top_directory_name, spc_date_string[:4], PATHLESS_FILE_NAME_PREFIX, spc_date_string ) if raise_error_if_missing and not os.path.isfile(prediction_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( prediction_file_name) raise ValueError(error_string) return prediction_file_name
def find_local_raw_5minute_file(station_id=None, month_unix_sec=None, top_directory_name=None, raise_error_if_missing=True): """Finds raw 5-minute file on local machine. This file should contain 5-minute METARs for one station-month. :param station_id: String ID for station. :param month_unix_sec: Month in Unix format. :param top_directory_name: Top-level directory for raw 1-minute files. :param raise_error_if_missing: Boolean flag. If True and file is missing, this method will raise an error. :return: raw_1minute_file_name: File path. If raise_error_if_missing = False and file is missing, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_string(station_id) error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) pathless_file_name = _get_pathless_raw_5minute_file_name( station_id, month_unix_sec) raw_5minute_file_name = '{0:s}/{1:s}/{2:s}'.format(top_directory_name, station_id, pathless_file_name) if raise_error_if_missing and not os.path.isfile(raw_5minute_file_name): raise ValueError( 'Cannot find raw 5-minute file. Expected at location: ' + raw_5minute_file_name) return raw_5minute_file_name
def find_processed_file(directory_name, year, raise_error_if_missing=True): """Finds processed file with tornado reports. See `write_processed_file` for the definition of a "processed file". :param directory_name: Name of directory. :param year: Year (integer). :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. :return: processed_file_name: Path to file. If file is missing and raise_error_if_missing = True, this will be the *expected* path. :raises: ValueError: if file is missing and raise_error_if_missing = True. """ error_checking.assert_is_string(directory_name) error_checking.assert_is_integer(year) error_checking.assert_is_boolean(raise_error_if_missing) processed_file_name = '{0:s}/tornado_reports_{1:04d}.csv'.format( directory_name, year) if raise_error_if_missing and not os.path.isfile(processed_file_name): error_string = ( 'Cannot find processed file with tornado reports. Expected at: ' '{0:s}').format(processed_file_name) raise ValueError(error_string) return processed_file_name
def do_2d_upsampling(feature_matrix, upsampling_factor=2, use_linear_interp=True): """Upsamples 2-D feature maps. m = number of rows after upsampling n = number of columns after upsampling :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x C or 1 x M x N x C. :param upsampling_factor: Upsampling factor (integer > 1). :param use_linear_interp: Boolean flag. If True (False), will use linear (nearest-neighbour) interpolation. :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x m x n x C. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_integer(upsampling_factor) error_checking.assert_is_geq(upsampling_factor, 2) error_checking.assert_is_boolean(use_linear_interp) if len(feature_matrix.shape) == 3: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=4)
def find_match_file(top_directory_name, valid_time_unix_sec, raise_error_if_missing=False): """Finds match file. A "match file" matches storm objects in one dataset (e.g., MYRORSS or GridRad) to those in another dataset, at one time step. :param top_directory_name: Name of top-level directory. :param valid_time_unix_sec: Valid time. :param raise_error_if_missing: See doc for `find_file`. :return: match_file_name: Path to match file. If file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) spc_date_string = time_conversion.time_to_spc_date_string( valid_time_unix_sec) match_file_name = '{0:s}/{1:s}/{2:s}/storm-matches_{3:s}.p'.format( top_directory_name, spc_date_string[:4], spc_date_string, time_conversion.unix_sec_to_string( valid_time_unix_sec, FILE_NAME_TIME_FORMAT) ) if raise_error_if_missing and not os.path.isfile(match_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( match_file_name) raise ValueError(error_string) return match_file_name
def check_wind_speeds(wind_speeds_m_s01, one_component=False): """Finds invalid wind speeds. N = number of observations. :param wind_speeds_m_s01: length-N numpy array of wind speeds (m/s). :param one_component: Boolean flag. If True, wind speeds are only one component (either u or v), which means that they can be negative. If False, wind speeds are absolute (vector magnitudes), so they cannot be negative. :return: invalid_indices: 1-D numpy array with indices of invalid speeds. """ error_checking.assert_is_real_numpy_array(wind_speeds_m_s01) error_checking.assert_is_numpy_array(wind_speeds_m_s01, num_dimensions=1) error_checking.assert_is_boolean(one_component) if one_component: this_min_wind_speed_m_s01 = MIN_SIGNED_WIND_SPEED_M_S01 else: this_min_wind_speed_m_s01 = MIN_ABSOLUTE_WIND_SPEED_M_S01 valid_flags = numpy.logical_and( wind_speeds_m_s01 >= this_min_wind_speed_m_s01, wind_speeds_m_s01 <= MAX_WIND_SPEED_M_S01) return numpy.where(numpy.invert(valid_flags))[0]
def find_model_file(base_model_file_name, raise_error_if_missing=True): """Finds file containing isotonic-regression model(s). This file should be written by `write_model_for_each_class`. :param base_model_file_name: Path to file containing base model (e.g., CNN). :param raise_error_if_missing: Boolean flag. If isotonic-regression file is missing and `raise_error_if_missing = True`, this method will error out. :return: isotonic_file_name: Path to metafile. If isotonic-regression file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if isotonic-regression file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(base_model_file_name) error_checking.assert_is_boolean(raise_error_if_missing) isotonic_file_name = '{0:s}/isotonic_regression_models.p'.format( os.path.split(base_model_file_name)[0]) if not os.path.isfile(isotonic_file_name) and raise_error_if_missing: error_string = 'Cannot find file. Expected at: "{0:s}"'.format( isotonic_file_name) raise ValueError(error_string) return isotonic_file_name
def target_name_to_num_classes(target_name, include_dead_storms=False): """Parses number of classes from name of (classifn-based) target variable. :param target_name: Name of target variable. :param include_dead_storms: Boolean flag. If True, number of classes will include "dead storms" (defined in documentation for `_find_dead_storms`). :return: num_classes: Number of classes. If target variable is regression- based, will return None. """ target_param_dict = target_name_to_params(target_name) if target_param_dict[EVENT_TYPE_KEY] in [ linkage.TORNADO_EVENT_STRING, linkage.TORNADOGENESIS_EVENT_STRING ]: return 2 error_checking.assert_is_boolean(include_dead_storms) wind_speed_cutoffs_kt = target_param_dict[WIND_SPEED_CUTOFFS_KEY] if wind_speed_cutoffs_kt is None: return None if target_param_dict[MIN_LEAD_TIME_KEY] <= 0: return len(wind_speed_cutoffs_kt) + 1 return len(wind_speed_cutoffs_kt) + 1 + int(include_dead_storms)
def plot_multipass_test(permutation_dict, axes_object=None, num_predictors_to_plot=None, plot_percent_increase=False, confidence_level=DEFAULT_CONFIDENCE_LEVEL, bar_face_colour=None): """Plots results of multi-pass (Lakshmanan) permutation test. :param permutation_dict: See doc for `plot_single_pass_test`. :param axes_object: Same. :param num_predictors_to_plot: Same. :param plot_percent_increase: Same. :param confidence_level: Same. :param bar_face_colour: Same. """ # Check input args. predictor_names = permutation_dict[permutation_utils.BEST_PREDICTORS_KEY] if num_predictors_to_plot is None: num_predictors_to_plot = len(predictor_names) error_checking.assert_is_integer(num_predictors_to_plot) error_checking.assert_is_greater(num_predictors_to_plot, 0) num_predictors_to_plot = min( [num_predictors_to_plot, len(predictor_names)]) error_checking.assert_is_boolean(plot_percent_increase) # Set up plotting args. backwards_flag = permutation_dict[permutation_utils.BACKWARDS_FLAG] perturbed_cost_matrix = permutation_dict[ permutation_utils.BEST_COST_MATRIX_KEY] perturbed_cost_matrix = perturbed_cost_matrix[:num_predictors_to_plot, :] predictor_names = predictor_names[:num_predictors_to_plot] original_cost_array = permutation_dict[ permutation_utils.ORIGINAL_COST_ARRAY_KEY] original_cost_matrix = numpy.reshape(original_cost_array, (1, original_cost_array.size)) cost_matrix = numpy.concatenate( (original_cost_matrix, perturbed_cost_matrix), axis=0) # Do plotting. if backwards_flag: clean_cost_array = permutation_dict[ permutation_utils.BEST_COST_MATRIX_KEY][-1, :] else: clean_cost_array = original_cost_array _plot_bars(cost_matrix=cost_matrix, clean_cost_array=clean_cost_array, predictor_names=predictor_names, plot_percent_increase=plot_percent_increase, backwards_flag=backwards_flag, multipass_flag=True, confidence_level=confidence_level, axes_object=axes_object, bar_face_colour=bar_face_colour)
def find_raw_files_one_day(top_directory_name, unix_time_sec, file_extension, raise_error_if_all_missing=True): """Finds all raw (ASCII or JSON) files for one day. :param top_directory_name: Name of top-level directory with raw probSevere files. :param unix_time_sec: Valid time (any time on the given day). :param file_extension: File type (either ".json" or ".ascii"). :param raise_error_if_all_missing: Boolean flag. If no files are found and raise_error_if_all_missing = True, this method will error out. If no files are found and raise_error_if_all_missing = False, will return None. :return: raw_file_names: [may be None] 1-D list of paths to raw files. :raises: ValueError: if no files are found and raise_error_if_all_missing = True. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_all_missing) dummy_pathless_file_name = _get_pathless_raw_file_name( unix_time_sec=unix_time_sec, file_extension=file_extension) time_string = time_conversion.unix_sec_to_string(unix_time_sec, RAW_FILE_TIME_FORMAT) pathless_file_name_pattern = dummy_pathless_file_name.replace( time_string, RAW_FILE_TIME_FORMAT_REGEX) raw_file_pattern = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, time_conversion.unix_sec_to_string(unix_time_sec, MONTH_FORMAT), time_conversion.unix_sec_to_string(unix_time_sec, DATE_FORMAT), pathless_file_name_pattern) raw_file_names = glob.glob(raw_file_pattern) if len(raw_file_names): return raw_file_names pathless_file_name_pattern = pathless_file_name_pattern.replace( RAW_FILE_NAME_PREFIX, ALT_RAW_FILE_NAME_PREFIX) raw_file_pattern = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, time_conversion.unix_sec_to_string(unix_time_sec, MONTH_FORMAT), time_conversion.unix_sec_to_string(unix_time_sec, DATE_FORMAT), pathless_file_name_pattern) raw_file_names = glob.glob(raw_file_pattern) if len(raw_file_names): return raw_file_names if not raise_error_if_all_missing: return None error_string = 'Cannot find any files with pattern: "{0:s}"'.format( raw_file_pattern) raise ValueError(error_string)
def plot_many_2d_grids(class_activation_matrix_3d, axes_object_matrix, colour_map_object, min_contour_level, max_contour_level, contour_interval, line_width=DEFAULT_CONTOUR_WIDTH, row_major=True, line_style=DEFAULT_CONTOUR_STYLE): """Plots the same 2-D class-activation map for each predictor. M = number of rows in spatial grid N = number of columns in spatial grid P = number of predictors :param class_activation_matrix_3d: M-by-N-by-P numpy array of class activations. :param axes_object_matrix: See doc for `plotting_utils.init_panels`. :param colour_map_object: See doc for `plot_2d_grid`. :param min_contour_level: Same. :param max_contour_level: Same. :param contour_interval: Same. :param line_width: Same. :param row_major: Boolean flag. If True, panels will be filled along rows first, then down columns. If False, down columns first, then along rows. :param line_style: Style (e.g., "solid") for contour lines. """ error_checking.assert_is_numpy_array_without_nan( class_activation_matrix_3d) error_checking.assert_is_numpy_array(class_activation_matrix_3d, num_dimensions=3) error_checking.assert_is_boolean(row_major) if row_major: order_string = 'C' else: order_string = 'F' num_predictors = class_activation_matrix_3d.shape[-1] num_panel_rows = axes_object_matrix.shape[0] num_panel_columns = axes_object_matrix.shape[1] for k in range(num_predictors): this_panel_row, this_panel_column = numpy.unravel_index( k, (num_panel_rows, num_panel_columns), order=order_string) plot_2d_grid(class_activation_matrix_2d=class_activation_matrix_3d[..., k], axes_object=axes_object_matrix[this_panel_row, this_panel_column], colour_map_object=colour_map_object, min_contour_level=min_contour_level, max_contour_level=max_contour_level, contour_interval=contour_interval, line_width=line_width, line_style=line_style)
def find_single_field_file(init_time_unix_sec, lead_time_hours=None, model_name=None, grid_id=None, grib1_field_name=None, top_directory_name=None, raise_error_if_missing=True): """Finds with single field on local machine. "Single field" = one variable at one time step and all grid cells. :param init_time_unix_sec: Model-initialization time (Unix format). :param lead_time_hours: Lead time (valid time minus init time). If model is a reanalysis, you can leave this as None (always zero). :param model_name: Name of model. :param grid_id: String ID for model grid. :param grib1_field_name: Field name in grib1 format. :param top_directory_name: Name of top-level directory with single-field files for the given model/grib combo. :param raise_error_if_missing: :param raise_error_if_missing: Boolean flag. If True and file is missing, will raise an error. :return: single_field_file_name: Path to single-field file. If file is missing but raise_error_if_missing = False, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_string(grib1_field_name) error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) nwp_model_utils.check_model_name(model_name) if model_name == nwp_model_utils.NARR_MODEL_NAME: lead_time_hours = 0 error_checking.assert_is_integer(lead_time_hours) error_checking.assert_is_geq(lead_time_hours, 0) pathless_file_name = _get_pathless_single_field_file_name( init_time_unix_sec, lead_time_hours=lead_time_hours, model_name=model_name, grid_id=grid_id, grib1_field_name=grib1_field_name) single_field_file_name = '{0:s}/{1:s}/{2:s}'.format( top_directory_name, time_conversion.unix_sec_to_string(init_time_unix_sec, TIME_FORMAT_MONTH), pathless_file_name) if raise_error_if_missing and not os.path.isfile(single_field_file_name): raise ValueError('Cannot find single-field file. Expected at: ' + single_field_file_name) return single_field_file_name
def download_file_via_passwordless_ssh(host_name=None, user_name=None, remote_file_name=None, local_file_name=None, raise_error_if_fails=True): """Downloads file via passwordless SSH. For this to work, the remote machine (from which you are downloading) must have the RSA key of the local machine. See the following page for instructions on sharing RSA keys: http://www.linuxproblem.org/art_9.html :param host_name: Name of remote machine (example: "thunderhoser.ou.edu"). :param user_name: User name on remote machine (example: "thunderhoser"). :param remote_file_name: File path on remote machine (where the file will be downloaded from). :param local_file_name: File path on local machine (where the file will be stored). :param raise_error_if_fails: Boolean flag. If raise_error_if_fails = True and download fails, will raise an error. :return: local_file_name: If raise_error_if_fails = False and download failed, this will be None. Otherwise, this will be the same as input. :raises: ValueError: if download failed and raise_error_if_fails = True. """ # TODO(thunderhoser): Handle exceptions more intelligently. Currently, if # the download fails, this method does not know why it failed. If the # download failed because the file does not exist, this is less severe than # if it failed because we can't login to the remote machine. error_checking.assert_is_string(host_name) error_checking.assert_is_string(user_name) error_checking.assert_is_string(remote_file_name) error_checking.assert_is_string(local_file_name) error_checking.assert_is_boolean(raise_error_if_fails) file_system_utils.mkdir_recursive_if_necessary(file_name=local_file_name) unix_command_string = ( 'LD_LIBRARY_PATH= rsync -rv -e "{0:s}" {1:s}@{2:s}:"{3:s}" "{4:s}"' ).format(SSH_ARG_STRING, user_name, host_name, remote_file_name, local_file_name) devnull_handle = open(os.devnull, 'w') subprocess.call(unix_command_string, shell=True, stdout=devnull_handle, stderr=devnull_handle) if not os.path.isfile(local_file_name): info_string = ('Download failed. Local file expected at: ' + local_file_name) if raise_error_if_fails: raise ValueError(info_string) else: warnings.warn(info_string) local_file_name = None return local_file_name
def find_raw_file(unix_time_sec, spc_date_string, field_name, data_source, top_directory_name, height_m_asl=None, raise_error_if_missing=True): """Finds raw file. File should contain one field at one time step (e.g., MESH at 123502 UTC, reflectivity at 500 m above sea level and 123502 UTC). :param unix_time_sec: Valid time. :param spc_date_string: SPC date (format "yyyymmdd"). :param field_name: Name of radar field in GewitterGefahr format. :param data_source: Data source (string). :param top_directory_name: Name of top-level directory with raw files. :param height_m_asl: Radar height (metres above sea level). :param raise_error_if_missing: Boolean flag. If True and file is missing, this method will raise an error. If False and file is missing, will return *expected* path to raw file. :return: raw_file_name: Path to raw file. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ # Error-checking. _ = time_conversion.spc_date_string_to_unix_sec(spc_date_string) error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) relative_directory_name = get_relative_dir_for_raw_files( field_name=field_name, height_m_asl=height_m_asl, data_source=data_source) directory_name = '{0:s}/{1:s}/{2:s}/{3:s}'.format(top_directory_name, spc_date_string[:4], spc_date_string, relative_directory_name) pathless_file_name = _get_pathless_raw_file_name(unix_time_sec, zipped=True) raw_file_name = '{0:s}/{1:s}'.format(directory_name, pathless_file_name) if raise_error_if_missing and not os.path.isfile(raw_file_name): pathless_file_name = _get_pathless_raw_file_name(unix_time_sec, zipped=False) raw_file_name = '{0:s}/{1:s}'.format(directory_name, pathless_file_name) if raise_error_if_missing and not os.path.isfile(raw_file_name): raise ValueError('Cannot find raw file. Expected at: "{0:s}"'.format( raw_file_name)) return raw_file_name
def find_target_file(top_directory_name, event_type_string, spc_date_string, raise_error_if_missing=True, unix_time_sec=None): """Locates file with target values for either one time or one SPC date. :param top_directory_name: Name of top-level directory with target files. :param event_type_string: Event type (must be accepted by `linkage.check_event_type`). :param spc_date_string: SPC date (format "yyyymmdd"). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :param unix_time_sec: Valid time. :return: target_file_name: Path to linkage file. If file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(top_directory_name) linkage.check_event_type(event_type_string) error_checking.assert_is_boolean(raise_error_if_missing) if unix_time_sec is None: time_conversion.spc_date_string_to_unix_sec(spc_date_string) if event_type_string == linkage.WIND_EVENT_STRING: target_file_name = '{0:s}/{1:s}/wind_labels_{2:s}.nc'.format( top_directory_name, spc_date_string[:4], spc_date_string) else: target_file_name = '{0:s}/{1:s}/tornado_labels_{2:s}.nc'.format( top_directory_name, spc_date_string[:4], spc_date_string) else: spc_date_string = time_conversion.time_to_spc_date_string( unix_time_sec) valid_time_string = time_conversion.unix_sec_to_string( unix_time_sec, TIME_FORMAT) if event_type_string == linkage.WIND_EVENT_STRING: target_file_name = '{0:s}/{1:s}/{2:s}/wind_labels_{3:s}.nc'.format( top_directory_name, spc_date_string[:4], spc_date_string, valid_time_string) else: target_file_name = ( '{0:s}/{1:s}/{2:s}/tornado_labels_{3:s}.nc').format( top_directory_name, spc_date_string[:4], spc_date_string, valid_time_string) if raise_error_if_missing and not os.path.isfile(target_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( target_file_name) raise ValueError(error_string) return target_file_name
def class_fractions_to_weights( sampling_fraction_by_class_dict, target_name, binarize_target): """For each target class, converts sampling fraction to loss-fctn weight. :param sampling_fraction_by_class_dict: See doc for `check_class_fractions`. :param target_name: Same. :param binarize_target: Boolean flag. If True, the target variable will be binarized, so that the highest class = 1 and all other classes = 0. Otherwise, the original number of classes will be retained, except that -2 ("dead storm") will be mapped to 0 (the lowest class). :return: lf_weight_by_class_dict: Dictionary, where each key is the integer representing a class and each value is the corresponding loss-function weight. """ check_class_fractions( sampling_fraction_by_class_dict=sampling_fraction_by_class_dict, target_name=target_name) error_checking.assert_is_boolean(binarize_target) class_keys = list(sampling_fraction_by_class_dict.keys()) if binarize_target: max_key = numpy.max(numpy.array(class_keys)) positive_fraction = sampling_fraction_by_class_dict[max_key] negative_fraction = 1. - positive_fraction new_sampling_fraction_dict = { 0: negative_fraction, 1: positive_fraction } else: new_sampling_fraction_dict = copy.deepcopy( sampling_fraction_by_class_dict) if target_val_utils.DEAD_STORM_INTEGER in class_keys: new_sampling_fraction_dict[0] = ( new_sampling_fraction_dict[0] + new_sampling_fraction_dict[target_val_utils.DEAD_STORM_INTEGER] ) del new_sampling_fraction_dict[target_val_utils.DEAD_STORM_INTEGER] class_keys = list(new_sampling_fraction_dict.keys()) class_fractions = numpy.array(list( new_sampling_fraction_dict.values() )) loss_function_weights = 1. / class_fractions loss_function_weights = ( loss_function_weights / numpy.sum(loss_function_weights) ) return dict(list(zip(class_keys, loss_function_weights)))
def find_file( top_prediction_dir_name, first_init_time_unix_sec, last_init_time_unix_sec, gridded, raise_error_if_missing=False): """Finds gridded or ungridded prediction files. :param top_prediction_dir_name: Name of top-level directory with prediction files. :param first_init_time_unix_sec: First initial time in file. The "initial time" is the time of the storm object for which the prediction is being made. This is different than the valid-time window (time range for which the prediction is valid). :param last_init_time_unix_sec: Last initial time in file. :param gridded: Boolean flag. If True, will look for gridded file. If False, will look for ungridded file. :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :return: prediction_file_name: Path to prediction file. If file is missing and `raise_error_if_missing = False`, this will be the expected path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ # TODO(thunderhoser): Put lead time in file names. error_checking.assert_is_string(top_prediction_dir_name) error_checking.assert_is_integer(first_init_time_unix_sec) error_checking.assert_is_integer(last_init_time_unix_sec) error_checking.assert_is_geq( last_init_time_unix_sec, first_init_time_unix_sec) error_checking.assert_is_boolean(gridded) error_checking.assert_is_boolean(raise_error_if_missing) spc_date_string = time_conversion.time_to_spc_date_string( first_init_time_unix_sec) prediction_file_name = ( '{0:s}/{1:s}/{2:s}/{3:s}_predictions_{4:s}_{5:s}{6:s}' ).format( top_prediction_dir_name, spc_date_string[:4], spc_date_string, 'gridded' if gridded else 'ungridded', time_conversion.unix_sec_to_string( first_init_time_unix_sec, FILE_NAME_TIME_FORMAT), time_conversion.unix_sec_to_string( last_init_time_unix_sec, FILE_NAME_TIME_FORMAT), '.p' if gridded else '.nc' ) if raise_error_if_missing and not os.path.isfile(prediction_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( prediction_file_name) raise ValueError(error_string) return prediction_file_name