def find_local_raw_file(year,
                        directory_name=None,
                        raise_error_if_missing=True):
    """Finds raw file on local machine.

    This file should contain all storm reports for one year.

    :param year: [integer] Will look for file from this year.
    :param directory_name: Name of directory with Storm Events files.
    :param raise_error_if_missing: Boolean flag.  If True and file is missing,
        this method will raise an error.
    :return: raw_file_name: File path.  If raise_error_if_missing = False and
        file is missing, this will be the *expected* path.
    :raises: ValueError: if raise_error_if_missing = True and file is missing.
    """

    error_checking.assert_is_integer(year)
    error_checking.assert_is_string(directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    raw_file_name = '{0:s}/{1:s}{2:s}{3:s}'.format(
        directory_name, PATHLESS_RAW_FILE_PREFIX, _year_number_to_string(year),
        RAW_FILE_EXTENSION)

    if raise_error_if_missing and not os.path.isfile(raw_file_name):
        raise ValueError('Cannot find raw file.  Expected at location: ' +
                         raw_file_name)

    return raw_file_name
def gzip_file(input_file_name, output_file_name=None, delete_input_file=True):
    """Creates gzip archive with one file.

    :param input_file_name: Path to input file (will be gzipped).
    :param output_file_name: Path to output file (extension must be ".gz").  If
        `output_file_name is None`, will simply append ".gz" to name of input
        file.
    :param delete_input_file: Boolean flag.  If True, will delete input file
        after gzipping.
    :raises: ValueError: if `output_file_name` does not end with ".gz".
    :raises: ValueError: if the Unix command fails.
    """

    error_checking.assert_file_exists(input_file_name)
    error_checking.assert_is_boolean(delete_input_file)
    if output_file_name is None:
        output_file_name = '{0:s}.gz'.format(input_file_name)

    if not output_file_name.endswith('.gz'):
        error_string = (
            'Output file ("{0:s}") should have extension ".gz".'
        ).format(output_file_name)
        raise ValueError(error_string)

    unix_command_string = 'gzip -v -c "{0:s}" > "{1:s}"'.format(
        input_file_name, output_file_name)
    exit_code = os.system(unix_command_string)

    if exit_code != 0:
        raise ValueError('\nUnix command failed (log messages shown above '
                         'should explain why).')

    if delete_input_file:
        os.remove(input_file_name)
def find_file(year, directory_name, raise_error_if_missing=True):
    """Finds Storm Events file.

    This file should contain all storm reports for one year.

    :param year: Year (integer).
    :param directory_name: Name of directory with Storm Events files.
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        raise_error_if_missing = True, this method will error out.
    :return: storm_event_file_name: Path to Storm Events file.  If file is
        missing and raise_error_if_missing = False, this will be the *expected*
        path.
    :raises: ValueError: if file is missing and raise_error_if_missing = True.
    """

    error_checking.assert_is_integer(year)
    error_checking.assert_is_string(directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    storm_event_file_name = '{0:s}/{1:s}{2:s}{3:s}'.format(
        directory_name, PATHLESS_FILE_PREFIX, _year_number_to_string(year),
        FILE_EXTENSION)

    if raise_error_if_missing and not os.path.isfile(storm_event_file_name):
        error_string = ('Cannot find Storm Events file.  Expected at: {0:s}'.
                        format(storm_event_file_name))
        raise ValueError(error_string)

    return storm_event_file_name
示例#4
0
def find_metafile(model_file_name, raise_error_if_missing=True):
    """Finds metafile for CNN.

    :param model_file_name: Path to model itself (see doc for `read_model`).
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        `raise_error_if_missing = True`, this method will error out.
    :return: metafile_name: Path to metafile.  If file is missing and
        `raise_error_if_missing = False`, this will be the expected path.
    :raises: ValueError: if file is missing and `raise_error_if_missing = True`.
    """

    error_checking.assert_is_string(model_file_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0]
    )

    if not os.path.isfile(metafile_name):
        metafile_name = metafile_name.replace(
            '/glade/work/ryanlage', '/condo/swatwork/ralager'
        )

    if not os.path.isfile(metafile_name) and raise_error_if_missing:
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            metafile_name)
        raise ValueError(error_string)

    return metafile_name
示例#5
0
def find_file(directory_name, year, raise_error_if_missing=True):
    """Finds NetCDF file with RRTM data.

    :param directory_name: Name of directory where file is expected.
    :param year: Year (integer).
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        `raise_error_if_missing == True`, will throw error.  If file is missing
        and `raise_error_if_missing == False`, will return *expected* file path.
    :return: rrtm_file_name: File path.
    :raises: ValueError: if file is missing
        and `raise_error_if_missing == True`.
    """

    error_checking.assert_is_string(directory_name)
    error_checking.assert_is_integer(year)
    error_checking.assert_is_boolean(raise_error_if_missing)

    rrtm_file_name = '{0:s}/rrtm_output_{1:04d}.nc'.format(
        directory_name, year)

    if raise_error_if_missing and not os.path.isfile(rrtm_file_name):
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            rrtm_file_name)
        raise ValueError(error_string)

    return rrtm_file_name
def find_file(valid_time_unix_sec,
              top_directory_name,
              raise_error_if_missing=True):
    """Finds file (text file in WPC format) on local machine.

    This file should contain positions of cyclones, anticyclones, fronts, etc.
    for a single valid time.

    :param valid_time_unix_sec: Valid time.
    :param top_directory_name: Name of top-level directory with WPC bulletins.
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        raise_error_if_missing = True, this method will error out.  If file is
        missing and raise_error_if_missing = False, this method will return the
        *expected* path to the file.
    :return: bulletin_file_name: Path to file.  If file is missing and
        raise_error_if_missing = False, this is the *expected* path.
    :raises: ValueError: if file is missing and raise_error_if_missing = True.
    """

    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)
    valid_time_string = time_conversion.unix_sec_to_string(
        valid_time_unix_sec, TIME_FORMAT_IN_FILE_NAME)

    bulletin_file_name = '{0:s}/{1:s}/{2:s}_{3:s}'.format(
        top_directory_name, valid_time_string[:4], PATHLESS_FILE_NAME_PREFIX,
        valid_time_string)

    if raise_error_if_missing and not os.path.isfile(bulletin_file_name):
        error_string = ('Cannot find file.  Expected at location: "{0:s}"'.
                        format(bulletin_file_name))
        raise ValueError(error_string)

    return bulletin_file_name
示例#7
0
def _check_training_args(
        model_file_name, history_file_name, tensorboard_dir_name, num_epochs,
        num_training_batches_per_epoch, num_validation_batches_per_epoch,
        training_option_dict, weight_loss_function):
    """Error-checks input arguments for training.

    :param model_file_name: Path to output file (HDF5 format).  The model will
        be saved here after each epoch.
    :param history_file_name: Path to output file (CSV format).  Training
        history (performance metrics) will be saved here after each epoch.
    :param tensorboard_dir_name: Path to output directory for TensorBoard log
        files.
    :param num_epochs: Number of epochs.
    :param num_training_batches_per_epoch: Number of training batches in each
        epoch.
    :param num_validation_batches_per_epoch: Number of validation batches in
        each epoch.
    :param training_option_dict: See doc for
        `training_validation_io.example_generator_2d_or_3d`.
    :param weight_loss_function: Boolean flag.  If False, classes will be
        weighted equally in the loss function.  If True, classes will be
        weighted differently (inversely proportional to their sampling
        fractions).
    :return: class_to_weight_dict: Dictionary, where each key is the integer ID
        for a target class (-2 for "dead storm") and each value is the weight
        for the loss function.  If None, classes will be equally weighted in the
        loss function.
    """

    orig_option_dict = training_option_dict.copy()
    training_option_dict = trainval_io.DEFAULT_OPTION_DICT.copy()
    training_option_dict.update(orig_option_dict)

    file_system_utils.mkdir_recursive_if_necessary(file_name=model_file_name)
    file_system_utils.mkdir_recursive_if_necessary(file_name=history_file_name)
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=tensorboard_dir_name)

    error_checking.assert_is_integer(num_epochs)
    error_checking.assert_is_geq(num_epochs, 1)
    error_checking.assert_is_integer(num_training_batches_per_epoch)
    error_checking.assert_is_geq(num_training_batches_per_epoch, 1)
    error_checking.assert_is_integer(num_validation_batches_per_epoch)
    error_checking.assert_is_geq(num_validation_batches_per_epoch, 0)

    error_checking.assert_is_boolean(weight_loss_function)
    if not weight_loss_function:
        return None

    class_to_sampling_fraction_dict = training_option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY
    ]
    if class_to_sampling_fraction_dict is None:
        return None

    return dl_utils.class_fractions_to_weights(
        sampling_fraction_by_class_dict=class_to_sampling_fraction_dict,
        target_name=training_option_dict[trainval_io.TARGET_NAME_KEY],
        binarize_target=training_option_dict[trainval_io.BINARIZE_TARGET_KEY]
    )
def check_field_name(field_name, require_standard=False):
    """Ensures that name of model field is recognized.

    :param field_name: Field name in GewitterGefahr format (not the original
        NetCDF format).
    :param require_standard: Boolean flag.  If True, `field_name` must be in
        `STANDARD_FIELD_NAMES`.  If False, `field_name` must be in
        `FIELD_NAMES`.
    :raises: ValueError: if field name is unrecognized.
    """

    error_checking.assert_is_string(field_name)
    error_checking.assert_is_boolean(require_standard)

    if require_standard:
        valid_field_names = STANDARD_FIELD_NAMES
    else:
        valid_field_names = FIELD_NAMES

    if field_name not in valid_field_names:
        error_string = (
            '\n\n' + str(valid_field_names) +
            '\n\nValid field names (listed above) do not include "' +
            field_name + '".')
        raise ValueError(error_string)
示例#9
0
def _check_args_one_step(predictor_matrix, permuted_flag_matrix,
                         scalar_channel_flags, shuffle_profiles_together,
                         num_bootstrap_reps):
    """Checks input args for `run_*_test_one_step`.

    :param predictor_matrix: See doc for `run_forward_test_one_step` or
        `run_backwards_test_one_step`.
    :param permuted_flag_matrix: Same.
    :param scalar_channel_flags: Same.
    :param shuffle_profiles_together: Same.
    :param num_bootstrap_reps: Same.
    :return: num_bootstrap_reps: Same as input but maxxed with 1.
    """

    error_checking.assert_is_numpy_array_without_nan(predictor_matrix)
    num_predictor_dim = len(predictor_matrix.shape)
    error_checking.assert_is_geq(num_predictor_dim, 3)
    error_checking.assert_is_leq(num_predictor_dim, 3)

    error_checking.assert_is_boolean_numpy_array(permuted_flag_matrix)
    these_expected_dim = numpy.array(predictor_matrix.shape[1:], dtype=int)
    error_checking.assert_is_numpy_array(permuted_flag_matrix,
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_boolean_numpy_array(scalar_channel_flags)
    these_expected_dim = numpy.array([predictor_matrix.shape[-1]], dtype=int)
    error_checking.assert_is_numpy_array(scalar_channel_flags,
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_boolean(shuffle_profiles_together)
    error_checking.assert_is_integer(num_bootstrap_reps)

    return numpy.maximum(num_bootstrap_reps, 1)
示例#10
0
def find_rap_file_any_grid(top_directory_name,
                           init_time_unix_sec,
                           lead_time_hours,
                           raise_error_if_missing=True):
    """Finds RAP (Rapid Refresh) file on any grid.

    :param top_directory_name: See doc for `find_ruc_file_any_grid`.
    :param init_time_unix_sec: Same.
    :param lead_time_hours: Same.
    :param raise_error_if_missing: Same.
    :return: grib_file_name: Same.
    """

    error_checking.assert_is_boolean(raise_error_if_missing)
    grid_ids = nwp_model_utils.RAP_GRID_IDS

    for i in range(len(grid_ids)):
        grib_file_name = find_grib_file(
            top_directory_name=top_directory_name,
            init_time_unix_sec=init_time_unix_sec,
            model_name=nwp_model_utils.RAP_MODEL_NAME,
            grid_id=grid_ids[i],
            lead_time_hours=lead_time_hours,
            raise_error_if_missing=(raise_error_if_missing
                                    and i == len(grid_ids) - 1))
        if os.path.isfile(grib_file_name):
            return grib_file_name

    return None
示例#11
0
def download_rap_file_any_grid(top_local_directory_name,
                               init_time_unix_sec,
                               lead_time_hours,
                               raise_error_if_fails=True):
    """Downloads RAP (Rapid Refresh) file on any grid.

    :param top_local_directory_name: Name of top-level directory for grib files
        on local machine.
    :param init_time_unix_sec: Model-initialization time.
    :param lead_time_hours: Lead time.
    :param raise_error_if_fails: See doc for `download_grib_file`.
    :return: local_file_name: See doc for `download_grib_file`.
    """

    error_checking.assert_is_boolean(raise_error_if_fails)
    # grid_ids = nwp_model_utils.RAP_GRID_IDS
    grid_ids = [nwp_model_utils.ID_FOR_130GRID, nwp_model_utils.ID_FOR_252GRID]

    for i in range(len(grid_ids)):
        local_file_name = download_grib_file(
            top_local_directory_name=top_local_directory_name,
            init_time_unix_sec=init_time_unix_sec,
            model_name=nwp_model_utils.RAP_MODEL_NAME,
            grid_id=grid_ids[i],
            lead_time_hours=lead_time_hours,
            raise_error_if_fails=(raise_error_if_fails
                                  and i == len(grid_ids) - 1))

        if local_file_name is not None:
            break

    return local_file_name
示例#12
0
def find_ruc_file_any_grid(top_directory_name,
                           init_time_unix_sec,
                           lead_time_hours,
                           raise_error_if_missing=True):
    """Finds RUC (Rapid Update Cycle) file on any grid.

    :param top_directory_name: Name of top-level directory with grib files.
    :param init_time_unix_sec: Model-initialization time.
    :param lead_time_hours: Lead time.
    :param raise_error_if_missing: Boolean flag.  If no file is found and
        raise_error_if_missing = True, this method will error out.
    :return: grib_file_name: Path to grib file.  If no file is found and
        raise_error_if_missing = False, this will be None.
    """

    error_checking.assert_is_boolean(raise_error_if_missing)
    grid_ids = nwp_model_utils.RUC_GRID_IDS

    for i in range(len(grid_ids)):
        grib_file_name = find_grib_file(
            top_directory_name=top_directory_name,
            init_time_unix_sec=init_time_unix_sec,
            model_name=nwp_model_utils.RUC_MODEL_NAME,
            grid_id=grid_ids[i],
            lead_time_hours=lead_time_hours,
            raise_error_if_missing=(raise_error_if_missing
                                    and i == len(grid_ids) - 1))
        if os.path.isfile(grib_file_name):
            return grib_file_name

    return None
示例#13
0
def cross_entropy_function(target_values,
                           class_probability_matrix,
                           test_mode=False):
    """Cross-entropy cost function.

    This function works for binary or multi-class classification.

    :param target_values: See doc for `run_permutation_test`.
    :param class_probability_matrix: Same.
    :param test_mode: Never mind.  Leave this alone.
    :return: cross_entropy: Scalar.
    """

    error_checking.assert_is_boolean(test_mode)

    num_examples = class_probability_matrix.shape[0]
    num_classes = class_probability_matrix.shape[1]

    class_probability_matrix[
        class_probability_matrix < MIN_PROBABILITY] = MIN_PROBABILITY
    class_probability_matrix[
        class_probability_matrix > MAX_PROBABILITY] = MAX_PROBABILITY

    target_matrix = keras.utils.to_categorical(target_values,
                                               num_classes).astype(int)

    if test_mode:
        return -1 * numpy.sum(
            target_matrix * numpy.log(class_probability_matrix)) / num_examples

    return -1 * numpy.sum(
        target_matrix * numpy.log2(class_probability_matrix)) / num_examples
示例#14
0
def find_prediction_file(top_directory_name, spc_date_string,
                         raise_error_if_missing=False):
    """Finds file with upconvnet predictions (reconstructed radar images).

    :param top_directory_name: Name of top-level directory with upconvnet
        predictions.
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        `raise_error_if_missing = True`, this method will error out.
    :return: prediction_file_name: Path to prediction file.  If file is missing
        and `raise_error_if_missing = False`, this will be the expected path.
    :raises: ValueError: if file is missing and `raise_error_if_missing = True`.
    """

    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)
    time_conversion.spc_date_string_to_unix_sec(spc_date_string)

    prediction_file_name = (
        '{0:s}/{1:s}/{2:s}_{3:s}.p'
    ).format(
        top_directory_name, spc_date_string[:4], PATHLESS_FILE_NAME_PREFIX,
        spc_date_string
    )

    if raise_error_if_missing and not os.path.isfile(prediction_file_name):
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            prediction_file_name)
        raise ValueError(error_string)

    return prediction_file_name
def find_local_raw_5minute_file(station_id=None,
                                month_unix_sec=None,
                                top_directory_name=None,
                                raise_error_if_missing=True):
    """Finds raw 5-minute file on local machine.

    This file should contain 5-minute METARs for one station-month.

    :param station_id: String ID for station.
    :param month_unix_sec: Month in Unix format.
    :param top_directory_name: Top-level directory for raw 1-minute files.
    :param raise_error_if_missing: Boolean flag.  If True and file is missing,
        this method will raise an error.
    :return: raw_1minute_file_name: File path.  If raise_error_if_missing =
        False and file is missing, this will be the *expected* path.
    :raises: ValueError: if raise_error_if_missing = True and file is missing.
    """

    error_checking.assert_is_string(station_id)
    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    pathless_file_name = _get_pathless_raw_5minute_file_name(
        station_id, month_unix_sec)
    raw_5minute_file_name = '{0:s}/{1:s}/{2:s}'.format(top_directory_name,
                                                       station_id,
                                                       pathless_file_name)

    if raise_error_if_missing and not os.path.isfile(raw_5minute_file_name):
        raise ValueError(
            'Cannot find raw 5-minute file.  Expected at location: ' +
            raw_5minute_file_name)

    return raw_5minute_file_name
示例#16
0
def find_processed_file(directory_name, year, raise_error_if_missing=True):
    """Finds processed file with tornado reports.

    See `write_processed_file` for the definition of a "processed file".

    :param directory_name: Name of directory.
    :param year: Year (integer).
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        raise_error_if_missing = True, this method will error out.
    :return: processed_file_name: Path to file.  If file is missing and
        raise_error_if_missing = True, this will be the *expected* path.
    :raises: ValueError: if file is missing and raise_error_if_missing = True.
    """

    error_checking.assert_is_string(directory_name)
    error_checking.assert_is_integer(year)
    error_checking.assert_is_boolean(raise_error_if_missing)

    processed_file_name = '{0:s}/tornado_reports_{1:04d}.csv'.format(
        directory_name, year)

    if raise_error_if_missing and not os.path.isfile(processed_file_name):
        error_string = (
            'Cannot find processed file with tornado reports.  Expected at: '
            '{0:s}').format(processed_file_name)
        raise ValueError(error_string)

    return processed_file_name
示例#17
0
def do_2d_upsampling(feature_matrix, upsampling_factor=2,
                     use_linear_interp=True):
    """Upsamples 2-D feature maps.

    m = number of rows after upsampling
    n = number of columns after upsampling

    :param feature_matrix: Input feature maps (numpy array).  Dimensions must be
        M x N x C or 1 x M x N x C.
    :param upsampling_factor: Upsampling factor (integer > 1).
    :param use_linear_interp: Boolean flag.  If True (False), will use linear
        (nearest-neighbour) interpolation.
    :return: feature_matrix: Output feature maps (numpy array).  Dimensions will
        be 1 x m x n x C.
    """

    error_checking.assert_is_numpy_array_without_nan(feature_matrix)
    error_checking.assert_is_integer(upsampling_factor)
    error_checking.assert_is_geq(upsampling_factor, 2)
    error_checking.assert_is_boolean(use_linear_interp)

    if len(feature_matrix.shape) == 3:
        feature_matrix = numpy.expand_dims(feature_matrix, axis=0)

    error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=4)
示例#18
0
def find_match_file(top_directory_name, valid_time_unix_sec,
                    raise_error_if_missing=False):
    """Finds match file.

    A "match file" matches storm objects in one dataset (e.g., MYRORSS or
    GridRad) to those in another dataset, at one time step.

    :param top_directory_name: Name of top-level directory.
    :param valid_time_unix_sec: Valid time.
    :param raise_error_if_missing: See doc for `find_file`.
    :return: match_file_name: Path to match file.  If file is missing and
        `raise_error_if_missing = False`, this will be the *expected* path.
    :raises: ValueError: if file is missing and `raise_error_if_missing = True`.
    """

    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    spc_date_string = time_conversion.time_to_spc_date_string(
        valid_time_unix_sec)

    match_file_name = '{0:s}/{1:s}/{2:s}/storm-matches_{3:s}.p'.format(
        top_directory_name, spc_date_string[:4], spc_date_string,
        time_conversion.unix_sec_to_string(
            valid_time_unix_sec, FILE_NAME_TIME_FORMAT)
    )

    if raise_error_if_missing and not os.path.isfile(match_file_name):
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            match_file_name)
        raise ValueError(error_string)

    return match_file_name
def check_wind_speeds(wind_speeds_m_s01, one_component=False):
    """Finds invalid wind speeds.

    N = number of observations.

    :param wind_speeds_m_s01: length-N numpy array of wind speeds (m/s).
    :param one_component: Boolean flag.  If True, wind speeds are only one
        component (either u or v), which means that they can be negative.  If
        False, wind speeds are absolute (vector magnitudes), so they cannot be
        negative.
    :return: invalid_indices: 1-D numpy array with indices of invalid speeds.
    """

    error_checking.assert_is_real_numpy_array(wind_speeds_m_s01)
    error_checking.assert_is_numpy_array(wind_speeds_m_s01, num_dimensions=1)
    error_checking.assert_is_boolean(one_component)

    if one_component:
        this_min_wind_speed_m_s01 = MIN_SIGNED_WIND_SPEED_M_S01
    else:
        this_min_wind_speed_m_s01 = MIN_ABSOLUTE_WIND_SPEED_M_S01

    valid_flags = numpy.logical_and(
        wind_speeds_m_s01 >= this_min_wind_speed_m_s01,
        wind_speeds_m_s01 <= MAX_WIND_SPEED_M_S01)
    return numpy.where(numpy.invert(valid_flags))[0]
def find_model_file(base_model_file_name, raise_error_if_missing=True):
    """Finds file containing isotonic-regression model(s).

    This file should be written by `write_model_for_each_class`.

    :param base_model_file_name: Path to file containing base model (e.g., CNN).
    :param raise_error_if_missing: Boolean flag.  If isotonic-regression file is
        missing and `raise_error_if_missing = True`, this method will error out.
    :return: isotonic_file_name: Path to metafile.  If isotonic-regression file
        is missing and `raise_error_if_missing = False`, this will be the
        *expected* path.
    :raises: ValueError: if isotonic-regression file is missing and
        `raise_error_if_missing = True`.
    """

    error_checking.assert_is_string(base_model_file_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    isotonic_file_name = '{0:s}/isotonic_regression_models.p'.format(
        os.path.split(base_model_file_name)[0])
    if not os.path.isfile(isotonic_file_name) and raise_error_if_missing:
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            isotonic_file_name)
        raise ValueError(error_string)

    return isotonic_file_name
def target_name_to_num_classes(target_name, include_dead_storms=False):
    """Parses number of classes from name of (classifn-based) target variable.

    :param target_name: Name of target variable.
    :param include_dead_storms: Boolean flag.  If True, number of classes will
        include "dead storms" (defined in documentation for
        `_find_dead_storms`).
    :return: num_classes: Number of classes.  If target variable is regression-
        based, will return None.
    """

    target_param_dict = target_name_to_params(target_name)
    if target_param_dict[EVENT_TYPE_KEY] in [
            linkage.TORNADO_EVENT_STRING, linkage.TORNADOGENESIS_EVENT_STRING
    ]:
        return 2

    error_checking.assert_is_boolean(include_dead_storms)
    wind_speed_cutoffs_kt = target_param_dict[WIND_SPEED_CUTOFFS_KEY]
    if wind_speed_cutoffs_kt is None:
        return None

    if target_param_dict[MIN_LEAD_TIME_KEY] <= 0:
        return len(wind_speed_cutoffs_kt) + 1

    return len(wind_speed_cutoffs_kt) + 1 + int(include_dead_storms)
示例#22
0
def plot_multipass_test(permutation_dict,
                        axes_object=None,
                        num_predictors_to_plot=None,
                        plot_percent_increase=False,
                        confidence_level=DEFAULT_CONFIDENCE_LEVEL,
                        bar_face_colour=None):
    """Plots results of multi-pass (Lakshmanan) permutation test.

    :param permutation_dict: See doc for `plot_single_pass_test`.
    :param axes_object: Same.
    :param num_predictors_to_plot: Same.
    :param plot_percent_increase: Same.
    :param confidence_level: Same.
    :param bar_face_colour: Same.
    """

    # Check input args.
    predictor_names = permutation_dict[permutation_utils.BEST_PREDICTORS_KEY]
    if num_predictors_to_plot is None:
        num_predictors_to_plot = len(predictor_names)

    error_checking.assert_is_integer(num_predictors_to_plot)
    error_checking.assert_is_greater(num_predictors_to_plot, 0)
    num_predictors_to_plot = min(
        [num_predictors_to_plot, len(predictor_names)])

    error_checking.assert_is_boolean(plot_percent_increase)

    # Set up plotting args.
    backwards_flag = permutation_dict[permutation_utils.BACKWARDS_FLAG]
    perturbed_cost_matrix = permutation_dict[
        permutation_utils.BEST_COST_MATRIX_KEY]

    perturbed_cost_matrix = perturbed_cost_matrix[:num_predictors_to_plot, :]
    predictor_names = predictor_names[:num_predictors_to_plot]

    original_cost_array = permutation_dict[
        permutation_utils.ORIGINAL_COST_ARRAY_KEY]
    original_cost_matrix = numpy.reshape(original_cost_array,
                                         (1, original_cost_array.size))
    cost_matrix = numpy.concatenate(
        (original_cost_matrix, perturbed_cost_matrix), axis=0)

    # Do plotting.
    if backwards_flag:
        clean_cost_array = permutation_dict[
            permutation_utils.BEST_COST_MATRIX_KEY][-1, :]
    else:
        clean_cost_array = original_cost_array

    _plot_bars(cost_matrix=cost_matrix,
               clean_cost_array=clean_cost_array,
               predictor_names=predictor_names,
               plot_percent_increase=plot_percent_increase,
               backwards_flag=backwards_flag,
               multipass_flag=True,
               confidence_level=confidence_level,
               axes_object=axes_object,
               bar_face_colour=bar_face_colour)
示例#23
0
def find_raw_files_one_day(top_directory_name,
                           unix_time_sec,
                           file_extension,
                           raise_error_if_all_missing=True):
    """Finds all raw (ASCII or JSON) files for one day.

    :param top_directory_name: Name of top-level directory with raw probSevere
        files.
    :param unix_time_sec: Valid time (any time on the given day).
    :param file_extension: File type (either ".json" or ".ascii").
    :param raise_error_if_all_missing: Boolean flag.  If no files are found and
        raise_error_if_all_missing = True, this method will error out.  If no
        files are found and raise_error_if_all_missing = False, will return
        None.
    :return: raw_file_names: [may be None] 1-D list of paths to raw files.
    :raises: ValueError: if no files are found and raise_error_if_all_missing =
        True.
    """

    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_all_missing)

    dummy_pathless_file_name = _get_pathless_raw_file_name(
        unix_time_sec=unix_time_sec, file_extension=file_extension)
    time_string = time_conversion.unix_sec_to_string(unix_time_sec,
                                                     RAW_FILE_TIME_FORMAT)
    pathless_file_name_pattern = dummy_pathless_file_name.replace(
        time_string, RAW_FILE_TIME_FORMAT_REGEX)

    raw_file_pattern = '{0:s}/{1:s}/{2:s}/{3:s}'.format(
        top_directory_name,
        time_conversion.unix_sec_to_string(unix_time_sec, MONTH_FORMAT),
        time_conversion.unix_sec_to_string(unix_time_sec, DATE_FORMAT),
        pathless_file_name_pattern)

    raw_file_names = glob.glob(raw_file_pattern)
    if len(raw_file_names):
        return raw_file_names

    pathless_file_name_pattern = pathless_file_name_pattern.replace(
        RAW_FILE_NAME_PREFIX, ALT_RAW_FILE_NAME_PREFIX)

    raw_file_pattern = '{0:s}/{1:s}/{2:s}/{3:s}'.format(
        top_directory_name,
        time_conversion.unix_sec_to_string(unix_time_sec, MONTH_FORMAT),
        time_conversion.unix_sec_to_string(unix_time_sec, DATE_FORMAT),
        pathless_file_name_pattern)

    raw_file_names = glob.glob(raw_file_pattern)

    if len(raw_file_names):
        return raw_file_names
    if not raise_error_if_all_missing:
        return None

    error_string = 'Cannot find any files with pattern: "{0:s}"'.format(
        raw_file_pattern)
    raise ValueError(error_string)
def plot_many_2d_grids(class_activation_matrix_3d,
                       axes_object_matrix,
                       colour_map_object,
                       min_contour_level,
                       max_contour_level,
                       contour_interval,
                       line_width=DEFAULT_CONTOUR_WIDTH,
                       row_major=True,
                       line_style=DEFAULT_CONTOUR_STYLE):
    """Plots the same 2-D class-activation map for each predictor.

    M = number of rows in spatial grid
    N = number of columns in spatial grid
    P = number of predictors

    :param class_activation_matrix_3d: M-by-N-by-P numpy array of class
        activations.
    :param axes_object_matrix: See doc for `plotting_utils.init_panels`.
    :param colour_map_object: See doc for `plot_2d_grid`.
    :param min_contour_level: Same.
    :param max_contour_level: Same.
    :param contour_interval: Same.
    :param line_width: Same.
    :param row_major: Boolean flag.  If True, panels will be filled along rows
        first, then down columns.  If False, down columns first, then along
        rows.
    :param line_style: Style (e.g., "solid") for contour lines.
    """

    error_checking.assert_is_numpy_array_without_nan(
        class_activation_matrix_3d)
    error_checking.assert_is_numpy_array(class_activation_matrix_3d,
                                         num_dimensions=3)
    error_checking.assert_is_boolean(row_major)

    if row_major:
        order_string = 'C'
    else:
        order_string = 'F'

    num_predictors = class_activation_matrix_3d.shape[-1]
    num_panel_rows = axes_object_matrix.shape[0]
    num_panel_columns = axes_object_matrix.shape[1]

    for k in range(num_predictors):
        this_panel_row, this_panel_column = numpy.unravel_index(
            k, (num_panel_rows, num_panel_columns), order=order_string)

        plot_2d_grid(class_activation_matrix_2d=class_activation_matrix_3d[...,
                                                                           k],
                     axes_object=axes_object_matrix[this_panel_row,
                                                    this_panel_column],
                     colour_map_object=colour_map_object,
                     min_contour_level=min_contour_level,
                     max_contour_level=max_contour_level,
                     contour_interval=contour_interval,
                     line_width=line_width,
                     line_style=line_style)
示例#25
0
def find_single_field_file(init_time_unix_sec,
                           lead_time_hours=None,
                           model_name=None,
                           grid_id=None,
                           grib1_field_name=None,
                           top_directory_name=None,
                           raise_error_if_missing=True):
    """Finds with single field on local machine.

    "Single field" = one variable at one time step and all grid cells.

    :param init_time_unix_sec: Model-initialization time (Unix format).
    :param lead_time_hours: Lead time (valid time minus init time).  If model is
        a reanalysis, you can leave this as None (always zero).
    :param model_name: Name of model.
    :param grid_id: String ID for model grid.
    :param grib1_field_name: Field name in grib1 format.
    :param top_directory_name: Name of top-level directory with single-field
        files for the given model/grib combo.
    :param raise_error_if_missing:
    :param raise_error_if_missing: Boolean flag.  If True and file is missing,
        will raise an error.
    :return: single_field_file_name: Path to single-field file.  If file is
        missing but raise_error_if_missing = False, this will be the *expected*
        path.
    :raises: ValueError: if raise_error_if_missing = True and file is missing.
    """

    error_checking.assert_is_string(grib1_field_name)
    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    nwp_model_utils.check_model_name(model_name)
    if model_name == nwp_model_utils.NARR_MODEL_NAME:
        lead_time_hours = 0

    error_checking.assert_is_integer(lead_time_hours)
    error_checking.assert_is_geq(lead_time_hours, 0)

    pathless_file_name = _get_pathless_single_field_file_name(
        init_time_unix_sec,
        lead_time_hours=lead_time_hours,
        model_name=model_name,
        grid_id=grid_id,
        grib1_field_name=grib1_field_name)

    single_field_file_name = '{0:s}/{1:s}/{2:s}'.format(
        top_directory_name,
        time_conversion.unix_sec_to_string(init_time_unix_sec,
                                           TIME_FORMAT_MONTH),
        pathless_file_name)

    if raise_error_if_missing and not os.path.isfile(single_field_file_name):
        raise ValueError('Cannot find single-field file.  Expected at: ' +
                         single_field_file_name)

    return single_field_file_name
示例#26
0
def download_file_via_passwordless_ssh(host_name=None,
                                       user_name=None,
                                       remote_file_name=None,
                                       local_file_name=None,
                                       raise_error_if_fails=True):
    """Downloads file via passwordless SSH.
    For this to work, the remote machine (from which you are downloading) must
    have the RSA key of the local machine.  See the following page for
    instructions on sharing RSA keys: http://www.linuxproblem.org/art_9.html
    :param host_name: Name of remote machine (example: "thunderhoser.ou.edu").
    :param user_name: User name on remote machine (example: "thunderhoser").
    :param remote_file_name: File path on remote machine (where the file will be
        downloaded from).
    :param local_file_name: File path on local machine (where the file will be
        stored).
    :param raise_error_if_fails: Boolean flag.  If raise_error_if_fails = True
        and download fails, will raise an error.
    :return: local_file_name: If raise_error_if_fails = False and download
        failed, this will be None.  Otherwise, this will be the same as input.
    :raises: ValueError: if download failed and raise_error_if_fails = True.
    """

    # TODO(thunderhoser): Handle exceptions more intelligently.  Currently, if
    # the download fails, this method does not know why it failed.  If the
    # download failed because the file does not exist, this is less severe than
    # if it failed because we can't login to the remote machine.

    error_checking.assert_is_string(host_name)
    error_checking.assert_is_string(user_name)
    error_checking.assert_is_string(remote_file_name)
    error_checking.assert_is_string(local_file_name)
    error_checking.assert_is_boolean(raise_error_if_fails)

    file_system_utils.mkdir_recursive_if_necessary(file_name=local_file_name)

    unix_command_string = (
        'LD_LIBRARY_PATH= rsync -rv -e "{0:s}" {1:s}@{2:s}:"{3:s}" "{4:s}"'
    ).format(SSH_ARG_STRING, user_name, host_name, remote_file_name,
             local_file_name)

    devnull_handle = open(os.devnull, 'w')
    subprocess.call(unix_command_string,
                    shell=True,
                    stdout=devnull_handle,
                    stderr=devnull_handle)

    if not os.path.isfile(local_file_name):
        info_string = ('Download failed.  Local file expected at: ' +
                       local_file_name)
        if raise_error_if_fails:
            raise ValueError(info_string)
        else:
            warnings.warn(info_string)
            local_file_name = None

    return local_file_name
示例#27
0
def find_raw_file(unix_time_sec,
                  spc_date_string,
                  field_name,
                  data_source,
                  top_directory_name,
                  height_m_asl=None,
                  raise_error_if_missing=True):
    """Finds raw file.

    File should contain one field at one time step (e.g., MESH at 123502 UTC,
    reflectivity at 500 m above sea level and 123502 UTC).

    :param unix_time_sec: Valid time.
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param field_name: Name of radar field in GewitterGefahr format.
    :param data_source: Data source (string).
    :param top_directory_name: Name of top-level directory with raw files.
    :param height_m_asl: Radar height (metres above sea level).
    :param raise_error_if_missing: Boolean flag.  If True and file is missing,
        this method will raise an error.  If False and file is missing, will
        return *expected* path to raw file.
    :return: raw_file_name: Path to raw file.
    :raises: ValueError: if raise_error_if_missing = True and file is missing.
    """

    # Error-checking.
    _ = time_conversion.spc_date_string_to_unix_sec(spc_date_string)
    error_checking.assert_is_string(top_directory_name)
    error_checking.assert_is_boolean(raise_error_if_missing)

    relative_directory_name = get_relative_dir_for_raw_files(
        field_name=field_name,
        height_m_asl=height_m_asl,
        data_source=data_source)

    directory_name = '{0:s}/{1:s}/{2:s}/{3:s}'.format(top_directory_name,
                                                      spc_date_string[:4],
                                                      spc_date_string,
                                                      relative_directory_name)

    pathless_file_name = _get_pathless_raw_file_name(unix_time_sec,
                                                     zipped=True)
    raw_file_name = '{0:s}/{1:s}'.format(directory_name, pathless_file_name)

    if raise_error_if_missing and not os.path.isfile(raw_file_name):
        pathless_file_name = _get_pathless_raw_file_name(unix_time_sec,
                                                         zipped=False)
        raw_file_name = '{0:s}/{1:s}'.format(directory_name,
                                             pathless_file_name)

    if raise_error_if_missing and not os.path.isfile(raw_file_name):
        raise ValueError('Cannot find raw file.  Expected at: "{0:s}"'.format(
            raw_file_name))

    return raw_file_name
示例#28
0
def find_target_file(top_directory_name,
                     event_type_string,
                     spc_date_string,
                     raise_error_if_missing=True,
                     unix_time_sec=None):
    """Locates file with target values for either one time or one SPC date.

    :param top_directory_name: Name of top-level directory with target files.
    :param event_type_string: Event type (must be accepted by
        `linkage.check_event_type`).
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        `raise_error_if_missing = True`, this method will error out.
    :param unix_time_sec: Valid time.
    :return: target_file_name: Path to linkage file.  If file is missing and
        `raise_error_if_missing = False`, this will be the *expected* path.
    :raises: ValueError: if file is missing and `raise_error_if_missing = True`.
    """

    error_checking.assert_is_string(top_directory_name)
    linkage.check_event_type(event_type_string)
    error_checking.assert_is_boolean(raise_error_if_missing)

    if unix_time_sec is None:
        time_conversion.spc_date_string_to_unix_sec(spc_date_string)

        if event_type_string == linkage.WIND_EVENT_STRING:
            target_file_name = '{0:s}/{1:s}/wind_labels_{2:s}.nc'.format(
                top_directory_name, spc_date_string[:4], spc_date_string)
        else:
            target_file_name = '{0:s}/{1:s}/tornado_labels_{2:s}.nc'.format(
                top_directory_name, spc_date_string[:4], spc_date_string)
    else:
        spc_date_string = time_conversion.time_to_spc_date_string(
            unix_time_sec)
        valid_time_string = time_conversion.unix_sec_to_string(
            unix_time_sec, TIME_FORMAT)

        if event_type_string == linkage.WIND_EVENT_STRING:
            target_file_name = '{0:s}/{1:s}/{2:s}/wind_labels_{3:s}.nc'.format(
                top_directory_name, spc_date_string[:4], spc_date_string,
                valid_time_string)
        else:
            target_file_name = (
                '{0:s}/{1:s}/{2:s}/tornado_labels_{3:s}.nc').format(
                    top_directory_name, spc_date_string[:4], spc_date_string,
                    valid_time_string)

    if raise_error_if_missing and not os.path.isfile(target_file_name):
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            target_file_name)
        raise ValueError(error_string)

    return target_file_name
def class_fractions_to_weights(
        sampling_fraction_by_class_dict, target_name, binarize_target):
    """For each target class, converts sampling fraction to loss-fctn weight.

    :param sampling_fraction_by_class_dict: See doc for `check_class_fractions`.
    :param target_name: Same.
    :param binarize_target: Boolean flag.  If True, the target variable will be
        binarized, so that the highest class = 1 and all other classes = 0.
        Otherwise, the original number of classes will be retained, except that
        -2 ("dead storm") will be mapped to 0 (the lowest class).
    :return: lf_weight_by_class_dict: Dictionary, where each key is the integer
        representing a class and each value is the corresponding loss-function
        weight.
    """

    check_class_fractions(
        sampling_fraction_by_class_dict=sampling_fraction_by_class_dict,
        target_name=target_name)
    error_checking.assert_is_boolean(binarize_target)

    class_keys = list(sampling_fraction_by_class_dict.keys())

    if binarize_target:
        max_key = numpy.max(numpy.array(class_keys))
        positive_fraction = sampling_fraction_by_class_dict[max_key]
        negative_fraction = 1. - positive_fraction

        new_sampling_fraction_dict = {
            0: negative_fraction, 1: positive_fraction
        }

    else:
        new_sampling_fraction_dict = copy.deepcopy(
            sampling_fraction_by_class_dict)

        if target_val_utils.DEAD_STORM_INTEGER in class_keys:
            new_sampling_fraction_dict[0] = (
                new_sampling_fraction_dict[0] +
                new_sampling_fraction_dict[target_val_utils.DEAD_STORM_INTEGER]
            )

            del new_sampling_fraction_dict[target_val_utils.DEAD_STORM_INTEGER]

    class_keys = list(new_sampling_fraction_dict.keys())
    class_fractions = numpy.array(list(
        new_sampling_fraction_dict.values()
    ))

    loss_function_weights = 1. / class_fractions
    loss_function_weights = (
        loss_function_weights / numpy.sum(loss_function_weights)
    )

    return dict(list(zip(class_keys, loss_function_weights)))
示例#30
0
def find_file(
        top_prediction_dir_name, first_init_time_unix_sec,
        last_init_time_unix_sec, gridded, raise_error_if_missing=False):
    """Finds gridded or ungridded prediction files.

    :param top_prediction_dir_name: Name of top-level directory with prediction
        files.
    :param first_init_time_unix_sec: First initial time in file.  The "initial
        time" is the time of the storm object for which the prediction is being
        made.  This is different than the valid-time window (time range for
        which the prediction is valid).
    :param last_init_time_unix_sec: Last initial time in file.
    :param gridded: Boolean flag.  If True, will look for gridded file.  If
        False, will look for ungridded file.
    :param raise_error_if_missing: Boolean flag.  If file is missing and
        `raise_error_if_missing = True`, this method will error out.
    :return: prediction_file_name: Path to prediction file.  If file is missing
        and `raise_error_if_missing = False`, this will be the expected path.
    :raises: ValueError: if file is missing and `raise_error_if_missing = True`.
    """

    # TODO(thunderhoser): Put lead time in file names.

    error_checking.assert_is_string(top_prediction_dir_name)
    error_checking.assert_is_integer(first_init_time_unix_sec)
    error_checking.assert_is_integer(last_init_time_unix_sec)
    error_checking.assert_is_geq(
        last_init_time_unix_sec, first_init_time_unix_sec)

    error_checking.assert_is_boolean(gridded)
    error_checking.assert_is_boolean(raise_error_if_missing)

    spc_date_string = time_conversion.time_to_spc_date_string(
        first_init_time_unix_sec)

    prediction_file_name = (
        '{0:s}/{1:s}/{2:s}/{3:s}_predictions_{4:s}_{5:s}{6:s}'
    ).format(
        top_prediction_dir_name, spc_date_string[:4], spc_date_string,
        'gridded' if gridded else 'ungridded',
        time_conversion.unix_sec_to_string(
            first_init_time_unix_sec, FILE_NAME_TIME_FORMAT),
        time_conversion.unix_sec_to_string(
            last_init_time_unix_sec, FILE_NAME_TIME_FORMAT),
        '.p' if gridded else '.nc'
    )

    if raise_error_if_missing and not os.path.isfile(prediction_file_name):
        error_string = 'Cannot find file.  Expected at: "{0:s}"'.format(
            prediction_file_name)
        raise ValueError(error_string)

    return prediction_file_name