示例#1
0
    def test_target_params_to_name_tornado(self):
        """Ensures correct output from target_params_to_name.

        In this case, target variable is based on tornado occurrence.
        """

        this_target_name = target_val_utils.target_params_to_name(
            min_lead_time_sec=MIN_LEAD_TIME_SEC,
            max_lead_time_sec=MAX_LEAD_TIME_SEC,
            min_link_distance_metres=MIN_LINK_DISTANCE_METRES,
            max_link_distance_metres=MAX_LINK_DISTANCE_METRES)

        self.assertTrue(this_target_name == TORNADO_TARGET_NAME)
示例#2
0
    def test_target_params_to_name_wind_regression(self):
        """Ensures correct output from target_params_to_name.

        In this case, target variable is based on wind-speed regression.
        """

        this_target_name = target_val_utils.target_params_to_name(
            min_lead_time_sec=MIN_LEAD_TIME_SEC,
            max_lead_time_sec=MAX_LEAD_TIME_SEC,
            min_link_distance_metres=MIN_LINK_DISTANCE_METRES,
            max_link_distance_metres=MAX_LINK_DISTANCE_METRES,
            wind_speed_percentile_level=WIND_SPEED_PERCENTILE_LEVEL)

        self.assertTrue(this_target_name == WIND_REGRESSION_NAME)
示例#3
0
    def test_target_params_to_name_tornadogenesis(self):
        """Ensures correct output from target_params_to_name.

        In this case, target variable is based on tornadogenesis.
        """

        this_target_name = target_val_utils.target_params_to_name(
            min_lead_time_sec=MIN_LEAD_TIME_SEC,
            max_lead_time_sec=MAX_LEAD_TIME_SEC,
            min_link_distance_metres=MIN_LINK_DISTANCE_METRES,
            max_link_distance_metres=MAX_LINK_DISTANCE_METRES,
            tornadogenesis_only=True,
            min_fujita_rating=MIN_FUJITA_FOR_GENESIS)

        self.assertTrue(this_target_name == TORNADOGENESIS_TARGET_NAME)
示例#4
0
    def test_target_params_to_name_wind_classifn_0lead(self):
        """Ensures correct output from target_params_to_name.

        In this case, target variable is based on wind-speed classification and
        minimum lead time is zero.
        """

        this_target_name = target_val_utils.target_params_to_name(
            min_lead_time_sec=0,
            max_lead_time_sec=MAX_LEAD_TIME_SEC,
            min_link_distance_metres=MIN_LINK_DISTANCE_METRES,
            max_link_distance_metres=MAX_LINK_DISTANCE_METRES,
            wind_speed_percentile_level=WIND_SPEED_PERCENTILE_LEVEL,
            wind_speed_cutoffs_kt=WIND_SPEED_CUTOFFS_KT)

        self.assertTrue(this_target_name == WIND_CLASSIFICATION_NAME_0LEAD)
示例#5
0
def _run(model_file_name, example_file_name, first_time_string,
         last_time_string, top_output_dir_name):
    """Applies CNN to one example file.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param top_output_dir_name: Same.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    model_directory_name, _ = os.path.split(model_file_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(model_directory_name)

    print('Reading metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)

    training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None
    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = [example_file_name]
    training_option_dict[
        trainval_io.FIRST_STORM_TIME_KEY] = first_time_unix_sec
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = last_time_unix_sec

    if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            list_of_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY],
            num_examples_total=LARGE_INTEGER)

    elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict, num_examples_total=LARGE_INTEGER)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict, num_examples_total=LARGE_INTEGER)

    include_soundings = (training_option_dict[trainval_io.SOUNDING_FIELDS_KEY]
                         is not None)

    try:
        storm_object_dict = next(generator_object)
    except StopIteration:
        storm_object_dict = None

    print(SEPARATOR_STRING)

    if storm_object_dict is not None:
        observed_labels = storm_object_dict[testing_io.TARGET_ARRAY_KEY]
        list_of_predictor_matrices = storm_object_dict[
            testing_io.INPUT_MATRICES_KEY]

        if include_soundings:
            sounding_matrix = list_of_predictor_matrices[-1]
        else:
            sounding_matrix = None

        if model_metadata_dict[cnn.CONV_2D3D_KEY]:
            if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]:
                class_probability_matrix = cnn.apply_2d_or_3d_cnn(
                    model_object=model_object,
                    radar_image_matrix=list_of_predictor_matrices[0],
                    sounding_matrix=sounding_matrix,
                    verbose=True)
            else:
                class_probability_matrix = cnn.apply_2d3d_cnn(
                    model_object=model_object,
                    reflectivity_matrix_dbz=list_of_predictor_matrices[0],
                    azimuthal_shear_matrix_s01=list_of_predictor_matrices[1],
                    sounding_matrix=sounding_matrix,
                    verbose=True)
        else:
            class_probability_matrix = cnn.apply_2d_or_3d_cnn(
                model_object=model_object,
                radar_image_matrix=list_of_predictor_matrices[0],
                sounding_matrix=sounding_matrix,
                verbose=True)

        print(SEPARATOR_STRING)
        num_examples = class_probability_matrix.shape[0]

        for k in [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
            print(
                '{0:d}th percentile of {1:d} forecast probs = {2:.4f}'.format(
                    k, num_examples,
                    numpy.percentile(class_probability_matrix[:, 1], k)))

        print('\n')

    target_param_dict = target_val_utils.target_name_to_params(
        training_option_dict[trainval_io.TARGET_NAME_KEY])

    event_type_string = target_param_dict[target_val_utils.EVENT_TYPE_KEY]
    if event_type_string == linkage.TORNADO_EVENT_STRING:
        genesis_only = False
    elif event_type_string == linkage.TORNADOGENESIS_EVENT_STRING:
        genesis_only = True
    else:
        genesis_only = None

    target_name = target_val_utils.target_params_to_name(
        min_lead_time_sec=target_param_dict[
            target_val_utils.MIN_LEAD_TIME_KEY],
        max_lead_time_sec=target_param_dict[
            target_val_utils.MAX_LEAD_TIME_KEY],
        min_link_distance_metres=target_param_dict[
            target_val_utils.MIN_LINKAGE_DISTANCE_KEY],
        max_link_distance_metres=10000.,
        genesis_only=genesis_only)

    output_file_name = prediction_io.find_file(
        top_prediction_dir_name=top_output_dir_name,
        first_init_time_unix_sec=first_time_unix_sec,
        last_init_time_unix_sec=last_time_unix_sec,
        gridded=False,
        raise_error_if_missing=False)

    print('Writing "{0:s}" predictions to: "{1:s}"...'.format(
        target_name, output_file_name))

    if storm_object_dict is None:
        num_output_neurons = (
            model_object.layers[-1].output.get_shape().as_list()[-1])

        num_classes = max([num_output_neurons, 2])
        class_probability_matrix = numpy.full((0, num_classes), numpy.nan)

        prediction_io.write_ungridded_predictions(
            netcdf_file_name=output_file_name,
            class_probability_matrix=class_probability_matrix,
            storm_ids=[],
            storm_times_unix_sec=numpy.array([], dtype=int),
            target_name=target_name,
            observed_labels=numpy.array([], dtype=int))

        return

    prediction_io.write_ungridded_predictions(
        netcdf_file_name=output_file_name,
        class_probability_matrix=class_probability_matrix,
        storm_ids=storm_object_dict[testing_io.FULL_IDS_KEY],
        storm_times_unix_sec=storm_object_dict[testing_io.STORM_TIMES_KEY],
        target_name=target_name,
        observed_labels=observed_labels)
示例#6
0
    tracking_utils.VALID_TIME_COLUMN: THESE_TIMES_UNIX_SEC,
    tracking_utils.TRACKING_END_TIME_COLUMN: THESE_END_TIMES_UNIX_SEC,
    linkage.MERGING_PRED_FLAG_COLUMN: THESE_MERGING_FLAGS,
    linkage.EVENT_LATITUDES_COLUMN: THESE_EVENT_LATITUDES_DEG,
    linkage.EVENT_LONGITUDES_COLUMN: THESE_EVENT_LONGITUDES_DEG,
    linkage.LINKAGE_DISTANCES_COLUMN: THESE_LINK_DIST_METRES,
    linkage.RELATIVE_EVENT_TIMES_COLUMN: THESE_RELATIVE_TIMES_UNIX_SEC,
    linkage.FUJITA_RATINGS_COLUMN: THESE_FUJITA_RATINGS
}

STORM_TO_TORNADOES_TABLE = pandas.DataFrame.from_dict(THIS_DICT)

MAIN_TORNADO_TARGET_NAME = target_val_utils.target_params_to_name(
    min_lead_time_sec=MIN_LEAD_TIME_SEC,
    max_lead_time_sec=MAX_LEAD_TIME_SEC,
    min_link_distance_metres=MIN_LINK_DISTANCE_METRES,
    max_link_distance_metres=MAX_LINK_DISTANCE_METRES,
    min_fujita_rating=MIN_FUJITA_FOR_OCCURRENCE,
    tornadogenesis_only=False)

INVALID_STORM_INTEGER = target_val_utils.INVALID_STORM_INTEGER
MAIN_TORNADO_TARGET_VALUES = numpy.array(
    [1, 0, INVALID_STORM_INTEGER, 1, INVALID_STORM_INTEGER], dtype=int)

MAIN_TORNADOGENESIS_TARGET_NAME = target_val_utils.target_params_to_name(
    min_lead_time_sec=MIN_LEAD_TIME_SEC,
    max_lead_time_sec=MAX_LEAD_TIME_SEC,
    min_link_distance_metres=MIN_LINK_DISTANCE_METRES,
    max_link_distance_metres=MAX_LINK_DISTANCE_METRES,
    min_fujita_rating=MIN_FUJITA_FOR_GENESIS,
    tornadogenesis_only=True)
def _run(prediction_file_name, best_prob_threshold, upgraded_min_ef_rating,
         top_target_dir_name, num_bootstrap_reps, downsampling_fractions,
         output_dir_name):
    """Evaluates CNN predictions.

    This is effectively the main method.

    :param prediction_file_name: See documentation at top of file.
    :param best_prob_threshold: Same.
    :param upgraded_min_ef_rating: Same.
    :param top_target_dir_name: Same.
    :param num_bootstrap_reps: Same.
    :param downsampling_fractions: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if file contains no examples (storm objects).
    :raises: ValueError: if file contains multi-class predictions.
    :raises: ValueError: if you try to upgrade minimum EF rating but the
        original is non-zero.
    """

    # Verify and process input args.
    if upgraded_min_ef_rating <= 0:
        upgraded_min_ef_rating = None

    num_bootstrap_reps = max([num_bootstrap_reps, 1])
    if best_prob_threshold < 0:
        best_prob_threshold = None

    # Read predictions.
    print('Reading data from: "{0:s}"...'.format(prediction_file_name))
    prediction_dict = prediction_io.read_ungridded_predictions(
        prediction_file_name)

    observed_labels = prediction_dict[prediction_io.OBSERVED_LABELS_KEY]
    class_probability_matrix = (
        prediction_dict[prediction_io.PROBABILITY_MATRIX_KEY])

    num_examples = len(observed_labels)
    num_classes = class_probability_matrix.shape[1]

    if num_examples == 0:
        raise ValueError('File contains no examples (storm objects).')

    if num_classes > 2:
        error_string = (
            'This script handles only binary, not {0:d}-class, classification.'
        ).format(num_classes)

        raise ValueError(error_string)

    forecast_probabilities = class_probability_matrix[:, -1]

    # If necessary, upgrade minimum EF rating.
    if upgraded_min_ef_rating is not None:
        target_param_dict = target_val_utils.target_name_to_params(
            prediction_dict[prediction_io.TARGET_NAME_KEY])
        orig_min_ef_rating = (
            target_param_dict[target_val_utils.MIN_FUJITA_RATING_KEY])

        if orig_min_ef_rating != 0:
            error_string = (
                'Cannot upgrade minimum EF rating when original min rating is '
                'non-zero (in this case it is {0:d}).'
            ).format(orig_min_ef_rating)

            raise ValueError(error_string)

        new_target_name = target_val_utils.target_params_to_name(
            min_lead_time_sec=target_param_dict[
                target_val_utils.MIN_LEAD_TIME_KEY],
            max_lead_time_sec=target_param_dict[
                target_val_utils.MAX_LEAD_TIME_KEY],
            min_link_distance_metres=target_param_dict[
                target_val_utils.MIN_LINKAGE_DISTANCE_KEY],
            max_link_distance_metres=target_param_dict[
                target_val_utils.MAX_LINKAGE_DISTANCE_KEY],
            tornadogenesis_only=(
                target_param_dict[target_val_utils.EVENT_TYPE_KEY] ==
                linkage.TORNADOGENESIS_EVENT_STRING),
            min_fujita_rating=upgraded_min_ef_rating)

        print(SEPARATOR_STRING)

        observed_labels = _read_new_target_values(
            top_target_dir_name=top_target_dir_name,
            new_target_name=new_target_name,
            full_storm_id_strings=prediction_dict[prediction_io.STORM_IDS_KEY],
            storm_times_unix_sec=prediction_dict[
                prediction_io.STORM_TIMES_KEY],
            orig_target_values=observed_labels)

        print(SEPARATOR_STRING)

        good_indices = numpy.where(observed_labels >= 0)[0]
        observed_labels = observed_labels[good_indices]
        forecast_probabilities = forecast_probabilities[good_indices]

    # Do calculations.
    output_file_name = model_eval.find_file_from_prediction_file(
        input_prediction_file_name=prediction_file_name,
        output_dir_name=output_dir_name,
        raise_error_if_missing=False)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    if numpy.any(downsampling_fractions <= 0):
        downsampling_dict = None
    else:
        downsampling_dict = {
            0: downsampling_fractions[0],
            1: downsampling_fractions[1]
        }

    _compute_scores(forecast_probabilities=forecast_probabilities,
                    observed_labels=observed_labels,
                    num_bootstrap_reps=num_bootstrap_reps,
                    best_prob_threshold=best_prob_threshold,
                    downsampling_dict=downsampling_dict,
                    output_file_name=output_file_name)
示例#8
0
THIS_DICT = {
    tracking_utils.STORM_ID_COLUMN: THESE_STORM_IDS,
    tracking_utils.TIME_COLUMN: THESE_TIMES_UNIX_SEC,
    tracking_utils.TRACKING_END_TIME_COLUMN: THESE_END_TIMES_UNIX_SEC,
    linkage.EVENT_LATITUDES_COLUMN: THESE_EVENT_LATITUDES_DEG,
    linkage.EVENT_LONGITUDES_COLUMN: THESE_EVENT_LONGITUDES_DEG,
    linkage.LINKAGE_DISTANCES_COLUMN: THESE_LINK_DIST_METRES,
    linkage.RELATIVE_EVENT_TIMES_COLUMN: THESE_RELATIVE_TIMES_UNIX_SEC,
    linkage.FUJITA_RATINGS_COLUMN: THESE_FUJITA_RATINGS
}

STORM_TO_TORNADOES_TABLE = pandas.DataFrame.from_dict(THIS_DICT)

TARGET_NAME = target_val_utils.target_params_to_name(
    min_lead_time_sec=MIN_LEAD_TIME_SEC,
    max_lead_time_sec=MAX_LEAD_TIME_SEC,
    min_link_distance_metres=MIN_LINK_DISTANCE_METRES,
    max_link_distance_metres=MAX_LINK_DISTANCE_METRES)

INVALID_STORM_INTEGER = target_val_utils.INVALID_STORM_INTEGER
TARGET_VALUES = numpy.array(
    [1, 0, INVALID_STORM_INTEGER, 1, INVALID_STORM_INTEGER], dtype=int)

# The following constants are used to test find_target_file.
TOP_DIRECTORY_NAME = 'target_values'
FILE_TIME_UNIX_SEC = 1517523991  # 222631 1 Feb 2018
FILE_SPC_DATE_STRING = '20180201'

WIND_FILE_NAME_ONE_TIME = (
    'target_values/2018/20180201/wind_labels_2018-02-01-222631.nc')
WIND_FILE_NAME_ONE_DAY = 'target_values/2018/wind_labels_20180201.nc'
示例#9
0
def _run(top_linkage_dir_name, spc_date_string, min_lead_times_sec,
         max_lead_times_sec, min_link_distances_metres,
         max_link_distances_metres, event_type_string,
         wind_speed_percentile_level, wind_speed_cutoffs_kt,
         top_output_dir_name):
    """Computes target value for ea storm object, lead-time window, and buffer.

    This is effectively the main method.

    :param top_linkage_dir_name: See documentation at top of file.
    :param spc_date_string: Same.
    :param min_lead_times_sec: Same.
    :param max_lead_times_sec: Same.
    :param min_link_distances_metres: Same.
    :param max_link_distances_metres: Same.
    :param event_type_string: Same.
    :param wind_speed_percentile_level: Same.
    :param wind_speed_cutoffs_kt: Same.
    :param top_output_dir_name: Same.
    """

    num_lead_time_windows = len(min_lead_times_sec)
    error_checking.assert_is_numpy_array(
        max_lead_times_sec,
        exact_dimensions=numpy.array([num_lead_time_windows])
    )

    num_distance_buffers = len(min_link_distances_metres)
    error_checking.assert_is_numpy_array(
        max_link_distances_metres,
        exact_dimensions=numpy.array([num_distance_buffers])
    )

    linkage_file_name = linkage.find_linkage_file(
        top_directory_name=top_linkage_dir_name,
        event_type_string=event_type_string, spc_date_string=spc_date_string)

    print 'Reading data from: "{0:s}"...'.format(linkage_file_name)
    storm_to_events_table = linkage.read_linkage_file(linkage_file_name)

    if event_type_string == linkage.WIND_EVENT_STRING:
        list_of_cutoff_arrays_kt = general_utils.split_array_by_nan(
            wind_speed_cutoffs_kt)
        num_cutoff_sets = len(wind_speed_cutoffs_kt)
    else:
        list_of_cutoff_arrays_kt = None
        num_cutoff_sets = 1

    target_names = []

    for i in range(num_lead_time_windows):
        for j in range(num_distance_buffers):
            for k in range(num_cutoff_sets):
                if event_type_string == linkage.WIND_EVENT_STRING:
                    this_target_name = target_val_utils.target_params_to_name(
                        min_lead_time_sec=min_lead_times_sec[i],
                        max_lead_time_sec=max_lead_times_sec[i],
                        min_link_distance_metres=min_link_distances_metres[j],
                        max_link_distance_metres=max_link_distances_metres[j],
                        wind_speed_percentile_level=wind_speed_percentile_level,
                        wind_speed_cutoffs_kt=list_of_cutoff_arrays_kt[k])

                    target_names.append(this_target_name)
                    print 'Computing values for "{0:s}"...'.format(
                        target_names[-1])

                    storm_to_events_table = (
                        target_val_utils.create_wind_classification_targets(
                            storm_to_winds_table=storm_to_events_table,
                            min_lead_time_sec=min_lead_times_sec[i],
                            max_lead_time_sec=max_lead_times_sec[i],
                            min_link_distance_metres=min_link_distances_metres[
                                j],
                            max_link_distance_metres=max_link_distances_metres[
                                j],
                            percentile_level=wind_speed_percentile_level,
                            class_cutoffs_kt=list_of_cutoff_arrays_kt[k])
                    )
                else:
                    this_target_name = target_val_utils.target_params_to_name(
                        min_lead_time_sec=min_lead_times_sec[i],
                        max_lead_time_sec=max_lead_times_sec[i],
                        min_link_distance_metres=min_link_distances_metres[j],
                        max_link_distance_metres=max_link_distances_metres[j])

                    target_names.append(this_target_name)
                    print 'Computing values for "{0:s}"...'.format(
                        target_names[-1])

                    storm_to_events_table = (
                        target_val_utils.create_tornado_targets(
                            storm_to_tornadoes_table=storm_to_events_table,
                            min_lead_time_sec=min_lead_times_sec[i],
                            max_lead_time_sec=max_lead_times_sec[i],
                            min_link_distance_metres=min_link_distances_metres[
                                j],
                            max_link_distance_metres=max_link_distances_metres[
                                j]
                        )
                    )

    target_file_name = target_val_utils.find_target_file(
        top_directory_name=top_output_dir_name,
        event_type_string=event_type_string, spc_date_string=spc_date_string,
        raise_error_if_missing=False)

    print 'Writing target values to: "{0:s}"...'.format(target_file_name)
    target_val_utils.write_target_values(
        storm_to_events_table=storm_to_events_table, target_names=target_names,
        netcdf_file_name=target_file_name)
THIS_DICT = {
    tracking_utils.FULL_ID_COLUMN: THESE_FULL_ID_STRINGS,
    tracking_utils.VALID_TIME_COLUMN: THESE_TIMES_UNIX_SEC,
    tracking_utils.TRACKING_END_TIME_COLUMN: THESE_END_TIMES_UNIX_SEC,
    linkage.EVENT_LATITUDES_COLUMN: THESE_EVENT_LATITUDES_DEG,
    linkage.EVENT_LONGITUDES_COLUMN: THESE_EVENT_LONGITUDES_DEG,
    linkage.LINKAGE_DISTANCES_COLUMN: THESE_LINK_DIST_METRES,
    linkage.RELATIVE_EVENT_TIMES_COLUMN: THESE_RELATIVE_TIMES_UNIX_SEC,
    linkage.FUJITA_RATINGS_COLUMN: THESE_FUJITA_RATINGS
}

STORM_TO_TORNADOES_TABLE = pandas.DataFrame.from_dict(THIS_DICT)

MAIN_TORNADO_TARGET_NAME = target_val_utils.target_params_to_name(
    min_lead_time_sec=MIN_LEAD_TIME_SEC,
    max_lead_time_sec=MAX_LEAD_TIME_SEC,
    min_link_distance_metres=MIN_LINK_DISTANCE_METRES,
    max_link_distance_metres=MAX_LINK_DISTANCE_METRES,
    genesis_only=False)

INVALID_STORM_INTEGER = target_val_utils.INVALID_STORM_INTEGER
MAIN_TORNADO_TARGET_VALUES = numpy.array(
    [1, 0, INVALID_STORM_INTEGER, 1, INVALID_STORM_INTEGER], dtype=int)

MAIN_TORNADOGENESIS_TARGET_NAME = target_val_utils.target_params_to_name(
    min_lead_time_sec=MIN_LEAD_TIME_SEC,
    max_lead_time_sec=MAX_LEAD_TIME_SEC,
    min_link_distance_metres=MIN_LINK_DISTANCE_METRES,
    max_link_distance_metres=MAX_LINK_DISTANCE_METRES,
    genesis_only=True)

MAIN_TORNADOGENESIS_TARGET_VALUES = numpy.array(
def _compute_targets_one_day(storm_to_events_table, spc_date_string,
                             min_lead_times_sec, max_lead_times_sec,
                             min_link_distances_metres,
                             max_link_distances_metres, event_type_string,
                             wind_speed_percentile_level,
                             wind_speed_cutoffs_kt, top_output_dir_name):
    """Computes target values for one SPC date.

    :param storm_to_events_table: pandas DataFrame returned by
        `linkage.read_linkage_file`.
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param min_lead_times_sec: See documentation at top of file.
    :param max_lead_times_sec: Same.
    :param min_link_distances_metres: Same.
    :param max_link_distances_metres: Same.
    :param event_type_string: Same.
    :param wind_speed_percentile_level: Same.
    :param wind_speed_cutoffs_kt: Same.
    :param top_output_dir_name: Same.
    """

    num_lead_time_windows = len(min_lead_times_sec)
    num_distance_buffers = len(min_link_distances_metres)

    if event_type_string == linkage.WIND_EVENT_STRING:
        list_of_cutoff_arrays_kt = general_utils.split_array_by_nan(
            wind_speed_cutoffs_kt)
        num_cutoff_sets = len(wind_speed_cutoffs_kt)
    else:
        list_of_cutoff_arrays_kt = None
        num_cutoff_sets = 1

    target_names = []

    for i in range(num_lead_time_windows):
        for j in range(num_distance_buffers):
            for k in range(num_cutoff_sets):
                if event_type_string == linkage.WIND_EVENT_STRING:
                    this_target_name = target_val_utils.target_params_to_name(
                        min_lead_time_sec=min_lead_times_sec[i],
                        max_lead_time_sec=max_lead_times_sec[i],
                        min_link_distance_metres=min_link_distances_metres[j],
                        max_link_distance_metres=max_link_distances_metres[j],
                        wind_speed_percentile_level=wind_speed_percentile_level,
                        wind_speed_cutoffs_kt=list_of_cutoff_arrays_kt[k])

                    target_names.append(this_target_name)

                    print(('Computing labels for "{0:s}" on SPC date {1:s}...'
                           ).format(this_target_name, spc_date_string))

                    storm_to_events_table = (
                        target_val_utils.create_wind_classification_targets(
                            storm_to_winds_table=storm_to_events_table,
                            min_lead_time_sec=min_lead_times_sec[i],
                            max_lead_time_sec=max_lead_times_sec[i],
                            min_link_distance_metres=min_link_distances_metres[
                                j],
                            max_link_distance_metres=max_link_distances_metres[
                                j],
                            percentile_level=wind_speed_percentile_level,
                            class_cutoffs_kt=list_of_cutoff_arrays_kt[k]))
                else:
                    genesis_only = (event_type_string ==
                                    linkage.TORNADOGENESIS_EVENT_STRING)

                    this_target_name = target_val_utils.target_params_to_name(
                        min_lead_time_sec=min_lead_times_sec[i],
                        max_lead_time_sec=max_lead_times_sec[i],
                        min_link_distance_metres=min_link_distances_metres[j],
                        max_link_distance_metres=max_link_distances_metres[j],
                        genesis_only=genesis_only)

                    target_names.append(this_target_name)

                    print(('Computing labels for "{0:s}" on SPC date {1:s}...'
                           ).format(this_target_name, spc_date_string))

                    storm_to_events_table = (
                        target_val_utils.create_tornado_targets(
                            storm_to_tornadoes_table=storm_to_events_table,
                            min_lead_time_sec=min_lead_times_sec[i],
                            max_lead_time_sec=max_lead_times_sec[i],
                            min_link_distance_metres=min_link_distances_metres[
                                j],
                            max_link_distance_metres=max_link_distances_metres[
                                j],
                            genesis_only=genesis_only))

    target_file_name = target_val_utils.find_target_file(
        top_directory_name=top_output_dir_name,
        event_type_string=event_type_string,
        spc_date_string=spc_date_string,
        raise_error_if_missing=False)

    print('Writing target values to: "{0:s}"...'.format(target_file_name))
    target_val_utils.write_target_values(
        storm_to_events_table=storm_to_events_table,
        target_names=target_names,
        netcdf_file_name=target_file_name)
示例#12
0
# _file_name_to_batch_number.
TOP_DIRECTORY_NAME = 'foo'
BATCH_NUMBER = 1967
SPC_DATE_STRING = '19670502'

EXAMPLE_FILE_NAME_SHUFFLED = (
    'foo/batches0001000-0001999/input_examples_batch0001967.nc')
EXAMPLE_FILE_NAME_UNSHUFFLED = 'foo/1967/input_examples_19670502.nc'

# The following constants are used to test _check_target_vars.
TORNADO_MEAN_LEAD_TIME_SEC = 1800
WIND_MEAN_LEAD_TIME_SEC = 2700

NEAR_TORNADO_TARGET_NAME = target_val_utils.target_params_to_name(
    min_lead_time_sec=0,
    max_lead_time_sec=3600,
    min_link_distance_metres=0,
    max_link_distance_metres=10000,
    tornadogenesis_only=False)

MEDIUM_TORNADO_TARGET_NAME = target_val_utils.target_params_to_name(
    min_lead_time_sec=450,
    max_lead_time_sec=3150,
    min_link_distance_metres=0,
    max_link_distance_metres=20000,
    tornadogenesis_only=False)

FAR_TORNADO_TARGET_NAME = target_val_utils.target_params_to_name(
    min_lead_time_sec=900,
    max_lead_time_sec=2700,
    min_link_distance_metres=0,
    max_link_distance_metres=30000,
示例#13
0
def _compute_tornado_targets_one_day(storm_to_tornadoes_table, spc_date_string,
                                     min_lead_times_sec, max_lead_times_sec,
                                     min_link_distances_metres,
                                     max_link_distances_metres, genesis_only,
                                     min_fujita_ratings, top_output_dir_name):
    """Computes tornado-related target values for one SPC date.

    :param storm_to_tornadoes_table: pandas DataFrame returned by
        `linkage.read_linkage_file`.
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param min_lead_times_sec: See documentation at top of file.
    :param max_lead_times_sec: Same.
    :param min_link_distances_metres: Same.
    :param max_link_distances_metres: Same.
    :param genesis_only: Boolean flag.  If True, will create labels for
        tornadogenesis only.  If False, will create labels for occurrence, which
        includes pre-existing tornadoes (at forecast time).
    :param min_fujita_ratings: Same.
    :param top_output_dir_name: Same.
    """

    num_time_windows = len(min_lead_times_sec)
    num_distance_buffers = len(min_link_distances_metres)
    num_strength_thresholds = len(min_fujita_ratings)

    target_names = []

    for i in range(num_time_windows):
        for j in range(num_distance_buffers):
            for k in range(num_strength_thresholds):
                this_target_name = target_val_utils.target_params_to_name(
                    min_lead_time_sec=min_lead_times_sec[i],
                    max_lead_time_sec=max_lead_times_sec[i],
                    min_link_distance_metres=min_link_distances_metres[j],
                    max_link_distance_metres=max_link_distances_metres[j],
                    tornadogenesis_only=genesis_only,
                    min_fujita_rating=min_fujita_ratings[k])

                target_names.append(this_target_name)

                print(('Creating labels for "{0:s}" on SPC date "{1:s}"...'
                       ).format(this_target_name, spc_date_string))

                storm_to_tornadoes_table = (
                    target_val_utils.create_tornado_targets(
                        storm_to_tornadoes_table=storm_to_tornadoes_table,
                        min_lead_time_sec=min_lead_times_sec[i],
                        max_lead_time_sec=max_lead_times_sec[i],
                        min_link_distance_metres=min_link_distances_metres[j],
                        max_link_distance_metres=max_link_distances_metres[j],
                        genesis_only=genesis_only,
                        min_fujita_rating=min_fujita_ratings[k]))

                print(SEPARATOR_STRING)

    event_type_string = (linkage.TORNADOGENESIS_EVENT_STRING
                         if genesis_only else linkage.TORNADO_EVENT_STRING)

    target_file_name = target_val_utils.find_target_file(
        top_directory_name=top_output_dir_name,
        event_type_string=event_type_string,
        spc_date_string=spc_date_string,
        raise_error_if_missing=False)

    print('Writing target values to: "{0:s}"...'.format(target_file_name))
    target_val_utils.write_target_values(
        storm_to_events_table=storm_to_tornadoes_table,
        target_names=target_names,
        netcdf_file_name=target_file_name)