def test_target_params_to_name_tornado(self): """Ensures correct output from target_params_to_name. In this case, target variable is based on tornado occurrence. """ this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=MIN_LEAD_TIME_SEC, max_lead_time_sec=MAX_LEAD_TIME_SEC, min_link_distance_metres=MIN_LINK_DISTANCE_METRES, max_link_distance_metres=MAX_LINK_DISTANCE_METRES) self.assertTrue(this_target_name == TORNADO_TARGET_NAME)
def test_target_params_to_name_wind_regression(self): """Ensures correct output from target_params_to_name. In this case, target variable is based on wind-speed regression. """ this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=MIN_LEAD_TIME_SEC, max_lead_time_sec=MAX_LEAD_TIME_SEC, min_link_distance_metres=MIN_LINK_DISTANCE_METRES, max_link_distance_metres=MAX_LINK_DISTANCE_METRES, wind_speed_percentile_level=WIND_SPEED_PERCENTILE_LEVEL) self.assertTrue(this_target_name == WIND_REGRESSION_NAME)
def test_target_params_to_name_tornadogenesis(self): """Ensures correct output from target_params_to_name. In this case, target variable is based on tornadogenesis. """ this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=MIN_LEAD_TIME_SEC, max_lead_time_sec=MAX_LEAD_TIME_SEC, min_link_distance_metres=MIN_LINK_DISTANCE_METRES, max_link_distance_metres=MAX_LINK_DISTANCE_METRES, tornadogenesis_only=True, min_fujita_rating=MIN_FUJITA_FOR_GENESIS) self.assertTrue(this_target_name == TORNADOGENESIS_TARGET_NAME)
def test_target_params_to_name_wind_classifn_0lead(self): """Ensures correct output from target_params_to_name. In this case, target variable is based on wind-speed classification and minimum lead time is zero. """ this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=0, max_lead_time_sec=MAX_LEAD_TIME_SEC, min_link_distance_metres=MIN_LINK_DISTANCE_METRES, max_link_distance_metres=MAX_LINK_DISTANCE_METRES, wind_speed_percentile_level=WIND_SPEED_PERCENTILE_LEVEL, wind_speed_cutoffs_kt=WIND_SPEED_CUTOFFS_KT) self.assertTrue(this_target_name == WIND_CLASSIFICATION_NAME_0LEAD)
def _run(model_file_name, example_file_name, first_time_string, last_time_string, top_output_dir_name): """Applies CNN to one example file. This is effectively the main method. :param model_file_name: See documentation at top of file. :param example_file_name: Same. :param first_time_string: Same. :param last_time_string: Same. :param top_output_dir_name: Same. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) model_directory_name, _ = os.path.split(model_file_name) model_metafile_name = '{0:s}/model_metadata.p'.format(model_directory_name) print('Reading metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = cnn.read_model_metadata(model_metafile_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = [example_file_name] training_option_dict[ trainval_io.FIRST_STORM_TIME_KEY] = first_time_unix_sec training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = last_time_unix_sec if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, list_of_operation_dicts=model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY], num_examples_total=LARGE_INTEGER) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, num_examples_total=LARGE_INTEGER) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, num_examples_total=LARGE_INTEGER) include_soundings = (training_option_dict[trainval_io.SOUNDING_FIELDS_KEY] is not None) try: storm_object_dict = next(generator_object) except StopIteration: storm_object_dict = None print(SEPARATOR_STRING) if storm_object_dict is not None: observed_labels = storm_object_dict[testing_io.TARGET_ARRAY_KEY] list_of_predictor_matrices = storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if include_soundings: sounding_matrix = list_of_predictor_matrices[-1] else: sounding_matrix = None if model_metadata_dict[cnn.CONV_2D3D_KEY]: if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]: class_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=list_of_predictor_matrices[0], sounding_matrix=sounding_matrix, verbose=True) else: class_probability_matrix = cnn.apply_2d3d_cnn( model_object=model_object, reflectivity_matrix_dbz=list_of_predictor_matrices[0], azimuthal_shear_matrix_s01=list_of_predictor_matrices[1], sounding_matrix=sounding_matrix, verbose=True) else: class_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=list_of_predictor_matrices[0], sounding_matrix=sounding_matrix, verbose=True) print(SEPARATOR_STRING) num_examples = class_probability_matrix.shape[0] for k in [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]: print( '{0:d}th percentile of {1:d} forecast probs = {2:.4f}'.format( k, num_examples, numpy.percentile(class_probability_matrix[:, 1], k))) print('\n') target_param_dict = target_val_utils.target_name_to_params( training_option_dict[trainval_io.TARGET_NAME_KEY]) event_type_string = target_param_dict[target_val_utils.EVENT_TYPE_KEY] if event_type_string == linkage.TORNADO_EVENT_STRING: genesis_only = False elif event_type_string == linkage.TORNADOGENESIS_EVENT_STRING: genesis_only = True else: genesis_only = None target_name = target_val_utils.target_params_to_name( min_lead_time_sec=target_param_dict[ target_val_utils.MIN_LEAD_TIME_KEY], max_lead_time_sec=target_param_dict[ target_val_utils.MAX_LEAD_TIME_KEY], min_link_distance_metres=target_param_dict[ target_val_utils.MIN_LINKAGE_DISTANCE_KEY], max_link_distance_metres=10000., genesis_only=genesis_only) output_file_name = prediction_io.find_file( top_prediction_dir_name=top_output_dir_name, first_init_time_unix_sec=first_time_unix_sec, last_init_time_unix_sec=last_time_unix_sec, gridded=False, raise_error_if_missing=False) print('Writing "{0:s}" predictions to: "{1:s}"...'.format( target_name, output_file_name)) if storm_object_dict is None: num_output_neurons = ( model_object.layers[-1].output.get_shape().as_list()[-1]) num_classes = max([num_output_neurons, 2]) class_probability_matrix = numpy.full((0, num_classes), numpy.nan) prediction_io.write_ungridded_predictions( netcdf_file_name=output_file_name, class_probability_matrix=class_probability_matrix, storm_ids=[], storm_times_unix_sec=numpy.array([], dtype=int), target_name=target_name, observed_labels=numpy.array([], dtype=int)) return prediction_io.write_ungridded_predictions( netcdf_file_name=output_file_name, class_probability_matrix=class_probability_matrix, storm_ids=storm_object_dict[testing_io.FULL_IDS_KEY], storm_times_unix_sec=storm_object_dict[testing_io.STORM_TIMES_KEY], target_name=target_name, observed_labels=observed_labels)
tracking_utils.VALID_TIME_COLUMN: THESE_TIMES_UNIX_SEC, tracking_utils.TRACKING_END_TIME_COLUMN: THESE_END_TIMES_UNIX_SEC, linkage.MERGING_PRED_FLAG_COLUMN: THESE_MERGING_FLAGS, linkage.EVENT_LATITUDES_COLUMN: THESE_EVENT_LATITUDES_DEG, linkage.EVENT_LONGITUDES_COLUMN: THESE_EVENT_LONGITUDES_DEG, linkage.LINKAGE_DISTANCES_COLUMN: THESE_LINK_DIST_METRES, linkage.RELATIVE_EVENT_TIMES_COLUMN: THESE_RELATIVE_TIMES_UNIX_SEC, linkage.FUJITA_RATINGS_COLUMN: THESE_FUJITA_RATINGS } STORM_TO_TORNADOES_TABLE = pandas.DataFrame.from_dict(THIS_DICT) MAIN_TORNADO_TARGET_NAME = target_val_utils.target_params_to_name( min_lead_time_sec=MIN_LEAD_TIME_SEC, max_lead_time_sec=MAX_LEAD_TIME_SEC, min_link_distance_metres=MIN_LINK_DISTANCE_METRES, max_link_distance_metres=MAX_LINK_DISTANCE_METRES, min_fujita_rating=MIN_FUJITA_FOR_OCCURRENCE, tornadogenesis_only=False) INVALID_STORM_INTEGER = target_val_utils.INVALID_STORM_INTEGER MAIN_TORNADO_TARGET_VALUES = numpy.array( [1, 0, INVALID_STORM_INTEGER, 1, INVALID_STORM_INTEGER], dtype=int) MAIN_TORNADOGENESIS_TARGET_NAME = target_val_utils.target_params_to_name( min_lead_time_sec=MIN_LEAD_TIME_SEC, max_lead_time_sec=MAX_LEAD_TIME_SEC, min_link_distance_metres=MIN_LINK_DISTANCE_METRES, max_link_distance_metres=MAX_LINK_DISTANCE_METRES, min_fujita_rating=MIN_FUJITA_FOR_GENESIS, tornadogenesis_only=True)
def _run(prediction_file_name, best_prob_threshold, upgraded_min_ef_rating, top_target_dir_name, num_bootstrap_reps, downsampling_fractions, output_dir_name): """Evaluates CNN predictions. This is effectively the main method. :param prediction_file_name: See documentation at top of file. :param best_prob_threshold: Same. :param upgraded_min_ef_rating: Same. :param top_target_dir_name: Same. :param num_bootstrap_reps: Same. :param downsampling_fractions: Same. :param output_dir_name: Same. :raises: ValueError: if file contains no examples (storm objects). :raises: ValueError: if file contains multi-class predictions. :raises: ValueError: if you try to upgrade minimum EF rating but the original is non-zero. """ # Verify and process input args. if upgraded_min_ef_rating <= 0: upgraded_min_ef_rating = None num_bootstrap_reps = max([num_bootstrap_reps, 1]) if best_prob_threshold < 0: best_prob_threshold = None # Read predictions. print('Reading data from: "{0:s}"...'.format(prediction_file_name)) prediction_dict = prediction_io.read_ungridded_predictions( prediction_file_name) observed_labels = prediction_dict[prediction_io.OBSERVED_LABELS_KEY] class_probability_matrix = ( prediction_dict[prediction_io.PROBABILITY_MATRIX_KEY]) num_examples = len(observed_labels) num_classes = class_probability_matrix.shape[1] if num_examples == 0: raise ValueError('File contains no examples (storm objects).') if num_classes > 2: error_string = ( 'This script handles only binary, not {0:d}-class, classification.' ).format(num_classes) raise ValueError(error_string) forecast_probabilities = class_probability_matrix[:, -1] # If necessary, upgrade minimum EF rating. if upgraded_min_ef_rating is not None: target_param_dict = target_val_utils.target_name_to_params( prediction_dict[prediction_io.TARGET_NAME_KEY]) orig_min_ef_rating = ( target_param_dict[target_val_utils.MIN_FUJITA_RATING_KEY]) if orig_min_ef_rating != 0: error_string = ( 'Cannot upgrade minimum EF rating when original min rating is ' 'non-zero (in this case it is {0:d}).' ).format(orig_min_ef_rating) raise ValueError(error_string) new_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=target_param_dict[ target_val_utils.MIN_LEAD_TIME_KEY], max_lead_time_sec=target_param_dict[ target_val_utils.MAX_LEAD_TIME_KEY], min_link_distance_metres=target_param_dict[ target_val_utils.MIN_LINKAGE_DISTANCE_KEY], max_link_distance_metres=target_param_dict[ target_val_utils.MAX_LINKAGE_DISTANCE_KEY], tornadogenesis_only=( target_param_dict[target_val_utils.EVENT_TYPE_KEY] == linkage.TORNADOGENESIS_EVENT_STRING), min_fujita_rating=upgraded_min_ef_rating) print(SEPARATOR_STRING) observed_labels = _read_new_target_values( top_target_dir_name=top_target_dir_name, new_target_name=new_target_name, full_storm_id_strings=prediction_dict[prediction_io.STORM_IDS_KEY], storm_times_unix_sec=prediction_dict[ prediction_io.STORM_TIMES_KEY], orig_target_values=observed_labels) print(SEPARATOR_STRING) good_indices = numpy.where(observed_labels >= 0)[0] observed_labels = observed_labels[good_indices] forecast_probabilities = forecast_probabilities[good_indices] # Do calculations. output_file_name = model_eval.find_file_from_prediction_file( input_prediction_file_name=prediction_file_name, output_dir_name=output_dir_name, raise_error_if_missing=False) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) if numpy.any(downsampling_fractions <= 0): downsampling_dict = None else: downsampling_dict = { 0: downsampling_fractions[0], 1: downsampling_fractions[1] } _compute_scores(forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, num_bootstrap_reps=num_bootstrap_reps, best_prob_threshold=best_prob_threshold, downsampling_dict=downsampling_dict, output_file_name=output_file_name)
THIS_DICT = { tracking_utils.STORM_ID_COLUMN: THESE_STORM_IDS, tracking_utils.TIME_COLUMN: THESE_TIMES_UNIX_SEC, tracking_utils.TRACKING_END_TIME_COLUMN: THESE_END_TIMES_UNIX_SEC, linkage.EVENT_LATITUDES_COLUMN: THESE_EVENT_LATITUDES_DEG, linkage.EVENT_LONGITUDES_COLUMN: THESE_EVENT_LONGITUDES_DEG, linkage.LINKAGE_DISTANCES_COLUMN: THESE_LINK_DIST_METRES, linkage.RELATIVE_EVENT_TIMES_COLUMN: THESE_RELATIVE_TIMES_UNIX_SEC, linkage.FUJITA_RATINGS_COLUMN: THESE_FUJITA_RATINGS } STORM_TO_TORNADOES_TABLE = pandas.DataFrame.from_dict(THIS_DICT) TARGET_NAME = target_val_utils.target_params_to_name( min_lead_time_sec=MIN_LEAD_TIME_SEC, max_lead_time_sec=MAX_LEAD_TIME_SEC, min_link_distance_metres=MIN_LINK_DISTANCE_METRES, max_link_distance_metres=MAX_LINK_DISTANCE_METRES) INVALID_STORM_INTEGER = target_val_utils.INVALID_STORM_INTEGER TARGET_VALUES = numpy.array( [1, 0, INVALID_STORM_INTEGER, 1, INVALID_STORM_INTEGER], dtype=int) # The following constants are used to test find_target_file. TOP_DIRECTORY_NAME = 'target_values' FILE_TIME_UNIX_SEC = 1517523991 # 222631 1 Feb 2018 FILE_SPC_DATE_STRING = '20180201' WIND_FILE_NAME_ONE_TIME = ( 'target_values/2018/20180201/wind_labels_2018-02-01-222631.nc') WIND_FILE_NAME_ONE_DAY = 'target_values/2018/wind_labels_20180201.nc'
def _run(top_linkage_dir_name, spc_date_string, min_lead_times_sec, max_lead_times_sec, min_link_distances_metres, max_link_distances_metres, event_type_string, wind_speed_percentile_level, wind_speed_cutoffs_kt, top_output_dir_name): """Computes target value for ea storm object, lead-time window, and buffer. This is effectively the main method. :param top_linkage_dir_name: See documentation at top of file. :param spc_date_string: Same. :param min_lead_times_sec: Same. :param max_lead_times_sec: Same. :param min_link_distances_metres: Same. :param max_link_distances_metres: Same. :param event_type_string: Same. :param wind_speed_percentile_level: Same. :param wind_speed_cutoffs_kt: Same. :param top_output_dir_name: Same. """ num_lead_time_windows = len(min_lead_times_sec) error_checking.assert_is_numpy_array( max_lead_times_sec, exact_dimensions=numpy.array([num_lead_time_windows]) ) num_distance_buffers = len(min_link_distances_metres) error_checking.assert_is_numpy_array( max_link_distances_metres, exact_dimensions=numpy.array([num_distance_buffers]) ) linkage_file_name = linkage.find_linkage_file( top_directory_name=top_linkage_dir_name, event_type_string=event_type_string, spc_date_string=spc_date_string) print 'Reading data from: "{0:s}"...'.format(linkage_file_name) storm_to_events_table = linkage.read_linkage_file(linkage_file_name) if event_type_string == linkage.WIND_EVENT_STRING: list_of_cutoff_arrays_kt = general_utils.split_array_by_nan( wind_speed_cutoffs_kt) num_cutoff_sets = len(wind_speed_cutoffs_kt) else: list_of_cutoff_arrays_kt = None num_cutoff_sets = 1 target_names = [] for i in range(num_lead_time_windows): for j in range(num_distance_buffers): for k in range(num_cutoff_sets): if event_type_string == linkage.WIND_EVENT_STRING: this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[j], max_link_distance_metres=max_link_distances_metres[j], wind_speed_percentile_level=wind_speed_percentile_level, wind_speed_cutoffs_kt=list_of_cutoff_arrays_kt[k]) target_names.append(this_target_name) print 'Computing values for "{0:s}"...'.format( target_names[-1]) storm_to_events_table = ( target_val_utils.create_wind_classification_targets( storm_to_winds_table=storm_to_events_table, min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[ j], max_link_distance_metres=max_link_distances_metres[ j], percentile_level=wind_speed_percentile_level, class_cutoffs_kt=list_of_cutoff_arrays_kt[k]) ) else: this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[j], max_link_distance_metres=max_link_distances_metres[j]) target_names.append(this_target_name) print 'Computing values for "{0:s}"...'.format( target_names[-1]) storm_to_events_table = ( target_val_utils.create_tornado_targets( storm_to_tornadoes_table=storm_to_events_table, min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[ j], max_link_distance_metres=max_link_distances_metres[ j] ) ) target_file_name = target_val_utils.find_target_file( top_directory_name=top_output_dir_name, event_type_string=event_type_string, spc_date_string=spc_date_string, raise_error_if_missing=False) print 'Writing target values to: "{0:s}"...'.format(target_file_name) target_val_utils.write_target_values( storm_to_events_table=storm_to_events_table, target_names=target_names, netcdf_file_name=target_file_name)
THIS_DICT = { tracking_utils.FULL_ID_COLUMN: THESE_FULL_ID_STRINGS, tracking_utils.VALID_TIME_COLUMN: THESE_TIMES_UNIX_SEC, tracking_utils.TRACKING_END_TIME_COLUMN: THESE_END_TIMES_UNIX_SEC, linkage.EVENT_LATITUDES_COLUMN: THESE_EVENT_LATITUDES_DEG, linkage.EVENT_LONGITUDES_COLUMN: THESE_EVENT_LONGITUDES_DEG, linkage.LINKAGE_DISTANCES_COLUMN: THESE_LINK_DIST_METRES, linkage.RELATIVE_EVENT_TIMES_COLUMN: THESE_RELATIVE_TIMES_UNIX_SEC, linkage.FUJITA_RATINGS_COLUMN: THESE_FUJITA_RATINGS } STORM_TO_TORNADOES_TABLE = pandas.DataFrame.from_dict(THIS_DICT) MAIN_TORNADO_TARGET_NAME = target_val_utils.target_params_to_name( min_lead_time_sec=MIN_LEAD_TIME_SEC, max_lead_time_sec=MAX_LEAD_TIME_SEC, min_link_distance_metres=MIN_LINK_DISTANCE_METRES, max_link_distance_metres=MAX_LINK_DISTANCE_METRES, genesis_only=False) INVALID_STORM_INTEGER = target_val_utils.INVALID_STORM_INTEGER MAIN_TORNADO_TARGET_VALUES = numpy.array( [1, 0, INVALID_STORM_INTEGER, 1, INVALID_STORM_INTEGER], dtype=int) MAIN_TORNADOGENESIS_TARGET_NAME = target_val_utils.target_params_to_name( min_lead_time_sec=MIN_LEAD_TIME_SEC, max_lead_time_sec=MAX_LEAD_TIME_SEC, min_link_distance_metres=MIN_LINK_DISTANCE_METRES, max_link_distance_metres=MAX_LINK_DISTANCE_METRES, genesis_only=True) MAIN_TORNADOGENESIS_TARGET_VALUES = numpy.array(
def _compute_targets_one_day(storm_to_events_table, spc_date_string, min_lead_times_sec, max_lead_times_sec, min_link_distances_metres, max_link_distances_metres, event_type_string, wind_speed_percentile_level, wind_speed_cutoffs_kt, top_output_dir_name): """Computes target values for one SPC date. :param storm_to_events_table: pandas DataFrame returned by `linkage.read_linkage_file`. :param spc_date_string: SPC date (format "yyyymmdd"). :param min_lead_times_sec: See documentation at top of file. :param max_lead_times_sec: Same. :param min_link_distances_metres: Same. :param max_link_distances_metres: Same. :param event_type_string: Same. :param wind_speed_percentile_level: Same. :param wind_speed_cutoffs_kt: Same. :param top_output_dir_name: Same. """ num_lead_time_windows = len(min_lead_times_sec) num_distance_buffers = len(min_link_distances_metres) if event_type_string == linkage.WIND_EVENT_STRING: list_of_cutoff_arrays_kt = general_utils.split_array_by_nan( wind_speed_cutoffs_kt) num_cutoff_sets = len(wind_speed_cutoffs_kt) else: list_of_cutoff_arrays_kt = None num_cutoff_sets = 1 target_names = [] for i in range(num_lead_time_windows): for j in range(num_distance_buffers): for k in range(num_cutoff_sets): if event_type_string == linkage.WIND_EVENT_STRING: this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[j], max_link_distance_metres=max_link_distances_metres[j], wind_speed_percentile_level=wind_speed_percentile_level, wind_speed_cutoffs_kt=list_of_cutoff_arrays_kt[k]) target_names.append(this_target_name) print(('Computing labels for "{0:s}" on SPC date {1:s}...' ).format(this_target_name, spc_date_string)) storm_to_events_table = ( target_val_utils.create_wind_classification_targets( storm_to_winds_table=storm_to_events_table, min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[ j], max_link_distance_metres=max_link_distances_metres[ j], percentile_level=wind_speed_percentile_level, class_cutoffs_kt=list_of_cutoff_arrays_kt[k])) else: genesis_only = (event_type_string == linkage.TORNADOGENESIS_EVENT_STRING) this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[j], max_link_distance_metres=max_link_distances_metres[j], genesis_only=genesis_only) target_names.append(this_target_name) print(('Computing labels for "{0:s}" on SPC date {1:s}...' ).format(this_target_name, spc_date_string)) storm_to_events_table = ( target_val_utils.create_tornado_targets( storm_to_tornadoes_table=storm_to_events_table, min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[ j], max_link_distance_metres=max_link_distances_metres[ j], genesis_only=genesis_only)) target_file_name = target_val_utils.find_target_file( top_directory_name=top_output_dir_name, event_type_string=event_type_string, spc_date_string=spc_date_string, raise_error_if_missing=False) print('Writing target values to: "{0:s}"...'.format(target_file_name)) target_val_utils.write_target_values( storm_to_events_table=storm_to_events_table, target_names=target_names, netcdf_file_name=target_file_name)
# _file_name_to_batch_number. TOP_DIRECTORY_NAME = 'foo' BATCH_NUMBER = 1967 SPC_DATE_STRING = '19670502' EXAMPLE_FILE_NAME_SHUFFLED = ( 'foo/batches0001000-0001999/input_examples_batch0001967.nc') EXAMPLE_FILE_NAME_UNSHUFFLED = 'foo/1967/input_examples_19670502.nc' # The following constants are used to test _check_target_vars. TORNADO_MEAN_LEAD_TIME_SEC = 1800 WIND_MEAN_LEAD_TIME_SEC = 2700 NEAR_TORNADO_TARGET_NAME = target_val_utils.target_params_to_name( min_lead_time_sec=0, max_lead_time_sec=3600, min_link_distance_metres=0, max_link_distance_metres=10000, tornadogenesis_only=False) MEDIUM_TORNADO_TARGET_NAME = target_val_utils.target_params_to_name( min_lead_time_sec=450, max_lead_time_sec=3150, min_link_distance_metres=0, max_link_distance_metres=20000, tornadogenesis_only=False) FAR_TORNADO_TARGET_NAME = target_val_utils.target_params_to_name( min_lead_time_sec=900, max_lead_time_sec=2700, min_link_distance_metres=0, max_link_distance_metres=30000,
def _compute_tornado_targets_one_day(storm_to_tornadoes_table, spc_date_string, min_lead_times_sec, max_lead_times_sec, min_link_distances_metres, max_link_distances_metres, genesis_only, min_fujita_ratings, top_output_dir_name): """Computes tornado-related target values for one SPC date. :param storm_to_tornadoes_table: pandas DataFrame returned by `linkage.read_linkage_file`. :param spc_date_string: SPC date (format "yyyymmdd"). :param min_lead_times_sec: See documentation at top of file. :param max_lead_times_sec: Same. :param min_link_distances_metres: Same. :param max_link_distances_metres: Same. :param genesis_only: Boolean flag. If True, will create labels for tornadogenesis only. If False, will create labels for occurrence, which includes pre-existing tornadoes (at forecast time). :param min_fujita_ratings: Same. :param top_output_dir_name: Same. """ num_time_windows = len(min_lead_times_sec) num_distance_buffers = len(min_link_distances_metres) num_strength_thresholds = len(min_fujita_ratings) target_names = [] for i in range(num_time_windows): for j in range(num_distance_buffers): for k in range(num_strength_thresholds): this_target_name = target_val_utils.target_params_to_name( min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[j], max_link_distance_metres=max_link_distances_metres[j], tornadogenesis_only=genesis_only, min_fujita_rating=min_fujita_ratings[k]) target_names.append(this_target_name) print(('Creating labels for "{0:s}" on SPC date "{1:s}"...' ).format(this_target_name, spc_date_string)) storm_to_tornadoes_table = ( target_val_utils.create_tornado_targets( storm_to_tornadoes_table=storm_to_tornadoes_table, min_lead_time_sec=min_lead_times_sec[i], max_lead_time_sec=max_lead_times_sec[i], min_link_distance_metres=min_link_distances_metres[j], max_link_distance_metres=max_link_distances_metres[j], genesis_only=genesis_only, min_fujita_rating=min_fujita_ratings[k])) print(SEPARATOR_STRING) event_type_string = (linkage.TORNADOGENESIS_EVENT_STRING if genesis_only else linkage.TORNADO_EVENT_STRING) target_file_name = target_val_utils.find_target_file( top_directory_name=top_output_dir_name, event_type_string=event_type_string, spc_date_string=spc_date_string, raise_error_if_missing=False) print('Writing target values to: "{0:s}"...'.format(target_file_name)) target_val_utils.write_target_values( storm_to_events_table=storm_to_tornadoes_table, target_names=target_names, netcdf_file_name=target_file_name)