def _shuffle_one_input_file(input_example_file_name, radar_field_names, num_examples_per_out_chunk, output_example_file_names): """Shuffles examples from one input file to many output files. :param input_example_file_name: Path to input file. :param radar_field_names: See documentation at top of file. :param num_examples_per_out_chunk: Same. :param output_example_file_names: 1-D list of paths to output files. """ print('Reading data from: "{0:s}"...'.format(input_example_file_name)) example_dict = input_examples.read_example_file( netcdf_file_name=input_example_file_name, read_all_target_vars=True, radar_field_names_to_keep=radar_field_names) num_examples = len(example_dict[input_examples.FULL_IDS_KEY]) shuffled_indices = numpy.linspace(0, num_examples - 1, num=num_examples, dtype=int) numpy.random.shuffle(shuffled_indices) example_dict = input_examples.subset_examples( example_dict=example_dict, indices_to_keep=shuffled_indices) for j in range(0, num_examples, num_examples_per_out_chunk): this_first_index = j this_last_index = min( [j + num_examples_per_out_chunk - 1, num_examples - 1]) these_indices = numpy.linspace(this_first_index, this_last_index, num=this_last_index - this_first_index + 1, dtype=int) this_example_dict = input_examples.subset_examples( example_dict=example_dict, indices_to_keep=these_indices, create_new_dict=True) this_output_file_name = random.choice(output_example_file_names) print('Writing shuffled examples to: "{0:s}"...'.format( this_output_file_name)) input_examples.write_example_file( netcdf_file_name=this_output_file_name, example_dict=this_example_dict, append_to_file=os.path.isfile(this_output_file_name))
def test_subset_examples_2d3d(self): """Ensures correct output from subset_examples. In this case examples contain both 2-D and 3-D radar images. """ this_example_dict = input_examples.subset_examples( example_dict=EXAMPLE_DICT_2D3D_ORIG, indices_to_keep=INDICES_TO_KEEP, create_new_dict=True) self.assertTrue(_compare_example_dicts( this_example_dict, EXAMPLE_DICT_2D3D_SUBSET))
def gridrad_generator_2d_reduced(option_dict, list_of_operation_dicts, num_examples_total): """Generates examples with 2-D GridRad images. These 2-D images are produced by applying layer operations to the native 3-D images. The layer operations are specified by `list_of_operation_dicts`. Each example (storm object) consists of the following: - Storm-centered radar images (one 2-D image for each layer operation) - Storm-centered sounding (optional) - Target value (class) :param option_dict: Dictionary with the following keys. option_dict['example_file_names']: See doc for `training_validation_io.gridrad_generator_2d_reduced`. option_dict['binarize_target']: Same. option_dict['sounding_field_names']: Same. option_dict['sounding_heights_m_agl']: Same. option_dict['first_storm_time_unix_sec']: Same. option_dict['last_storm_time_unix_sec']: Same. option_dict['num_grid_rows']: Same. option_dict['num_grid_columns']: Same. option_dict['normalization_type_string']: Same. option_dict['normalization_param_file_name']: Same. option_dict['min_normalized_value']: Same. option_dict['max_normalized_value']: Same. option_dict['class_to_sampling_fraction_dict']: Same. :param list_of_operation_dicts: See doc for `input_examples.reduce_examples_3d_to_2d`. :param num_examples_total: Number of examples to generate. :return: storm_object_dict: Dictionary with the following keys. storm_object_dict['list_of_input_matrices']: length-T list of numpy arrays, where T = number of input tensors to model. The first axis of each array has length E. storm_object_dict['storm_ids']: length-E list of storm IDs. storm_object_dict['storm_times_unix_sec']: length-E numpy array of storm times. storm_object_dict['target_array']: See output doc for `training_validation_io.gridrad_generator_2d_reduced`. storm_object_dict['sounding_pressure_matrix_pascals']: numpy array (E x H_s) of pressures. If soundings were not read, this is None. storm_object_dict['radar_field_names']: length-C list of field names, where the [j]th item corresponds to the [j]th channel of the 2-D radar images returned in "list_of_input_matrices". storm_object_dict['min_radar_heights_m_agl']: length-C numpy array with minimum height for each layer operation (used to reduce 3-D radar images to 2-D). storm_object_dict['max_radar_heights_m_agl']: Same but with max heights. storm_object_dict['radar_layer_operation_names']: length-C list with names of layer operations. Each name must be accepted by `input_examples._check_layer_operation`. """ unique_radar_field_names, unique_radar_heights_m_agl = ( trainval_io.layer_ops_to_field_height_pairs(list_of_operation_dicts) ) option_dict[trainval_io.RADAR_FIELDS_KEY] = unique_radar_field_names option_dict[trainval_io.RADAR_HEIGHTS_KEY] = unique_radar_heights_m_agl storm_ids, storm_times_unix_sec = _find_examples_to_read( option_dict=option_dict, num_examples_total=num_examples_total) print '\n' example_file_names = option_dict[trainval_io.EXAMPLE_FILES_KEY] first_storm_time_unix_sec = option_dict[trainval_io.FIRST_STORM_TIME_KEY] last_storm_time_unix_sec = option_dict[trainval_io.LAST_STORM_TIME_KEY] num_grid_rows = option_dict[trainval_io.NUM_ROWS_KEY] num_grid_columns = option_dict[trainval_io.NUM_COLUMNS_KEY] sounding_field_names = option_dict[trainval_io.SOUNDING_FIELDS_KEY] sounding_heights_m_agl = option_dict[trainval_io.SOUNDING_HEIGHTS_KEY] normalization_type_string = option_dict[trainval_io.NORMALIZATION_TYPE_KEY] normalization_param_file_name = option_dict[ trainval_io.NORMALIZATION_FILE_KEY] min_normalized_value = option_dict[trainval_io.MIN_NORMALIZED_VALUE_KEY] max_normalized_value = option_dict[trainval_io.MAX_NORMALIZED_VALUE_KEY] binarize_target = option_dict[trainval_io.BINARIZE_TARGET_KEY] this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[0], metadata_only=True) target_name = this_example_dict[input_examples.TARGET_NAME_KEY] num_classes = target_val_utils.target_name_to_num_classes( target_name=target_name, include_dead_storms=False) if sounding_field_names is None: sounding_field_names_to_read = None else: if soundings.PRESSURE_NAME in sounding_field_names: sounding_field_names_to_read = sounding_field_names + [] else: sounding_field_names_to_read = ( sounding_field_names + [soundings.PRESSURE_NAME] ) radar_image_matrix = None sounding_matrix = None target_values = None sounding_pressure_matrix_pascals = None reduction_metadata_dict = {} file_index = 0 while True: if file_index >= len(example_file_names): raise StopIteration print 'Reading data from: "{0:s}"...'.format( example_file_names[file_index]) this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[file_index], include_soundings=sounding_field_names is not None, radar_field_names_to_keep=unique_radar_field_names, radar_heights_to_keep_m_agl=unique_radar_heights_m_agl, sounding_field_names_to_keep=sounding_field_names_to_read, sounding_heights_to_keep_m_agl=sounding_heights_m_agl, first_time_to_keep_unix_sec=first_storm_time_unix_sec, last_time_to_keep_unix_sec=last_storm_time_unix_sec, num_rows_to_keep=num_grid_rows, num_columns_to_keep=num_grid_columns) file_index += 1 if this_example_dict is None: continue indices_to_keep = tracking_utils.find_storm_objects( all_storm_ids=this_example_dict[input_examples.STORM_IDS_KEY], all_times_unix_sec=this_example_dict[ input_examples.STORM_TIMES_KEY], storm_ids_to_keep=storm_ids, times_to_keep_unix_sec=storm_times_unix_sec, allow_missing=True) indices_to_keep = indices_to_keep[indices_to_keep >= 0] if len(indices_to_keep) == 0: continue this_example_dict = input_examples.subset_examples( example_dict=this_example_dict, indices_to_keep=indices_to_keep) this_example_dict = input_examples.reduce_examples_3d_to_2d( example_dict=this_example_dict, list_of_operation_dicts=list_of_operation_dicts) radar_field_names_2d = this_example_dict[ input_examples.RADAR_FIELDS_KEY] for this_key in REDUCTION_METADATA_KEYS: reduction_metadata_dict[this_key] = this_example_dict[this_key] include_soundings = ( input_examples.SOUNDING_MATRIX_KEY in this_example_dict) if include_soundings: pressure_index = this_example_dict[ input_examples.SOUNDING_FIELDS_KEY ].index(soundings.PRESSURE_NAME) this_pressure_matrix_pascals = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY][..., pressure_index] this_sounding_matrix = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY] if soundings.PRESSURE_NAME not in sounding_field_names: this_sounding_matrix = this_sounding_matrix[..., :-1] if target_values is None: radar_image_matrix = ( this_example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY] + 0. ) target_values = ( this_example_dict[input_examples.TARGET_VALUES_KEY] + 0) if include_soundings: sounding_matrix = this_sounding_matrix + 0. sounding_pressure_matrix_pascals = ( this_pressure_matrix_pascals + 0.) else: radar_image_matrix = numpy.concatenate( (radar_image_matrix, this_example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY]), axis=0) target_values = numpy.concatenate(( target_values, this_example_dict[input_examples.TARGET_VALUES_KEY] )) if include_soundings: sounding_matrix = numpy.concatenate( (sounding_matrix, this_sounding_matrix), axis=0) sounding_pressure_matrix_pascals = numpy.concatenate( (sounding_pressure_matrix_pascals, this_pressure_matrix_pascals), axis=0) if normalization_type_string is not None: radar_image_matrix = dl_utils.normalize_radar_images( radar_image_matrix=radar_image_matrix, field_names=radar_field_names_2d, normalization_type_string=normalization_type_string, normalization_param_file_name=normalization_param_file_name, min_normalized_value=min_normalized_value, max_normalized_value=max_normalized_value).astype('float32') if include_soundings: sounding_matrix = dl_utils.normalize_soundings( sounding_matrix=sounding_matrix, field_names=sounding_field_names, normalization_type_string=normalization_type_string, normalization_param_file_name=normalization_param_file_name, min_normalized_value=min_normalized_value, max_normalized_value=max_normalized_value).astype('float32') list_of_predictor_matrices = [radar_image_matrix] if include_soundings: list_of_predictor_matrices.append(sounding_matrix) target_array = _finalize_targets( target_values=target_values, binarize_target=binarize_target, num_classes=num_classes) storm_object_dict = { INPUT_MATRICES_KEY: list_of_predictor_matrices, TARGET_ARRAY_KEY: target_array, STORM_IDS_KEY: this_example_dict[input_examples.STORM_IDS_KEY], STORM_TIMES_KEY: this_example_dict[input_examples.STORM_TIMES_KEY], SOUNDING_PRESSURES_KEY: copy.deepcopy(sounding_pressure_matrix_pascals) } for this_key in REDUCTION_METADATA_KEYS: storm_object_dict[this_key] = reduction_metadata_dict[this_key] radar_image_matrix = None sounding_matrix = None target_values = None sounding_pressure_matrix_pascals = None yield storm_object_dict
def myrorss_generator_2d3d(option_dict, num_examples_total): """Generates examples with both 2-D and 3-D radar images. Each example (storm object) consists of the following: - Storm-centered azimuthal shear (one 2-D image for each field) - Storm-centered reflectivity (one 3-D image) - Storm-centered sounding (optional) - Target value (class) :param option_dict: Dictionary with the following keys. option_dict['example_file_names']: See doc for `training_validation_io.myrorss_generator_2d3d`. option_dict['binarize_target']: Same. option_dict['radar_field_names']: Same. option_dict['radar_heights_m_agl']: Same. option_dict['sounding_field_names']: Same. option_dict['sounding_heights_m_agl']: Same. option_dict['first_storm_time_unix_sec']: Same. option_dict['last_storm_time_unix_sec']: Same. option_dict['num_grid_rows']: Same. option_dict['num_grid_columns']: Same. option_dict['normalization_type_string']: See doc for `generator_2d_or_3d`. option_dict['normalization_param_file_name']: Same. option_dict['min_normalized_value']: Same. option_dict['max_normalized_value']: Same. option_dict['class_to_sampling_fraction_dict']: Same. :param num_examples_total: Total number of examples to generate. :return: storm_object_dict: Dictionary with the following keys. storm_object_dict['list_of_input_matrices']: length-T list of numpy arrays, where T = number of input tensors to model. The first axis of each array has length E. storm_object_dict['storm_ids']: length-E list of storm IDs. storm_object_dict['storm_times_unix_sec']: length-E numpy array of storm times. storm_object_dict['target_array']: See output doc for `training_validation_io.myrorss_generator_2d3d`. storm_object_dict['sounding_pressure_matrix_pascals']: numpy array (E x H_s) of pressures. If soundings were not read, this is None. """ storm_ids, storm_times_unix_sec = _find_examples_to_read( option_dict=option_dict, num_examples_total=num_examples_total) print '\n' example_file_names = option_dict[trainval_io.EXAMPLE_FILES_KEY] first_storm_time_unix_sec = option_dict[trainval_io.FIRST_STORM_TIME_KEY] last_storm_time_unix_sec = option_dict[trainval_io.LAST_STORM_TIME_KEY] num_grid_rows = option_dict[trainval_io.NUM_ROWS_KEY] num_grid_columns = option_dict[trainval_io.NUM_COLUMNS_KEY] azimuthal_shear_field_names = option_dict[trainval_io.RADAR_FIELDS_KEY] reflectivity_heights_m_agl = option_dict[trainval_io.RADAR_HEIGHTS_KEY] sounding_field_names = option_dict[trainval_io.SOUNDING_FIELDS_KEY] sounding_heights_m_agl = option_dict[trainval_io.SOUNDING_HEIGHTS_KEY] normalization_type_string = option_dict[trainval_io.NORMALIZATION_TYPE_KEY] normalization_param_file_name = option_dict[ trainval_io.NORMALIZATION_FILE_KEY] min_normalized_value = option_dict[trainval_io.MIN_NORMALIZED_VALUE_KEY] max_normalized_value = option_dict[trainval_io.MAX_NORMALIZED_VALUE_KEY] binarize_target = option_dict[trainval_io.BINARIZE_TARGET_KEY] this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[0], metadata_only=True) target_name = this_example_dict[input_examples.TARGET_NAME_KEY] num_classes = target_val_utils.target_name_to_num_classes( target_name=target_name, include_dead_storms=False) if sounding_field_names is None: sounding_field_names_to_read = None else: if soundings.PRESSURE_NAME in sounding_field_names: sounding_field_names_to_read = sounding_field_names + [] else: sounding_field_names_to_read = ( sounding_field_names + [soundings.PRESSURE_NAME] ) reflectivity_image_matrix_dbz = None az_shear_image_matrix_s01 = None sounding_matrix = None target_values = None sounding_pressure_matrix_pascals = None file_index = 0 while True: if file_index >= len(example_file_names): raise StopIteration print 'Reading data from: "{0:s}"...'.format( example_file_names[file_index]) this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[file_index], include_soundings=sounding_field_names is not None, radar_field_names_to_keep=azimuthal_shear_field_names, radar_heights_to_keep_m_agl=reflectivity_heights_m_agl, sounding_field_names_to_keep=sounding_field_names_to_read, sounding_heights_to_keep_m_agl=sounding_heights_m_agl, first_time_to_keep_unix_sec=first_storm_time_unix_sec, last_time_to_keep_unix_sec=last_storm_time_unix_sec, num_rows_to_keep=num_grid_rows, num_columns_to_keep=num_grid_columns) file_index += 1 if this_example_dict is None: continue indices_to_keep = tracking_utils.find_storm_objects( all_storm_ids=this_example_dict[input_examples.STORM_IDS_KEY], all_times_unix_sec=this_example_dict[ input_examples.STORM_TIMES_KEY], storm_ids_to_keep=storm_ids, times_to_keep_unix_sec=storm_times_unix_sec, allow_missing=True) indices_to_keep = indices_to_keep[indices_to_keep >= 0] if len(indices_to_keep) == 0: continue this_example_dict = input_examples.subset_examples( example_dict=this_example_dict, indices_to_keep=indices_to_keep) include_soundings = ( input_examples.SOUNDING_MATRIX_KEY in this_example_dict) if include_soundings: pressure_index = this_example_dict[ input_examples.SOUNDING_FIELDS_KEY ].index(soundings.PRESSURE_NAME) this_pressure_matrix_pascals = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY][..., pressure_index] this_sounding_matrix = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY] if soundings.PRESSURE_NAME not in sounding_field_names: this_sounding_matrix = this_sounding_matrix[..., -1] if target_values is None: reflectivity_image_matrix_dbz = ( this_example_dict[input_examples.REFL_IMAGE_MATRIX_KEY] + 0. ) az_shear_image_matrix_s01 = ( this_example_dict[input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY] + 0. ) target_values = ( this_example_dict[input_examples.TARGET_VALUES_KEY] + 0) if include_soundings: sounding_matrix = this_sounding_matrix + 0. sounding_pressure_matrix_pascals = ( this_pressure_matrix_pascals + 0.) else: reflectivity_image_matrix_dbz = numpy.concatenate( (reflectivity_image_matrix_dbz, this_example_dict[input_examples.REFL_IMAGE_MATRIX_KEY]), axis=0) az_shear_image_matrix_s01 = numpy.concatenate(( az_shear_image_matrix_s01, this_example_dict[input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY] ), axis=0) target_values = numpy.concatenate(( target_values, this_example_dict[input_examples.TARGET_VALUES_KEY] )) if include_soundings: sounding_matrix = numpy.concatenate( (sounding_matrix, this_sounding_matrix), axis=0) sounding_pressure_matrix_pascals = numpy.concatenate( (sounding_pressure_matrix_pascals, this_pressure_matrix_pascals), axis=0) if normalization_type_string is not None: reflectivity_image_matrix_dbz = dl_utils.normalize_radar_images( radar_image_matrix=reflectivity_image_matrix_dbz, field_names=[radar_utils.REFL_NAME], normalization_type_string=normalization_type_string, normalization_param_file_name=normalization_param_file_name, min_normalized_value=min_normalized_value, max_normalized_value=max_normalized_value).astype('float32') az_shear_image_matrix_s01 = dl_utils.normalize_radar_images( radar_image_matrix=az_shear_image_matrix_s01, field_names=azimuthal_shear_field_names, normalization_type_string=normalization_type_string, normalization_param_file_name=normalization_param_file_name, min_normalized_value=min_normalized_value, max_normalized_value=max_normalized_value).astype('float32') if include_soundings: sounding_matrix = dl_utils.normalize_soundings( sounding_matrix=sounding_matrix, field_names=sounding_field_names, normalization_type_string=normalization_type_string, normalization_param_file_name=normalization_param_file_name, min_normalized_value=min_normalized_value, max_normalized_value=max_normalized_value).astype('float32') list_of_predictor_matrices = [ reflectivity_image_matrix_dbz, az_shear_image_matrix_s01 ] if include_soundings: list_of_predictor_matrices.append(sounding_matrix) target_array = _finalize_targets( target_values=target_values, binarize_target=binarize_target, num_classes=num_classes) storm_object_dict = { INPUT_MATRICES_KEY: list_of_predictor_matrices, TARGET_ARRAY_KEY: target_array, STORM_IDS_KEY: this_example_dict[input_examples.STORM_IDS_KEY], STORM_TIMES_KEY: this_example_dict[input_examples.STORM_TIMES_KEY], SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pascals + 0. } reflectivity_image_matrix_dbz = None az_shear_image_matrix_s01 = None sounding_matrix = None target_values = None sounding_pressure_matrix_pascals = None yield storm_object_dict
def _run(input_example_dir_name, storm_metafile_name, num_examples_in_subset, subset_randomly, output_example_file_name): """Extracts desired examples and writes them to one file. This is effectively the main method. :param input_example_dir_name: See documentation at top of file. :param storm_metafile_name: Same. :param num_examples_in_subset: Same. :param subset_randomly: Same. :param output_example_file_name: Same. """ print( 'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name)) example_id_strings, example_times_unix_sec = ( tracking_io.read_ids_and_times(storm_metafile_name)) if not 0 < num_examples_in_subset < len(example_id_strings): num_examples_in_subset = None if num_examples_in_subset is not None: if subset_randomly: these_indices = numpy.linspace(0, len(example_id_strings) - 1, num=len(example_id_strings), dtype=int) these_indices = numpy.random.choice(these_indices, size=num_examples_in_subset, replace=False) example_id_strings = [example_id_strings[k] for k in these_indices] example_times_unix_sec = example_times_unix_sec[these_indices] else: example_id_strings = example_id_strings[:num_examples_in_subset] example_times_unix_sec = ( example_times_unix_sec[:num_examples_in_subset]) example_spc_date_strings = numpy.array([ time_conversion.time_to_spc_date_string(t) for t in example_times_unix_sec ]) spc_date_strings = numpy.unique(example_spc_date_strings) example_file_name_by_day = [ input_examples.find_example_file( top_directory_name=input_example_dir_name, shuffled=False, spc_date_string=d, raise_error_if_missing=True) for d in spc_date_strings ] num_days = len(spc_date_strings) for i in range(num_days): print('Reading data from: "{0:s}"...'.format( example_file_name_by_day[i])) all_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_name_by_day[i], read_all_target_vars=True) these_indices = numpy.where( example_spc_date_strings == spc_date_strings[i])[0] desired_indices = tracking_utils.find_storm_objects( all_id_strings=all_example_dict[input_examples.FULL_IDS_KEY], all_times_unix_sec=all_example_dict[ input_examples.STORM_TIMES_KEY], id_strings_to_keep=[example_id_strings[k] for k in these_indices], times_to_keep_unix_sec=example_times_unix_sec[these_indices], allow_missing=False) desired_example_dict = input_examples.subset_examples( example_dict=all_example_dict, indices_to_keep=desired_indices) print('Writing {0:d} desired examples to: "{1:s}"...'.format( len(desired_indices), output_example_file_name)) input_examples.write_example_file( netcdf_file_name=output_example_file_name, example_dict=desired_example_dict, append_to_file=i > 0)