def _run(top_input_dir_name, first_spc_date_string, last_spc_date_string, resolution_factor, top_output_dir_name): """Converts examples from GridRad to MYRORSS format. This is effectively the main method. :param top_input_dir_name: See documentation at top of file. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param resolution_factor: Same. :param top_output_dir_name: Same. """ spc_date_strings = time_conversion.get_spc_dates_in_range( first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string) input_file_names = [ input_examples.find_example_file( top_directory_name=top_input_dir_name, shuffled=False, spc_date_string=d, raise_error_if_missing=False ) for d in spc_date_strings ] output_file_names = [ input_examples.find_example_file( top_directory_name=top_output_dir_name, shuffled=False, spc_date_string=d, raise_error_if_missing=False ) for d in spc_date_strings ] num_spc_dates = len(spc_date_strings) for i in range(num_spc_dates): if not os.path.isfile(input_file_names[i]): continue _convert_one_file( input_file_name=input_file_names[i], resolution_factor=resolution_factor, output_file_name=output_file_names[i] ) print('\n')
def test_find_example_file_unshuffled(self): """Ensures correct output from find_example_file. In this case the hypothetical file is *not* temporally shuffled. """ this_file_name = input_examples.find_example_file( top_directory_name=TOP_DIRECTORY_NAME, shuffled=False, spc_date_string=SPC_DATE_STRING, raise_error_if_missing=False) self.assertTrue(this_file_name == EXAMPLE_FILE_NAME_UNSHUFFLED)
def test_find_example_file_shuffled(self): """Ensures correct output from find_example_file. In this case the hypothetical file is temporally shuffled. """ this_file_name = input_examples.find_example_file( top_directory_name=TOP_DIRECTORY_NAME, shuffled=True, batch_number=BATCH_NUMBER, raise_error_if_missing=False) self.assertTrue(this_file_name == EXAMPLE_FILE_NAME_SHUFFLED)
def _set_output_locations(top_output_dir_name, num_input_examples, num_examples_per_out_file, first_output_batch_number): """Sets locations of output files. :param top_output_dir_name: See documentation at top of file. :param num_input_examples: Total number of examples in input files. :param num_examples_per_out_file: See documentation at top of file. :param first_output_batch_number: Same. :return: output_example_file_names: 1-D list of paths to output files. """ num_output_files = int( numpy.ceil(float(num_input_examples) / num_examples_per_out_file)) print( ('Num input examples = {0:d} ... num examples per output file = {1:d} ' '... num output files = {2:d}').format(num_input_examples, num_examples_per_out_file, num_output_files)) output_example_file_names = [ input_examples.find_example_file( top_directory_name=top_output_dir_name, shuffled=True, batch_number=first_output_batch_number + i, raise_error_if_missing=False) for i in range(num_output_files) ] for this_file_name in output_example_file_names: if not os.path.isfile(this_file_name): continue print('Deleting output file: "{0:s}"...'.format(this_file_name)) os.remove(this_file_name) return output_example_file_names
def _run(model_file_name, top_example_dir_name, storm_metafile_name, output_dir_name): """Uses trained CNN to make predictions for specific examples. This is effectively the main method. :param model_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param storm_metafile_name: Same. :param output_dir_name: Same. :raises: ValueError: if the model does multi-class classification. """ print('Reading CNN from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) num_output_neurons = ( model_object.layers[-1].output.get_shape().as_list()[-1] ) if num_output_neurons > 2: error_string = ( 'The model has {0:d} output neurons, which suggests {0:d}-class ' 'classification. This script handles only binary classification.' ).format(num_output_neurons) raise ValueError(error_string) soundings_only = False if isinstance(model_object.input, list): list_of_input_tensors = model_object.input else: list_of_input_tensors = [model_object.input] if len(list_of_input_tensors) == 1: these_spatial_dim = numpy.array( list_of_input_tensors[0].get_shape().as_list()[1:-1], dtype=int ) soundings_only = len(these_spatial_dim) == 1 cnn_metafile_name = cnn.find_metafile( model_file_name=model_file_name, raise_error_if_missing=True ) print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name)) cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name) print('Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name)) desired_full_id_strings, desired_times_unix_sec = ( tracking_io.read_ids_and_times(storm_metafile_name) ) unique_spc_date_strings = list(set([ time_conversion.time_to_spc_date_string(t) for t in desired_times_unix_sec ])) example_file_names = [ input_examples.find_example_file( top_directory_name=top_example_dir_name, shuffled=False, spc_date_string=d, raise_error_if_missing=True ) for d in unique_spc_date_strings ] first_spc_date_string = time_conversion.time_to_spc_date_string( numpy.min(desired_times_unix_sec) ) last_spc_date_string = time_conversion.time_to_spc_date_string( numpy.max(desired_times_unix_sec) ) training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string) ) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string) ) training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = ( NUM_EXAMPLES_PER_BATCH ) if soundings_only: generator_object = testing_io.sounding_generator( option_dict=training_option_dict, desired_full_id_strings=desired_full_id_strings, desired_times_unix_sec=desired_times_unix_sec) elif cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, desired_full_id_strings=desired_full_id_strings, desired_times_unix_sec=desired_times_unix_sec, list_of_operation_dicts=cnn_metadata_dict[ cnn.LAYER_OPERATIONS_KEY] ) elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, desired_full_id_strings=desired_full_id_strings, desired_times_unix_sec=desired_times_unix_sec) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, desired_full_id_strings=desired_full_id_strings, desired_times_unix_sec=desired_times_unix_sec) include_soundings = ( training_option_dict[trainval_io.SOUNDING_FIELDS_KEY] is not None ) full_storm_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) observed_labels = numpy.array([], dtype=int) class_probability_matrix = None while True: try: this_storm_object_dict = next(generator_object) print(SEPARATOR_STRING) except StopIteration: break full_storm_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY] storm_times_unix_sec = numpy.concatenate(( storm_times_unix_sec, this_storm_object_dict[testing_io.STORM_TIMES_KEY] )) observed_labels = numpy.concatenate(( observed_labels, this_storm_object_dict[testing_io.TARGET_ARRAY_KEY] )) if soundings_only: these_predictor_matrices = [ this_storm_object_dict[testing_io.SOUNDING_MATRIX_KEY] ] else: these_predictor_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if include_soundings: this_sounding_matrix = these_predictor_matrices[-1] else: this_sounding_matrix = None if soundings_only: this_probability_matrix = cnn.apply_cnn_soundings_only( model_object=model_object, sounding_matrix=this_sounding_matrix, verbose=True) elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]: if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]: this_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=these_predictor_matrices[0], sounding_matrix=this_sounding_matrix, verbose=True) else: this_probability_matrix = cnn.apply_2d3d_cnn( model_object=model_object, reflectivity_matrix_dbz=these_predictor_matrices[0], azimuthal_shear_matrix_s01=these_predictor_matrices[1], sounding_matrix=this_sounding_matrix, verbose=True) else: this_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=these_predictor_matrices[0], sounding_matrix=this_sounding_matrix, verbose=True) print(SEPARATOR_STRING) if class_probability_matrix is None: class_probability_matrix = this_probability_matrix + 0. else: class_probability_matrix = numpy.concatenate( (class_probability_matrix, this_probability_matrix), axis=0 ) output_file_name = prediction_io.find_ungridded_file( directory_name=output_dir_name, raise_error_if_missing=False) print('Writing results to: "{0:s}"...'.format(output_file_name)) prediction_io.write_ungridded_predictions( netcdf_file_name=output_file_name, class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, storm_ids=full_storm_id_strings, storm_times_unix_sec=storm_times_unix_sec, target_name=training_option_dict[trainval_io.TARGET_NAME_KEY], model_file_name=model_file_name )
def read_specific_examples( top_example_dir_name, desired_storm_ids, desired_times_unix_sec, option_dict, list_of_layer_operation_dicts=None): """Reads predictors for specific examples (storm objects). E = number of desired examples :param top_example_dir_name: Name of top-level directory with pre-processed examples. Files therein will be found by `input_examples.find_example_file`. :param desired_storm_ids: length-E list of storm IDs (strings). :param desired_times_unix_sec: length-E numpy array of storm times. :param option_dict: See doc for any generator in this file. :param list_of_layer_operation_dicts: See doc for `gridrad_generator_2d_reduced`. If you do not want to reduce radar images from 3-D to 2-D, leave this as None. :return: list_of_predictor_matrices: length-T list of numpy arrays, where T = number of input tensors to model. The first dimension of each numpy array has length E. :return: sounding_pressure_matrix_pascals: numpy array (E x H_s) of pressures. If soundings were not read, this is None. """ option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None desired_spc_date_strings = [ time_conversion.time_to_spc_date_string(t) for t in desired_times_unix_sec ] unique_spc_date_strings = numpy.unique( numpy.array(desired_spc_date_strings) ).tolist() myrorss_2d3d = None storm_ids = [] storm_times_unix_sec = numpy.array([], dtype=int) list_of_predictor_matrices = None sounding_pressure_matrix_pascals = None for this_spc_date_string in unique_spc_date_strings: this_start_time_unix_sec = time_conversion.get_start_of_spc_date( this_spc_date_string) this_end_time_unix_sec = time_conversion.get_end_of_spc_date( this_spc_date_string) this_example_file_name = input_examples.find_example_file( top_directory_name=top_example_dir_name, shuffled=False, spc_date_string=this_spc_date_string) option_dict[trainval_io.EXAMPLE_FILES_KEY] = [this_example_file_name] option_dict[trainval_io.FIRST_STORM_TIME_KEY] = this_start_time_unix_sec option_dict[trainval_io.LAST_STORM_TIME_KEY] = this_end_time_unix_sec if myrorss_2d3d is None: netcdf_dataset = netCDF4.Dataset(this_example_file_name) myrorss_2d3d = ( input_examples.REFL_IMAGE_MATRIX_KEY in netcdf_dataset.variables ) netcdf_dataset.close() if list_of_layer_operation_dicts is not None: this_generator = gridrad_generator_2d_reduced( option_dict=option_dict, list_of_operation_dicts=list_of_layer_operation_dicts, num_examples_total=LARGE_INTEGER) elif myrorss_2d3d: this_generator = myrorss_generator_2d3d( option_dict=option_dict, num_examples_total=LARGE_INTEGER) else: this_generator = generator_2d_or_3d( option_dict=option_dict, num_examples_total=LARGE_INTEGER) this_storm_object_dict = next(this_generator) these_desired_indices = numpy.where(numpy.logical_and( desired_times_unix_sec >= this_start_time_unix_sec, desired_times_unix_sec <= this_end_time_unix_sec ))[0] these_indices = tracking_utils.find_storm_objects( all_storm_ids=this_storm_object_dict[STORM_IDS_KEY], all_times_unix_sec=this_storm_object_dict[STORM_TIMES_KEY], storm_ids_to_keep= [desired_storm_ids[k] for k in these_desired_indices], times_to_keep_unix_sec= desired_times_unix_sec[these_desired_indices], allow_missing=False ) storm_ids += [ this_storm_object_dict[STORM_IDS_KEY][k] for k in these_indices ] storm_times_unix_sec = numpy.concatenate(( storm_times_unix_sec, this_storm_object_dict[STORM_TIMES_KEY][these_indices] )) this_pressure_matrix_pascals = this_storm_object_dict[ SOUNDING_PRESSURES_KEY] if this_pressure_matrix_pascals is not None: this_pressure_matrix_pascals = this_pressure_matrix_pascals[ these_indices, ...] if sounding_pressure_matrix_pascals is None: sounding_pressure_matrix_pascals = ( this_pressure_matrix_pascals + 0.) else: sounding_pressure_matrix_pascals = numpy.concatenate( (sounding_pressure_matrix_pascals, this_pressure_matrix_pascals), axis=0) if list_of_predictor_matrices is None: num_matrices = len(this_storm_object_dict[INPUT_MATRICES_KEY]) list_of_predictor_matrices = [None] * num_matrices for k in range(len(list_of_predictor_matrices)): this_new_matrix = this_storm_object_dict[INPUT_MATRICES_KEY][k][ these_indices, ...] if list_of_predictor_matrices[k] is None: list_of_predictor_matrices[k] = this_new_matrix + 0. else: list_of_predictor_matrices[k] = numpy.concatenate( (list_of_predictor_matrices[k], this_new_matrix), axis=0) sort_indices = tracking_utils.find_storm_objects( all_storm_ids=storm_ids, all_times_unix_sec=storm_times_unix_sec, storm_ids_to_keep=desired_storm_ids, times_to_keep_unix_sec=desired_times_unix_sec, allow_missing=False) for k in range(len(list_of_predictor_matrices)): list_of_predictor_matrices[k] = list_of_predictor_matrices[k][ sort_indices, ...] if sounding_pressure_matrix_pascals is not None: sounding_pressure_matrix_pascals = sounding_pressure_matrix_pascals[ sort_indices, ...] return list_of_predictor_matrices, sounding_pressure_matrix_pascals
def _run(input_example_dir_name, storm_metafile_name, num_examples_in_subset, subset_randomly, output_example_file_name): """Extracts desired examples and writes them to one file. This is effectively the main method. :param input_example_dir_name: See documentation at top of file. :param storm_metafile_name: Same. :param num_examples_in_subset: Same. :param subset_randomly: Same. :param output_example_file_name: Same. """ print( 'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name)) example_id_strings, example_times_unix_sec = ( tracking_io.read_ids_and_times(storm_metafile_name)) if not 0 < num_examples_in_subset < len(example_id_strings): num_examples_in_subset = None if num_examples_in_subset is not None: if subset_randomly: these_indices = numpy.linspace(0, len(example_id_strings) - 1, num=len(example_id_strings), dtype=int) these_indices = numpy.random.choice(these_indices, size=num_examples_in_subset, replace=False) example_id_strings = [example_id_strings[k] for k in these_indices] example_times_unix_sec = example_times_unix_sec[these_indices] else: example_id_strings = example_id_strings[:num_examples_in_subset] example_times_unix_sec = ( example_times_unix_sec[:num_examples_in_subset]) example_spc_date_strings = numpy.array([ time_conversion.time_to_spc_date_string(t) for t in example_times_unix_sec ]) spc_date_strings = numpy.unique(example_spc_date_strings) example_file_name_by_day = [ input_examples.find_example_file( top_directory_name=input_example_dir_name, shuffled=False, spc_date_string=d, raise_error_if_missing=True) for d in spc_date_strings ] num_days = len(spc_date_strings) for i in range(num_days): print('Reading data from: "{0:s}"...'.format( example_file_name_by_day[i])) all_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_name_by_day[i], read_all_target_vars=True) these_indices = numpy.where( example_spc_date_strings == spc_date_strings[i])[0] desired_indices = tracking_utils.find_storm_objects( all_id_strings=all_example_dict[input_examples.FULL_IDS_KEY], all_times_unix_sec=all_example_dict[ input_examples.STORM_TIMES_KEY], id_strings_to_keep=[example_id_strings[k] for k in these_indices], times_to_keep_unix_sec=example_times_unix_sec[these_indices], allow_missing=False) desired_example_dict = input_examples.subset_examples( example_dict=all_example_dict, indices_to_keep=desired_indices) print('Writing {0:d} desired examples to: "{1:s}"...'.format( len(desired_indices), output_example_file_name)) input_examples.write_example_file( netcdf_file_name=output_example_file_name, example_dict=desired_example_dict, append_to_file=i > 0)