def aggregate_cmd(main_arguments): """ Main routine for handling calls to the aggregation command. :param main_arguments: The command line arguments (minus the aggregate command) """ import cis.exceptions as ex from cis.data_io.gridded_data import GriddedDataList if len(main_arguments.datagroups) > 1: __error_occurred("Aggregation can only be performed on one data group") input_group = main_arguments.datagroups[0] data = DataReader().read_single_datagroup(input_group) if isinstance(data, GriddedDataList): logging.warning( "The aggregate command is deprecated for GriddedData and will not be supported in future " "versions of CIS. Please use 'collapse' instead.") if any(v is not None for v in main_arguments.grid.values()): raise ex.InvalidCommandLineOptionError( "Grid specifications are not supported for Gridded aggregation." ) output = data.collapsed(list(main_arguments.grid.keys()), how=input_group.get("kernel", '')) else: output = data.aggregate(how=input_group.get("kernel", ''), **main_arguments.grid) output.save_data(main_arguments.output)
def test_GIVEN_multiple_variable_gridded_WHEN_read_data_THEN_GriddedDataList_returned( self): variables = ['var1', 'var2'] filenames = 'filename1' product = None gridded_data = make_square_5x3_2d_cube() gridded_data.__class__ = GriddedData get_data_func = MagicMock(return_value=gridded_data) reader = DataReader(get_data_func=get_data_func) data = reader.read_data_list(filenames, variables, product) # Check the data read function is called correctly assert_that(get_data_func.call_count, is_(2)) first_call_args = get_data_func.call_args_list[0][0] second_call_args = get_data_func.call_args_list[1][0] assert_that(first_call_args[0], is_([filenames])) assert_that(first_call_args[1], is_(variables[0])) assert_that(second_call_args[1], is_(variables[1])) assert_that(first_call_args[2], is_(product)) # Check the data we got back is as expected assert_that(data, instance_of(GriddedDataList)) assert_that(data[0].data.tolist(), is_(make_square_5x3_2d_cube().data.tolist())) assert_that(data[1].data.tolist(), is_(data[0].data.tolist()))
def col_cmd(main_arguments): """ Main routine for handling calls to the collocate ('col') command. :param main_arguments: The command line arguments (minus the col command) """ from cis.collocation.col_framework import get_kernel from cis.parse import check_boolean # Read the sample data missing_data_for_missing_sample = False if main_arguments.samplevariable is not None: sample_data = DataReader().read_data_list(main_arguments.samplefiles, main_arguments.samplevariable, main_arguments.sampleproduct)[0] missing_data_for_missing_sample = True else: sample_data = DataReader().read_coordinates(main_arguments.samplefiles, main_arguments.sampleproduct) # Unpack the sample options col_name, col_options = main_arguments.samplegroup.get('collocator', ('', {})) kern_name, kern_options = main_arguments.samplegroup.get('kernel', ('', {})) missing_data_for_missing_sample = check_boolean(col_options.pop('missing_data_for_missing_sample', str(missing_data_for_missing_sample)), logging) kernel = get_kernel(kern_name)(**kern_options) if kern_name else None for input_group in main_arguments.datagroups: # Then collocate each datagroup data = DataReader().read_single_datagroup(input_group) output = data.collocated_onto(sample_data, how=col_name, kernel=kernel, missing_data_for_missing_sample=missing_data_for_missing_sample, **col_options) output.save_data(main_arguments.output)
def stats_cmd(main_arguments): """ Main routine for handling calls to the statistics command. :param main_arguments: The command line arguments (minus the stats command) """ from cis.stats import StatsAnalyzer from cis.data_io.gridded_data import GriddedDataList data_reader = DataReader() data_list = data_reader.read_datagroups(main_arguments.datagroups) analyzer = StatsAnalyzer(*data_list) results = analyzer.analyze() header = "RESULTS OF STATISTICAL COMPARISON:" note = "Compared all points which have non-missing values in both variables" header_length = max(len(header), len(note)) print(header_length * '=') print(header) print(header_length * '-') print(note) print(header_length * '=') for result in results: print(result.pprint()) if main_arguments.output: cubes = GriddedDataList([result.as_cube() for result in results]) variables = [] filenames = [] for datagroup in main_arguments.datagroups: variables.extend(datagroup['variables']) filenames.extend(datagroup['filenames']) history = "Statistical comparison performed using CIS version " + __version__ + \ "\n variables: " + str(variables) + \ "\n from files: " + str(set(filenames)) cubes.add_history(history) cubes.save_data(main_arguments.output)
def test_GIVEN_multiple_datagroups_WHEN_read_datagroups_THEN_get_data_called_correctly( self): datagroup_1 = { 'variables': ['var1', 'var2'], 'filenames': ['filename1.nc'], 'product': None } datagroup_2 = { 'variables': ['var3', 'var4'], 'filenames': ['filename2.nc'], 'product': 'cis' } get_data_func = MagicMock( return_value=make_regular_2d_ungridded_data()) get_var_func = MagicMock(side_effect=lambda f: { 'filename1.nc': ['var1', 'var2'], 'filename2.nc': ['var3', 'var4'] }[f]) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) data = reader.read_datagroups([datagroup_1, datagroup_2]) assert_that(get_data_func.call_count, is_(4)) assert_that(get_data_func.call_args_list[0][0], is_((['filename1.nc'], 'var1', None))) assert_that(get_data_func.call_args_list[1][0], is_((['filename1.nc'], 'var2', None))) assert_that(get_data_func.call_args_list[2][0], is_((['filename2.nc'], 'var3', 'cis'))) assert_that(get_data_func.call_args_list[3][0], is_((['filename2.nc'], 'var4', 'cis')))
def test_GIVEN_gridded_datagroups_WHEN_read_datagroups_THEN_data_returned_in_list( self): datagroup_1 = { 'variables': ['var1', 'var2'], 'filenames': ['filename1.nc'], 'product': None } datagroup_2 = { 'variables': ['var3'], 'filenames': ['filename2.nc'], 'product': 'cis' } var1 = make_from_cube(make_square_5x3_2d_cube()) var2 = make_from_cube(make_square_5x3_2d_cube()) var3 = make_from_cube(make_square_5x3_2d_cube()) get_data_func = MagicMock(side_effect=[var1, var2, var3]) get_var_func = MagicMock(side_effect=lambda f: { 'filename1.nc': ['var1', 'var2'], 'filename2.nc': ['var3'] }[f]) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) data = reader.read_datagroups([datagroup_1, datagroup_2]) assert_that(len(data), is_(3)) assert_that(data[0], is_(var1)) assert_that(data[1], is_(var2)) assert_that(data[2], is_(var3))
def test_GIVEN_single_variable_WHEN_aggregate_THEN_DataWriter_called_correctly(self): variables = 'var_name' filenames = 'filename' output_file = 'output.hdf' kernel = 'mean' grid = None input_data = GriddedDataList([make_from_cube(make_square_5x3_2d_cube())]) output_data = make_from_cube(make_square_5x3_2d_cube() + 1) mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=input_data) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_aggregator = Aggregator(None, None) mock_aggregator.aggregate_gridded = MagicMock(return_value=output_data) # Return the modified data array aggregate = Aggregate(grid, output_file, data_reader=mock_data_reader, data_writer=mock_data_writer) aggregate._create_aggregator = MagicMock(return_value=mock_aggregator) aggregate.aggregate(variables, filenames, None, kernel) assert_that(mock_data_writer.write_data.call_count, is_(1)) written_data = mock_data_writer.write_data.call_args[0][0] written_filename = mock_data_writer.write_data.call_args[0][1] assert_that(written_data.data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]])) assert_that(written_filename, is_(output_file))
def test_GIVEN_single_variable_WHEN_subset_THEN_DataWriter_called_correctly(self): variable = 'var_name' filename = 'filename' xmin, xmax = -10, 10 ymin, ymax = 40, 60 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' def _mock_subset(data, constraint): data.data += 1 # Modify the data slightly so we can be sure it's passed in correctly return data mock_subsetter = Subsetter() mock_subsetter.subset = _mock_subset mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data()) mock_data_writer = DataWriter() mock_data_writer.write_data = MagicMock() subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_data_writer.write_data.call_count, is_(1)) written_data = mock_data_writer.write_data.call_args[0][0] written_filename = mock_data_writer.write_data.call_args[0][1] assert_that(written_data.data_flattened.tolist(), is_([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])) assert_that(written_filename, is_(output_file))
def test_GIVEN_single_variable_WHEN_subset_THEN_Subsetter_called_correctly(self): variable = 'var_name' filename = 'filename' xmin, xmax = -10, 10 ymin, ymax = 40, 60 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data()) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0]) # Return the data array unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_subsetter.subset.call_count, is_(1)) called_data = mock_subsetter.subset.call_args[0][0] called_constraint = mock_subsetter.subset.call_args[0][1] assert_that(called_data.data_flattened.tolist(), is_([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])) assert_that(called_constraint, instance_of(UngriddedSubsetConstraint)) assert_that(called_constraint._limits['lat'][1:3], is_((ymin, ymax))) assert_that(called_constraint._limits['lon'][1:3], is_((xmin, xmax)))
def test_GIVEN_multiple_variables_WHEN_subset_THEN_DataWriter_called_correctly(self): variables = ['var_name1', 'var_name2'] filename = 'filename' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' def _mock_subset(data, constraint): # Modify the data slightly so we can be sure it's passed in correctly for var in data: var.data += 1 return data mock_subsetter = Subsetter() mock_subsetter.subset = _mock_subset mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList([make_square_5x3_2d_cube(), make_square_5x3_2d_cube()])) mock_data_writer = DataWriter() mock_data_writer.write_data = MagicMock() subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variables, filename, product=None) assert_that(mock_data_writer.write_data.call_count, is_(1)) written_data = mock_data_writer.write_data.call_args[0][0] written_filename = mock_data_writer.write_data.call_args[0][1] assert_that(written_data[0].data.tolist(), is_([[2, 3, 4], [5, 6, 7], [8, 9, 10], [11, 12, 13], [14, 15, 16]])) assert_that(written_data[0].data.tolist(), written_data[1].data.tolist()) assert_that(written_filename, is_(output_file))
def test_GIVEN_multiple_variables_WHEN_subset_THEN_Subsetter_called_correctly(self): variables = ['var_name1', 'var_name2'] filename = 'filename' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} output_file = 'output.hdf' mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList(2 * [make_square_5x3_2d_cube()])) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = MagicMock(side_effect=lambda *args: args[0]) # Return the data list unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variables, filename, product=None) assert_that(mock_subsetter.subset.call_count, is_(1)) called_data = mock_subsetter.subset.call_args[0][0] called_constraint = mock_subsetter.subset.call_args[0][1] assert_that(called_data[0].data.tolist(), is_([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]])) assert_that(called_data[1].data.tolist(), is_(called_data[0].data.tolist())) assert_that(called_constraint, instance_of(GriddedSubsetConstraint)) assert_that(called_constraint._limits['latitude'][1:3], is_((ymin, ymax))) assert_that(called_constraint._limits['longitude'][1:3], is_((xmin, xmax)))
def aggregate_cmd(main_arguments): """ Main routine for handling calls to the aggregation command. :param main_arguments: The command line arguments (minus the aggregate command) """ import cis.exceptions as ex from cis.data_io.gridded_data import GriddedDataList if len(main_arguments.datagroups) > 1: __error_occurred("Aggregation can only be performed on one data group") input_group = main_arguments.datagroups[0] data = DataReader().read_single_datagroup(input_group) if isinstance(data, GriddedDataList): logging.warning("The aggregate command is deprecated for GriddedData and will not be supported in future " "versions of CIS. Please use 'collapse' instead.") if any(v is not None for v in main_arguments.grid.values()): raise ex.InvalidCommandLineOptionError("Grid specifications are not supported for Gridded aggregation.") output = data.collapsed(list(main_arguments.grid.keys()), how=input_group.get("kernel", '')) else: output = data.aggregate(how=input_group.get("kernel", ''), **main_arguments.grid) output.save_data(main_arguments.output)
def test_GIVEN_no_matching_variables_found_overall_WHEN_read_data_THEN_raises_Error(self): variables = ['test?.hdf', '*.nc'] file_vars = ['sample_file.hdf', 'aeronet.lev20'] filenames = 'filename1' get_data_func = MagicMock() get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) with self.assertRaises(ValueError): reader.read_data_list(filenames, variables)[0]
def test_GIVEN_no_matching_variables_for_wildcards_WHEN_read_data_THEN_no_Error(self): variables = ['aeronet.lev20', '*.nc', 'test?.hdf'] file_vars = ['aeronet.lev20', 'var2.hdf'] filenames = 'filename1' get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data()) get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) reader.read_data_list(filenames, variables)[0] assert_that(reader._get_data_func.call_count, is_(1)) assert_that(reader._get_data_func.call_args_list[0][0][1], is_('aeronet.lev20'))
def test_GIVEN_aliases_missing_WHEN_read_datagroups_THEN_read_OK_aliases_default_to_var_names(self): datagroup = {'variables': ['var1'], 'filenames': ['filename1.nc'], 'product': None} var1 = make_from_cube(make_square_5x3_2d_cube()) get_data_func = MagicMock(side_effect=[var1]) get_var_func = MagicMock(side_effect=['var1']) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) data = reader.read_datagroups([datagroup]) assert_that(data[0].var_name, is_('dummy'))
def test_GIVEN_aliases_WHEN_read_datagroups_THEN_output_data_has_aliases(self): datagroup = {'variables': ['var1'], 'filenames': ['filename1.nc'], 'product': None, 'aliases': ['alias1']} get_data_func = MagicMock(return_value=make_from_cube(make_square_5x3_2d_cube())) get_var_func = MagicMock(return_value=['var1']) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) data = reader.read_datagroups([datagroup]) assert_that(data[0].alias, is_('alias1'))
def test_GIVEN_no_matching_variables_found_overall_WHEN_read_data_THEN_raises_Error( self): variables = ['test?.hdf', '*.nc'] file_vars = ['sample_file.hdf', 'aeronet.lev20'] filenames = 'filename1' get_data_func = MagicMock() get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) with self.assertRaises(ValueError): reader.read_data_list(filenames, variables)[0]
def test_GIVEN_multiple_variable_mix_of_gridded_ungridded_WHEN_read_data_THEN_raises_TypeError(self): variables = ['var1', 'var2'] filenames = 'filename1' product = None gridded_data = make_square_5x3_2d_cube() gridded_data.__class__ = GriddedData ungridded_data = make_regular_2d_ungridded_data() get_data_func = MagicMock(side_effect=[gridded_data, ungridded_data]) reader = DataReader(get_data_func=get_data_func) with self.assertRaises(TypeError): data = reader.read_data_list(filenames, variables, product)[0]
def test_GIVEN_wildcards_WHEN_read_data_THEN_matching_variables_identified(self): variables = ['*.nc', 'test?.hdf'] file_vars = ['aeronet.lev20', 'var2.hdf', 'netcdf1.nc', 'netcdf3.nc', 'test.hdf', 'test1.hdf'] should_match = ['netcdf1.nc', 'netcdf3.nc', 'test1.hdf'] filenames = 'filename1' get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data()) get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) reader.read_data_list(filenames, variables)[0] assert_that(reader._get_data_func.call_count, is_(len(should_match))) for i in range(len(should_match)): assert_that(reader._get_data_func.call_args_list[i][0][1], is_(should_match[i]))
def test_GIVEN_multiple_variable_mix_of_gridded_ungridded_WHEN_read_data_THEN_raises_TypeError( self): variables = ['var1', 'var2'] filenames = 'filename1' product = None gridded_data = make_square_5x3_2d_cube() gridded_data.__class__ = GriddedData ungridded_data = make_regular_2d_ungridded_data() get_data_func = MagicMock(side_effect=[gridded_data, ungridded_data]) reader = DataReader(get_data_func=get_data_func) with self.assertRaises(TypeError): data = reader.read_data_list(filenames, variables, product)[0]
def test_GIVEN_not_enough_aliases_WHEN_read_datagroups_THEN_raises_ValueError(self): datagroup = {'variables': ['var1', 'var2'], 'filenames': ['filename1.nc'], 'product': None, 'aliases': ['alias1']} var1 = make_from_cube(make_square_5x3_2d_cube()) var2 = make_from_cube(make_square_5x3_2d_cube()) get_data_func = MagicMock(side_effect=[var1, var2]) get_var_func = MagicMock(side_effect=['var1', 'var2']) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) with self.assertRaises(ValueError): data = reader.read_datagroups([datagroup])
def evaluate_cmd(main_arguments): """ Main routine for handling calls to the evaluation command :param main_arguments: The command line arguments (minus the eval command) """ from cis.evaluate import Calculator data_reader = DataReader() data_list = data_reader.read_datagroups(main_arguments.datagroups) calculator = Calculator() result = calculator.evaluate(data_list, main_arguments.expr, main_arguments.output_var, main_arguments.units, main_arguments.attributes) result.save_data(main_arguments.output)
def test_GIVEN_aliases_missing_WHEN_read_datagroups_THEN_read_OK_aliases_default_to_var_names( self): datagroup = { 'variables': ['var1'], 'filenames': ['filename1.nc'], 'product': None } var1 = make_from_cube(make_square_5x3_2d_cube()) get_data_func = MagicMock(side_effect=[var1]) get_var_func = MagicMock(side_effect=['var1']) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) data = reader.read_datagroups([datagroup]) assert_that(data[0].var_name, is_('dummy'))
def test_GIVEN_no_matching_variables_for_wildcards_WHEN_read_data_THEN_no_Error( self): variables = ['aeronet.lev20', '*.nc', 'test?.hdf'] file_vars = ['aeronet.lev20', 'var2.hdf'] filenames = 'filename1' get_data_func = MagicMock( return_value=make_regular_2d_ungridded_data()) get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) reader.read_data_list(filenames, variables)[0] assert_that(reader._get_data_func.call_count, is_(1)) assert_that(reader._get_data_func.call_args_list[0][0][1], is_('aeronet.lev20'))
def test_GIVEN_gridded_and_ungridded_datagroups_WHEN_read_datagroups_THEN_raises_TypeError(self): datagroup_1 = {'variables': ['var1'], 'filenames': ['filename1.nc'], 'product': None} datagroup_2 = {'variables': ['var3'], 'filenames': ['filename2.nc'], 'product': 'cis'} var1 = make_from_cube(make_square_5x3_2d_cube()) var2 = make_regular_2d_ungridded_data() get_data_func = MagicMock(side_effect=[var1, var2]) get_var_func = MagicMock(side_effect=lambda f: {'filename1.nc': ['var1'], 'filename2.nc': ['var3']}[f]) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) with self.assertRaises(TypeError): reader.read_datagroups([datagroup_1, datagroup_2])
def test_GIVEN_aliases_WHEN_read_datagroups_THEN_output_data_has_aliases( self): datagroup = { 'variables': ['var1'], 'filenames': ['filename1.nc'], 'product': None, 'aliases': ['alias1'] } get_data_func = MagicMock( return_value=make_from_cube(make_square_5x3_2d_cube())) get_var_func = MagicMock(return_value=['var1']) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) data = reader.read_datagroups([datagroup]) assert_that(data[0].alias, is_('alias1'))
def plot_cmd(main_arguments): """ Main routine for handling calls to the 'plot' command. Reads in the data files specified and passes the rest of the arguments to the plot function. :param main_arguments: The command line arguments """ from cis.plotting.formatted_plot import Plotter from cis.data_io.data_reader import DataReader data = DataReader().read_datagroups(main_arguments.datagroups) # We have to pop off the arguments which plot isn't expecting so that it treats everything else as an mpl kwarg main_arguments = vars(main_arguments) _ = main_arguments.pop('command') _ = main_arguments.pop("quiet") _ = main_arguments.pop("verbose") _ = main_arguments.pop("force_overwrite") _ = main_arguments.pop("output_var", None) layer_opts = [{ k: v for k, v in d.items() if k not in ['variables', 'filenames', 'product'] } for d in main_arguments.pop('datagroups')] Plotter(data, layer_opts=layer_opts, **main_arguments)
def test_GIVEN_not_enough_aliases_WHEN_read_datagroups_THEN_raises_ValueError( self): datagroup = { 'variables': ['var1', 'var2'], 'filenames': ['filename1.nc'], 'product': None, 'aliases': ['alias1'] } var1 = make_from_cube(make_square_5x3_2d_cube()) var2 = make_from_cube(make_square_5x3_2d_cube()) get_data_func = MagicMock(side_effect=[var1, var2]) get_var_func = MagicMock(side_effect=['var1', 'var2']) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) with self.assertRaises(ValueError): data = reader.read_datagroups([datagroup])
def test_GIVEN_multiple_datagroups_WHEN_read_datagroups_THEN_get_data_called_correctly(self): datagroup_1 = {'variables': ['var1', 'var2'], 'filenames': ['filename1.nc'], 'product': None} datagroup_2 = {'variables': ['var3', 'var4'], 'filenames': ['filename2.nc'], 'product': 'cis'} get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data()) get_var_func = MagicMock(side_effect=lambda f: {'filename1.nc': ['var1', 'var2'], 'filename2.nc': ['var3', 'var4']}[f]) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) data = reader.read_datagroups([datagroup_1, datagroup_2]) assert_that(get_data_func.call_count, is_(4)) assert_that(get_data_func.call_args_list[0][0], is_((['filename1.nc'], 'var1', None))) assert_that(get_data_func.call_args_list[1][0], is_((['filename1.nc'], 'var2', None))) assert_that(get_data_func.call_args_list[2][0], is_((['filename2.nc'], 'var3', 'cis'))) assert_that(get_data_func.call_args_list[3][0], is_((['filename2.nc'], 'var4', 'cis')))
def col_cmd(main_arguments): """ Main routine for handling calls to the collocate ('col') command. :param main_arguments: The command line arguments (minus the col command) """ from cis.exceptions import ClassNotFoundError, CISError from cis.collocation.col import Collocate output_file = main_arguments.output data_reader = DataReader() missing_data_for_missing_samples = False if main_arguments.samplevariable is not None: sample_data = data_reader.read_data_list(main_arguments.samplefiles, main_arguments.samplevariable, main_arguments.sampleproduct)[0] else: sample_data = data_reader.read_coordinates(main_arguments.samplefiles, main_arguments.sampleproduct) missing_data_for_missing_samples = True try: col = Collocate(sample_data, missing_data_for_missing_samples) except IOError as e: __error_occurred("There was an error reading one of the files: \n" + str(e)) col_name = main_arguments.samplegroup['collocator'][0] if main_arguments.samplegroup[ 'collocator'] is not None else None col_options = main_arguments.samplegroup['collocator'][1] if main_arguments.samplegroup[ 'collocator'] is not None else {} kern_name = main_arguments.samplegroup['kernel'][0] if main_arguments.samplegroup['kernel'] is not None else None kern_options = main_arguments.samplegroup['kernel'][1] if main_arguments.samplegroup['kernel'] is not None else None for input_group in main_arguments.datagroups: variables = input_group['variables'] filenames = input_group['filenames'] product = input_group["product"] if input_group["product"] is not None else None data = data_reader.read_data_list(filenames, variables, product) data_writer = DataWriter() try: output = col.collocate(data, col_name, col_options, kern_name, kern_options) data_writer.write_data(output, output_file) except ClassNotFoundError as e: __error_occurred(str(e) + "\nInvalid collocation option.") except (CISError, IOError) as e: __error_occurred(e)
def test_GIVEN_single_variable_ungridded_WHEN_read_data_THEN_GriddedData_returned(self): variables = 'var1' filenames = 'filename1' product = None get_data_func = MagicMock(return_value=make_regular_2d_ungridded_data()) reader = DataReader(get_data_func=get_data_func) data = reader.read_data_list(filenames, variables, product)[0] # Check the data read function is called correctly assert_that(get_data_func.call_count, is_(1)) call_args = get_data_func.call_args_list[0][0] assert_that(call_args[0], is_([filenames])) assert_that(call_args[1], is_(variables)) assert_that(call_args[2], is_(product)) # Check the data we got back is as expected assert_that(data, instance_of(UngriddedData)) assert_that(data.data.tolist(), is_(make_regular_2d_ungridded_data().data.tolist()))
def test_GIVEN_wildcards_WHEN_read_data_THEN_matching_variables_identified( self): variables = ['*.nc', 'test?.hdf'] file_vars = [ 'aeronet.lev20', 'var2.hdf', 'netcdf1.nc', 'netcdf3.nc', 'test.hdf', 'test1.hdf' ] should_match = ['netcdf1.nc', 'netcdf3.nc', 'test1.hdf'] filenames = 'filename1' get_data_func = MagicMock( return_value=make_regular_2d_ungridded_data()) get_var_func = MagicMock(return_value=file_vars) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) reader.read_data_list(filenames, variables)[0] assert_that(reader._get_data_func.call_count, is_(len(should_match))) for i in range(len(should_match)): assert_that(reader._get_data_func.call_args_list[i][0][1], is_(should_match[i]))
def col_cmd(main_arguments): """ Main routine for handling calls to the collocate ('col') command. :param main_arguments: The command line arguments (minus the col command) """ from cis.collocation.col_framework import get_kernel from cis.parse import check_boolean # Read the sample data missing_data_for_missing_sample = False if main_arguments.samplevariable is not None: sample_data = DataReader().read_data_list( main_arguments.samplefiles, main_arguments.samplevariable, main_arguments.sampleproduct)[0] missing_data_for_missing_sample = True else: sample_data = DataReader().read_coordinates( main_arguments.samplefiles, main_arguments.sampleproduct) # Unpack the sample options col_name, col_options = main_arguments.samplegroup.get( 'collocator', ('', {})) kern_name, kern_options = main_arguments.samplegroup.get( 'kernel', ('', {})) missing_data_for_missing_sample = check_boolean( col_options.pop('missing_data_for_missing_sample', str(missing_data_for_missing_sample)), logging) kernel = get_kernel(kern_name)(**kern_options) if kern_name else None for input_group in main_arguments.datagroups: # Then collocate each datagroup data = DataReader().read_single_datagroup(input_group) output = data.collocated_onto( sample_data, how=col_name, kernel=kernel, missing_data_for_missing_sample=missing_data_for_missing_sample, **col_options) output.save_data(main_arguments.output)
def test_GIVEN_gridded_datagroups_WHEN_read_datagroups_THEN_data_returned_in_list(self): datagroup_1 = {'variables': ['var1', 'var2'], 'filenames': ['filename1.nc'], 'product': None} datagroup_2 = {'variables': ['var3'], 'filenames': ['filename2.nc'], 'product': 'cis'} var1 = make_from_cube(make_square_5x3_2d_cube()) var2 = make_from_cube(make_square_5x3_2d_cube()) var3 = make_from_cube(make_square_5x3_2d_cube()) get_data_func = MagicMock(side_effect=[var1, var2, var3]) get_var_func = MagicMock(side_effect=lambda f: {'filename1.nc': ['var1', 'var2'], 'filename2.nc': ['var3']}[f]) reader = DataReader(get_data_func=get_data_func, get_variables_func=get_var_func) data = reader.read_datagroups([datagroup_1, datagroup_2]) assert_that(isinstance(data, GriddedDataList)) assert_that(len(data), is_(3)) assert_that(data[0], is_(var1)) assert_that(data[1], is_(var2)) assert_that(data[2], is_(var3))
def subset_cmd(main_arguments): """ Main routine for handling calls to the subset command. :param main_arguments: The command line arguments (minus the subset command) """ import cis.exceptions as ex if len(main_arguments.datagroups) > 1: __error_occurred("Subsetting can only be performed on one data group") data = DataReader().read_single_datagroup(main_arguments.datagroups[0]) subset = data.subset(**main_arguments.limits) if subset is None: # Constraints exclude all data. raise ex.NoDataInSubsetError("No output created - constraints exclude all data") subset.save_data(main_arguments.output)
def subset_cmd(main_arguments): """ Main routine for handling calls to the subset command. :param main_arguments: The command line arguments (minus the subset command) """ import cis.exceptions as ex if len(main_arguments.datagroups) > 1: __error_occurred("Subsetting can only be performed on one data group") data = DataReader().read_single_datagroup(main_arguments.datagroups[0]) subset = data.subset(**main_arguments.limits) if subset is None: # Constraints exclude all data. raise ex.NoDataInSubsetError( "No output created - constraints exclude all data") subset.save_data(main_arguments.output)
def test_GIVEN_single_variable_ungridded_WHEN_read_data_THEN_GriddedData_returned( self): variables = 'var1' filenames = 'filename1' product = None get_data_func = MagicMock( return_value=make_regular_2d_ungridded_data()) reader = DataReader(get_data_func=get_data_func) data = reader.read_data_list(filenames, variables, product)[0] # Check the data read function is called correctly assert_that(get_data_func.call_count, is_(1)) call_args = get_data_func.call_args_list[0][0] assert_that(call_args[0], is_([filenames])) assert_that(call_args[1], is_(variables)) assert_that(call_args[2], is_(product)) # Check the data we got back is as expected assert_that(data, instance_of(UngriddedData)) assert_that(data.data.tolist(), is_(make_regular_2d_ungridded_data().data.tolist()))
def collapse_cmd(main_arguments): """ Main routine for handling calls to the collapse command. :param main_arguments: The command line arguments (minus the collapse command) """ from cis.data_io.ungridded_data import UngriddedDataList if len(main_arguments.datagroups) > 1: __error_occurred("Collapse can only be performed on one data group") input_group = main_arguments.datagroups[0] data = DataReader().read_single_datagroup(input_group) if isinstance(data, UngriddedDataList): logging.error("The collapse command can only be performed on gridded data. " "Please use 'aggregate' instead.") output = data.collapsed(main_arguments.dimensions, how=input_group.get("kernel", '')) output.save_data(main_arguments.output)
def test_GIVEN_multiple_variables_and_filenames_WHEN_aggregate_THEN_Aggregate_called_correctly(self): variables = ['var_name1', 'var_name2'] filenames = ['filename1', 'filename2'] output_file = 'output.hdf' kernel = 'mean' grid = 'grid' input_data = GriddedDataList(2 * [make_from_cube(make_square_5x3_2d_cube())]) output_data = input_data mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=input_data) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_aggregator = Aggregator(None, None) mock_aggregator.aggregate_gridded = MagicMock(return_value=output_data) # Return the data array unmodified aggregate = Aggregate(grid, output_file, data_reader=mock_data_reader, data_writer=mock_data_writer) aggregate._create_aggregator = MagicMock(return_value=mock_aggregator) aggregate.aggregate(variables, filenames, None, kernel) assert_that(mock_aggregator.aggregate_gridded.call_count, is_(1)) assert_that(mock_aggregator.aggregate_gridded.call_args[0][0], kernel)
def test_GIVEN_single_variable_WHEN_subset_THEN_DataReader_called_correctly(self): variable = 'var_name' filename = 'filename' output_file = 'output.hdf' xmin, xmax = -10, 10 ymin, ymax = 40, 60 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=make_regular_2d_ungridded_data()) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = lambda *args: args[0] # Return the data array unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variable, filename, product=None) assert_that(mock_data_reader.read_data_list.call_count, is_(1)) assert_that(mock_data_reader.read_data_list.call_args[0][0], filename) assert_that(mock_data_reader.read_data_list.call_args[0][1], variable)
def test_GIVEN_multiple_variables_and_filenames_WHEN_subset_THEN_DataReader_called_correctly(self): variables = ['var_name1', 'var_name2'] filenames = ['filename1', 'filename2'] output_file = 'output.hdf' xmin, xmax = 0, 5 ymin, ymax = -5, 5 limits = {'x': SubsetLimits(xmin, xmax, False), 'y': SubsetLimits(ymin, ymax, False)} mock_data_reader = DataReader() mock_data_reader.read_data_list = MagicMock(return_value=GriddedDataList(2 * [make_square_5x3_2d_cube()])) mock_data_writer = DataWriter() mock_data_writer.write_data = Mock() mock_subsetter = Subsetter() mock_subsetter.subset = lambda *args: args[0] # Return the data array unmodified subset = Subset(limits, output_file, subsetter=mock_subsetter, data_reader=mock_data_reader, data_writer=mock_data_writer) subset.subset(variables, filenames, product=None) assert_that(mock_data_reader.read_data_list.call_count, is_(1)) assert_that(mock_data_reader.read_data_list.call_args[0][0], filenames) assert_that(mock_data_reader.read_data_list.call_args[0][1], variables)
def collapse_cmd(main_arguments): """ Main routine for handling calls to the collapse command. :param main_arguments: The command line arguments (minus the collapse command) """ from cis.data_io.ungridded_data import UngriddedDataList if len(main_arguments.datagroups) > 1: __error_occurred("Collapse can only be performed on one data group") input_group = main_arguments.datagroups[0] data = DataReader().read_single_datagroup(input_group) if isinstance(data, UngriddedDataList): logging.error( "The collapse command can only be performed on gridded data. " "Please use 'aggregate' instead.") output = data.collapsed(main_arguments.dimensions, how=input_group.get("kernel", '')) output.save_data(main_arguments.output)
def test_GIVEN_multiple_variable_gridded_WHEN_read_data_THEN_GriddedDataList_returned(self): variables = ['var1', 'var2'] filenames = 'filename1' product = None gridded_data = make_square_5x3_2d_cube() gridded_data.__class__ = GriddedData get_data_func = MagicMock(return_value=gridded_data) reader = DataReader(get_data_func=get_data_func) data = reader.read_data_list(filenames, variables, product) # Check the data read function is called correctly assert_that(get_data_func.call_count, is_(2)) first_call_args = get_data_func.call_args_list[0][0] second_call_args = get_data_func.call_args_list[1][0] assert_that(first_call_args[0], is_([filenames])) assert_that(first_call_args[1], is_(variables[0])) assert_that(second_call_args[1], is_(variables[1])) assert_that(first_call_args[2], is_(product)) # Check the data we got back is as expected assert_that(data, instance_of(GriddedDataList)) assert_that(data[0].data.tolist(), is_(make_square_5x3_2d_cube().data.tolist())) assert_that(data[1].data.tolist(), is_(data[0].data.tolist()))
def read_data_list(filenames, variables, product=None, aliases=None): """ Read multiple data objects from a list of files. Files can be either gridded or ungridded but not a mix of both. :param filenames: The filenames of the files to read. This can be either a single filename as a string, a comma separated list, or a :class:`list` of string filenames. Filenames can include directories which will be expanded to include all files in that directory, or wildcards such as ``*`` or ``?``. :type filenames: string or list :param variables: One or more variables to read from the files :type variables: string or list :param str product: The name of the data reading plugin to use to read the data (e.g. ``Cloud_CCI``). :param aliases: List of aliases to put on each variable's data object as an alternative means of identifying them. :type aliases: string or list :return: A list of the data read out (either a :class:`GriddedDataList` or :class:`UngriddedDataList` depending on the type of data contained in the files) """ from cis.data_io.data_reader import DataReader, expand_filelist try: file_set = expand_filelist(filenames) except ValueError as e: raise IOError(e) if len(file_set) == 0: raise IOError("No files found which match: {}".format(filenames)) return DataReader().read_data_list(file_set, variables, product, aliases)