def test_collapse_second_dim(self): config = Config.from_nc(self.inputs[0]) config.dims["b"].update({"flatten": True, "index_by": "b"}) l = generate_aggregation_list(config, self.inputs) evaluate_aggregation_list(config, l, self.filename) with nc.Dataset(self.filename) as nc_out: # type: nc.Dataset # This is the more practically useful method of aggregation, # where, for example, the dimension "a" might represent time # and dim "b" is maybe satellite, or event, etc. (something that, # at any point in time, there could be an arbitrary number of). # flatten b dimension, should turn out like: # [[0 -- --] # [1 -- --] # [2 -- --] # [3 3 --] # [4 4 --] # [5 5 --] # [6 6 6] # [7 7 7] # [8 8 8]] c = nc_out.variables["c"][:] self.assertEqual(c.shape, (9, 3)) self.assertEqual(np.sum(c), 90) self.assertEqual(np.ma.count_masked(c), 9) for i, a in enumerate(["a", "b", "c"]): self.assertEqual(nc_out.variables["b"][i], a)
def setUp(self): # tmp file to aggregate to _, self.nc_out_filename = tempfile.mkstemp() pwd = os.path.dirname(__file__) self.files = sorted(glob.glob(os.path.join(pwd, "data", "*.nc"))) self.config = Config.from_nc(self.files[0])
def test_missing_dim(self): """ The variable t depends on a dimension c that has not been configured. Make sure a ValueError is raised because of this.""" dims = DimensionConfig([{ "name": "a", "size": 2 }, { "name": "b", "size": None }]) vars = VariableConfig([ { "name": "t", "dimensions": ["c"], "datatype": "float32" }, { "name": "x", "dimensions": ["b", "a"], "datatype": "float32" }, ]) attrs = GlobalAttributeConfig([]) with self.assertRaises(ValueError): Config(dims, vars, attrs)
def setUpClass(cls): super(TestEvaluateAggregationList, cls).setUpClass() pwd = os.path.dirname(__file__) cls.start_time = datetime(2017, 3, 16, 15, 27) cls.end_time = datetime(2017, 3, 16, 15, 28) cls.files = glob.glob(os.path.join(pwd, "data", "*.nc")) cls.config = Config.from_nc(cls.files[0]) cls.config.dims["report_number"].update({ "index_by": "OB_time", "min": cls. start_time, # for convenience, will convert according to index_by units if this is datetime "max": cls.end_time, "other_dim_indicies": { "samples_per_record": 0 }, "expected_cadence": { "report_number": 1, "number_samples_per_report": 10, }, }) _, cls.filename = tempfile.mkstemp() agg_list = generate_aggregation_list(cls.config, cls.files) logger.info(agg_list) evaluate_aggregation_list(cls.config, agg_list, cls.filename) cls.output = nc.Dataset(cls.filename, "r")
def test_extra_dim(self): """We have configured an extra dimension z that isn't used by any variables. Make sure a ValueError is raised. """ dims = DimensionConfig([ { "name": "a", "size": 2 }, { "name": "b", "size": None }, { "name": "z", "size": None }, ]) vars = VariableConfig([ { "name": "t", "dimensions": ["a"], "datatype": "float32" }, { "name": "x", "dimensions": ["b", "a"], "datatype": "float32" }, ]) attrs = GlobalAttributeConfig([]) with self.assertRaises(ValueError): Config(dims, vars, attrs)
def test_initialize_several_variables(self): """Ensure aggregation file is created correctly according to the variable config.""" config = Config.from_dict( { "dimensions": [{"name": "x", "size": None}, {"name": "y", "size": 10}], "variables": [ { "name": "foo", "dimensions": ["x", "y"], "datatype": "float32", "attributes": {"units": "seconds"}, }, { "name": "foo_x", "dimensions": ["x"], "datatype": "float64", "attributes": {"units": "floops", "created_by": "the flooper"}, }, ], "global attributes": [], } ) initialize_aggregation_file(config, self.filename) with nc.Dataset(self.filename) as nc_check: self.assertEqual(len(nc_check.variables), 2) self.assertEqual(nc_check.variables["foo"].dimensions, ("x", "y")) self.assertEqual(nc_check.variables["foo"].datatype, np.dtype(np.float32)) self.assertEqual(nc_check.variables["foo"].units, "seconds") self.assertEqual(nc_check.variables["foo_x"].dimensions, ("x",)) self.assertEqual(nc_check.variables["foo_x"].datatype, np.dtype(np.float64)) self.assertEqual(nc_check.variables["foo_x"].units, "floops") self.assertEqual( nc_check.variables["foo_x"].getncattr("created_by"), "the flooper" )
def test_basic_with_var_attrs(self): """ Make sure the configuration accepts a valid configuration. """ dims = DimensionConfig([{ "name": "a", "size": 2 }, { "name": "b", "size": None }]) vars = VariableConfig([ { "name": "t", "dimensions": ["b"], "datatype": "float32", "attributes": { "_FillValue": 0 }, }, { "name": "x", "dimensions": ["b", "a"], "datatype": "float32" }, ]) attrs = GlobalAttributeConfig([]) Config(dims, vars, attrs)
def setUp(self): _, self.file = tempfile.mkstemp() pwd = os.path.dirname(__file__) self.files = glob.glob(os.path.join(pwd, "data", "*.nc")) self.config = Config.from_nc(self.files[0]) self.config.dims["time"].update( {"index_by": "time", "expected_cadence": {"time": 10},} )
def setUp(self): # tmp file to aggregate to _, self.nc_out_filename = tempfile.mkstemp() pwd = os.path.dirname(__file__) self.files = sorted(glob.glob(os.path.join(pwd, "data", "*.nc"))) with open(os.path.join(pwd, "new_dim_config.json")) as config_in: self.config = Config.from_dict(json.load(config_in))
def setUp(self): _, self.file = tempfile.mkstemp() pwd = os.path.dirname(__file__) self.files = glob.glob(os.path.join(pwd, "data", "*.nc")) self.config = Config.from_nc(self.files[0]) self.config.dims["report_number"].update( { "index_by": "OB_time", "other_dim_indicies": {"samples_per_record": 0}, "expected_cadence": { "report_number": 1, "number_samples_per_report": 10, }, } )
def test_initialize_basic(self): """Ensure aggregation file is created with proper dimensions according to the config.""" config = Config.from_dict( { "dimensions": [{"name": "x", "size": None}, {"name": "y", "size": 10}], "variables": [ {"name": "x", "dimensions": ["x", "y"], "datatype": "int8"} ], "global attributes": [], } ) initialize_aggregation_file(config, self.filename) with nc.Dataset(self.filename) as nc_check: self.assertEqual(len(nc_check.dimensions), 2) self.assertEqual(nc_check.dimensions["y"].size, 10) self.assertFalse(nc_check.dimensions["y"].isunlimited()) self.assertTrue(nc_check.dimensions["x"].isunlimited())
def setUpClass(cls): super(TestEvaluateAggregationList, cls).setUpClass() pwd = os.path.dirname(__file__) cls.start_time = datetime(2017, 6, 8, 16, 45) cls.end_time = datetime(2017, 6, 8, 16, 50) cls.files = glob.glob(os.path.join(pwd, "data", "*.nc")) cls.config = Config.from_nc(cls.files[0]) cls.config.dims["report_number"].update( { "index_by": "L1a_SciData_TimeStamp", "min": cls.start_time, # for convenience, will convert according to index_by units if this is datetime "max": cls.end_time, "expected_cadence": {"report_number": 1, "sensor_unit": 0}, } ) _, cls.filename = tempfile.mkstemp() agg_list = generate_aggregation_list(cls.config, cls.files) evaluate_aggregation_list(cls.config, agg_list, cls.filename) cls.output = nc.Dataset(cls.filename, "r")
def setUpClass(cls): super(TestEvaluateAggregationList, cls).setUpClass() pwd = os.path.dirname(__file__) cls.start_time = datetime(2017, 4, 14, 19, 23) cls.end_time = datetime(2017, 4, 14, 20, 30) cls.files = glob.glob(os.path.join(pwd, "data", "*.nc")) cls.files = glob.glob(os.path.join(pwd, "data", "*.nc")) cls.config = Config.from_nc(cls.files[0]) cls.config.dims["time"].update( { "index_by": "time", "min": cls.start_time, # for convenience, will convert according to index_by units if this is datetime "max": cls.end_time, "expected_cadence": {"time": 10}, } ) _, cls.filename = tempfile.mkstemp() agg_list = generate_aggregation_list(cls.config, cls.files) evaluate_aggregation_list(cls.config, agg_list, cls.filename) cls.output = nc.Dataset(cls.filename, "r")
def test_initialize_with_list_attribute(self): """Ensure aggregation file is created with proper dimensions according to the config.""" config = Config.from_dict( { "dimensions": [{"name": "x", "size": None}, {"name": "y", "size": 10}], "variables": [ { "name": "x", "dimensions": ["x", "y"], "datatype": "int8", "attributes": {"valid_range": [0, 10]}, } ], "global attributes": [], } ) initialize_aggregation_file(config, self.filename) with nc.Dataset(self.filename) as nc_check: self.assertEqual(len(nc_check.dimensions), 2) self.assertEqual(nc_check.variables["x"].valid_range[0], 0) self.assertEqual(nc_check.variables["x"].valid_range[1], 10)
def test_to_json(self): dims = DimensionConfig([{ "name": "a", "size": 2 }, { "name": "b", "size": None }]) vars = VariableConfig([ { "name": "t", "dimensions": ["b"], "datatype": "float32" }, { "name": "x", "dimensions": ["b", "a"], "datatype": "float32" }, ]) attrs = GlobalAttributeConfig([]) json = Config(dims, vars, attrs).to_dict()
def test_default_multi_dim(self): config = Config.from_nc(self.inputs[0]) l = generate_aggregation_list(config, self.inputs) evaluate_aggregation_list(config, l, self.filename) with nc.Dataset(self.filename) as nc_out: # type: nc.Dataset # this is the default aggregation produced by aggregation # along both unlimited dimensions. This isn't really practically # useful, but, by our "basic" definition of aggregation along unlitimed # dimensions is correct. Need to make sure we get what's expected. # [[0 -- -- -- -- --] # [1 -- -- -- -- --] # [2 -- -- -- -- --] # [-- 3 3 -- -- --] # [-- 4 4 -- -- --] # [-- 5 5 -- -- --] # [-- -- -- 6 6 6] # [-- -- -- 7 7 7] # [-- -- -- 8 8 8]] c = nc_out.variables["c"][:] self.assertEqual(c.shape, (9, 6)) self.assertEqual(np.sum(c), 90) self.assertEqual(np.ma.count_masked(c), 36)
def setUpClass(cls): super(TestEvaluateAggregationList, cls).setUpClass() pwd = os.path.dirname(__file__) cls.start_time = datetime(2018, 1, 17, 15, 5) cls.end_time = datetime(2018, 1, 17, 15, 56) cls.files = glob.glob(os.path.join(pwd, "data", "*.nc")) cls.config = Config.from_nc(cls.files[0]) cls.config.dims["report_number"].update( { "index_by": "ELF_StartStopTime", "min": cls.start_time, # for convenience, will convert according to index_by units if this is datetime "max": cls.end_time, "expected_cadence": { "report_number": 1.0 / (5.0 * 60.0), "number_of_time_bounds": 1.0 / ((5.0 * 60.0) - 1), }, "size": None, } ) _, cls.filename = tempfile.mkstemp() agg_list = generate_aggregation_list(cls.config, cls.files) evaluate_aggregation_list(cls.config, agg_list, cls.filename) cls.output = nc.Dataset(cls.filename, "r")
def setUp(self): self.config = Config.from_nc(test_input_file)
def setUp(self): _, self.file = tempfile.mkstemp() pwd = os.path.dirname(__file__) self.files = glob.glob(os.path.join(pwd, "data", "*.nc"))[:2] self.config = Config.from_nc(self.files[0])
def setUp(self): self.config = Config.from_nc(another_input_file)