示例#1
0
 def test_unknown_output_type(self):
     """Test what happens if you give an unknown output type."""
     plugin = SpotDatabase("kitten", self.data_directory + "/test.csv",
                           "improver", "time", "index")
     message = ("Unrecognised output type. Current options are 'sqlite'"
                " or 'csv', 'kitten' given.")
     with self.assertRaisesRegexp(ValueError, message):
         plugin.process(self.cubes)
示例#2
0
 def setUp(self):
     """Set up the plugin and dataframe needed for these tests"""
     self.cube = set_up_spot_cube(280, number_of_sites=1)
     self.plugin = SpotDatabase("csv", "output", "improver", "time",
                                "index")
     data = [[1487311200, 280.]]
     columns = ["time", "values"]
     self.input_df = pd.DataFrame(data, columns=columns)
示例#3
0
 def setUp(self):
     """Set up the plugin and dataframe needed for these tests"""
     self.cube = set_up_spot_cube(280, number_of_sites=1)
     second_cube = self.cube.copy()
     second_cube.coord("percentile").points = np.array([60.0])
     cubelist = CubeList([self.cube, second_cube])
     self.cubes = cubelist.concatenate()
     self.plugin = SpotDatabase("csv", "output", "improver", "time",
                                "IMPRO", 0)
示例#4
0
 def test_no_exception_if_pivot_dim_set(self):
     """Test it raises an exception if a 2D cube is input and
        no pivot_dim."""
     plugin = SpotDatabase("csv",
                           "output",
                           "improver",
                           "time",
                           "IMPRO",
                           0,
                           pivot_dim="percentile")
     self.assertFalse(plugin.check_input_dimensions(self.cubes[0]))
示例#5
0
class Test_map_primary_index(IrisTest):
    """Test the map_primary_index method"""
    def setUp(self):
        """Set up the plugin and dataframe needed for these tests"""
        self.cube = set_up_spot_cube(
            280,
            number_of_sites=1,
        )
        self.plugin = SpotDatabase(
            "csv",
            "output",
            "improver",
            "time",
            "index",
            primary_map=['validity_date', 'validity_time'],
            primary_func=[
                lambda x: dt.utcfromtimestamp(x).date(),
                lambda x: dt.utcfromtimestamp(x).hour * 100
            ])
        data = [[1487311200, 280.]]
        columns = ["time", "values"]
        self.input_df = pd.DataFrame(data, columns=columns)
        self.input_df = self.input_df.set_index(["time"])

    def test_single_cube(self):
        """Basic test using one input cube."""
        # Set up expected dataframe.

        validity_date = dt.utcfromtimestamp(1487311200).date()
        expected_data = [[validity_date, 600, 280.]]
        columns = ['validity_date', 'validity_time', "values"]
        expected_df = pd.DataFrame(expected_data, columns=columns)
        expected_df = expected_df.set_index(["validity_date", "validity_time"])
        # Call the method.
        self.plugin.map_primary_index(self.input_df)
        assert_frame_equal(expected_df, self.input_df)

    def test_multiple_times_cube(self):
        """Test using one input cube, with one site and multiple times."""
        # Set up expected dataframe.
        validity_date = dt.utcfromtimestamp(1487311200).date()
        expected_data = [[validity_date, 600, 280.],
                         [validity_date, 700, 281.]]
        columns = ['validity_date', 'validity_time', "values"]
        expected_df = pd.DataFrame(expected_data, columns=columns)
        expected_df = expected_df.set_index(["validity_date", "validity_time"])

        # Set up input dataframe
        data = [[1487311200, 280.], [1487311200 + 3600, 281.]]
        columns = ["time", "values"]
        input_df = pd.DataFrame(data, columns=columns)
        input_df = input_df.set_index(["time"])
        self.plugin.map_primary_index(input_df)
        assert_frame_equal(expected_df, input_df)
示例#6
0
 def test_repr_after_dataframe_created(self):
     """Basic test of string representation after a database has been
        created and added to self."""
     expected_result = ('<SpotDatabase: csv, output, improver, time, index,'
                        ' primary_map=None, '
                        'primary_func=None, '
                        'pivot_dim=None, '
                        'pivot_map=None, '
                        'column_dims=None, '
                        'column_maps=None>')
     plugin = SpotDatabase("csv", "output", "improver", "time", "index")
     plugin.df = pd.DataFrame()
     result = str(plugin)
     self.assertEqual(expected_result, result)
示例#7
0
 def setUp(self):
     """Set up the plugin and dataframe needed for these tests"""
     self.cube = set_up_spot_cube(280, number_of_sites=1)
     self.plugin = SpotDatabase(
         "csv",
         "output",
         "improver",
         "time",
         "index",
         pivot_map=lambda x: 'T+{:03d}'.format(int(x / 3600)),
         pivot_dim="forecast_period")
     data = [[1487311200, 280.]]
     columns = ["time", "values"]
     self.input_df = pd.DataFrame(data, columns=columns)
class Test_determine_schema(IrisTest):
    """A set of tests for the determine_schema method"""
    def setUp(self):
        """Set up the plugin and dataframe needed for this test"""
        cubes = CubeList([set_up_spot_cube(280)])
        self.plugin = SpotDatabase("csv", "output", "improver", "time",
                                   "index")
        self.dataframe = self.plugin.to_dataframe(cubes)

    def test_full_schema(self):
        """Basic test using a basic dataframe as input"""
        schema = self.plugin.determine_schema("improver")
        expected_schema = 'CREATE TABLE "improver" (\n"index" INTEGER,\n  '\
                          '"values" REAL,\n  CONSTRAINT improver_pk '\
                          'PRIMARY KEY ("index")\n)'
        self.assertEqual(schema, expected_schema)
示例#9
0
 def setUp(self):
     """Set up the plugin and dataframe needed for these tests"""
     self.cube = set_up_spot_cube(280, number_of_sites=1)
     self.cube2 = set_up_spot_cube(281,
                                   number_of_sites=1,
                                   validity_time=1487311200 + 3600,
                                   forecast_period=1)
     self.cube3 = set_up_spot_cube(282,
                                   number_of_sites=1,
                                   validity_time=1487311200 + 7200,
                                   forecast_period=2)
     self.cubelist = CubeList([self.cube])
     self.cubelist_multiple = CubeList([self.cube, self.cube2, self.cube3])
     self.plugin = SpotDatabase("csv", "output", "improver", "time",
                                "index")
     data = [[1487311200, 280.]]
     columns = ["time", "values"]
     self.input_df = pd.DataFrame(data, columns=columns)
示例#10
0
 def setUp(self):
     """Set up the plugin and dataframe needed for these tests"""
     self.cube = set_up_spot_cube(
         280,
         number_of_sites=1,
     )
     self.plugin = SpotDatabase(
         "csv",
         "output",
         "improver",
         "time",
         "index",
         primary_map=['validity_date', 'validity_time'],
         primary_func=[
             lambda x: dt.utcfromtimestamp(x).date(),
             lambda x: dt.utcfromtimestamp(x).hour * 100
         ])
     data = [[1487311200, 280.]]
     columns = ["time", "values"]
     self.input_df = pd.DataFrame(data, columns=columns)
     self.input_df = self.input_df.set_index(["time"])
示例#11
0
 def test_basic_repr(self):
     """Basic test of string representation"""
     expected_result = ('<SpotDatabase: csv, output, improver, time, index,'
                        ' primary_map=None, '
                        'primary_func=None, '
                        'pivot_dim=None, '
                        'pivot_map=None, '
                        'column_dims=None, '
                        'column_maps=None>')
     result = str(SpotDatabase("csv", "output", "improver", "time",
                               "index"))
     self.assertEqual(expected_result, result)
示例#12
0
 def test_exception_if_pivot_dim_set(self):
     """Test it raises an exception if a 2D cube is input and
        no pivot_dim."""
     plugin = SpotDatabase("csv",
                           "output",
                           "improver",
                           "time",
                           "IMPRO",
                           0,
                           pivot_dim="percentile")
     cube = set_up_spot_cube(280, number_of_sites=3)
     second_cube = cube.copy()
     second_cube.coord("percentile").points = np.array([60.0])
     cubelist = CubeList([cube, second_cube])
     cubes = cubelist.concatenate()
     message = "Dimensions that are not described by the pivot_dim or "\
               "coord_to_slice_over must only have one point in. "\
               "Dimension '2' has length '3' and is associated with the "\
               "'index' coordinate."
     with self.assertRaisesRegex(ValueError, message):
         plugin.check_input_dimensions(cubes[0])
示例#13
0
 def test_all_optional_args(self):
     """Test we create a datafram even when we have all optional
        arguements set"""
     # Set up expected dataframe.
     data = [[600, "air_temperature", 280.]]
     columns = ["validity_time", "cf_name", "T+000"]
     expected_df = pd.DataFrame(data, columns=columns)
     expected_df.set_index(["validity_time", "cf_name"], inplace=True)
     expected_df.columns.name = "forecast_period"
     plugin = SpotDatabase(
         "csv",
         "output",
         "improver",
         "time",
         primary_map=['validity_time'],
         primary_func=[lambda x: dt.utcfromtimestamp(x).hour * 100],
         pivot_dim='forecast_period',
         pivot_map=lambda x: 'T+{:03d}'.format(int(x / 3600)),
         column_dims=['name'],
         column_maps=['cf_name'],
         coord_to_slice_over="index")
     # Call the method.
     plugin.to_dataframe(self.cubelist)
     assert_frame_equal(plugin.df, expected_df)
示例#14
0
 def setUp(self):
     """Set up the plugin and dataframe needed for this test"""
     self.cubes = CubeList([set_up_spot_cube(280)])
     self.data_directory = mkdtemp()
     self.plugin = SpotDatabase("csv", self.data_directory + "/test.csv",
                                "improver", "time", "index")
示例#15
0
 def setUp(self):
     """Set up the plugin and dataframe needed for this test"""
     cubes = CubeList([set_up_spot_cube(280)])
     self.plugin = SpotDatabase("csv", "output", "improver", "time",
                                "index")
     self.dataframe = self.plugin.to_dataframe(cubes)
示例#16
0
class Test_to_dataframe(IrisTest):
    """Test the to_dataframe method"""
    def setUp(self):
        """Set up the plugin and dataframe needed for these tests"""
        self.cube = set_up_spot_cube(280, number_of_sites=1)
        self.cube2 = set_up_spot_cube(281,
                                      number_of_sites=1,
                                      validity_time=1487311200 + 3600,
                                      forecast_period=1)
        self.cube3 = set_up_spot_cube(282,
                                      number_of_sites=1,
                                      validity_time=1487311200 + 7200,
                                      forecast_period=2)
        self.cubelist = CubeList([self.cube])
        self.cubelist_multiple = CubeList([self.cube, self.cube2, self.cube3])
        self.plugin = SpotDatabase("csv", "output", "improver", "time",
                                   "index")
        data = [[1487311200, 280.]]
        columns = ["time", "values"]
        self.input_df = pd.DataFrame(data, columns=columns)

    @ManageWarnings(ignored_messages=IGNORED_MESSAGES,
                    warning_types=WARNING_TYPES)
    def test_no_optional_args(self):
        """Test we create a datafram even when we have no optional
           arguements set"""
        # Set up expected dataframe.
        data = [[280.]]
        columns = ["values"]
        expected_df = pd.DataFrame(data, index=[1487311200], columns=columns)

        # Call the method.
        self.plugin.to_dataframe(self.cubelist)

        assert_frame_equal(self.plugin.df, expected_df)

    @ManageWarnings(ignored_messages=IGNORED_MESSAGES,
                    warning_types=WARNING_TYPES)
    def test_all_optional_args(self):
        """Test we create a datafram even when we have all optional
           arguements set"""
        # Set up expected dataframe.
        data = [[600, "air_temperature", 280.]]
        columns = ["validity_time", "cf_name", "T+000"]
        expected_df = pd.DataFrame(data, columns=columns)
        expected_df.set_index(["validity_time", "cf_name"], inplace=True)
        expected_df.columns.name = "forecast_period"
        plugin = SpotDatabase(
            "csv",
            "output",
            "improver",
            "time",
            primary_map=['validity_time'],
            primary_func=[lambda x: dt.utcfromtimestamp(x).hour * 100],
            pivot_dim='forecast_period',
            pivot_map=lambda x: 'T+{:03d}'.format(int(x / 3600)),
            column_dims=['name'],
            column_maps=['cf_name'],
            coord_to_slice_over="index")
        # Call the method.
        plugin.to_dataframe(self.cubelist)
        assert_frame_equal(plugin.df, expected_df)

    @ManageWarnings(ignored_messages=IGNORED_MESSAGES,
                    warning_types=WARNING_TYPES)
    def test_all_optional_args_multiple_input_cubes(self):
        """Test we create a dataframe even when we have no optional
           arguements set and multiple cubes"""
        # Set up expected dataframe.
        data = [[600, "air_temperature", 280., np.nan, np.nan],
                [700, "air_temperature", np.nan, 281., np.nan],
                [800, "air_temperature", np.nan, np.nan, 282.]]
        columns = ["validity_time", "cf_name", "T+000", "T+001", "T+002"]
        expected_df = pd.DataFrame(data, columns=columns)
        expected_df.set_index(["validity_time", "cf_name"], inplace=True)
        expected_df.columns.name = "forecast_period"
        plugin = SpotDatabase(
            "csv",
            "output",
            "improver",
            "time",
            primary_map=['validity_time'],
            primary_func=[lambda x: dt.utcfromtimestamp(x).hour * 100],
            pivot_dim='forecast_period',
            pivot_map=lambda x: 'T+{:03d}'.format(int(x / 3600)),
            column_dims=['name'],
            column_maps=['cf_name'],
            coord_to_slice_over="index")
        # Call the method.
        plugin.to_dataframe(self.cubelist_multiple)
        assert_frame_equal(plugin.df, expected_df)

    @staticmethod
    @ManageWarnings(ignored_messages=IGNORED_MESSAGES,
                    warning_types=WARNING_TYPES)
    def test_all_optional_args_multiple_sites():
        """Test we create a datafram even when we have all optional
           arguements set and multiple spots"""
        # Set up expected dataframe.
        data = [[600, "air_temperature", 0, 280.],
                [600, "air_temperature", 1, 280.],
                [600, "air_temperature", 2, 280.]]
        columns = ["validity_time", "cf_name", "site", "T+000"]
        expected_df = pd.DataFrame(data, columns=columns)
        expected_df.set_index(["validity_time", "cf_name", "site"],
                              inplace=True)
        expected_df.columns.name = "forecast_period"
        plugin = SpotDatabase(
            "csv",
            "output",
            "improver",
            "time",
            primary_map=['validity_time'],
            primary_func=[lambda x: dt.utcfromtimestamp(x).hour * 100],
            pivot_dim='forecast_period',
            pivot_map=lambda x: 'T+{:03d}'.format(int(x / 3600)),
            column_dims=['name', "index"],
            column_maps=['cf_name', "site"],
            coord_to_slice_over="index")
        # Call the method.
        cube = set_up_spot_cube(
            280,
            number_of_sites=3,
        )
        plugin.to_dataframe(CubeList([cube]))
        assert_frame_equal(plugin.df, expected_df)
示例#17
0
class Test_insert_extra_mapped_column(IrisTest):
    """Test the insert_extra_mapped_column method"""
    def setUp(self):
        """Set up the plugin and dataframe needed for these tests"""
        self.cube = set_up_spot_cube(280, number_of_sites=1)
        self.plugin = SpotDatabase("csv", "output", "improver", "time",
                                   "index")
        data = [[1487311200, 280.]]
        columns = ["time", "values"]
        self.input_df = pd.DataFrame(data, columns=columns)

    def test_extra_column_in_df(self):
        """Basic test using a column and dim for something that already exist.
           In this case the function does nothing to the dataframe."""
        # Set up expected dataframe.
        data = [[1487311200, 280.]]
        columns = ["time", "values"]
        expected_df = pd.DataFrame(data, columns=columns)
        # Call the method.
        self.plugin.insert_extra_mapped_column(self.input_df, self.cube,
                                               "values", [380.])
        assert_frame_equal(self.input_df, expected_df)

    def test_extra_column_from_coord(self):
        """Test for when we are taking data from a coordinate in the cube"""
        data = [[1487311200, 100, 280.]]
        columns = ["time", "altitude_of_site", "values"]
        expected_df = pd.DataFrame(data, columns=columns)
        expected_df.set_index(["altitude_of_site"], append=True, inplace=True)
        # Call the method.
        self.plugin.insert_extra_mapped_column(self.input_df, self.cube,
                                               "altitude", "altitude_of_site")
        assert_frame_equal(self.input_df, expected_df)

    def test_extra_column_from_coord_multiple_points(self):
        """Test for when we are taking data from a coordinate in the cube.
           This test has multiple sites in the cube."""
        data = [[1487311200, 100, 280.], [1487311200, 101, 280.],
                [1487311200, 102, 280.]]
        columns = ["time", "altitude_of_site", "values"]
        expected_df = pd.DataFrame(data, columns=columns)
        expected_df.set_index(["altitude_of_site"], append=True, inplace=True)
        # Call the method.
        cube = set_up_spot_cube(280, number_of_sites=3)
        data = [[1487311200, 280.], [1487311200, 280.], [1487311200, 280.]]
        columns = ["time", "values"]
        self.input_df = pd.DataFrame(data, columns=columns)
        self.plugin.insert_extra_mapped_column(self.input_df, cube, "altitude",
                                               "altitude_of_site")
        assert_frame_equal(self.input_df, expected_df)

    def test_extra_column_from_cube_name(self):
        """Test for when we are taking data from the cube name."""
        data = [[1487311200, "air_temperature", 280.]]
        columns = ["time", "name_of_cube", "values"]
        expected_df = pd.DataFrame(data, columns=columns)
        expected_df.set_index(["name_of_cube"], append=True, inplace=True)
        # Call the method.
        self.plugin.insert_extra_mapped_column(self.input_df, self.cube,
                                               "name", "name_of_cube")
        assert_frame_equal(self.input_df, expected_df)

    def test_extra_column_from_cube_attribute(self):
        """Test for when we are taking data from the cube name."""
        data = [[1487311200, "K", 280.]]
        columns = ["time", "cube_units", "values"]
        expected_df = pd.DataFrame(data, columns=columns)
        expected_df.set_index(["cube_units"], append=True, inplace=True)
        # Call the method.
        self.plugin.insert_extra_mapped_column(self.input_df, self.cube,
                                               "units", "cube_units")
        assert_frame_equal(self.input_df, expected_df)

    def test_static_extra_column(self):
        """Test for when we add a new column that does not come from the cube.
           """
        data = [[1487311200, "IMPRO_nbhood", 280.]]
        columns = ["time", "experiment_id", "values"]
        expected_df = pd.DataFrame(data, columns=columns)
        expected_df.set_index(["experiment_id"], append=True, inplace=True)
        # Call the method.
        self.cube.attributes["source_grid"] = "ukvx"
        self.plugin.insert_extra_mapped_column(self.input_df, self.cube,
                                               "IMPRO_nbhood", "experiment_id")
        assert_frame_equal(self.input_df, expected_df)
示例#18
0
class Test_pivot_table(IrisTest):
    """Test the pivot_table method"""
    def setUp(self):
        """Set up the plugin and dataframe needed for these tests"""
        self.cube = set_up_spot_cube(280, number_of_sites=1)
        self.plugin = SpotDatabase(
            "csv",
            "output",
            "improver",
            "time",
            "index",
            pivot_map=lambda x: 'T+{:03d}'.format(int(x / 3600)),
            pivot_dim="forecast_period")
        data = [[1487311200, 280.]]
        columns = ["time", "values"]
        self.input_df = pd.DataFrame(data, columns=columns)

    def test_single_cube(self):
        """Basic test using one input cube."""
        # Set up expected dataframe.
        expected_data = [[280.]]
        expected_df = pd.DataFrame(expected_data, columns=["T+000"])
        expected_df.columns.name = "forecast_period"
        # Call the method.
        result = self.plugin.pivot_table(self.cube, self.input_df)
        assert_frame_equal(expected_df, result)

    def test_multiple_times_cube(self):
        """Test using one input cube, with one site and multiple times."""
        # Set up expected dataframe.
        expected_data = [[280., np.nan], [np.nan, 281.]]
        expected_df = pd.DataFrame(expected_data, columns=["T+000", "T+001"])
        expected_df.columns.name = "forecast_period"

        # Set up cube with multiple lead times in.
        second_cube = set_up_spot_cube(
            281,
            number_of_sites=1,
            validity_time=1487311200 + 3600,
            forecast_period=1,
        )

        merged_cube = CubeList([self.cube, second_cube])
        merged_cube = merged_cube.concatenate()
        # Set up input dataframe
        data = [[1487311200, 280.], [1487311200 + 3600, 281.]]
        columns = ["time", "values"]
        input_df = pd.DataFrame(data, columns=columns)
        result = self.plugin.pivot_table(merged_cube[0], input_df)
        assert_frame_equal(expected_df, result)

    def test_extra_columns(self):
        """Test with an extra column in the input dataframe.
           In this case the extra column gets removed when the table
           is pivoted.
        """
        # Set up expected dataframe.
        expected_data = [[280., np.nan], [np.nan, 281.]]
        expected_df = pd.DataFrame(expected_data, columns=["T+000", "T+001"])
        expected_df.columns.name = "forecast_period"

        # Set up cube with multiple lead times in.
        second_cube = set_up_spot_cube(
            281,
            number_of_sites=1,
            validity_time=1487311200 + 3600,
            forecast_period=1,
        )

        merged_cube = CubeList([self.cube, second_cube])
        merged_cube = merged_cube.concatenate()
        # Set up input dataframe
        data = [[1487311200, 3001, 280.], [1487311200 + 3600, 3002, 281.]]
        columns = ["time", "wmo_site", "values"]
        input_df = pd.DataFrame(data, columns=columns)
        result = self.plugin.pivot_table(merged_cube[0], input_df)
        assert_frame_equal(expected_df, result)