Exemplo n.º 1
0
    def test_should_replace_two_values_from_one_column(self):
        transformation = Transformation(self.test_data)
        transformation.replace_content("status", {
            "Automatic": "auto",
            "Reviewed": "rev"
        })

        current_result = transformation.dataframe.collect()
        expected_result = self.spark.createDataFrame([
            (1704567252, "California", 0.82, "auto",
             [-116.8, 33.3333333, 12.04], None),
            (1391707828, "Alaska", 1.1, None, [-148.942, 64.9081, 10.6
                                               ], "green"),
            (1435498694, "Chile", 4.9, "rev", [-70.6202, -21.4265, 52.24
                                               ], None),
            (1609879110, "Hawaii", 2.0099, "auto", [
                -155.429000854492, 19.2180004119873, 33.2999992370605
            ], "yellow"),
            (1224994646, "Indonesia", 4.8, "rev", [126.419, 0.2661, 10
                                                   ], "green"),
            (1801059964, "Nevada", 0.5, "auto", [-116.242, 36.7564, 0.8
                                                 ], None),
            (1262739669, "Arkansas", 1.9, "rev", [-91.4295, 35.863, 16.41
                                                  ], "green"),
            (1890118874, "Montana", 1.33, "rev", [-110.434, 44.4718333, 2.21
                                                  ], None),
            (1025727100, "Oklahoma", 1.58, "rev",
             [-98.53233333, 36.57083333, 6.31], None),
            (1834567116, "Idaho", 2.6, "rev", [-115.186, 44.2666, 10], "green")
        ], ["date", "place", "mag", "status", "coordinates", "alert"
            ]).collect()

        self.assertEqual(current_result, expected_result)
Exemplo n.º 2
0
    def test_should_return_same_df_when_column_not_exists(self):
        transformation = Transformation(self.test_data)
        transformation.replace_content("xpto", {"value1": "value2"})

        current_result = transformation.dataframe.collect()
        expected_result = self.test_data.collect()

        self.assertEqual(current_result, expected_result)
Exemplo n.º 3
0
    def test_should_return_same_df_when_columns_param_is_empty(self):
        transformation = Transformation(self.test_data)
        transformation.replace_content("mag", {})

        current_result = transformation.dataframe.collect()
        expected_result = self.test_data.collect()

        self.assertEqual(current_result, expected_result)