Пример #1
0
    def test_file_read_nullable(self):
        if not os.path.exists(TestDataFramesIO.FILE_NULLABLE):
            self.fail("Test resource '{}' was not found".format(
                TestDataFramesIO.FILE_NULLABLE))

        df = DataFrame.read(TestDataFramesIO.FILE_NULLABLE)
        self.assertTrue(df.equals(TestDataFramesIO.df_nullable),
                        "DataFrames do not match")
Пример #2
0
    def test_file_read_default(self):
        if not os.path.exists(TestDataFramesIO.FILE_DEFAULT):
            self.fail("Test resource '{}' was not found".format(
                TestDataFramesIO.FILE_DEFAULT))

        df = DataFrame.read(TestDataFramesIO.FILE_DEFAULT)
        self.assertTrue(df.equals(TestDataFramesIO.df_default),
                        "DataFrames do not match")
Пример #3
0
    def test_file_read_multiple_files_in_dir(self):
        if not os.path.exists(TestDataFramesIO.DIR_TEST_RESOURCES):
            self.fail(("Test resource directory '{}' was not found").format(
                TestDataFramesIO.DIR_TEST_RESOURCES))

        files = DataFrame.read(TestDataFramesIO.DIR_TEST_RESOURCES)
        self.assertTrue(isinstance(files, dict),
                        "Returned object should be of type dict")
        self.assertTrue(
            len(files) == 2, "Returned dict should have 2 elements")
        self.assertTrue(
            files["test_default"].equals(
                DataFrame.read(TestDataFramesIO.FILE_DEFAULT)),
            "DataFrames do not match")

        self.assertTrue(
            files["test_nullable"].equals(
                DataFrame.read(TestDataFramesIO.FILE_NULLABLE)),
            "DataFrames do not match")
Пример #4
0
    def test_deserialization_nullable_compressed(self):
        res = DataFrame.deserialize(TestDataFramesIO.truth_nullable_compressed)
        self.assertFalse(res.is_empty(), "DataFrame should not be empty")
        self.assertTrue(res.rows() == 5, "DataFrame row count should be 5")
        self.assertTrue(res.columns() == 10,
                        "DataFrame column count should be 10")
        self.assertTrue(res.has_column_names(),
                        "DataFrame should have column names set")
        self.assertTrue(isinstance(res, NullableDataFrame),
                        "DataFrame should be of type NullableDataFrame")

        self.assertTrue(res.equals(TestDataFramesIO.df_nullable),
                        "DataFrame differs in content")
Пример #5
0
    def test_from_base64_string_nullable(self):
        res = DataFrame.from_base64(TestDataFramesIO.truth_nullable_base64)
        self.assertFalse(res.is_empty(), "DataFrame should not be empty")
        self.assertTrue(res.rows() == 5, "DataFrame row count should be 5")
        self.assertTrue(res.columns() == 10,
                        "DataFrame column count should be 10")
        self.assertTrue(res.has_column_names(),
                        "DataFrame should have column names set")
        self.assertTrue(isinstance(res, NullableDataFrame),
                        "DataFrame should be of type NullableDataFrame")

        self.assertTrue(res.equals(TestDataFramesIO.df_nullable),
                        "DataFrame differs in content")
Пример #6
0
    def stress_test_nullable(self):
        df = DataFrame.copy(TestDataFramesIO.df_nullable)
        for _ in range(df.columns()):
            col = df.get_column(0)
            df.remove_column(0)
            df.add_column(col)
            b = DataFrame.serialize(df)
            df = DataFrame.deserialize(b)

        self.assertTrue(df.equals(TestDataFramesIO.df_nullable),
                        "DataFrame does not match original")

        df2 = DataFrame.copy(TestDataFramesIO.df_nullable)
        for _ in range(df2.rows()):
            df2.remove_row(0)
            df.remove_row(0)
            b = DataFrame.serialize(df)
            df = DataFrame.deserialize(b)
            self.assertTrue(df.equals(df2),
                            "DataFrame does not match changed object")
Пример #7
0
    def setUp(self):
        TestCSV.DF_DEFAULT = DefaultDataFrame(
            DataFrame.IntColumn("AttrA", [1, 2, 3]),
            DataFrame.DoubleColumn("AttrB", [1.1, 2.2, 3.3]),
            DataFrame.StringColumn("AttrC", ["C1", "C2", "C,3"]))

        TestCSV.DF_DEFAULT_AS_STRING = DefaultDataFrame(
            DataFrame.StringColumn("AttrA", ["1", "2", "3"]),
            DataFrame.StringColumn("AttrB", ["1.1", "2.2", "3.3"]),
            DataFrame.StringColumn("AttrC", ["C1", "C2", "C,3"]))

        TestCSV.DF_NULLABLE = NullableDataFrame(
            DataFrame.NullableIntColumn("AttrA", [None, 2, 3]),
            DataFrame.NullableDoubleColumn("AttrB", [1.1, None, 3.3]),
            DataFrame.NullableStringColumn("AttrC", ["C1", "C2", None]))

        TestCSV.DF_NULLABLE_AS_STRING = NullableDataFrame(
            DataFrame.NullableStringColumn("AttrA", [None, "2", "3"]),
            DataFrame.NullableStringColumn("AttrB", ["1.1", None, "3.3"]),
            DataFrame.NullableStringColumn("AttrC", ["C1", "C2", None]))

        TestCSV.DF_MALFORMED = NullableDataFrame(
            DataFrame.NullableIntColumn("AttrA", [11, 22, 33, None]),
            DataFrame.NullableDoubleColumn("AttrB", [None, None, 3.3, None]),
            DataFrame.NullableDoubleColumn("AttrC", [None, 2.2, None, 4.4]),
            DataFrame.NullableStringColumn("AttrD", [None, None, None, None]))

        TestCSV.DF_MALFORMED_AS_STRING = NullableDataFrame(
            DataFrame.NullableStringColumn("AttrA", ["11", "22", "33", None]),
            DataFrame.NullableStringColumn("AttrB", [None, None, "3.3", None]),
            DataFrame.NullableStringColumn("AttrC",
                                           [None, "2.2", None, "4.4"]),
            DataFrame.NullableStringColumn("AttrD", [None, None, None, None]))

        TestCSV.DF_EMPTY_LINES = DefaultDataFrame(
            DataFrame.IntColumn("AttrA", [1, 2, 3, 4, 5, 6, 7]),
            DataFrame.DoubleColumn("AttrB",
                                   [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7]),
            DataFrame.StringColumn(
                "AttrC", ["C1", "C2", "C,3", "C4", "C5", "C6", "C7"]))
Пример #8
0
    def setUp(self):
        TestDataFramesIO.column_names = [
            "byteCol",  # 0
            "shortCol",  # 1
            "intCol",  # 2
            "longCol",  # 3
            "stringCol",  # 4
            "charCol",  # 5
            "floatCol",  # 6
            "doubleCol",  # 7
            "booleanCol",  # 8
            "binaryCol"  # 9
        ]

        TestDataFramesIO.df_default = DefaultDataFrame(
            DataFrame.ByteColumn(values=[10, 20, 30, 40, 50]),
            DataFrame.ShortColumn(values=[11, 21, 31, 41, 51]),
            DataFrame.IntColumn(values=[12, 22, 32, 42, 52]),
            DataFrame.LongColumn(values=[13, 23, 33, 43, 53]),
            DataFrame.StringColumn(values=["10", "20", "30", "40", "50"]),
            DataFrame.CharColumn(values=["a", "b", "c", "d", "e"]),
            DataFrame.FloatColumn(values=[10.1, 20.2, 30.3, 40.4, 50.5]),
            DataFrame.DoubleColumn(values=[11.1, 21.2, 31.3, 41.4, 51.5]),
            DataFrame.BooleanColumn(values=[True, False, True, False, True]),
            DataFrame.BinaryColumn(values=[
                bytearray.fromhex("0102030405"),
                bytearray.fromhex("0504030201"),
                bytearray.fromhex("0502010203"),
                bytearray.fromhex("0201040503"),
                bytearray.fromhex("0301020504")
            ]))

        # expected uncompressed
        TestDataFramesIO.truth = bytearray.fromhex((
            "7b763a323b64000000050000000a62797465436f6c0073686f7274436f6c00696e74436f6"
            "c006c6f6e67436f6c00737472696e67436f6c0063686172436f6c00666c6f6174436f6c00"
            "646f75626c65436f6c00626f6f6c65616e436f6c0062696e617279436f6c0001020304050"
            "8060709137d0a141e2832000b0015001f002900330000000c00000016000000200000002a"
            "00000034000000000000000d00000000000000170000000000000021000000000000002b0"
            "00000000000003531300032300033300034300035300061626364654121999a41a1999a41"
            "f266664221999a424a000040263333333333334035333333333333403f4ccccccccccd404"
            "4b333333333334049c00000000000a8000000050102030405000000050504030201000000"
            "050502010203000000050201040503000000050301020504"))

        # expected compressed
        TestDataFramesIO.truth_compressed = bytearray.fromhex(
            ("6466ab2eb332b24e616060600562aea4ca9254e7fc1c86e28"
             "cfca2121023330f4ce5e4e7a583c54b8a3221ace48cc42210"
             "9d96939f085692925f9a9403d69c949f9f939a98076666e62"
             "5165582588c4ccc2cac1c6cec9cc2b55c22721a460cdc0ca2"
             "0cf20c9a0cc6407b7980580c881580580b884d182080174a8"
             "b43694528ad0da54d0d0d188c0c188c0d184c0c184c0d1812"
             "939253521d1567ce725c08c49fd2d29c806c272f060607356"
             "330703085d2f63e6780e0ac83cb6608dff300d8c015a08000"
             "3b15c460656166620433988062200613230b2b98c1ccc8c4c"
             "a020011103dae"))

        # expected Base64
        TestDataFramesIO.truth_base64 = (
            "ZGarLrMysk5hYGBgBWKupMqSVOf8HIbijPyiEhAjMw9M5eTnpYPFS4oyIaz"
            "kjMQiEJ2Wk58IVpKSX5qUA9aclJ+fk5qYB2Zm5iUWVYJYjEzMLKwcbOycwr"
            "VcInIaRgzcDKIM8gyaDMZAe3mAWAyIFYBYC4hNGCCAF0qLQ2lFKK0NpU0ND"
            "RiMDBiMDRhMDBhMDRgSk5JTUh0VZ85yXAjEn9LSnIBsJy8GBgc1YzBwMIXS"
            "9j5ngOCsg8tmCN/zANjAFaCAADsVxGBlYWZiBDOYgGIgBhMjCyuYwczIxMo"
            "CABEQPa4=")

        #*************************************************************#
        #                                                             #
        #                 Data for NullableDataFrame                  #
        #                                                             #
        #*************************************************************#

        TestDataFramesIO.df_nullable = NullableDataFrame(
            DataFrame.NullableByteColumn(values=[10, None, None, 0, 50]),
            DataFrame.NullableShortColumn(values=[11, 21, None, 0, None]),
            DataFrame.NullableIntColumn(values=[12, None, 32, 0, None]),
            DataFrame.NullableLongColumn(values=[None, None, 33, 0, 53]),
            DataFrame.NullableStringColumn(
                values=["ABCD", "2!\"0,.", None, "", "#5{=0>}"]),
            DataFrame.NullableCharColumn(values=[",", "b", None, "d", "?"]),
            DataFrame.NullableFloatColumn(
                values=[10.1, None, 0.0, None, 50.5]),
            DataFrame.NullableDoubleColumn(
                values=[None, 0.0, 0.0, None, 51.5]),
            DataFrame.NullableBooleanColumn(
                values=[True, None, False, None, True]),
            DataFrame.NullableBinaryColumn(values=[
                bytearray.fromhex("00"),
                bytearray.fromhex("0504030201"), None,
                bytearray.fromhex("020104054a0503"), None
            ]))

        TestDataFramesIO.truth_nullable = bytearray.fromhex(
            ("7b763a323b6e000000050000000a62797465436f6c0073686"
             "f7274436f6c00696e74436f6c006c6f6e67436f6c00737472"
             "696e67436f6c0063686172436f6c00666c6f6174436f6c006"
             "46f75626c65436f6c00626f6f6c65616e436f6c0062696e61"
             "7279436f6c000a0b0c0d0e110f10121400000003d6eacd7d0"
             "a00000032000b00150000000000000000000c000000000000"
             "0020000000000000000000000000000000000000000000000"
             "0000000000000000021000000000000000000000000000000"
             "354142434400322122302c2e00000023357b3d303e7d002c6"
             "200643f4121999a000000000000000000000000424a000000"
             "0000000000000000000000000000000000000000000000000"
             "00000000000004049c0000000000088000000010000000005"
             "05040302010000000000000007020104054a050300000000"))

        TestDataFramesIO.truth_nullable_compressed = bytearray.fromhex(
            ("6466ab2eb332b2ce636060600562aea4ca9254e7fc1"
             "c86e28cfca2121023330f4ce5e4e7a583c54b8a3221"
             "ace48cc422109d96939f085692925f9a9403d69c949"
             "f9f939a98076666e625165582585cdc3cbc7c82fc02"
             "4222407b98afbd3a5bcb0564183170338832c0000f9"
             "45660c00f14d1f8a68e4ece2e0c468a4a063a7a40ae"
             "b269b5ad815d2d834e12438abda3e2cc59c86a9dbc0"
             "8180e040e9e07c0740710338218acac2ccc4c8c5059"
             "76264616562f5666101b00df48306a"))

        TestDataFramesIO.truth_nullable_base64 = ((
            "ZGarLrMyss5jYGBgBWKupMqSVOf8HIbijPyiEhAjMw9M5e"
            "TnpYPFS4oyIazkjMQiEJ2Wk58IVpKSX5qUA9aclJ+fk5qY"
            "B2Zm5iUWVYJYXNw8vHyC/AJCIkB7mK+9OlvLBWQYMXAziD"
            "LAAA+UVmDADxTR+KaOTs4uDEaKSgY6ekCusmm1rYFdLYNO"
            "EkOKvaPizFnIap28CBgOBA6eB8B0BxAzghisrCzMTIxQWX"
            "YmRhZWL1ZmEBsA30gwag=="))

        TestDataFramesIO.df_default.set_column_names(
            TestDataFramesIO.column_names)
        TestDataFramesIO.df_nullable.set_column_names(
            TestDataFramesIO.column_names)
Пример #9
0
 def test_serial_deserial_nullable_compressed(self):
     b = DataFrame.serialize(TestDataFramesIO.df_nullable, compress=True)
     res = DataFrame.deserialize(b)
     self.assertTrue(res.equals(TestDataFramesIO.df_nullable),
                     "DataFrames are not equal")
Пример #10
0
 def test_serial_deserial_default(self):
     b = DataFrame.serialize(TestDataFramesIO.df_default, compress=False)
     res = DataFrame.deserialize(b)
     self.assertTrue(res.equals(TestDataFramesIO.df_default),
                     "DataFrames are not equal")
Пример #11
0
 def test_to_base64_string_nullable(self):
     s = DataFrame.to_base64(TestDataFramesIO.df_nullable)
     self.assertTrue(
         TestDataFramesIO.truth_nullable_base64 == s,
         "Serialized Dataframe does not match expected Base64 string")
Пример #12
0
 def test_to_base64_nullable(self):
     s = DataFrame.to_base64(TestDataFramesIO.df_nullable)
     df = DataFrame.from_base64(s)
     self.assertTrue(df == TestDataFramesIO.df_nullable,
                     "Dataframe does not match original")
Пример #13
0
 def test_to_base64_default(self):
     s = DataFrame.to_base64(TestDataFramesIO.df_default)
     df = DataFrame.from_base64(s)
     self.assertTrue(df == TestDataFramesIO.df_default,
                     "Dataframe does not match original")
Пример #14
0
 def test_serialization_nullable_compressed(self):
     b = DataFrame.serialize(TestDataFramesIO.df_nullable, compress=True)
     self.assertTrue(TestDataFramesIO.truth_nullable_compressed == b,
                     "Serialized Dataframe does not match expected bytes")
Пример #15
0
 def test_serialization_default(self):
     b = DataFrame.serialize(TestDataFramesIO.df_default)
     self.assertTrue(TestDataFramesIO.truth == b,
                     "Serialized Dataframe does not match expected bytes")