Пример #1
0
    def test_field(self):
        schema = Schema()\
            .field("int_field", DataTypes.INT())\
            .field("long_field", DataTypes.BIGINT())\
            .field("string_field", DataTypes.STRING())\
            .field("timestamp_field", DataTypes.TIMESTAMP(3))\
            .field("time_field", DataTypes.TIME())\
            .field("date_field", DataTypes.DATE())\
            .field("double_field", DataTypes.DOUBLE())\
            .field("float_field", DataTypes.FLOAT())\
            .field("byte_field", DataTypes.TINYINT())\
            .field("short_field", DataTypes.SMALLINT())\
            .field("boolean_field", DataTypes.BOOLEAN())

        properties = schema.to_properties()
        expected = {'schema.0.name': 'int_field',
                    'schema.0.data-type': 'INT',
                    'schema.1.name': 'long_field',
                    'schema.1.data-type': 'BIGINT',
                    'schema.2.name': 'string_field',
                    'schema.2.data-type': 'VARCHAR(2147483647)',
                    'schema.3.name': 'timestamp_field',
                    'schema.3.data-type': 'TIMESTAMP(3)',
                    'schema.4.name': 'time_field',
                    'schema.4.data-type': 'TIME(0)',
                    'schema.5.name': 'date_field',
                    'schema.5.data-type': 'DATE',
                    'schema.6.name': 'double_field',
                    'schema.6.data-type': 'DOUBLE',
                    'schema.7.name': 'float_field',
                    'schema.7.data-type': 'FLOAT',
                    'schema.8.name': 'byte_field',
                    'schema.8.data-type': 'TINYINT',
                    'schema.9.name': 'short_field',
                    'schema.9.data-type': 'SMALLINT',
                    'schema.10.name': 'boolean_field',
                    'schema.10.data-type': 'BOOLEAN'}
        self.assertEqual(expected, properties)
Пример #2
0
    def test_basic_type(self):
        test_types = [
            DataTypes.STRING(),
            DataTypes.BOOLEAN(),
            DataTypes.BYTES(),
            DataTypes.TINYINT(),
            DataTypes.SMALLINT(),
            DataTypes.INT(),
            DataTypes.BIGINT(),
            DataTypes.FLOAT(),
            DataTypes.DOUBLE(),
            DataTypes.DATE(),
            DataTypes.TIME(),
            DataTypes.TIMESTAMP(3)
        ]

        java_types = [_to_java_data_type(item) for item in test_types]

        converted_python_types = [
            _from_java_data_type(item) for item in java_types
        ]

        self.assertEqual(test_types, converted_python_types)
Пример #3
0
    def test_verify_type_not_nullable(self):
        import array
        import datetime
        import decimal

        schema = DataTypes.ROW([
            DataTypes.FIELD('s', DataTypes.STRING(nullable=False)),
            DataTypes.FIELD('i', DataTypes.INT(True))
        ])

        class MyObj:
            def __init__(self, **kwargs):
                for k, v in kwargs.items():
                    setattr(self, k, v)

        # obj, data_type
        success_spec = [
            # String
            ("", DataTypes.STRING()),
            (u"", DataTypes.STRING()),

            # UDT
            (ExamplePoint(1.0, 2.0), ExamplePointUDT()),

            # Boolean
            (True, DataTypes.BOOLEAN()),

            # TinyInt
            (-(2**7), DataTypes.TINYINT()),
            (2**7 - 1, DataTypes.TINYINT()),

            # SmallInt
            (-(2**15), DataTypes.SMALLINT()),
            (2**15 - 1, DataTypes.SMALLINT()),

            # Int
            (-(2**31), DataTypes.INT()),
            (2**31 - 1, DataTypes.INT()),

            # BigInt
            (2**64, DataTypes.BIGINT()),

            # Float & Double
            (1.0, DataTypes.FLOAT()),
            (1.0, DataTypes.DOUBLE()),

            # Decimal
            (decimal.Decimal("1.0"), DataTypes.DECIMAL(10, 0)),

            # Binary
            (bytearray([1]), DataTypes.BINARY(1)),

            # Date/Time/Timestamp
            (datetime.date(2000, 1, 2), DataTypes.DATE()),
            (datetime.datetime(2000, 1, 2, 3, 4), DataTypes.DATE()),
            (datetime.time(1, 1, 2), DataTypes.TIME()),
            (datetime.datetime(2000, 1, 2, 3, 4), DataTypes.TIMESTAMP()),

            # Array
            ([], DataTypes.ARRAY(DataTypes.INT())),
            (["1", None], DataTypes.ARRAY(DataTypes.STRING(nullable=True))),
            ([1, 2], DataTypes.ARRAY(DataTypes.INT())),
            ((1, 2), DataTypes.ARRAY(DataTypes.INT())),
            (array.array('h', [1, 2]), DataTypes.ARRAY(DataTypes.INT())),

            # Map
            ({}, DataTypes.MAP(DataTypes.STRING(), DataTypes.INT())),
            ({
                "a": 1
            }, DataTypes.MAP(DataTypes.STRING(), DataTypes.INT())),
            ({
                "a": None
            },
             DataTypes.MAP(DataTypes.STRING(nullable=False),
                           DataTypes.INT(True))),

            # Struct
            ({
                "s": "a",
                "i": 1
            }, schema),
            ({
                "s": "a",
                "i": None
            }, schema),
            ({
                "s": "a"
            }, schema),
            ({
                "s": "a",
                "f": 1.0
            }, schema),
            (Row(s="a", i=1), schema),
            (Row(s="a", i=None), schema),
            (Row(s="a", i=1, f=1.0), schema),
            (["a", 1], schema),
            (["a", None], schema),
            (("a", 1), schema),
            (MyObj(s="a", i=1), schema),
            (MyObj(s="a", i=None), schema),
            (MyObj(s="a"), schema),
        ]

        # obj, data_type, exception class
        failure_spec = [
            # Char/VarChar (match anything but None)
            (None, DataTypes.VARCHAR(1), ValueError),
            (None, DataTypes.CHAR(1), ValueError),

            # VarChar (length exceeds maximum length)
            ("abc", DataTypes.VARCHAR(1), ValueError),
            # Char (length exceeds length)
            ("abc", DataTypes.CHAR(1), ValueError),

            # UDT
            (ExamplePoint(1.0, 2.0), PythonOnlyUDT(), ValueError),

            # Boolean
            (1, DataTypes.BOOLEAN(), TypeError),
            ("True", DataTypes.BOOLEAN(), TypeError),
            ([1], DataTypes.BOOLEAN(), TypeError),

            # TinyInt
            (-(2**7) - 1, DataTypes.TINYINT(), ValueError),
            (2**7, DataTypes.TINYINT(), ValueError),
            ("1", DataTypes.TINYINT(), TypeError),
            (1.0, DataTypes.TINYINT(), TypeError),

            # SmallInt
            (-(2**15) - 1, DataTypes.SMALLINT(), ValueError),
            (2**15, DataTypes.SMALLINT(), ValueError),

            # Int
            (-(2**31) - 1, DataTypes.INT(), ValueError),
            (2**31, DataTypes.INT(), ValueError),

            # Float & Double
            (1, DataTypes.FLOAT(), TypeError),
            (1, DataTypes.DOUBLE(), TypeError),

            # Decimal
            (1.0, DataTypes.DECIMAL(10, 0), TypeError),
            (1, DataTypes.DECIMAL(10, 0), TypeError),
            ("1.0", DataTypes.DECIMAL(10, 0), TypeError),

            # Binary
            (1, DataTypes.BINARY(1), TypeError),
            # VarBinary (length exceeds maximum length)
            (bytearray([1, 2]), DataTypes.VARBINARY(1), ValueError),
            # Char (length exceeds length)
            (bytearray([1, 2]), DataTypes.BINARY(1), ValueError),

            # Date/Time/Timestamp
            ("2000-01-02", DataTypes.DATE(), TypeError),
            ("10:01:02", DataTypes.TIME(), TypeError),
            (946811040, DataTypes.TIMESTAMP(), TypeError),

            # Array
            (["1", None], DataTypes.ARRAY(DataTypes.VARCHAR(1,
                                                            nullable=False)),
             ValueError),
            ([1, "2"], DataTypes.ARRAY(DataTypes.INT()), TypeError),

            # Map
            ({
                "a": 1
            }, DataTypes.MAP(DataTypes.INT(), DataTypes.INT()), TypeError),
            ({
                "a": "1"
            }, DataTypes.MAP(DataTypes.VARCHAR(1),
                             DataTypes.INT()), TypeError),
            ({
                "a": None
            }, DataTypes.MAP(DataTypes.VARCHAR(1),
                             DataTypes.INT(False)), ValueError),

            # Struct
            ({
                "s": "a",
                "i": "1"
            }, schema, TypeError),
            (Row(s="a"), schema, ValueError),  # Row can't have missing field
            (Row(s="a", i="1"), schema, TypeError),
            (["a"], schema, ValueError),
            (["a", "1"], schema, TypeError),
            (MyObj(s="a", i="1"), schema, TypeError),
            (MyObj(s=None, i="1"), schema, ValueError),
        ]

        # Check success cases
        for obj, data_type in success_spec:
            try:
                _create_type_verifier(data_type.not_null())(obj)
            except (TypeError, ValueError):
                self.fail("verify_type(%s, %s, nullable=False)" %
                          (obj, data_type))

        # Check failure cases
        for obj, data_type, exp in failure_spec:
            msg = "verify_type(%s, %s, nullable=False) == %s" % (
                obj, data_type, exp)
            with self.assertRaises(exp, msg=msg):
                _create_type_verifier(data_type.not_null())(obj)
Пример #4
0
 def test_datetype_equal_zero(self):
     dt = DataTypes.DATE()
     self.assertEqual(dt.from_sql_type(0), datetime.date(1970, 1, 1))