def test_get_avro_schema_invalid_column_interface(): """ Test incompatible custom class with no mapping, this shall result in exception. """ class Column: def __init__(self, name: str, data_type: str, udt_name: str, is_nullable: bool): self.name = name self.data_type = data_type self.udt_name = udt_name self.is_nullable = is_nullable columns = [ Column(name="smallint", data_type="smallint", udt_name="int2", is_nullable=False) ] table_name = "test_table" namespace = "test_namespace" # Not passing column mapping, this should raise an exception. with pytest.raises(Exception, match="Assuming pg2avro compatible column interface"): get_avro_schema(table_name, namespace, columns)
def test_get_avro_schema_assumed_column_interface(): """ Test using compatible column object without any integration mapping. """ class Column: def __init__(self, name: str, type: str, nullable: bool): self.name = name self.type = type self.nullable = nullable columns = [Column(name="smallint", type="smallint", nullable=False)] table_name = "test_table" namespace = "test_namespace" expected = { "name": table_name, "namespace": namespace, "type": "record", "fields": [{ "name": "smallint", "type": "int" }], } actual = get_avro_schema(table_name, namespace, columns) assert expected == actual
def test_get_avro_schema_assumed_column_interface(): """ Test using dictionary with custom mapping. """ columns = [{ "name": "smallint", "type": "smallint", "secondary_type": "int2", "nullable": False, }] table_name = "test_table" namespace = "test_namespace" expected = { "name": table_name, "namespace": namespace, "type": "record", "fields": [{ "name": "smallint", "type": "int" }], } actual = get_avro_schema(table_name, namespace, columns) assert expected == actual
def test_get_avro_schema_custom_mapping(): """ Test using dictionary with custom mapping. """ columns = [{"c1": "smallint", "c2": "smallint", "c3": "int2", "c4": False}] table_name = "test_table" namespace = "test_namespace" expected = { "name": table_name, "namespace": namespace, "type": "record", "fields": [{ "name": "smallint", "type": "int" }], } actual = get_avro_schema( table_name, namespace, columns, ColumnMapping( name="c1", type="c2", nullable="c4", numeric_precision="c5", numeric_scale="c6", ), ) assert expected == actual
def test_get_avro_schema_invalid_column_interface(): """ Test incompatible dict with no mapping, this shall result in exception. """ columns = [{ "incompatible": "smallint", "type": "smallint", "nullable": False }] table_name = "test_table" namespace = "test_namespace" # Not passing column mapping, this should raise an exception. with pytest.raises(Exception, match="Assuming pg2avro compatible column interface"): get_avro_schema(table_name, namespace, columns)
def test_get_avro_row_dict_special_data_types(): """ Test generating Avro rows from data, using special types. """ columns = [ { "name": "json_col", "type": "json" }, { "name": "jsonb_col", "type": "jsonb" }, { "name": "empty_list", "type": "_varchar" }, ] table_name = "test_table" namespace = "test_namespace" schema = get_avro_schema(table_name, namespace, columns) json_1 = {"key1": "val1"} json_2 = {"key2": "val2", "key3": [1, 2], "key4": {"key5": "val5"}} expected = [ { "json_col": json.dumps(json_1), "jsonb_col": json.dumps(json_2), "empty_list": [], }, { "json_col": json.dumps(json_2), "jsonb_col": json.dumps(json_1), "empty_list": None, }, ] actual = [ get_avro_row_dict(r, schema) for r in [(json_1, json_2, []), (json_2, json_1, None)] ] assert expected == actual
def test_get_avro_schema_sqlalchemy(): """ Test sqlalchemy integration. """ columns = [ Column(SMALLINT, name="smallint", nullable=False), Column(BOOLEAN, name="bool", nullable=False), Column(ARRAY(VARCHAR), name="array", nullable=False), ] table_name = "test_table" namespace = "test_namespace" expected = { "name": table_name, "namespace": namespace, "type": "record", "fields": [ { "name": "smallint", "type": "int" }, { "name": "bool", "type": "boolean" }, { "name": "array", "type": { "items": "string", "type": "array" } }, ], } actual = get_avro_schema(table_name, namespace, columns) assert expected == actual
def test_get_avro_row_row_types(): """ Test generating Avro rows from different source row data. TODO: Cover more than the simplest golden path. """ columns = [ { "name": "name", "type": "varchar", "nullable": False }, { "name": "number", "type": "float4", "nullable": False }, { "name": "list", "type": "_varchar", "nullable": False }, { "name": "is_working", "type": "bool", "nullable": False }, ] table_name = "test_table" namespace = "test_namespace" schema = get_avro_schema(table_name, namespace, columns) expected = [ { "name": "example-01", "number": 1.0, "list": ["list", "of", "strings"], "is_working": True, }, { "name": "example-02", "number": 2.5, "list": ["another", "list", "of", "strings"], "is_working": False, }, ] class Row: def __init__(self, name: str, number: float, list: List[str], is_working: bool): self.name = name self.number = number self.list = list self.is_working = is_working rows_data = [ # Compatible Row objects. [ Row("example-01", 1.0, "list of strings".split(), True), Row("example-02", 2.5, "another list of strings".split(), False), ], # Compatible Dicts. [ { "name": "example-01", "number": 1.0, "list": "list of strings".split(), "is_working": True, }, { "name": "example-02", "number": 2.5, "list": "another list of strings".split(), "is_working": False, }, ], # Compatible Dicts, but extended class. [ OrderedDict({ "name": "example-01", "number": 1.0, "list": "list of strings".split(), "is_working": True, }), OrderedDict({ "name": "example-02", "number": 2.5, "list": "another list of strings".split(), "is_working": False, }), ], # Compatible Tuples. [ ("example-01", 1.0, "list of strings".split(), True), ("example-02", 2.5, "another list of strings".split(), False), ], ] for row_data in rows_data: actual = [get_avro_row_dict(r, schema) for r in row_data] assert expected == actual
def test_mapping_overrides(): """ Test mapping overrides """ from pg2avro.pg2avro import Column table_name = "test_table" namespace = "test_namespace" columns = [ Column(name="int_to_string", type="int"), Column(name="string_to_numeric", type="string"), Column(name="not_overriden", type="int"), Column(name="numeric_to_float", type="numeric"), Column(name="array_to_string", type="_varchar"), Column(name="string_to_array", type="varchar"), ] overrides = { "int_to_string": { "pg_type": "string", "python_type": str }, "string_to_numeric": { "pg_type": "numeric", "python_type": float }, "not_matching_override_name": { "pg_type": "int", "python_type": int }, "numeric_to_float": { "pg_type": "float8", "python_type": float }, "array_to_string": { "pg_type": "string", "python_type": str }, "string_to_array": { "pg_type": "_string", "python_type": list }, } expected_schema = { "name": table_name, "namespace": namespace, "type": "record", "fields": [ { "name": "int_to_string", "type": ["null", "string"] }, { "name": "string_to_numeric", "type": [ "null", { "type": "bytes", "logicalType": "decimal", "precision": 38, "scale": 9, }, ], }, { "name": "not_overriden", "type": ["null", "int"] }, { "name": "numeric_to_float", "type": ["null", "double"] }, { "name": "array_to_string", "type": ["null", "string"] }, { "name": "string_to_array", "type": ["null", { "type": "array", "items": "string" }], }, ], } schema = get_avro_schema(table_name, namespace, columns, mapping_overrides=overrides) assert expected_schema == schema # Now data rows_data = [ { "int_to_string": 1, "string_to_numeric": "2.0", "not_overriden": 3, "numeric_to_float": 0.12345678910, "array_to_string": [1, 2, "a", "b"], "string_to_array": "asd", }, { "int_to_string": None, "string_to_numeric": None, "not_overriden": None, "numeric_to_float": None, "array_to_string": None, "string_to_array": None, }, ] expected = [ { "int_to_string": "1", "string_to_numeric": 2.0, "not_overriden": 3, "numeric_to_float": 0.12345678910, "array_to_string": "[1, 2, 'a', 'b']", "string_to_array": ["a", "s", "d"], }, { "int_to_string": None, "string_to_numeric": None, "not_overriden": None, "numeric_to_float": None, "array_to_string": None, "string_to_array": None, }, ] actual = [get_avro_row_dict(r, schema, overrides) for r in rows_data] assert expected == actual
def test_get_avro_schema_sqlalchemy(): """ Test sqlalchemy integration. TODO: Cover all sql/postgres types. """ custom_enum_type = ("value_1", "value_2") columns = [ Column(SMALLINT, name="smallint", nullable=False), Column(BIGINT, name="bigint", nullable=False), Column(INTEGER, name="integer", nullable=False), Column(NUMERIC(10, 2), name="numeric", nullable=False), Column(NUMERIC(10, 10), name="numeric_to_double", nullable=False), Column(NUMERIC, name="numeric_defaults", nullable=False), Column(NUMERIC, name="numeric_nullable", nullable=True), Column(DOUBLE_PRECISION, name="double_precision", nullable=False), Column(BOOLEAN, name="bool", nullable=False), Column(DATE, name="date", nullable=False), Column(TIME, name="time", nullable=False), Column(TIMESTAMP, name="timestamp", nullable=False), Column(CHAR, name="char", nullable=False), Column(TEXT, name="text", nullable=True), Column(VARCHAR(255), primary_key=True, name="varchar", nullable=False), Column(ARRAY(VARCHAR), name="array", nullable=False), Column(INTERVAL, name="interval", nullable=False), Column(ENUM(name="some_enum", *custom_enum_type), name="enum", nullable=False), Column(UUID, name="uuid", nullable=False), Column(JSONB, name="jsonb", nullable=False), Column(JSON, name="json", nullable=False), ] table_name = "test_table" namespace = "test_namespace" expected = { "name": table_name, "namespace": namespace, "type": "record", "fields": [ { "name": "smallint", "type": "int" }, { "name": "bigint", "type": "long" }, { "name": "integer", "type": "int" }, { "name": "numeric", "type": { "logicalType": "decimal", "type": "bytes", "precision": 10, "scale": 2, }, }, { "name": "numeric_to_double", "type": "double" }, { "name": "numeric_defaults", "type": { "logicalType": "decimal", "type": "bytes", "precision": 38, "scale": 9, }, }, { "name": "numeric_nullable", "type": [ "null", { "logicalType": "decimal", "type": "bytes", "precision": 38, "scale": 9, }, ], }, { "name": "double_precision", "type": "double" }, { "name": "bool", "type": "boolean" }, { "name": "date", "type": { "logicalType": "date", "type": "int" } }, { "name": "time", "type": { "logicalType": "timestamp-millis", "type": "int" }, }, { "name": "timestamp", "type": { "logicalType": "timestamp-millis", "type": "long" }, }, { "name": "char", "type": "string" }, { "name": "text", "type": ["null", "string"] }, { "name": "varchar", "type": "string" }, { "name": "array", "type": { "items": "string", "type": "array" } }, { "name": "interval", "type": "string" }, { "name": "enum", "type": "string" }, { "name": "uuid", "type": "string" }, { "name": "jsonb", "type": "string" }, { "name": "json", "type": "string" }, ], } actual = get_avro_schema(table_name, namespace, columns) assert expected == actual
def test_get_avro_schema_custom_mapping(): """ Test custom integration using mapping class. TODO: Cover all sql/postgres types. """ class Col: def __init__( self, n: str, un: str, nul: bool, np: Optional[int] = None, ns: Optional[int] = None, ): self.n = n self.un = un self.nul = nul self.np = np self.ns = ns columns = [ Col(n="smallint", un="int2", nul=False), Col(n="bigint", un="int8", nul=False), Col(n="integer", un="int4", nul=False), Col(n="numeric", un="numeric", nul=False, np=3, ns=7), Col(n="numeric_to_double", un="numeric", nul=False, np=10, ns=10), Col(n="numeric_defaults", un="numeric", nul=False), Col(n="numeric_nullable", un="numeric", nul=True), Col(n="double_precision", un="float8", nul=False), Col(n="real", un="float4", nul=False), Col(n="bool", un="bool", nul=False), Col(n="char", un="char", nul=False), Col(n="bpchar", un="bpchar", nul=False), Col(n="varchar", un="varchar", nul=False), Col(n="array", un="_varchar", nul=False), Col(n="array_n", un="_varchar", nul=True), Col(n="date", un="date", nul=False), Col(n="time", un="time", nul=False), Col(n="timestamp", un="timestamp", nul=False), Col(n="enum", un="custom_type", nul=False), Col(n="uuid", un="uuid", nul=False), Col(n="json", un="json", nul=False), Col(n="jsonb", un="jsonb", nul=False), ] table_name = "test_table" namespace = "test_namespace" expected = { "name": table_name, "namespace": namespace, "type": "record", "fields": [ { "name": "smallint", "type": "int" }, { "name": "bigint", "type": "long" }, { "name": "integer", "type": "int" }, { "name": "numeric", "type": { "logicalType": "decimal", "type": "bytes", "precision": 3, "scale": 7, }, }, { "name": "numeric_to_double", "type": "double" }, { "name": "numeric_defaults", "type": { "logicalType": "decimal", "type": "bytes", "precision": 38, "scale": 9, }, }, { "name": "numeric_nullable", "type": [ "null", { "logicalType": "decimal", "type": "bytes", "precision": 38, "scale": 9, }, ], }, { "name": "double_precision", "type": "double" }, { "name": "real", "type": "float" }, { "name": "bool", "type": "boolean" }, { "name": "char", "type": "string" }, { "name": "bpchar", "type": "string" }, { "name": "varchar", "type": "string" }, { "name": "array", "type": { "items": "string", "type": "array" } }, { "name": "array_n", "type": ["null", { "items": "string", "type": "array" }] }, { "name": "date", "type": { "logicalType": "date", "type": "int" } }, { "name": "time", "type": { "logicalType": "timestamp-millis", "type": "int" }, }, { "name": "timestamp", "type": { "logicalType": "timestamp-millis", "type": "long" }, }, { "name": "enum", "type": "string" }, { "name": "uuid", "type": "string" }, { "name": "json", "type": "string" }, { "name": "jsonb", "type": "string" }, ], } actual = get_avro_schema( table_name, namespace, columns, ColumnMapping( name="n", type="un", nullable="nul", numeric_precision="np", numeric_scale="ns", ), ) assert expected == actual
def test_get_avro_schema_custom_mapping(): """ Test custom integration using mapping class. TODO: Cover all sql/postgres types. """ class Col: def __init__( self, n: str, un: str, nul: bool, np: Optional[int] = None, ns: Optional[int] = None, ): self.n = n self.un = un self.nul = nul self.np = np self.ns = ns columns = [ Col(n="smallint", un="int2", nul=False), Col(n="bool", un="bool", nul=False), Col(n="array", un="_varchar", nul=False), ] table_name = "test_table" namespace = "test_namespace" expected = { "name": table_name, "namespace": namespace, "type": "record", "fields": [ { "name": "smallint", "type": "int" }, { "name": "bool", "type": "boolean" }, { "name": "array", "type": { "items": "string", "type": "array" } }, ], } actual = get_avro_schema( table_name, namespace, columns, ColumnMapping( name="n", type="un", nullable="nul", numeric_precision="np", numeric_scale="ns", ), ) assert expected == actual