Exemplo n.º 1
0
def test_json_load_repeated_schema():
    transformer = JsonDataLiteralTransformer().use_datetime_like_cast()
    query = transformer.load(
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/"
            "repeated_schema_datum.json"),
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/repeated_schema.json"
        ))
    assert query == (
        "(select ['str with quote \\'', cast(null as STRING), "
        "'and backslash \\\\'] as f_string, [from_base64('YW55'), "
        "from_base64('c3RyaW5n'), cast(null as BYTES)] as f_bytes, [cast(null as INT64), "
        "cast(1 as INT64), cast(2 as INT64)] as f_int, [cast(1.5 as FLOAT64), cast(null as FLOAT64), "
        "cast(2.6 as FLOAT64)] as f_float, "
        "[true, false, cast(null as BOOLEAN)] as f_bool, "
        "[cast(null as TIMESTAMP), cast('2020-11-26 17:09:03.967259 UTC' as TIMESTAMP), "
        "cast('2019-11-26 17:09:03.967259 UTC' as TIMESTAMP)] as f_timestamp, "
        "[cast('2020-11-26' as DATE), cast(null as DATE), cast('2019-11-26' as DATE)] as f_date, "
        "[cast('11:09:03' as TIME), cast('10:09:03' as TIME), cast(null as TIME)] as f_time, "
        "[cast(null as DATETIME), cast('2020-11-26 17:09:03' as DATETIME), "
        "cast('2019-11-26 17:09:03' as DATETIME)] as f_datetime, [cast(1.6 as NUMERIC), "
        "cast(null as NUMERIC), cast(1.7 as NUMERIC)] as f_numeric, "
        "[struct(cast(null as ARRAY<STRING>) as f_string, [from_base64('dG9fYnl0ZXM=')] as f_bytes), "
        "struct(['a_string'] as f_string, cast(null as ARRAY<BYTES>) as f_bytes), "
        "cast(null as STRUCT<f_string ARRAY<STRING>, f_bytes ARRAY<BYTES>>)] as f_nested)"
    )
Exemplo n.º 2
0
def test_technical_column_with_datum_literal(bqtk: BQTestKit):
    complex_schema = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/resources/"
                                       "technical_column_schema.json")
    complex_datum = PackageFileLoader(
        "tests/it/bq_test_kit/bq_dsl/resources/technical_column_schema_datum.json"
    )
    transformer = JsonDataLiteralTransformer(json_format=JsonFormat.JSON_ARRAY)
    with pytest.raises(BadRequest):
        bqtk.query_template(from_="select * \nfrom technical_column_table;\n") \
            .with_datum({
                "technical_column_table": (complex_datum, complex_schema)
            }) \
            .as_data_literals() \
            .loaded_with(transformer) \
            .add_interpolator(JinjaInterpolator()) \
            .run()
    result = bqtk.query_template(from_="select count(*) \nfrom {{technical_column_table}};\n") \
                 .with_datum({
                     "technical_column_table": (complex_datum, complex_schema)
                  }) \
                 .as_data_literals() \
                 .loaded_with(transformer) \
                 .add_interpolator(JinjaInterpolator()) \
                 .run()
    assert len(result.rows) == 1
Exemplo n.º 3
0
def test_json_empty_array_schema():
    expected = (
        "(select struct(cast(null as STRING) as f_string, "
        "cast([] as ARRAY<STRUCT<f_string STRING, f_bytes BYTES, f_int INT64, f_float FLOAT64, "
        "f_bool BOOLEAN, f_timestamp TIMESTAMP, f_date DATE, f_time TIME, f_datetime DATETIME, "
        "f_numeric NUMERIC, f_geography GEOGRAPHY>>) as f_repeated_struct, "
        "cast([] as ARRAY<STRING>) as f_string_repeated, cast([] as ARRAY<BYTES>) as f_bytes_repeated, "
        "cast([] as ARRAY<INT64>) as f_int_repeated, "
        "cast([] as ARRAY<FLOAT64>) as f_float_repeated, cast([] as ARRAY<BOOLEAN>) as f_bool_repeated, "
        "cast([] as ARRAY<TIMESTAMP>) as f_timestamp_repeated, cast([] as ARRAY<DATE>) as f_date_repeated, "
        "cast([] as ARRAY<TIME>) as f_time_repeated, cast([] as ARRAY<DATETIME>) as f_datetime_repeated, "
        "cast([] as ARRAY<NUMERIC>) as f_numeric_repeated, "
        "cast([] as ARRAY<GEOGRAPHY>) as f_geography_repeated) as f_struct, "
        "cast([] as ARRAY<STRUCT<f_string STRING, f_bytes BYTES, f_int INT64, f_float FLOAT64, "
        "f_bool BOOLEAN, f_timestamp TIMESTAMP, f_date DATE, f_time TIME, f_datetime DATETIME, "
        "f_numeric NUMERIC, f_geography GEOGRAPHY>>) as f_repeated_struct, "
        "cast([] as ARRAY<STRING>) as f_string_repeated, "
        "cast([] as ARRAY<BYTES>) as f_bytes_repeated, cast([] as ARRAY<INT64>) as f_int_repeated, "
        "cast([] as ARRAY<FLOAT64>) as f_float_repeated, "
        "cast([] as ARRAY<BOOLEAN>) as f_bool_repeated, "
        "cast([] as ARRAY<TIMESTAMP>) as f_timestamp_repeated, "
        "cast([] as ARRAY<DATE>) as f_date_repeated, "
        "cast([] as ARRAY<TIME>) as f_time_repeated, "
        "cast([] as ARRAY<DATETIME>) as f_datetime_repeated, "
        "cast([] as ARRAY<NUMERIC>) as f_numeric_repeated, cast([] as ARRAY<GEOGRAPHY>) "
        "as f_geography_repeated)")
    query = JsonDataLiteralTransformer(json_format=JsonFormat.JSON_ARRAY).load(
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/empty_array_schema_datum.json"
        ),
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/empty_array_schema.json"
        ))
    assert query == expected
def test_json_empty_array_schema(bqtk: BQTestKit):
    schema = PackageFileLoader(
        "tests/it/bq_test_kit/data_literal_transformers/resources/empty_array_schema.json"
    )
    query = JsonDataLiteralTransformer(json_format=JsonFormat.JSON_ARRAY).load(
        PackageFileLoader(
            "tests/it/bq_test_kit/data_literal_transformers/resources/empty_array_schema_datum.json"
        ), schema)
    result = bqtk.query_template(from_=query).run()
    assert result.schema == SchemaMixin().to_schema_field_list(schema)
Exemplo n.º 5
0
def test_invalid_json_load():
    transformer = JsonDataLiteralTransformer()
    with pytest.raises(RowParsingException):
        transformer.load(
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/invalid_json_datum.txt"
            ),
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
            ))
def test_json_complex_schema(bqtk: BQTestKit):
    complex_schema_pfl = PackageFileLoader(
        "tests/it/bq_test_kit/data_literal_transformers/resources/"
        "complex_schema.json")
    transformer = JsonDataLiteralTransformer()
    complex_schema = SchemaMixin().to_schema_field_list(complex_schema_pfl)
    query = transformer.load(
        PackageFileLoader(
            "tests/it/bq_test_kit/data_literal_transformers/resources/complex_schema_datum.json"
        ), complex_schema_pfl)
    result = bqtk.query_template(from_=query).run()

    assert result.schema == complex_schema
Exemplo n.º 7
0
def test_invalid_instance():
    transformer = JsonDataLiteralTransformer()
    with pytest.raises(InvalidInstanceException):
        transformer.load(
            123,
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/geography_schema.json"
            ))
    with pytest.raises(InvalidInstanceException):
        transformer.with_json_format(JsonFormat.JSON_ARRAY).load(
            123,
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/geography_schema.json"
            ))
Exemplo n.º 8
0
def test_complex_temp_tables(bqtk: BQTestKit):
    complex_schema = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/resources/"
                                       "complex_schema.json")
    complex_datum = PackageFileLoader(
        "tests/it/bq_test_kit/bq_dsl/resources/complex_schema_datum.json")
    transformer = JsonDataLiteralTransformer()
    result = bqtk.query_template(from_="select * from complex_table") \
                 .with_temp_tables((transformer, {
                     "complex_table": (complex_datum, complex_schema)
                  })) \
                 .run()

    assert len(result.rows) == 1
    assert result.schema == SchemaMixin().to_schema_field_list(complex_schema)
Exemplo n.º 9
0
def test_json_empty_complex_schema():
    expected = (
        "(select * from (select cast(null as STRING) as f_string, cast(null as BYTES) as f_bytes, "
        "cast(null as INT64) as f_int, cast(null as FLOAT64) as f_float, cast(null as BOOLEAN) as f_bool,"
        " cast(null as TIMESTAMP) as f_timestamp, cast(null as DATE) as f_date, "
        "cast(null as TIME) as f_time, cast(null as DATETIME) as f_datetime, "
        "cast(null as NUMERIC) as f_numeric, cast(null as GEOGRAPHY) as f_geography, "
        "cast(null as STRUCT<f_string STRING, f_bytes BYTES, f_int INT64, f_float FLOAT64, "
        "f_bool BOOLEAN, f_timestamp TIMESTAMP, f_date DATE, f_time TIME, f_datetime DATETIME, "
        "f_numeric NUMERIC, f_geography GEOGRAPHY, f_repeated_struct ARRAY<STRUCT<f_string STRING, "
        "f_bytes BYTES, f_int INT64, f_float FLOAT64, f_bool BOOLEAN, f_timestamp TIMESTAMP, "
        "f_date DATE, f_time TIME, f_datetime DATETIME, f_numeric NUMERIC, f_geography GEOGRAPHY>>, "
        "f_string_repeated ARRAY<STRING>, f_bytes_repeated ARRAY<BYTES>, f_int_repeated ARRAY<INT64>, "
        "f_float_repeated ARRAY<FLOAT64>, f_bool_repeated ARRAY<BOOLEAN>, "
        "f_timestamp_repeated ARRAY<TIMESTAMP>, f_date_repeated ARRAY<DATE>, "
        "f_time_repeated ARRAY<TIME>, f_datetime_repeated ARRAY<DATETIME>, "
        "f_numeric_repeated ARRAY<NUMERIC>, f_geography_repeated ARRAY<GEOGRAPHY>>) as f_struct, "
        "cast(null as ARRAY<STRUCT<f_string STRING, f_bytes BYTES, f_int INT64, f_float FLOAT64, "
        "f_bool BOOLEAN, f_timestamp TIMESTAMP, f_date DATE, f_time TIME, f_datetime DATETIME, "
        "f_numeric NUMERIC, f_geography GEOGRAPHY>>) as f_repeated_struct, "
        "cast(null as ARRAY<STRING>) as f_string_repeated, "
        "cast(null as ARRAY<BYTES>) as f_bytes_repeated, cast(null as ARRAY<INT64>) as f_int_repeated, "
        "cast(null as ARRAY<FLOAT64>) as f_float_repeated, "
        "cast(null as ARRAY<BOOLEAN>) as f_bool_repeated, "
        "cast(null as ARRAY<TIMESTAMP>) as f_timestamp_repeated, "
        "cast(null as ARRAY<DATE>) as f_date_repeated, "
        "cast(null as ARRAY<TIME>) as f_time_repeated, "
        "cast(null as ARRAY<DATETIME>) as f_datetime_repeated, "
        "cast(null as ARRAY<NUMERIC>) as f_numeric_repeated, cast(null as ARRAY<GEOGRAPHY>) "
        "as f_geography_repeated) limit 0)")
    query = JsonDataLiteralTransformer().load(
        None,
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/complex_schema.json"
        ))
    assert query == expected
    query = JsonDataLiteralTransformer(json_format=JsonFormat.JSON_ARRAY).load(
        "[]",
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/complex_schema.json"
        ))
    assert query == expected
    query = JsonDataLiteralTransformer().load(
        [],
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/complex_schema.json"
        ))
    assert query == expected
Exemplo n.º 10
0
def test_json_load_nested_schema():
    transformer = JsonDataLiteralTransformer().use_datetime_like_cast()
    query = transformer.load(
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/nested_schema_datum.json"
        ),
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/nested_schema.json"
        ))
    assert query == (
        "(select struct('str with quote \\' and backslash \\\\' as f_string, "
        "from_base64('YW55') as f_bytes, cast(1 as INT64) as f_int, cast(1.5 as FLOAT64) as f_float, "
        "true as f_bool, "
        "cast('2020-11-26 17:09:03.967259 UTC' as TIMESTAMP) as f_timestamp, "
        "cast('2020-11-26' as DATE) as f_date, cast('11:09:03' as TIME) as f_time, "
        "cast('2020-11-26 17:09:03' as DATETIME) as f_datetime, cast(1.6 as NUMERIC) as f_numeric, "
        "cast(null as STRUCT<f_string STRING>) as f_nested2) as f_nested)")
Exemplo n.º 11
0
def test_csv_skip_line_load_simple_schema():
    transformer = DsvDataLiteralTransformer().use_datetime_like_cast(
    ).skip_leading_rows(1)
    query = transformer.load(
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema_datum.csv"
        ),
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
        ))
    assert query == (
        "(select * from (select cast(null as STRING) as f_string, cast(null as BYTES) as f_bytes, "
        "cast(null as INT64) as f_int, cast(null as FLOAT64) as f_float, cast(null as BOOLEAN) as f_bool,"
        " cast(null as TIMESTAMP) as f_timestamp, cast(null as DATE) as f_date, "
        "cast(null as TIME) as f_time, cast(null as DATETIME) as f_datetime, "
        "cast(null as NUMERIC) as f_numeric, cast(null as GEOGRAPHY) as f_geography, "
        "cast(null as STRUCT<f_datetime DATETIME>) as f_struct) limit 0)")
def test_json_error_if_exists_in_partition(bqtk: BQTestKit):
    with bqtk.project("it").dataset("dataset_foo").isolate() as ds:
        with ds.table("table_bar", schema=json_schema).partition_by(IngestionTime()).isolate() as t:
            pfl = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/bq_resources/data_loaders/resources/dummy_data.json")
            t.json_loader(from_=pfl).to_partition("20201111").error_if_exists().load()
            t.json_loader(from_=pfl).to_partition("20201112").error_if_exists().load()
            with pytest.raises(Exception):
                t.json_loader(from_=pfl).to_partition("20201112").error_if_exists().load()
def test_csv_error_if_exists_in_partition(bqtk: BQTestKit):
    with bqtk.project("it").dataset("dataset_foo").isolate() as ds:
        schema = [SchemaField("f1", field_type="STRING"), SchemaField("f2", field_type="INT64")]
        with ds.table("table_bar", schema=schema).partition_by(IngestionTime()).isolate() as t:
            pfl = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/bq_resources/data_loaders/resources/dummy_data.csv")
            t.dsv_loader(from_=pfl).to_partition("20201111").error_if_exists().load()
            t.dsv_loader(from_=pfl).to_partition("20201112").error_if_exists().load()
            with pytest.raises(Exception):
                t.dsv_loader(from_=pfl).to_partition("20201112").error_if_exists().load()
Exemplo n.º 14
0
def test_csv_invalid_load():
    transformer = DsvDataLiteralTransformer().use_datetime_like_cast(
    ).skip_leading_rows(1)
    with pytest.raises(InvalidInstanceException):
        transformer.load(
            123,
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
            ))
Exemplo n.º 15
0
def test_tsv_load_simple_schema():
    transformer = DsvDataLiteralTransformer().use_datetime_like_cast() \
        .with_quote_character("#").with_field_delimiter("\t").with_escape_character("~")
    query = transformer.load(
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema_datum.tsv"
        ),
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
        ))
    assert query == (
        "(select 'str with quote \\' and backslash \\\\~' as f_string, "
        "from_base64('YW55') as f_bytes, cast(1 as INT64) as f_int, cast(1.5 as FLOAT64) as f_float, "
        "true as f_bool, "
        "cast('2020-11-26 17:09:03.967259 UTC' as TIMESTAMP) as f_timestamp, "
        "cast('2020-11-26' as DATE) as f_date, cast('11:09:03' as TIME) as f_time, "
        "cast('2020-11-26T17:09:03' as DATETIME) as f_datetime, cast(1.6 as NUMERIC) as f_numeric, "
        "ST_GEOGPOINT(-122.35, 47.62) as f_geography, "
        "cast(null as STRUCT<f_datetime DATETIME>) as f_struct)")
def test_json_load_in_partition(bqtk: BQTestKit):
    with bqtk.project("it").dataset("dataset_foo").isolate() as ds:
        with ds.table("table_bar", schema=json_schema).partition_by(IngestionTime()).isolate() as t:
            pfl = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/bq_resources/data_loaders/resources/dummy_data.json")
            t.json_loader(from_=pfl).to_partition("20201112").load()
            job: QueryJob = t._bq_client.query(f"select max(_partitiontime) as pt from `{t.fqdn()}`")
            rows_it = job.result()
            rows = list(rows_it)
            assert len(rows) == 1
            assert rows[0].pt == datetime(2020, 11, 12, 00, 00, tzinfo=timezone.utc)
Exemplo n.º 17
0
def test_json_load_simple_schema_with_string_cast_to_bytes():
    transformer = JsonDataLiteralTransformer().use_string_cast_to_bytes(
    ).use_datetime_like_cast()
    query = transformer.load(
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/"
            "simple_schema_datum_with_string_cast_to_bytes.json"),
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
        ))
    assert query == (
        "(select 'str with quote \\' and backslash \\\\' as f_string, "
        "cast('any' as BYTES) as f_bytes, cast(1 as INT64) as f_int, cast(1.5 as FLOAT64) as f_float, "
        "true as f_bool, "
        "cast('2020-11-26 17:09:03.967259 UTC' as TIMESTAMP) as f_timestamp, "
        "cast('2020-11-26' as DATE) as f_date, cast('11:09:03' as TIME) as f_time, "
        "cast('2020-11-26 17:09:03' as DATETIME) as f_datetime, cast(1.6 as NUMERIC) as f_numeric, "
        "ST_GEOGPOINT(-122.35, 47.62) as f_geography, "
        "cast(null as STRUCT<f_datetime DATETIME>) as f_struct)")
def test_json_truncate_in_partition(bqtk: BQTestKit):
    with bqtk.project("it").dataset("dataset_foo").isolate() as ds:
        with ds.table("table_bar", schema=json_schema).partition_by(IngestionTime()).isolate() as t:
            pfl = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/bq_resources/data_loaders/resources/dummy_data.json")
            t.json_loader(from_=pfl).to_partition("20201112").load()
            t.json_loader(from_=pfl).to_partition("20201112").overwrite().load()
            job: QueryJob = t._bq_client.query(f"select count(*) as nb from `{t.fqdn()}`")
            rows_it = job.result()
            rows = list(rows_it)
            assert rows[0]["nb"] == 2
Exemplo n.º 19
0
def test_required_schema():
    transformer = JsonDataLiteralTransformer()

    with pytest.raises(DataLiteralTransformException) as exception:
        transformer.load(
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/"
                "required_schema_datum.json"),
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/required_schema.json"
            ))
    assert str(exception.value) == (
        "Exception happened in line 1 with the following errors :\n"
        "\t.f_string is required,\n"
        "\t.f_bytes is required,\n"
        "\t.f_int is required,\n"
        "\t.f_float is required,\n"
        "\t.f_bool is required,\n"
        "\t.f_timestamp is required,\n"
        "\t.f_date is required,\n"
        "\t.f_time is required,\n"
        "\t.f_datetime is required,\n"
        "\t.f_numeric is required,\n"
        "\t.f_geography is required,\n"
        "\t.f_record is required,\n"
        "\t.f_repeated_record[0].f_string is required,\n"
        "\t.f_repeated_record[0].f_int is required,\n"
        "\t.f_repeated_record[0].f_bool is required,\n"
        "\t.f_repeated_record[0].f_date is required,\n"
        "\t.f_repeated_record[0].f_datetime is required,\n"
        "\t.f_repeated_record[0].f_geography is required,\n"
        "\t.f_repeated_record[0].f_record is required,\n"
        "\t.f_repeated_record[0].f_repeated_record[0].f_nested_string is required,\n"
        "\t.f_repeated_record[1].f_string is required,\n"
        "\t.f_repeated_record[1].f_int is required,\n"
        "\t.f_repeated_record[1].f_bool is required,\n"
        "\t.f_repeated_record[1].f_date is required,\n"
        "\t.f_repeated_record[1].f_datetime is required,\n"
        "\t.f_repeated_record[1].f_geography is required,\n"
        "\t.f_repeated_record[1].f_record is required,\n"
        "\t.f_repeated_record[1].f_repeated_record[0].f_nested_string is required"
    )
Exemplo n.º 20
0
def test_geography_point():
    transformer = JsonDataLiteralTransformer()

    with pytest.raises(DataLiteralTransformException) as exception:
        transformer.load(
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/"
                "geography_schema_datum.json"),
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/geography_schema.json"
            ))
    assert str(exception.value) == (
        "Exception happened in line 6 with the following errors :\n"
        "\t.f_geography is a GEOGRAPHY type. It is expected to match POINT(x y) "
        "where x and y are FLOAT64. Instead get NotPoInT( -122.35  47.62 ). "
        "POINT is case insensitive.\n\n"
        "Exception happened in line 7 with the following errors :\n"
        "\t.f_geography is a GEOGRAPHY type. It is expected to match POINT(x y) "
        "where x and y are FLOAT64. Instead get NotPoInT( -122.35  47.62 ). "
        "POINT is case insensitive.")
Exemplo n.º 21
0
def test_technical_column_with_datum_temp_table(bqtk: BQTestKit):
    complex_schema = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/resources/"
                                       "technical_column_schema.json")
    temp_complex_schema = PackageFileLoader(
        "tests/it/bq_test_kit/bq_dsl/resources/"
        "temp_technical_column_schema.json")
    complex_datum = PackageFileLoader(
        "tests/it/bq_test_kit/bq_dsl/resources/technical_column_schema_datum.json"
    )
    transformer = JsonDataLiteralTransformer(json_format=JsonFormat.JSON_ARRAY)
    result = bqtk.query_template(from_="select * \nfrom technical_column_table;\n") \
                 .with_datum({
                     "technical_column_table": (complex_datum, complex_schema)
                  }) \
                 .loaded_with(transformer) \
                 .add_interpolator(JinjaInterpolator()) \
                 .run()
    assert len(result.rows) == 1
    assert result.schema == SchemaMixin().to_schema_field_list(
        temp_complex_schema)
    result = bqtk.query_template(from_="select count(*) \nfrom {{technical_column_table}};\n") \
                 .with_datum({
                     "technical_column_table": (complex_datum, complex_schema)
                  }) \
                 .loaded_with(transformer) \
                 .add_interpolator(JinjaInterpolator()) \
                 .run()
    assert len(result.rows) == 1
    result = bqtk.query_template(from_="""
                    select * from technical_column_table
                    UNION ALL
                    select * from {{technical_column_table}}
                    """) \
                 .with_datum({
                     "technical_column_table": (complex_datum, complex_schema)
                  }) \
                 .loaded_with(transformer) \
                 .add_interpolator(JinjaInterpolator()) \
                 .run()
    assert len(result.rows) == 2
    assert result.rows[0] == result.rows[1]
def test_csv_append_in_partition(bqtk: BQTestKit):
    with bqtk.project("it").dataset("dataset_foo").isolate() as ds:
        schema = [SchemaField("f1", field_type="STRING"), SchemaField("f2", field_type="INT64")]
        with ds.table("table_bar", schema=schema).partition_by(IngestionTime()).isolate() as t:
            pfl = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/bq_resources/data_loaders/resources/dummy_data.csv")
            t.dsv_loader(from_=pfl).to_partition("20201112").load()
            t.dsv_loader(from_=pfl).to_partition("20201112").load()
            t.dsv_loader(from_=pfl).to_partition("20201112").append().load()
            job: QueryJob = t._bq_client.query(f"select count(*) as nb from `{t.fqdn()}`")
            rows_it = job.result()
            rows = list(rows_it)
            assert rows[0]["nb"] == 6
Exemplo n.º 23
0
def test_json_load_with_extra_column():
    transformer = JsonDataLiteralTransformer()
    with pytest.raises(DataLiteralTransformException) as exception:
        transformer.load(
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/"
                "simple_schema_with_extra_datum.json"),
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
            ))
    assert str(exception.value) == (
        "Exception happened in line 1 with the following errors :\n"
        "\tKey f_string_extra @ . not in schema\n\n"
        "Exception happened in line 2 with the following errors :\n"
        "\tKey f_string_extra2 @ . not in schema")
    query = transformer.ignore_unknown_values().use_datetime_like_cast().load(
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/"
            "simple_schema_with_extra_datum.json"),
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
        ))
    assert query == (
        "(select 'str with quote \\' and backslash \\\\' as f_string, "
        "from_base64('YW55') as f_bytes, cast(1 as INT64) as f_int, cast(1.5 as FLOAT64) as f_float, "
        "true as f_bool, "
        "cast('2020-11-26 17:09:03.967259 UTC' as TIMESTAMP) as f_timestamp, "
        "cast('2020-11-26' as DATE) as f_date, cast('11:09:03' as TIME) as f_time, "
        "cast('2020-11-26 17:09:03' as DATETIME) as f_datetime, cast(1.6 as NUMERIC) as f_numeric, "
        "ST_GEOGPOINT(-122.35, 47.62) as f_geography, "
        "cast(null as STRUCT<f_datetime DATETIME>) as f_struct"
        "\nunion all\n"
        "select 'other string' as f_string, "
        "from_base64('YW55') as f_bytes, cast(2 as INT64) as f_int, cast(2.5 as FLOAT64) as f_float, "
        "true as f_bool, "
        "cast('2020-11-26 17:09:03.967259 UTC' as TIMESTAMP) as f_timestamp, "
        "cast('2020-11-26' as DATE) as f_date, cast('11:09:03' as TIME) as f_time, "
        "cast('2020-11-26 17:09:03' as DATETIME) as f_datetime, cast(2.6 as NUMERIC) as f_numeric, "
        "ST_GEOGPOINT(-122.50, -40.6) as f_geography, "
        "cast(null as STRUCT<f_datetime DATETIME>) as f_struct)")
Exemplo n.º 24
0
def test_dsv_load_struct():
    transformer = DsvDataLiteralTransformer()
    with pytest.raises(DataLiteralTransformException) as exception:
        transformer.load(
            ('"other string","YW55",2,2.5,true,"2020-11-26 17:09:03.967259 UTC",'
             '"2020-11-26","11:09:03","2020-11-26 17:09:03",2.6,"POINT(-122.50 -40.6)",invalid struct'
             ),
            PackageFileLoader(
                "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
            ))
    assert str(exception.value) == (
        "Exception happened in line 1 with the following errors :\n"
        "\t.f_struct is not a dictionary while schema is of type RECORD")
Exemplo n.º 25
0
def test_json_loader():
    conf = BQTestKitConfig({DEFAULT_LOCATION: "EU"})
    project = Project("test_project", bq_client=None, bqtk_config=conf)
    ds = Dataset("dataset_foo",
                 project=project,
                 bq_client=None,
                 bqtk_config=conf)
    table = Table("table_bar",
                  from_dataset=ds,
                  bq_client=None,
                  bqtk_config=conf)
    pfl = PackageFileLoader(
        "tests/ut/bq_test_kit/resource_loaders/resources/package_file_test_resource.txt"
    )
    assert isinstance(table.json_loader(from_=pfl), JsonDataLoader)
Exemplo n.º 26
0
def test_json_load_as_simple_schema():
    transformer = JsonDataLiteralTransformer().use_datetime_like_cast()
    query = transformer.load_as({
        "input_1":
        (PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema_datum.json"
        ),
         PackageFileLoader(
             "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
         )),
        "input_2":
        ("{}",
         PackageFileLoader(
             "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
         ))
    })
    assert query == {
        "input_1":
        ("(select 'str with quote \\' and backslash \\\\' as f_string, "
         "from_base64('YW55') as f_bytes, cast(1 as INT64) as f_int, cast(1.5 as FLOAT64) as f_float, "
         "true as f_bool, "
         "cast('2020-11-26 17:09:03.967259 UTC' as TIMESTAMP) as f_timestamp, "
         "cast('2020-11-26' as DATE) as f_date, cast('11:09:03' as TIME) as f_time, "
         "cast('2020-11-26T17:09:03' as DATETIME) as f_datetime, cast(1.6 as NUMERIC) as f_numeric, "
         "ST_GEOGPOINT(-122.35, 47.62) as f_geography, "
         "cast(null as STRUCT<f_datetime DATETIME>) as f_struct)"),
        "input_2":
        ("(select cast(null as STRING) as f_string, "
         "cast(null as BYTES) as f_bytes, cast(null as INT64) as f_int, cast(null as FLOAT64) as f_float, "
         "cast(null as BOOLEAN) as f_bool, "
         "cast(null as TIMESTAMP) as f_timestamp, "
         "cast(null as DATE) as f_date, cast(null as TIME) as f_time, "
         "cast(null as DATETIME) as f_datetime, cast(null as NUMERIC) as f_numeric, "
         "cast(null as GEOGRAPHY) as f_geography, "
         "cast(null as STRUCT<f_datetime DATETIME>) as f_struct)")
    }
def test_csv_load(bqtk: BQTestKit):
    with bqtk.project("it").dataset("dataset_foo").isolate() as ds:
        schema = [SchemaField("f1", field_type="STRING"), SchemaField("f2", field_type="INT64")]
        with ds.table("table_bar", schema=schema).isolate() as t:
            pfl = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/bq_resources/data_loaders/resources/dummy_data.csv")
            t.dsv_loader(from_=pfl).load()
            job: QueryJob = t._bq_client.query(f"select count(*) as nb from `{t.fqdn()}`")
            rows_it = job.result()
            rows = list(rows_it)
            assert len(rows) == 1
            assert rows[0].nb == 2
            job: QueryJob = t._bq_client.query(f"select * from `{t.fqdn()}` order by f1")
            rows_it = job.result()
            rows = list(rows_it)
            rows_as_dict = [dict(row.items()) for row in rows]
            assert rows_as_dict == [{"f1": "value1", "f2": 2}, {"f1": "value3", "f2": 4}]
Exemplo n.º 28
0
def test_json_array_load_simple_schema(json_input):
    transformer = JsonDataLiteralTransformer(json_format=JsonFormat.JSON_ARRAY)
    query = transformer.load(
        json_input,
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/simple_schema.json"
        ),
    )
    assert query == (
        "(select 'str with quote \\' and backslash \\\\' as f_string, "
        "from_base64('YW55') as f_bytes, cast(1 as INT64) as f_int, cast(1.5 as FLOAT64) as f_float, "
        "true as f_bool, "
        "timestamp '2020-11-26 17:09:03.967259 UTC' as f_timestamp, "
        "date '2020-11-26' as f_date, time '11:09:03' as f_time, "
        "datetime '2020-11-26T17:09:03' as f_datetime, cast(1.6 as NUMERIC) as f_numeric, "
        "ST_GEOGPOINT(-122.35, 47.62) as f_geography, "
        "cast(null as STRUCT<f_datetime DATETIME>) as f_struct)")
def test_json_load(bqtk: BQTestKit):

    with bqtk.project("it").dataset("dataset_foo").isolate() as ds:
        with ds.table("table_bar", schema=json_schema).isolate() as t:
            pfl = PackageFileLoader("tests/it/bq_test_kit/bq_dsl/bq_resources/data_loaders/resources/dummy_data.json")
            t.json_loader(from_=pfl).load()
            job: QueryJob = t._bq_client.query(f"select count(*) as nb from `{t.fqdn()}`")
            rows_it = job.result()
            rows = list(rows_it)
            assert len(rows) == 1
            assert rows[0].nb == 2
            job: QueryJob = t._bq_client.query(f"select * from `{t.fqdn()}` order by f1")
            rows_it = job.result()
            rows = list(rows_it)
            rows_as_dict = [dict(row.items()) for row in rows]
            assert rows_as_dict == [
                                        {
                                            "f1": "value1",
                                            "struct_f2": {
                                                "f2_1": 1
                                            },
                                            "array_f3": [
                                                {
                                                    "f3_1": datetime(2020, 10, 21, 10, 0)
                                                },
                                                {
                                                    "f3_1": datetime(2020, 10, 21, 11, 0)
                                                }
                                            ]
                                        },
                                        {
                                            "f1": "value2",
                                            "struct_f2": {
                                                "f2_1": 2
                                            },
                                            "array_f3": [
                                                {
                                                    "f3_1": datetime(2020, 10, 21, 12, 0)
                                                },
                                                {
                                                    "f3_1": datetime(2020, 10, 21, 13, 0)
                                                }
                                            ]
                                        }
                                    ]
Exemplo n.º 30
0
def test_json_complex_schema():
    transformer = JsonDataLiteralTransformer()
    query = transformer.load(
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/"
            "complex_schema_datum.json"),
        PackageFileLoader(
            "tests/ut/bq_test_kit/data_literal_transformers/resources/complex_schema.json"
        ))
    assert query == (
        "(select 'f_string' as f_string, from_base64('YW55') as f_bytes, cast(1 as INT64) as f_int, "
        "cast(1.1 as FLOAT64) as f_float, true as f_bool, "
        "timestamp '2020-11-26 17:09:03.967259 UTC' as f_timestamp, date '2020-11-26' as f_date, "
        "time '17:09:03.967259' as f_time, datetime '2020-11-26 17:09:03.967259' as f_datetime, "
        "cast(1.2 as NUMERIC) as f_numeric, ST_GEOGPOINT(1, 2) as f_geography, "
        "struct('f_string' as f_string, from_base64('YW55') as f_bytes, cast(2 as INT64) as f_int, "
        "cast(2.1 as FLOAT64) as f_float, true as f_bool, "
        "timestamp '2019-11-26 17:09:03.967259 UTC' as f_timestamp, date '2019-11-26' as f_date, "
        "time '16:09:03.967259' as f_time, datetime '2019-11-26 17:09:03.967259' as f_datetime, "
        "cast(2.2 as NUMERIC) as f_numeric, ST_GEOGPOINT(1, 2) as f_geography, "
        "[struct('f_string' as f_string, from_base64('YW55') as f_bytes, "
        "cast(1 as INT64) as f_int, cast(1.1 as FLOAT64) as f_float, true as f_bool, "
        "timestamp '2020-11-26 17:09:03.967259 UTC' as f_timestamp, date '2020-11-26' as f_date, "
        "time '17:09:03.967259' as f_time, datetime '2020-11-26 17:09:03.967259' as f_datetime, "
        "cast(1.2 as NUMERIC) as f_numeric, ST_GEOGPOINT(1, 2) as f_geography)] as f_repeated_struct, "
        "['f_string'] as f_string_repeated, [from_base64('YW55')] as f_bytes_repeated, "
        "[cast(1 as INT64)] as f_int_repeated, [cast(1.1 as FLOAT64)] as f_float_repeated, "
        "[true] as f_bool_repeated, [timestamp '2020-11-26 17:09:03.967259 UTC'] as f_timestamp_repeated,"
        " [date '2020-11-26'] as f_date_repeated, [time '17:09:03.967259'] as f_time_repeated, "
        "[datetime '2020-11-26 17:09:03.967259'] as f_datetime_repeated, "
        "[cast(1.2 as NUMERIC)] as f_numeric_repeated, [ST_GEOGPOINT(1, 2)] as f_geography_repeated) "
        "as f_struct, [struct('f_string' as f_string, from_base64('YW55') as f_bytes, "
        "cast(1 as INT64) as f_int, cast(1.1 as FLOAT64) as f_float, true as f_bool, "
        "timestamp '2020-11-26 17:09:03.967259 UTC' as f_timestamp, date '2020-11-26' as f_date, "
        "time '17:09:03.967259' as f_time, datetime '2020-11-26 17:09:03.967259' as f_datetime, "
        "cast(1.2 as NUMERIC) as f_numeric, ST_GEOGPOINT(1, 2) as f_geography)] as f_repeated_struct,"
        " ['f_string'] as f_string_repeated, [from_base64('YW55')] as f_bytes_repeated, "
        "[cast(1 as INT64)] as f_int_repeated, [cast(1.1 as FLOAT64)] as f_float_repeated, "
        "[true] as f_bool_repeated, [timestamp '2020-11-26 17:09:03.967259 UTC'] as f_timestamp_repeated,"
        " [date '2020-11-26'] as f_date_repeated, [time '17:09:03.967259'] as f_time_repeated, "
        "[datetime '2020-11-26 17:09:03.967259'] as f_datetime_repeated, "
        "[cast(1.2 as NUMERIC)] as f_numeric_repeated, [ST_GEOGPOINT(1, 2)] as f_geography_repeated"
        "\nunion all\n"
        "select cast(null as STRING) as f_string, cast(null as BYTES) as f_bytes, "
        "cast(null as INT64) as f_int, cast(null as FLOAT64) as f_float, cast(null as BOOLEAN) as f_bool,"
        " cast(null as TIMESTAMP) as f_timestamp, cast(null as DATE) as f_date, "
        "cast(null as TIME) as f_time, cast(null as DATETIME) as f_datetime, "
        "cast(null as NUMERIC) as f_numeric, cast(null as GEOGRAPHY) as f_geography, "
        "cast(null as STRUCT<f_string STRING, f_bytes BYTES, f_int INT64, f_float FLOAT64, "
        "f_bool BOOLEAN, f_timestamp TIMESTAMP, f_date DATE, f_time TIME, f_datetime DATETIME, "
        "f_numeric NUMERIC, f_geography GEOGRAPHY, f_repeated_struct ARRAY<STRUCT<f_string STRING, "
        "f_bytes BYTES, f_int INT64, f_float FLOAT64, f_bool BOOLEAN, f_timestamp TIMESTAMP, "
        "f_date DATE, f_time TIME, f_datetime DATETIME, f_numeric NUMERIC, f_geography GEOGRAPHY>>, "
        "f_string_repeated ARRAY<STRING>, f_bytes_repeated ARRAY<BYTES>, f_int_repeated ARRAY<INT64>, "
        "f_float_repeated ARRAY<FLOAT64>, f_bool_repeated ARRAY<BOOLEAN>, "
        "f_timestamp_repeated ARRAY<TIMESTAMP>, f_date_repeated ARRAY<DATE>, "
        "f_time_repeated ARRAY<TIME>, f_datetime_repeated ARRAY<DATETIME>, "
        "f_numeric_repeated ARRAY<NUMERIC>, f_geography_repeated ARRAY<GEOGRAPHY>>) as f_struct, "
        "cast(null as ARRAY<STRUCT<f_string STRING, f_bytes BYTES, f_int INT64, f_float FLOAT64, "
        "f_bool BOOLEAN, f_timestamp TIMESTAMP, f_date DATE, f_time TIME, f_datetime DATETIME, "
        "f_numeric NUMERIC, f_geography GEOGRAPHY>>) as f_repeated_struct, "
        "cast(null as ARRAY<STRING>) as f_string_repeated, "
        "cast(null as ARRAY<BYTES>) as f_bytes_repeated, cast(null as ARRAY<INT64>) as f_int_repeated, "
        "cast(null as ARRAY<FLOAT64>) as f_float_repeated, "
        "cast(null as ARRAY<BOOLEAN>) as f_bool_repeated, "
        "cast(null as ARRAY<TIMESTAMP>) as f_timestamp_repeated, "
        "cast(null as ARRAY<DATE>) as f_date_repeated, "
        "cast(null as ARRAY<TIME>) as f_time_repeated, "
        "cast(null as ARRAY<DATETIME>) as f_datetime_repeated, "
        "cast(null as ARRAY<NUMERIC>) as f_numeric_repeated, cast(null as ARRAY<GEOGRAPHY>) "
        "as f_geography_repeated"
        "\nunion all\n"
        "select cast(null as STRING) as f_string, cast(null as BYTES) as f_bytes, "
        "cast(null as INT64) as f_int, cast(null as FLOAT64) as f_float, cast(null as BOOLEAN) as f_bool,"
        " cast(null as TIMESTAMP) as f_timestamp, cast(null as DATE) as f_date, "
        "cast(null as TIME) as f_time, cast(null as DATETIME) as f_datetime, "
        "cast(null as NUMERIC) as f_numeric, cast(null as GEOGRAPHY) as f_geography, "
        "cast(null as STRUCT<f_string STRING, f_bytes BYTES, f_int INT64, f_float FLOAT64, "
        "f_bool BOOLEAN, f_timestamp TIMESTAMP, f_date DATE, f_time TIME, f_datetime DATETIME, "
        "f_numeric NUMERIC, f_geography GEOGRAPHY, f_repeated_struct ARRAY<STRUCT<f_string STRING, "
        "f_bytes BYTES, f_int INT64, f_float FLOAT64, f_bool BOOLEAN, f_timestamp TIMESTAMP, f_date DATE,"
        " f_time TIME, f_datetime DATETIME, f_numeric NUMERIC, f_geography GEOGRAPHY>>, "
        "f_string_repeated ARRAY<STRING>, f_bytes_repeated ARRAY<BYTES>, f_int_repeated ARRAY<INT64>, "
        "f_float_repeated ARRAY<FLOAT64>, f_bool_repeated ARRAY<BOOLEAN>, "
        "f_timestamp_repeated ARRAY<TIMESTAMP>, f_date_repeated ARRAY<DATE>, "
        "f_time_repeated ARRAY<TIME>, f_datetime_repeated ARRAY<DATETIME>, "
        "f_numeric_repeated ARRAY<NUMERIC>, f_geography_repeated ARRAY<GEOGRAPHY>>) as f_struct, "
        "[cast(null as STRUCT<f_string STRING, f_bytes BYTES, f_int INT64, f_float FLOAT64, "
        "f_bool BOOLEAN, f_timestamp TIMESTAMP, f_date DATE, f_time TIME, f_datetime DATETIME, "
        "f_numeric NUMERIC, f_geography GEOGRAPHY>)] as f_repeated_struct, "
        "[cast(null as STRING)] as f_string_repeated, [cast(null as BYTES)] as f_bytes_repeated, "
        "[cast(null as INT64)] as f_int_repeated, [cast(null as FLOAT64)] as f_float_repeated, "
        "[cast(null as BOOLEAN)] as f_bool_repeated, [cast(null as TIMESTAMP)] as f_timestamp_repeated, "
        "[cast(null as DATE)] as f_date_repeated, [cast(null as TIME)] as f_time_repeated, "
        "[cast(null as DATETIME)] as f_datetime_repeated, [cast(null as NUMERIC)] as f_numeric_repeated, "
        "[cast(null as GEOGRAPHY)] as f_geography_repeated)")