Python DestinationNameTransformer.truncate_identifier_name примеры использования

Язык программирования: Python

Пространство имен/Пакет: normalization.transform_catalog.destination_name_transformer

Метод/Функция: truncate_identifier_name

Примеров на hotexamples.com: 5

Python DestinationNameTransformer.truncate_identifier_name - 5 примеров найдено. Это лучшие примеры Python кода для normalization.transform_catalog.destination_name_transformer.DestinationNameTransformer.truncate_identifier_name, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DestinationNameTransformer(16)

normalize_table_name(7)

truncate_identifier_name(5)

normalize_schema_name(4)

get_name_max_length(3)

_DestinationNameTransformer__truncate_identifier_name(1)

needs_quotes(1)

normalize_column_name(1)

Пример #1

Показать файл

Файл: stream_processor.py Проект: zzstoatzz/airbyte

def get_table_name(name_transformer: DestinationNameTransformer, parent: str,
                   child: str, suffix: str, json_path: List[str]) -> str:
    max_length = name_transformer.get_name_max_length(
    ) - 2  # less two for the underscores
    json_path_hash = hash_json_path(json_path)
    norm_suffix = suffix if not suffix or suffix.startswith(
        "_") else f"_{suffix}"
    norm_parent = parent if not parent else name_transformer.normalize_table_name(
        parent, False, False)
    norm_child = name_transformer.normalize_table_name(child, False, False)
    min_parent_length = min(MINIMUM_PARENT_LENGTH, len(norm_parent))

    # no parent
    if not parent:
        return name_transformer.truncate_identifier_name(
            f"{norm_child}{norm_suffix}")
    # if everything fits without truncation, don't truncate anything
    elif (len(norm_parent) + len(norm_child) + len(json_path_hash) +
          len(norm_suffix)) < max_length:
        return f"{norm_parent}_{json_path_hash}_{norm_child}{norm_suffix}"
    # if everything fits except for the parent, just truncate the parent
    elif (len(norm_child) + len(json_path_hash) +
          len(norm_suffix)) < (max_length - min_parent_length):
        max_parent_length = max_length - len(norm_child) - len(
            json_path_hash) - len(norm_suffix)
        return f"{norm_parent[:max_parent_length]}_{json_path_hash}_{norm_child}{norm_suffix}"
    # otherwise first truncate parent to the minimum length and middle truncate the child
    else:
        norm_child_max_length = max_length - min_parent_length - len(
            json_path_hash) - len(norm_suffix)
        trunc_norm_child = name_transformer.truncate_identifier_name(
            norm_child, norm_child_max_length)
        return f"{norm_parent[:min_parent_length]}_{json_path_hash}_{trunc_norm_child}{norm_suffix}"

Пример #2

Показать файл

def get_nested_hashed_table_name(name_transformer: DestinationNameTransformer, schema: str, json_path: List[str], child: str) -> str:
    """
    In normalization code base, we often have to deal with naming for tables, combining informations from:
    - parent table: to denote where a table is extracted from (in case of nesting)
    - child table: in case of nesting, the field name or the original stream name
    - extra suffix: normalization is done in multiple transformation steps, each may need to generate separate tables,
    so we can add a suffix to distinguish the different transformation steps of a pipeline.
    - json path: in terms of parent and nested field names in order to reach the table currently being built

    All these informations should be included (if possible) in the table naming for the user to (somehow) identify and
    recognize what data is available there.
    """
    parent = "_".join(json_path[:-1])
    max_length = name_transformer.get_name_max_length()
    json_path_hash = hash_json_path([schema] + json_path)
    norm_parent = parent if not parent else name_transformer.normalize_table_name(parent, False, False)
    norm_child = name_transformer.normalize_table_name(child, False, False)
    min_parent_length = min(MINIMUM_PARENT_LENGTH, len(norm_parent))

    # no parent
    if not parent:
        raise RuntimeError("There is no nested table names without parents")
    # if everything fits without truncation, don't truncate anything
    elif (len(norm_parent) + len(json_path_hash) + len(norm_child) + 2) < max_length:
        return f"{norm_parent}_{json_path_hash}_{norm_child}"
    # if everything fits except for the parent, just truncate the parent (still guarantees parent is of length min_parent_length)
    elif (min_parent_length + len(json_path_hash) + len(norm_child) + 2) < max_length:
        max_parent_length = max_length - len(json_path_hash) - len(norm_child) - 2
        return f"{norm_parent[:max_parent_length]}_{json_path_hash}_{norm_child}"
    # otherwise first truncate parent to the minimum length and middle truncate the child too
    else:
        norm_child_max_length = max_length - len(json_path_hash) - 2 - min_parent_length
        trunc_norm_child = name_transformer.truncate_identifier_name(norm_child, norm_child_max_length)
        return f"{norm_parent[:min_parent_length]}_{json_path_hash}_{trunc_norm_child}"

Пример #3

Показать файл

Файл: stream_processor.py Проект: vitoravancini/airbyte

def get_table_name(name_transformer: DestinationNameTransformer, parent: str,
                   child: str, suffix: str, json_path: List[str]) -> str:
    """
    In normalization code base, we often have to deal with naming for tables, combining informations from:
    - parent table: to denote where a table is extracted from (in case of nesting)
    - child table: in case of nesting, the field name or the original stream name
    - extra suffix: normalization is done in multiple transformation steps, each may need to generate separate tables,
    so we can add a suffix to distinguish the different transformation steps of a pipeline.
    - json path: in terms of parent and nested field names in order to reach the table currently being built

    All these informations should be included (if possible) in the table naming for the user to (somehow) identify and
    recognize what data is available there.
    """
    max_length = name_transformer.get_name_max_length(
    ) - 2  # less two for the underscores
    json_path_hash = hash_json_path(json_path)
    norm_suffix = suffix if not suffix or suffix.startswith(
        "_") else f"_{suffix}"
    norm_parent = parent if not parent else name_transformer.normalize_table_name(
        parent, False, False)
    norm_child = name_transformer.normalize_table_name(child, False, False)
    min_parent_length = min(MINIMUM_PARENT_LENGTH, len(norm_parent))

    # no parent
    if not parent:
        return name_transformer.truncate_identifier_name(
            f"{norm_child}{norm_suffix}")
    # if everything fits without truncation, don't truncate anything
    elif (len(norm_parent) + len(norm_child) + len(json_path_hash) +
          len(norm_suffix)) < max_length:
        return f"{norm_parent}_{json_path_hash}_{norm_child}{norm_suffix}"
    # if everything fits except for the parent, just truncate the parent
    elif (len(norm_child) + len(json_path_hash) +
          len(norm_suffix)) < (max_length - min_parent_length):
        max_parent_length = max_length - len(norm_child) - len(
            json_path_hash) - len(norm_suffix)
        return f"{norm_parent[:max_parent_length]}_{json_path_hash}_{norm_child}{norm_suffix}"
    # otherwise first truncate parent to the minimum length and middle truncate the child
    else:
        norm_child_max_length = max_length - min_parent_length - len(
            json_path_hash) - len(norm_suffix)
        trunc_norm_child = name_transformer.truncate_identifier_name(
            norm_child, norm_child_max_length)
        return f"{norm_parent[:min_parent_length]}_{json_path_hash}_{trunc_norm_child}{norm_suffix}"

Пример #4

Показать файл

def test_truncate_identifier(input_str: str, expected: str):
    """
    Rules about truncations, for example for both of these strings which are too long for the postgres 64 limit:
    - `Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iiii`
    - `Aaaa_Bbbb_Cccc_Dddd_Eeee_a_very_long_name_Ffff_Gggg_Hhhh_Iiii`

    Deciding on how to truncate (in the middle) are being verified in these tests.
    In this instance, both strings ends up as:`Aaaa_Bbbb_Cccc_Dddd___e_Ffff_Gggg_Hhhh_Iiii`
    and can potentially cause a collision in table names.

    Note that dealing with such collisions is not part of `destination_name_transformer` but of the `stream_processor`.
    """
    name_transformer = DestinationNameTransformer(DestinationType.POSTGRES)
    print(f"Truncating from #{len(input_str)} to #{len(expected)}")
    assert name_transformer.truncate_identifier_name(input_str) == expected

Пример #5

Показать файл

Файл: test_destination_name_transformer.py Проект: golf-canada/airbyte

def test_truncate_identifier(input_str: str, expected: str):
    name_transformer = DestinationNameTransformer(DestinationType.POSTGRES)
    print(f"Truncating from #{len(input_str)} to #{len(expected)}")
    assert name_transformer.truncate_identifier_name(input_str) == expected