示例#1
0
def process_delete_tag(
    message: Mapping[str, Any],
    schema: TableSchema,
    tag_column_map: Mapping[str, Mapping[str, str]],
    promoted_tags: Mapping[str, Sequence[str]],
) -> Optional[Replacement]:
    tag = message["tag"]
    if not tag:
        return None

    assert isinstance(tag, str)
    timestamp = datetime.strptime(message["datetime"], settings.PAYLOAD_DATETIME_FORMAT)
    tag_column_name = tag_column_map["tags"].get(tag, tag)
    is_promoted = tag in promoted_tags["tags"]

    where = """\
        WHERE project_id = %(project_id)s
        AND received <= CAST('%(timestamp)s' AS DateTime)
        AND NOT deleted
    """

    if is_promoted:
        prewhere = " PREWHERE %(tag_column)s IS NOT NULL "
    else:
        prewhere = " PREWHERE has(`tags.key`, %(tag_str)s) "

    insert_query_template = (
        """\
        INSERT INTO %(dist_write_table_name)s (%(all_columns)s)
        SELECT %(select_columns)s
        FROM %(dist_read_table_name)s FINAL
    """
        + prewhere + where
    )

    all_columns = [
        col
        for col in schema.get_columns()
        if Materialized not in col.type.get_all_modifiers()
    ]
    select_columns = []
    for col in all_columns:
        if is_promoted and col.flattened == tag_column_name:
            select_columns.append("NULL")
        elif col.flattened == "tags.key":
            select_columns.append(
                "arrayFilter(x -> (indexOf(`tags.key`, x) != indexOf(`tags.key`, %s)), `tags.key`)"
                % escape_string(tag)
            )
        elif col.flattened == "tags.value":
            select_columns.append(
                "arrayMap(x -> arrayElement(`tags.value`, x), arrayFilter(x -> x != indexOf(`tags.key`, %s), arrayEnumerate(`tags.value`)))"
                % escape_string(tag)
            )
        elif col.flattened == "_tags_flattened":
            select_columns.append(FLATTENED_COLUMN_TEMPLATE % escape_string(tag))
        else:
            select_columns.append(col.escaped)

    all_column_names = [col.escaped for col in all_columns]
    query_args = {
        "all_columns": ", ".join(all_column_names),
        "select_columns": ", ".join(select_columns),
        "project_id": message["project_id"],
        "tag_str": escape_string(tag),
        "tag_column": escape_identifier(tag_column_name),
        "timestamp": timestamp.strftime(DATETIME_FORMAT),
    }

    count_query_template = (
        """\
        SELECT count()
        FROM %(dist_read_table_name)s FINAL
    """
        + prewhere + where
    )

    query_time_flags = (NEEDS_FINAL, message["project_id"])

    return Replacement(
        count_query_template, insert_query_template, query_args, query_time_flags
    )
示例#2
0
    storage_set_key=StorageSetKey.EVENTS,
).get_data_source()

table3 = TableSchema(
    columns=ColumnSet([
        ("t3c1", UInt(64)),
        ("t3c2", String()),
        ("t3c3", Nested([("t31c4", UInt(64))])),
    ]),
    local_table_name="table3",
    dist_table_name="table3",
    storage_set_key=StorageSetKey.EVENTS,
).get_data_source()

simple_join_structure = JoinClause(
    TableJoinNode(table1.format_from(), table1.get_columns(), [], [], "t1"),
    TableJoinNode(table2.format_from(), table2.get_columns(), [], [], "t2"),
    [
        JoinCondition(
            left=JoinConditionExpression(table_alias="t1", column="t1c1"),
            right=JoinConditionExpression(table_alias="t2", column="t2c2"),
        ),
        JoinCondition(
            left=JoinConditionExpression(table_alias="t1", column="t1c3"),
            right=JoinConditionExpression(table_alias="t2", column="t2c4"),
        ),
    ],
    JoinType.INNER,
)

complex_join_structure = JoinClause(