Exemplo n.º 1
0
    def __init__(self) -> None:
        self.__grouped_message = get_dataset("groupedmessage")
        groupedmessage_source = (self.__grouped_message.get_dataset_schemas().
                                 get_read_schema().get_data_source())

        self.__events = get_dataset("events")
        events_source = (self.__events.get_dataset_schemas().get_read_schema().
                         get_data_source())

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    # TODO: This will be replaced as soon as expressions won't be strings
                    # thus we will be able to easily add an alias to a column in an
                    # expression.
                    (qualified_column("record_deleted",
                                      self.GROUPS_ALIAS), "=", 0)
                ],
                prewhere_candidates=[
                    qualified_column(col, self.GROUPS_ALIAS)
                    for col in groupedmessage_source.get_prewhere_candidates()
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    (qualified_column("deleted", self.EVENTS_ALIAS), "=", 0)
                ],
                prewhere_candidates=[
                    qualified_column(col, self.EVENTS_ALIAS)
                    for col in events_source.get_prewhere_candidates()
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="project_id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"),
                ),
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=None,
        )
        super().__init__(
            dataset_schemas=dataset_schemas,
            time_group_columns={"events.time": "events.timestamp"},
            time_parse_columns=[
                "events.timestamp",
                "events.received",
                "groups.last_seen",
                "groups.first_seen",
                "groups.active_at",
            ],
        )
Exemplo n.º 2
0
    def __init__(self) -> None:
        self.__grouped_message = get_entity(EntityKey.GROUPEDMESSAGES)
        groupedmessage_source = (get_storage(
            StorageKey.GROUPEDMESSAGES).get_schema().get_data_source())

        self.__events = get_entity(EntityKey.EVENTS)
        events_source = get_storage(
            StorageKey.EVENTS).get_schema().get_data_source()

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    binary_condition(
                        None,
                        ConditionFunctions.EQ,
                        Column(None, self.GROUPS_ALIAS, "record_deleted"),
                        Literal(None, 0),
                    ),
                ],
                prewhere_candidates=[
                    qualified_column(col, self.GROUPS_ALIAS)
                    for col in groupedmessage_source.get_prewhere_candidates()
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    binary_condition(
                        None,
                        ConditionFunctions.EQ,
                        Column(None, self.EVENTS_ALIAS, "deleted"),
                        Literal(None, 0),
                    ),
                ],
                prewhere_candidates=[
                    qualified_column(col, self.EVENTS_ALIAS)
                    for col in events_source.get_prewhere_candidates()
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="project_id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"),
                ),
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        storage = JoinedStorage(StorageSetKey.EVENTS, join_structure)
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=schema.get_columns(),
            writable_storage=None,
        )
Exemplo n.º 3
0
        ("t3c1", UInt(64)),
        ("t3c2", String()),
        ("t3c3", Nested([("t31c4", UInt(64))])),
    ]),
    local_table_name="table3",
    dist_table_name="table3",
    order_by="",
    partition_by="",
).get_data_source()

simple_join_structure = JoinClause(
    TableJoinNode(table1.format_from(), table1.get_columns(), [], [], "t1"),
    TableJoinNode(table2.format_from(), table2.get_columns(), [], [], "t2"),
    [
        JoinCondition(
            left=JoinConditionExpression(table_alias="t1", column="t1c1"),
            right=JoinConditionExpression(table_alias="t2", column="t2c2"),
        ),
        JoinCondition(
            left=JoinConditionExpression(table_alias="t1", column="t1c3"),
            right=JoinConditionExpression(table_alias="t2", column="t2c4"),
        ),
    ],
    JoinType.INNER,
)

complex_join_structure = JoinClause(
    JoinClause(
        TableJoinNode(table1.format_from(), table1.get_columns(), [], [],
                      "t1"),
        TableJoinNode(table2.format_from(), table2.get_columns(), [], [],
                      "t2"),
Exemplo n.º 4
0
    def __init__(self) -> None:
        self.__grouped_message = get_dataset("groupedmessage")
        groupedmessage_source = self.__grouped_message \
            .get_dataset_schemas() \
            .get_read_schema() \
            .get_data_source()

        self.__events = get_dataset("events")
        events_source = self.__events \
            .get_dataset_schemas() \
            .get_read_schema() \
            .get_data_source()

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    # TODO: This will be replaced as soon as expressions won't be strings
                    # thus we will be able to easily add an alias to a column in an
                    # expression.
                    (qualified_column('record_deleted',
                                      self.GROUPS_ALIAS), '=', 0)
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    (qualified_column('deleted', self.EVENTS_ALIAS), '=', 0)
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="project_id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"),
                ),
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=None,
        )
        super().__init__(
            dataset_schemas=dataset_schemas,
            time_group_columns={
                'events.time': 'events.timestamp',
            },
            time_parse_columns=['events.timestamp'],
        )