Пример #1
0
def test_joined_columns():
    schema = JoinedSchema(complex_join_structure)
    columns = schema.get_columns()

    expected_columns = ColumnSet([
        ("t1.t1c1", UInt(64)),
        ("t1.t1c2", String()),
        ("t1.t1c3", Nested([
            ("t11c4", UInt(64))
        ])),
        ("t2.t2c1", UInt(64)),
        ("t2.t2c2", String()),
        ("t2.t2c3", Nested([
            ("t21c4", UInt(64))
        ])),
        ("t3.t3c1", UInt(64)),
        ("t3.t3c2", String()),
        ("t3.t3c3", Nested([
            ("t31c4", UInt(64))
        ])),
    ])

    # Checks equality between flattened columns. Nested columns are
    # exploded here
    assert set([c.flattened for c in columns]) \
        == set([c.flattened for c in expected_columns])

    # Checks equality between the structured set of columns. Nested columns
    # are not exploded.
    assert set([repr(c) for c in columns.columns]) \
        == set([repr(c) for c in expected_columns.columns])
Пример #2
0
 def __init__(
     self,
     storage_set_key: StorageSetKey,
     join_structure: JoinClause,
 ) -> None:
     self.__structure = join_structure
     super().__init__(storage_set_key, JoinedSchema(self.__structure))
Пример #3
0
    def __init__(self) -> None:
        self.__grouped_message = get_dataset("groupedmessage")
        groupedmessage_source = (self.__grouped_message.get_dataset_schemas().
                                 get_read_schema().get_data_source())

        self.__events = get_dataset("events")
        events_source = (self.__events.get_dataset_schemas().get_read_schema().
                         get_data_source())

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    # TODO: This will be replaced as soon as expressions won't be strings
                    # thus we will be able to easily add an alias to a column in an
                    # expression.
                    (qualified_column("record_deleted",
                                      self.GROUPS_ALIAS), "=", 0)
                ],
                prewhere_candidates=[
                    qualified_column(col, self.GROUPS_ALIAS)
                    for col in groupedmessage_source.get_prewhere_candidates()
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    (qualified_column("deleted", self.EVENTS_ALIAS), "=", 0)
                ],
                prewhere_candidates=[
                    qualified_column(col, self.EVENTS_ALIAS)
                    for col in events_source.get_prewhere_candidates()
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="project_id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"),
                ),
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=None,
        )
        super().__init__(
            dataset_schemas=dataset_schemas,
            time_group_columns={"events.time": "events.timestamp"},
            time_parse_columns=[
                "events.timestamp",
                "events.received",
                "groups.last_seen",
                "groups.first_seen",
                "groups.active_at",
            ],
        )
Пример #4
0
    def __init__(self) -> None:
        self.__grouped_message = get_entity(EntityKey.GROUPEDMESSAGES)
        groupedmessage_source = (get_storage(
            StorageKey.GROUPEDMESSAGES).get_schema().get_data_source())

        self.__events = get_entity(EntityKey.EVENTS)
        events_source = get_storage(
            StorageKey.EVENTS).get_schema().get_data_source()

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    binary_condition(
                        None,
                        ConditionFunctions.EQ,
                        Column(None, self.GROUPS_ALIAS, "record_deleted"),
                        Literal(None, 0),
                    ),
                ],
                prewhere_candidates=[
                    qualified_column(col, self.GROUPS_ALIAS)
                    for col in groupedmessage_source.get_prewhere_candidates()
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    binary_condition(
                        None,
                        ConditionFunctions.EQ,
                        Column(None, self.EVENTS_ALIAS, "deleted"),
                        Literal(None, 0),
                    ),
                ],
                prewhere_candidates=[
                    qualified_column(col, self.EVENTS_ALIAS)
                    for col in events_source.get_prewhere_candidates()
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="project_id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"),
                ),
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        storage = JoinedStorage(StorageSetKey.EVENTS, join_structure)
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=schema.get_columns(),
            writable_storage=None,
        )
Пример #5
0
 def get_schemas(self) -> StorageSchemas:
     return StorageSchemas(
         read_schema=JoinedSchema(self.__structure), write_schema=None
     )
Пример #6
0
    def __init__(self) -> None:
        self.__grouped_message = get_dataset("groupedmessage")
        groupedmessage_source = self.__grouped_message \
            .get_dataset_schemas() \
            .get_read_schema() \
            .get_data_source()

        self.__events = get_dataset("events")
        events_source = self.__events \
            .get_dataset_schemas() \
            .get_read_schema() \
            .get_data_source()

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    # TODO: This will be replaced as soon as expressions won't be strings
                    # thus we will be able to easily add an alias to a column in an
                    # expression.
                    (qualified_column('record_deleted',
                                      self.GROUPS_ALIAS), '=', 0)
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    (qualified_column('deleted', self.EVENTS_ALIAS), '=', 0)
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="project_id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"),
                ),
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=None,
        )
        super().__init__(
            dataset_schemas=dataset_schemas,
            time_group_columns={
                'events.time': 'events.timestamp',
            },
            time_parse_columns=['events.timestamp'],
        )