def test_complex_joins() -> None: e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA) node_err = IndividualNode(alias="err", data_source=e) g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA) node_group = IndividualNode(alias="groups", data_source=g) a = Entity(key=EntityKey.GROUPASSIGNEE, schema=GROUPS_ASSIGNEE) query = Query( from_clause=a, selected_columns=[ SelectedExpression("id", Column("id", None, "id")), SelectedExpression("assigned_user", Column("assigned_user", None, "user")), ], ) node_query = IndividualNode(alias="assignee", data_source=query) join = JoinClause( left_node=JoinClause( left_node=node_err, right_node=node_group, keys=[ JoinCondition( left=JoinConditionExpression("err", "group_id"), right=JoinConditionExpression("groups", "id"), ) ], join_type=JoinType.INNER, ), right_node=node_query, keys=[ JoinCondition( left=JoinConditionExpression("err", "group_id"), right=JoinConditionExpression("assignee", "id"), ) ], join_type=JoinType.INNER, ) assert join.get_column_sets() == { "err": ERRORS_SCHEMA, "assignee": ColumnSet([("id", Any()), ("assigned_user", Any())]), "groups": GROUPS_SCHEMA, }
def test_simple_join() -> None: e = Entity(key=EntityKey.EVENTS, schema=ERRORS_SCHEMA) node_err = IndividualNode(alias="err", data_source=e) g = Entity(key=EntityKey.GROUPEDMESSAGES, schema=GROUPS_SCHEMA) node_group = IndividualNode(alias="groups", data_source=g) join = JoinClause( left_node=node_err, right_node=node_group, keys=[ JoinCondition( left=JoinConditionExpression("err", "group_id"), right=JoinConditionExpression("groups", "id"), ) ], join_type=JoinType.INNER, join_modifier=JoinModifier.SEMI, ) assert join.get_column_sets() == { "err": ERRORS_SCHEMA, "groups": GROUPS_SCHEMA } joined_cols = join.get_columns() assert "err.group_id" in joined_cols assert "err.event_id" in joined_cols assert "groups.id" in joined_cols assert "groups.message" in joined_cols with pytest.raises(AssertionError): JoinClause( left_node=node_err, right_node=node_group, keys=[ JoinCondition( left=JoinConditionExpression("err", "missing_col"), right=JoinConditionExpression("groups", "another_missing_col"), ) ], join_type=JoinType.INNER, )
def test_join_query() -> None: events_query = LogicalQuery( Entity( EntityKey.EVENTS, ColumnSet([("event_id", String()), ("group_id", UInt(32))]), ), selected_columns=[ SelectedExpression("group_id", Column("group_id", None, "group_id")), SelectedExpression("string_evt_id", Column("string_evt_id", None, "event_id")), ], ) groups_query = LogicalQuery( Entity( EntityKey.GROUPEDMESSAGES, ColumnSet([("id", UInt(32)), ("message", String())]), ), selected_columns=[ SelectedExpression("group_id", Column("group_id", None, "id")) ], ) join_query = CompositeQuery(from_clause=JoinClause( left_node=IndividualNode("e", events_query), right_node=IndividualNode("g", groups_query), keys=[ JoinCondition( left=JoinConditionExpression("e", "group_id"), right=JoinConditionExpression("g", "group_id"), ) ], join_type=JoinType.INNER, )) data_source = join_query.get_from_clause() assert "e.string_evt_id" in data_source.get_columns() assert "g.group_id" in data_source.get_columns()
def visit_entity_match( self, node: Node, visited_children: Tuple[Any, str, Any, Any, EntityKey, Union[Optional[float], Node], Any, Any], ) -> IndividualNode[QueryEntity]: _, alias, _, _, name, sample, _, _ = visited_children if isinstance(sample, Node): sample = None return IndividualNode( alias, QueryEntity(name, get_entity(name).get_data_model(), sample))
def build_node( alias: str, from_clause: Entity, selected_columns: Sequence[SelectedExpression], condition: Optional[Expression], ) -> IndividualNode[Entity]: return IndividualNode( alias=alias, data_source=LogicalQuery( from_clause=from_clause, selected_columns=selected_columns, condition=condition, ), )
def build_clickhouse_node( alias: str, from_clause: Table, selected_columns: Sequence[SelectedExpression], condition: Optional[Expression], ) -> IndividualNode[Table]: return IndividualNode( alias=alias, data_source=ClickhouseQuery( from_clause=from_clause, selected_columns=selected_columns, condition=condition, ), )
def visit_individual_node( self, node: IndividualNode[Entity]) -> JoinDataSourcePlan: assert isinstance( node.data_source, ProcessableQuery ), "Invalid composite query. All nodes must be subqueries." sub_query_plan = self.__plans[node.alias] return JoinDataSourcePlan( translated_source=IndividualNode(alias=node.alias, data_source=sub_query_plan.query), processors={ node.alias: SubqueryProcessors( plan_processors=sub_query_plan.plan_query_processors, db_processors=sub_query_plan.db_query_processors, ) }, storage_set_key=sub_query_plan.storage_set_key, )
EVENTS_SCHEMA, GROUPS_ASSIGNEE, GROUPS_SCHEMA, Events, GroupAssignee, GroupedMessage, ) from tests.query.joins.join_structures import ( events_groups_join, events_node, groups_node, ) BASIC_JOIN = JoinClause( left_node=IndividualNode( alias="ev", data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None), ), right_node=IndividualNode( alias="gr", data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None), ), keys=[ JoinCondition( left=JoinConditionExpression("ev", "group_id"), right=JoinConditionExpression("gr", "id"), ) ], join_type=JoinType.INNER, ) TEST_CASES = [
id="subquery has their dates adjusted", ), pytest.param( """MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.c WHERE e.project_id=1 AND e.timestamp>=toDateTime('2021-01-01T00:30:00') AND e.timestamp<toDateTime('2021-01-03T00:30:00') AND t.project_id=1 AND t.finish_ts>=toDateTime('2021-01-01T00:30:00') AND t.finish_ts<toDateTime('2021-01-07T00:30:00')""", CompositeQuery( from_clause=JoinClause( left_node=IndividualNode( "e", QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(), ), ), right_node=IndividualNode( "t", QueryEntity( EntityKey.TRANSACTIONS, get_entity(EntityKey.TRANSACTIONS).get_data_model(), ), ), keys=[ JoinCondition( JoinConditionExpression("e", "event_id"), JoinConditionExpression("t", "event_id"), )
ERRORS_SCHEMA = ColumnSet([ ("event_id", UUID()), ("project_id", UInt(32)), ("message", String()), ("group_id", UInt(32)), ]) GROUPS_SCHEMA = ColumnSet([ ("id", UInt(32)), ("project_id", UInt(32)), ("group_id", UInt(32)), ("message", String()), ]) GROUPS_ASSIGNEE = ColumnSet([("id", UInt(32)), ("user", String())]) node_err = IndividualNode(alias="err", data_source=Table("errors_local", ERRORS_SCHEMA)) node_group = IndividualNode(alias="groups", data_source=Table("groupedmessage_local", GROUPS_SCHEMA)) node_assignee = IndividualNode(alias="assignee", data_source=Table("groupassignee_local", GROUPS_ASSIGNEE)) test_cases = [ pytest.param( Query( Table("my_table", ColumnSet([])), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression("column2", Column(None, "table1", "column2")),
get_equivalent_columns, ) from tests.query.joins.equivalence_schema import ( EVENTS_SCHEMA, GROUPS_ASSIGNEE, GROUPS_SCHEMA, Events, GroupAssignee, GroupedMessage, ) TEST_CASES = [ pytest.param( JoinClause( IndividualNode("ev", EntitySource(EntityKey.EVENTS, EVENTS_SCHEMA, None)), IndividualNode( "gr", EntitySource(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None)), [ JoinCondition( JoinConditionExpression("ev", "group_id"), JoinConditionExpression("gr", "id"), ) ], JoinType.INNER, None, ), { QualifiedCol(EntityKey.EVENTS, "group_id"): {
def node(alias: str, name: str) -> IndividualNode[QueryEntity]: return IndividualNode( alias, QueryEntity(EntityKey(name), get_entity(EntityKey(name)).get_data_model()), )
def visit_individual_node( self, node: IndividualNode[Entity]) -> IndividualNode[Entity]: return IndividualNode(node.alias, self.__subqueries[node.alias].build_query())
JoinCondition, JoinConditionExpression, JoinType, ) from snuba.query.data_source.simple import Entity from snuba.query.expressions import Column, FunctionCall, Literal from snuba.query.formatters.tracing import TExpression, format_query from snuba.query.logical import Query as LogicalQuery from tests.query.joins.equivalence_schema import ( EVENTS_SCHEMA, GROUPS_SCHEMA, ) BASIC_JOIN = JoinClause( left_node=IndividualNode( alias="ev", data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None), ), right_node=IndividualNode( alias="gr", data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None), ), keys=[ JoinCondition( left=JoinConditionExpression("ev", "group_id"), right=JoinConditionExpression("gr", "id"), ) ], join_type=JoinType.INNER, ) LOGICAL_QUERY = LogicalQuery(
), Literal(None, 1), ), ), Literal(None, 1), ), ), id="Special array join functions", ), pytest.param( "MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.c", CompositeQuery( from_clause=JoinClause( left_node=IndividualNode( "e", QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model(), ), ), right_node=IndividualNode( "t", QueryEntity( EntityKey.TRANSACTIONS, get_entity(EntityKey.TRANSACTIONS).get_data_model(), ), ), keys=[ JoinCondition( JoinConditionExpression("e", "event_id"), JoinConditionExpression("t", "event_id"), ) ],
SelectedExpression( "alias", FunctionCall("alias", "something", (Column(None, None, "alias"),)), ) ], ), 3, {"errors_local"}, True, None, id="Nested query. Count the inner query", ), pytest.param( CompositeQuery( from_clause=JoinClause( left_node=IndividualNode(alias="err", data_source=SIMPLE_QUERY), right_node=IndividualNode( alias="groups", data_source=Table("groups_local", GROUPS_SCHEMA) ), keys=[ JoinCondition( left=JoinConditionExpression("err", "group_id"), right=JoinConditionExpression("groups", "id"), ) ], join_type=JoinType.INNER, ), selected_columns=[ SelectedExpression( "event_id", FunctionCall("alias", "something", (Column(None, "err", "alias"),)),
def test_entity_node() -> None: e = Entity(key=EntityKey.ERRORS, schema=ERRORS_SCHEMA) node = IndividualNode(alias="err", data_source=e) assert node.get_column_sets() == {"err": e.schema}
), selected_columns=[ SelectedExpression( "average", FunctionCall("average", "avg", (Column(None, None, "count_environment"), )), ), ], ), id="Query with a subquery", ), pytest.param( CompositeQuery( from_clause=JoinClause( left_node=IndividualNode( alias="err", data_source=events_ent, ), right_node=IndividualNode( alias="groups", data_source=groups_ent, ), keys=[ JoinCondition( left=JoinConditionExpression("err", "group_id"), right=JoinConditionExpression("groups", "id"), ) ], join_type=JoinType.INNER, ), selected_columns=[ SelectedExpression(