Exemplo n.º 1
0
def parse_prop_clauses(filters: List[Property],
                       team_id: int,
                       prepend: str = "global",
                       table_name: str = "") -> Tuple[str, Dict]:
    final = []
    params: Dict[str, Any] = {"team_id": team_id}
    if table_name != "":
        table_name += "."

    for idx, prop in enumerate(filters):
        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final.append("AND {table_name}distinct_id IN ({clause})".format(
                table_name=table_name, clause=person_id_query))
        elif prop.type == "person":
            filter_query, filter_params = prop_filter_json_extract(
                prop, idx, "{}person".format(prepend))
            final.append(
                "AND {table_name}distinct_id IN ({filter_query})".format(
                    filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                        filters=filter_query),
                    table_name=table_name))
            params.update(filter_params)
        else:
            filter_query, filter_params = prop_filter_json_extract(
                prop, idx, prepend, prop_var="{}properties".format(table_name))
            final.append(
                "{filter_query} AND {table_name}team_id = %(team_id)s".format(
                    table_name=table_name, filter_query=filter_query))
            params.update(filter_params)
    return " ".join(final), params
Exemplo n.º 2
0
def parse_prop_clauses(key: str, filters: List[Property], team: Team, prepend: str = "") -> Tuple[str, Dict]:
    final = ""
    params: Dict[str, Any] = {}
    for idx, prop in enumerate(filters):

        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final += "{cond} ({clause}) ".format(cond="AND distinct_id IN", clause=person_id_query)

        elif prop.type == "person":
            prepend = "person"
            filter = "(ep.key = %(k{prepend}_{idx})s) AND (ep.value {operator} %(v{prepend}_{idx})s)".format(
                idx=idx, operator=get_operator(prop.operator), prepend=prepend
            )
            clause = GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(filters=filter)
            final += "{cond} ({clause}) ".format(cond="AND distinct_id IN", clause=clause)
            params.update(
                {"k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): _pad_value(prop.value)}
            )

        else:
            filter = "(ep.key = %(k{prepend}_{idx})s) AND (ep.value {operator} %(v{prepend}_{idx})s)".format(
                idx=idx, operator=get_operator(prop.operator), prepend=prepend
            )
            clause = EVENT_PROP_CLAUSE.format(team_id=team.pk, filters=filter)
            final += "{cond} ({clause}) ".format(cond="AND {key} IN".format(key=key), clause=clause)
            params.update(
                {"k{}_{}".format(prepend, idx): prop.key, "v{}_{}".format(prepend, idx): _pad_value(prop.value)}
            )

    return final, params
Exemplo n.º 3
0
def parse_prop_clauses(
    filters: List[Property],
    team_id: Optional[int],
    prepend: str = "global",
    table_name: str = "",
    allow_denormalized_props: bool = False,
    filter_test_accounts=False,
) -> Tuple[str, Dict]:
    final = []
    params: Dict[str, Any] = {}
    if team_id is not None:
        params["team_id"] = team_id
    if table_name != "":
        table_name += "."

    if filter_test_accounts:
        test_account_filters = Team.objects.only("test_account_filters").get(
            id=team_id).test_account_filters
        filters.extend([Property(**prop) for prop in test_account_filters])

    for idx, prop in enumerate(filters):
        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value, team_id=team_id)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final.append("AND {table_name}distinct_id IN ({clause})".format(
                table_name=table_name, clause=person_id_query))
        elif prop.type == "person":
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                "{}person".format(prepend),
                allow_denormalized_props=allow_denormalized_props)
            final.append(
                "AND {table_name}distinct_id IN ({filter_query})".format(
                    filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                        filters=filter_query),
                    table_name=table_name))
            params.update(filter_params)
        elif prop.type == "element":
            query, filter_params = filter_element({prop.key: prop.value},
                                                  prepend="{}_".format(idx))
            final.append("AND {}".format(query[0]))
            params.update(filter_params)
        else:
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend,
                prop_var="{}properties".format(table_name),
                allow_denormalized_props=allow_denormalized_props,
            )

            final.append(
                f"{filter_query} AND {table_name}team_id = %(team_id)s"
                if team_id else filter_query)
            params.update(filter_params)
    return " ".join(final), params
Exemplo n.º 4
0
def format_filter_query(cohort: Cohort) -> Tuple[str, Dict[str, Any]]:
    filters = []
    params: Dict[str, Any] = {}
    for group_idx, group in enumerate(cohort.groups):
        if group.get("action_id"):
            action = Action.objects.get(pk=group["action_id"],
                                        team_id=cohort.team.pk)
            action_filter_query, action_params = format_action_filter(action)
            extract_person = "SELECT distinct_id FROM events WHERE uuid IN ({query})".format(
                query=action_filter_query)
            params = {**params, **action_params}
            filters.append("(" + extract_person + ")")

        elif group.get("properties"):
            filter = Filter(data=group)

            for idx, prop in enumerate(filter.properties):
                prepend = "{}_cohort_group_{}".format(cohort.pk, group_idx)

                arg = "v{}_{}".format(prepend, idx)
                operator_clause, value = get_operator(prop, arg)

                prop_filters = "(ep.key = %(k{prepend}_{idx})s) AND {operator_clause}".format(
                    idx=idx, operator_clause=operator_clause, prepend=prepend)
                clause = GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                    filters=prop_filters,
                    negation="NOT "
                    if prop.operator and "not" in prop.operator else "")

                filters.append("(" + clause + ")")
                params.update({
                    "k{}_{}".format(prepend, idx): prop.key,
                    arg: value
                })

    separator = " OR distinct_id IN "
    joined_filter = separator.join(filters)
    person_id_query = CALCULATE_COHORT_PEOPLE_SQL.format(query=joined_filter)
    return person_id_query, params
Exemplo n.º 5
0
def parse_prop_clauses(
    key: str, filters: List[Property], team: Team, prepend: str = "", json_extract: bool = False
) -> Tuple[str, Dict]:
    final = ""
    params: Dict[str, Any] = {}
    for idx, prop in enumerate(filters):

        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final += "AND distinct_id IN ({clause}) ".format(clause=person_id_query)
        elif prop.type == "person":
            filter_query, filter_params = prop_filter_json_extract(prop, idx, "{}person".format(prepend))
            final += " AND distinct_id IN ({filter_query})".format(
                filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(filters=filter_query)
            )
            params.update(filter_params)
        else:
            filter_query, filter_params = prop_filter_json_extract(prop, idx, prepend)
            final += " {filter_query} AND team_id = %(team_id)s".format(filter_query=filter_query)
            params.update(filter_params)
    return final, params
Exemplo n.º 6
0
def parse_prop_clauses(key: str,
                       filters: List[Property],
                       team: Team,
                       prepend: str = "") -> Tuple[str, Dict]:
    final = ""
    params: Dict[str, Any] = {}
    for idx, prop in enumerate(filters):

        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final += "AND distinct_id IN ({clause}) ".format(
                clause=person_id_query)

        elif prop.type == "person":

            prepend = "person"

            arg = "v{}_{}".format(prepend, idx)
            operator_clause, value = get_operator(prop, arg)

            filter = "(ep.key = %(k{prepend}_{idx})s) {and_statement} {operator_clause}".format(
                idx=idx,
                and_statement="AND" if operator_clause else "",
                operator_clause=operator_clause,
                prepend=prepend,
            )
            clause = GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                filters=filter,
                negation="NOT "
                if prop.operator and "not" in prop.operator else "")
            final += "AND distinct_id IN ({clause}) ".format(clause=clause)
            params.update({
                "k{}_{}".format(prepend, idx): prop.key,
                arg: value
            })
        else:

            arg = "v{}_{}".format(prepend, idx)
            operator_clause, value = get_operator(prop, arg)

            filter = "(ep.key = %(k{prepend}_{idx})s) {and_statement} {operator_clause}".format(
                idx=idx,
                and_statement="AND" if operator_clause else "",
                operator_clause=operator_clause,
                prepend=prepend,
            )
            clause = EVENT_PROP_CLAUSE.format(team_id=team.pk, filters=filter)
            final += "{cond} ({clause}) AND team_id = %(team_id)s ".format(
                cond="AND {key} {negation}IN".format(
                    key=key,
                    negation="NOT "
                    if prop.operator and "not" in prop.operator else "",
                ),
                clause=clause,
            )
            params.update({
                "k{}_{}".format(prepend, idx): prop.key,
                arg: value
            })

    return final, params
Exemplo n.º 7
0
def parse_prop_clauses(
    filters: List[Property],
    team_id: Optional[int],
    prepend: str = "global",
    table_name: str = "",
    allow_denormalized_props: bool = True,
    has_person_id_joined: bool = True,
    person_properties_mode: PersonPropertiesMode = PersonPropertiesMode.
    USING_SUBQUERY,
) -> Tuple[str, Dict]:
    final = []
    params: Dict[str, Any] = {}
    if team_id is not None:
        params["team_id"] = team_id
    if table_name != "":
        table_name += "."

    for idx, prop in enumerate(filters):
        if prop.type == "cohort":
            try:
                cohort = Cohort.objects.get(pk=prop.value, team_id=team_id)
            except Cohort.DoesNotExist:
                final.append(
                    "AND 0 = 13")  # If cohort doesn't exist, nothing can match
            else:
                person_id_query, cohort_filter_params = format_filter_query(
                    cohort, idx)
                params = {**params, **cohort_filter_params}
                final.append(
                    "AND {table_name}distinct_id IN ({clause})".format(
                        table_name=table_name, clause=person_id_query))
        elif prop.type == "person" and person_properties_mode != PersonPropertiesMode.EXCLUDE:
            # :TODO: Clean this up by using ClickhousePersonQuery over GET_DISTINCT_IDS_BY_PROPERTY_SQL to have access
            #   to materialized columns
            # :TODO: (performance) Avoid subqueries whenever possible, use joins instead
            is_direct_query = person_properties_mode == PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                "{}person".format(prepend),
                prop_var="person_props" if is_direct_query else "properties",
                allow_denormalized_props=allow_denormalized_props
                and is_direct_query,
            )
            if is_direct_query:
                final.append(filter_query)
                params.update(filter_params)
            else:
                final.append(
                    "AND {table_name}distinct_id IN ({filter_query})".format(
                        filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                            filters=filter_query),
                        table_name=table_name,
                    ))
                params.update(filter_params)
        elif prop.type == "element":
            query, filter_params = filter_element({prop.key: prop.value},
                                                  operator=prop.operator,
                                                  prepend="{}_".format(idx))
            if query:
                final.append(f" AND {query}")
                params.update(filter_params)
        elif prop.type == "event":
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend,
                prop_var="{}properties".format(table_name),
                allow_denormalized_props=allow_denormalized_props,
            )

            final.append(
                f"{filter_query} AND {table_name}team_id = %(team_id)s"
                if team_id else filter_query)
            params.update(filter_params)
        elif prop.type == "group":
            # :TRICKY: This assumes group properties have already been joined, as in trends query
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend,
                prop_var=f"group_properties_{prop.group_type_index}",
                allow_denormalized_props=False)

            final.append(filter_query)
            params.update(filter_params)
        elif prop.type in ("static-cohort", "precalculated-cohort"):
            cohort_id = cast(int, prop.value)

            method = format_static_cohort_query if prop.type == "static-cohort" else format_precalculated_cohort_query
            filter_query, filter_params = method(
                cohort_id,
                idx,
                prepend=prepend,
                custom_match_field="person_id")  # type: ignore
            if has_person_id_joined:
                final.append(f" AND {filter_query}")
            else:
                # :TODO: (performance) Avoid subqueries whenever possible, use joins instead
                subquery = GET_DISTINCT_IDS_BY_PERSON_ID_FILTER.format(
                    filters=filter_query)
                final.append(f"AND {table_name}distinct_id IN ({subquery})")
            params.update(filter_params)

    return " ".join(final), params
Exemplo n.º 8
0
def parse_prop_clauses(
    team_id: int,
    filters: List[Property],
    prepend: str = "global",
    table_name: str = "",
    allow_denormalized_props: bool = True,
    has_person_id_joined: bool = True,
    person_properties_mode: PersonPropertiesMode = PersonPropertiesMode.
    USING_SUBQUERY,
    person_id_joined_alias: str = "person_id",
    group_properties_joined: bool = True,
    property_operator: PropertyOperatorType = PropertyOperatorType.AND,
) -> Tuple[str, Dict]:
    final = []
    params: Dict[str, Any] = {}
    if table_name != "":
        table_name += "."

    for idx, prop in enumerate(filters):
        if prop.type == "cohort":
            try:
                cohort = Cohort.objects.get(pk=prop.value)
            except Cohort.DoesNotExist:
                final.append(
                    f"{property_operator} 0 = 13"
                )  # If cohort doesn't exist, nothing can match, unless an OR operator is used
            else:

                if person_properties_mode == PersonPropertiesMode.USING_SUBQUERY:
                    person_id_query, cohort_filter_params = format_filter_query(
                        cohort, idx)
                    params = {**params, **cohort_filter_params}
                    final.append(
                        f"{property_operator} {table_name}distinct_id IN ({person_id_query})"
                    )
                else:
                    person_id_query, cohort_filter_params = format_cohort_subquery(
                        cohort,
                        idx,
                        custom_match_field=f"{person_id_joined_alias}")
                    params = {**params, **cohort_filter_params}
                    final.append(f"{property_operator} {person_id_query}")
        elif prop.type == "person" and person_properties_mode != PersonPropertiesMode.DIRECT:
            # :TODO: Clean this up by using ClickhousePersonQuery over GET_DISTINCT_IDS_BY_PROPERTY_SQL to have access
            #   to materialized columns
            # :TODO: (performance) Avoid subqueries whenever possible, use joins instead
            is_direct_query = person_properties_mode == PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                "{}person".format(prepend),
                prop_var="person_props" if is_direct_query else "properties",
                allow_denormalized_props=allow_denormalized_props
                and is_direct_query,
                property_operator=property_operator,
            )
            if is_direct_query:
                final.append(filter_query)
                params.update(filter_params)
            else:
                # Subquery filter here always should be blank as it's the first
                filter_query = filter_query.replace(property_operator, "", 1)
                final.append(
                    " {property_operator} {table_name}distinct_id IN ({filter_query})"
                    .format(
                        filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                            filters=filter_query,
                            GET_TEAM_PERSON_DISTINCT_IDS=
                            get_team_distinct_ids_query(team_id),
                        ),
                        table_name=table_name,
                        property_operator=property_operator,
                    ))
                params.update(filter_params)
        elif prop.type == "person" and person_properties_mode == PersonPropertiesMode.DIRECT:
            # this setting is used to generate the ClickhousePersonQuery SQL.
            # When using direct mode, there should only be person properties in the entire
            # property group
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend=f"personquery_{prepend}",
                allow_denormalized_props=True,
                transform_expression=lambda column_name:
                f"argMax(person.{column_name}, _timestamp)",
                property_operator=property_operator,
            )
            final.append(filter_query)
            params.update(filter_params)
        elif prop.type == "element":
            query, filter_params = filter_element({prop.key: prop.value},
                                                  operator=prop.operator,
                                                  prepend="{}_".format(idx))
            if query:
                final.append(f"{property_operator} {query}")
                params.update(filter_params)
        elif prop.type == "event":
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend,
                prop_var="{}properties".format(table_name),
                allow_denormalized_props=allow_denormalized_props,
                property_operator=property_operator,
            )
            final.append(f" {filter_query}")
            params.update(filter_params)
        elif prop.type == "group":
            if group_properties_joined:
                filter_query, filter_params = prop_filter_json_extract(
                    prop,
                    idx,
                    prepend,
                    prop_var=f"group_properties_{prop.group_type_index}",
                    allow_denormalized_props=False,
                    property_operator=property_operator,
                )
                final.append(filter_query)
                params.update(filter_params)
            else:
                # :TRICKY: offer groups support for queries which don't support automatically joining with groups table yet (e.g. lifecycle)
                filter_query, filter_params = prop_filter_json_extract(
                    prop,
                    idx,
                    prepend,
                    prop_var=f"group_properties",
                    allow_denormalized_props=False)
                group_type_index_var = f"{prepend}_group_type_index_{idx}"
                groups_subquery = GET_GROUP_IDS_BY_PROPERTY_SQL.format(
                    filters=filter_query,
                    group_type_index_var=group_type_index_var)
                final.append(
                    f"{property_operator} {table_name}$group_{prop.group_type_index} IN ({groups_subquery})"
                )
                params.update(filter_params)
                params[group_type_index_var] = prop.group_type_index
        elif prop.type in ("static-cohort", "precalculated-cohort"):
            cohort_id = cast(int, prop.value)

            method = format_static_cohort_query if prop.type == "static-cohort" else format_precalculated_cohort_query
            filter_query, filter_params = method(
                cohort_id,
                idx,
                prepend=prepend,
                custom_match_field=person_id_joined_alias)  # type: ignore
            if has_person_id_joined:
                final.append(f"{property_operator} {filter_query}")
            else:
                # :TODO: (performance) Avoid subqueries whenever possible, use joins instead
                subquery = GET_DISTINCT_IDS_BY_PERSON_ID_FILTER.format(
                    filters=filter_query,
                    GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(
                        team_id),
                )
                final.append(
                    f"{property_operator} {table_name}distinct_id IN ({subquery})"
                )
            params.update(filter_params)

    if final:
        # remove the first operator
        return " ".join(final).replace(property_operator, "", 1), params

    return "", params