Пример #1
0
    def property_to_Q(self) -> Q:
        from .cohort import CohortPeople

        if self.type in CLICKHOUSE_ONLY_PROPERTY_TYPES:
            raise ValueError(
                f"property_to_Q: type is not supported: {repr(self.type)}")

        value = self._parse_value(self.value)
        if self.type == "cohort":
            from posthog.models.cohort import Cohort

            cohort_id = int(cast(Union[str, int], value))
            cohort = Cohort.objects.get(pk=cohort_id)
            return Q(
                Exists(
                    CohortPeople.objects.filter(
                        cohort_id=cohort.pk,
                        person_id=OuterRef("id"),
                        version=cohort.version).only("id")))

        column = "group_properties" if self.type == "group" else "properties"

        if self.operator == "is_not":
            return Q(~lookup_q(f"{column}__{self.key}", value)
                     | ~Q(**{f"{column}__has_key": self.key}))
        if self.operator == "is_set":
            return Q(**{f"{column}__{self.key}__isnull": False})
        if self.operator == "is_not_set":
            return Q(**{f"{column}__{self.key}__isnull": True})
        if self.operator in ("regex",
                             "not_regex") and not is_valid_regex(value):
            # Return no data for invalid regexes
            return Q(pk=-1)
        if isinstance(self.operator, str) and self.operator.startswith("not_"):
            return Q(
                ~Q(**{f"{column}__{self.key}__{self.operator[4:]}": value})
                | ~Q(**{f"{column}__has_key": self.key})
                | Q(**{f"{column}__{self.key}": None}))

        if self.operator == "exact" or self.operator is None:
            return lookup_q(f"{column}__{self.key}", value)
        else:
            assert not isinstance(value, list)
            return Q(**{f"{column}__{self.key}__{self.operator}": value})
Пример #2
0
    def property_to_Q(self) -> Q:
        from .cohort import CohortPeople

        if self.type in CLICKHOUSE_ONLY_PROPERTY_TYPES:
            raise ValueError(
                f"property_to_Q: type is not supported: {repr(self.type)}")

        value = self._parse_value(self.value)
        if self.type == "cohort":
            cohort_id = int(cast(Union[str, int], value))
            return Q(
                Exists(
                    CohortPeople.objects.filter(
                        cohort_id=cohort_id,
                        person_id=OuterRef("id"),
                    ).only("id")))

        if self.operator == "is_not":
            return Q(~lookup_q(f"properties__{self.key}", value)
                     | ~Q(properties__has_key=self.key))
        if self.operator == "is_set":
            return Q(**{"properties__{}__isnull".format(self.key): False})
        if self.operator == "is_not_set":
            return Q(**{"properties__{}__isnull".format(self.key): True})
        if self.operator in ("regex",
                             "not_regex") and not is_valid_regex(value):
            # Return no data for invalid regexes
            return Q(pk=-1)
        if isinstance(self.operator, str) and self.operator.startswith("not_"):
            return Q(~Q(
                **{
                    "properties__{}__{}".format(self.key, self.operator[4:]):
                    value
                })
                     | ~Q(properties__has_key=self.key)
                     | Q(**{"properties__{}".format(self.key): None}))

        if self.operator == "exact" or self.operator is None:
            return lookup_q(f"properties__{self.key}", value)
        else:
            assert not isinstance(value, list)
            return Q(**{f"properties__{self.key}__{self.operator}": value})
Пример #3
0
    def property_to_Q(self) -> Q:
        from .cohort import CohortPeople

        value = self._parse_value(self.value)
        if self.type == "cohort":
            return Q(
                Exists(
                    CohortPeople.objects.filter(
                        cohort_id=int(value),
                        person_id=OuterRef("id"),
                    ).only("id")))

        if self.operator == "is_not":
            return Q(~Q(**{"properties__{}".format(self.key): value})
                     | ~Q(properties__has_key=self.key))
        if self.operator == "is_set":
            return Q(**{"properties__{}__isnull".format(self.key): False})
        if self.operator == "is_not_set":
            return Q(**{"properties__{}__isnull".format(self.key): True})
        if self.operator in ("regex",
                             "not_regex") and not is_valid_regex(value):
            # Return no data for invalid regexes
            return Q(pk=-1)
        if isinstance(self.operator, str) and self.operator.startswith("not_"):
            return Q(~Q(
                **{
                    "properties__{}__{}".format(self.key, self.operator[4:]):
                    value
                })
                     | ~Q(properties__has_key=self.key)
                     | Q(**{"properties__{}".format(self.key): None}))
        return Q(
            **{
                "properties__{}{}".format(
                    self.key, f"__{self.operator}" if self.operator else ""):
                value
            })
Пример #4
0
def prop_filter_json_extract(
    prop: Property,
    idx: int,
    prepend: str = "",
    prop_var: str = "properties",
    allow_denormalized_props: bool = True,
    transform_expression: Optional[Callable[[str], str]] = None,
) -> Tuple[str, Dict[str, Any]]:
    # TODO: Once all queries are migrated over we can get rid of allow_denormalized_props
    if transform_expression is not None:
        prop_var = transform_expression(prop_var)

    property_expr, is_denormalized = get_property_string_expr(
        property_table(prop), prop.key, f"%(k{prepend}_{idx})s", prop_var,
        allow_denormalized_props)

    if is_denormalized and transform_expression:
        property_expr = transform_expression(property_expr)

    operator = prop.operator
    params: Dict[str, Any] = {}

    if operator == "is_not":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): box_value(prop.value)
        }
        return (
            "AND NOT has(%(v{prepend}_{idx})s, {left})".format(
                idx=idx, prepend=prepend, left=property_expr),
            params,
        )
    elif operator == "icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            "AND {left} ILIKE %(v{prepend}_{idx})s".format(idx=idx,
                                                           prepend=prepend,
                                                           left=property_expr),
            params,
        )
    elif operator == "not_icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            "AND NOT ({left} ILIKE %(v{prepend}_{idx})s)".format(
                idx=idx, prepend=prepend, left=property_expr),
            params,
        )
    elif operator in ("regex", "not_regex"):
        if not is_valid_regex(prop.value):
            return "AND 1 = 2", {}

        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }

        return (
            "AND {regex_function}({left}, %(v{prepend}_{idx})s)".format(
                regex_function="match" if operator == "regex" else "NOT match",
                idx=idx,
                prepend=prepend,
                left=property_expr,
            ),
            params,
        )
    elif operator == "is_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                "AND notEmpty({left})".format(left=property_expr),
                params,
            )
        return (
            "AND JSONHas({prop_var}, %(k{prepend}_{idx})s)".format(
                idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "is_not_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                "AND empty({left})".format(left=property_expr),
                params,
            )
        return (
            "AND (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))"
            .format(idx=idx,
                    prepend=prepend,
                    prop_var=prop_var,
                    left=property_expr),
            params,
        )
    elif operator == "gt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=property_expr,
            ),
            params,
        )
    elif operator == "lt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=property_expr,
            ),
            params,
        )
    else:
        if is_json(prop.value) and not is_denormalized:
            clause = "AND has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))"
            params = {
                "k{}_{}".format(prepend, idx):
                prop.key,
                "v{}_{}".format(prepend, idx):
                box_value(prop.value, remove_spaces=True),
            }
        else:
            clause = "AND has(%(v{prepend}_{idx})s, {left})"
            params = {
                "k{}_{}".format(prepend, idx): prop.key,
                "v{}_{}".format(prepend, idx): box_value(prop.value)
            }
        return (
            clause.format(left=property_expr,
                          idx=idx,
                          prepend=prepend,
                          prop_var=prop_var),
            params,
        )
Пример #5
0
def prop_filter_json_extract(
    prop: Property,
    idx: int,
    prepend: str = "",
    prop_var: str = "properties",
    allow_denormalized_props: bool = True,
    transform_expression: Optional[Callable[[str], str]] = None,
    property_operator: PropertyOperatorType = PropertyOperatorType.AND,
) -> Tuple[str, Dict[str, Any]]:
    # TODO: Once all queries are migrated over we can get rid of allow_denormalized_props
    if transform_expression is not None:
        prop_var = transform_expression(prop_var)

    property_expr, is_denormalized = get_property_string_expr(
        property_table(prop), prop.key, f"%(k{prepend}_{idx})s", prop_var,
        allow_denormalized_props)

    if is_denormalized and transform_expression:
        property_expr = transform_expression(property_expr)

    operator = prop.operator
    params: Dict[str, Any] = {}

    if operator == "is_not":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): box_value(prop.value)
        }
        return (
            " {property_operator} NOT has(%(v{prepend}_{idx})s, {left})".
            format(idx=idx,
                   prepend=prepend,
                   left=property_expr,
                   property_operator=property_operator),
            params,
        )
    elif operator == "icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            " {property_operator} {left} ILIKE %(v{prepend}_{idx})s".format(
                idx=idx,
                prepend=prepend,
                left=property_expr,
                property_operator=property_operator),
            params,
        )
    elif operator == "not_icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            " {property_operator} NOT ({left} ILIKE %(v{prepend}_{idx})s)".
            format(idx=idx,
                   prepend=prepend,
                   left=property_expr,
                   property_operator=property_operator),
            params,
        )
    elif operator in ("regex", "not_regex"):
        if not is_valid_regex(prop.value):
            # If OR'ing, shouldn't be a problem since nothing will match this specific clause
            return f"{property_operator} 1 = 2", {}

        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }

        return (
            " {property_operator} {regex_function}({left}, %(v{prepend}_{idx})s)"
            .format(
                regex_function="match" if operator == "regex" else "NOT match",
                idx=idx,
                prepend=prepend,
                left=property_expr,
                property_operator=property_operator,
            ),
            params,
        )
    elif operator == "is_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                " {property_operator} notEmpty({left})".format(
                    left=property_expr, property_operator=property_operator),
                params,
            )
        return (
            " {property_operator} JSONHas({prop_var}, %(k{prepend}_{idx})s)".
            format(idx=idx,
                   prepend=prepend,
                   prop_var=prop_var,
                   property_operator=property_operator),
            params,
        )
    elif operator == "is_not_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                " {property_operator} empty({left})".format(
                    left=property_expr, property_operator=property_operator),
                params,
            )
        return (
            " {property_operator} (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))"
            .format(
                idx=idx,
                prepend=prepend,
                prop_var=prop_var,
                left=property_expr,
                property_operator=property_operator,
            ),
            params,
        )
    elif operator == "is_date_exact":
        # TODO introducing duplication in these branches now rather than refactor too early
        assert isinstance(prop.value, str)
        prop_value_param_key = "v{}_{}".format(prepend, idx)

        # if we're comparing against a date with no time,
        # truncate the values in the DB which may have times
        granularity = "day" if re.match(r"^\d{4}-\d{2}-\d{2}$",
                                        prop.value) else "second"
        query = f"""AND date_trunc('{granularity}', coalesce(
            parseDateTimeBestEffortOrNull({property_expr}),
            parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10))
        )) = %({prop_value_param_key})s"""

        return (
            query,
            {
                "k{}_{}".format(prepend, idx): prop.key,
                prop_value_param_key: prop.value,
            },
        )
    elif operator == "is_date_after":
        # TODO introducing duplication in these branches now rather than refactor too early
        assert isinstance(prop.value, str)
        prop_value_param_key = "v{}_{}".format(prepend, idx)

        # if we're comparing against a date with no time,
        # then instead of 2019-01-01 (implied 00:00:00)
        # use 2019-01-01 23:59:59
        is_date_only = re.match(r"^\d{4}-\d{2}-\d{2}$", prop.value)

        try_parse_as_date = f"parseDateTimeBestEffortOrNull({property_expr})"
        try_parse_as_timestamp = f"parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10))"
        first_of_date_or_timestamp = f"coalesce({try_parse_as_date},{try_parse_as_timestamp})"

        if is_date_only:
            adjusted_value = f"subtractSeconds(addDays(toDate(%({prop_value_param_key})s), 1), 1)"
        else:
            adjusted_value = f"%({prop_value_param_key})s"

        query = f"""{property_operator} {first_of_date_or_timestamp} > {adjusted_value}"""

        return (
            query,
            {
                "k{}_{}".format(prepend, idx): prop.key,
                prop_value_param_key: prop.value,
            },
        )
    elif operator == "is_date_before":
        # TODO introducing duplication in these branches now rather than refactor too early
        assert isinstance(prop.value, str)
        prop_value_param_key = "v{}_{}".format(prepend, idx)
        try_parse_as_date = f"parseDateTimeBestEffortOrNull({property_expr})"
        try_parse_as_timestamp = f"parseDateTimeBestEffortOrNull(substring({property_expr}, 1, 10))"
        first_of_date_or_timestamp = f"coalesce({try_parse_as_date},{try_parse_as_timestamp})"
        query = f"""{property_operator} {first_of_date_or_timestamp} < %({prop_value_param_key})s"""

        return (
            query,
            {
                "k{}_{}".format(prepend, idx): prop.key,
                prop_value_param_key: prop.value,
            },
        )
    elif operator == "gt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            " {property_operator} toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=property_expr,
                property_operator=property_operator,
            ),
            params,
        )
    elif operator == "lt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            " {property_operator} toFloat64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=property_expr,
                property_operator=property_operator,
            ),
            params,
        )
    else:
        if is_json(prop.value) and not is_denormalized:
            clause = " {property_operator} has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))"
            params = {
                "k{}_{}".format(prepend, idx):
                prop.key,
                "v{}_{}".format(prepend, idx):
                box_value(prop.value, remove_spaces=True),
            }
        else:
            clause = " {property_operator} has(%(v{prepend}_{idx})s, {left})"
            params = {
                "k{}_{}".format(prepend, idx): prop.key,
                "v{}_{}".format(prepend, idx): box_value(prop.value)
            }
        return (
            clause.format(left=property_expr,
                          idx=idx,
                          prepend=prepend,
                          prop_var=prop_var,
                          property_operator=property_operator),
            params,
        )
Пример #6
0
def prop_filter_json_extract(
        prop: Property,
        idx: int,
        prepend: str = "",
        prop_var: str = "properties",
        allow_denormalized_props: bool = False) -> Tuple[str, Dict[str, Any]]:
    # Once all queries are migrated over we can get rid of allow_denormalized_props
    is_denormalized = prop.key.lower(
    ) in settings.CLICKHOUSE_DENORMALIZED_PROPERTIES and allow_denormalized_props
    json_extract = "trim(BOTH '\"' FROM JSONExtractRaw({prop_var}, %(k{prepend}_{idx})s))".format(
        idx=idx, prepend=prepend, prop_var=prop_var)
    denormalized = "properties_{}".format(prop.key.lower())
    operator = prop.operator
    params: Dict[str, Any] = {}
    if operator == "is_not":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): box_value(prop.value)
        }
        return (
            "AND NOT has(%(v{prepend}_{idx})s, {left})".format(
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else json_extract),
            params,
        )
    elif operator == "icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            "AND {left} LIKE %(v{prepend}_{idx})s".format(
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else json_extract),
            params,
        )
    elif operator == "not_icontains":
        value = "%{}%".format(prop.value)
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): value
        }
        return (
            "AND NOT ({left} LIKE %(v{prepend}_{idx})s)".format(
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else json_extract),
            params,
        )
    elif operator in ("regex", "not_regex"):
        if not is_valid_regex(prop.value):
            return "AND 1 = 2", {}

        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }

        return (
            "AND {regex_function}({left}, %(v{prepend}_{idx})s)".format(
                regex_function="match" if operator == "regex" else "NOT match",
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else json_extract,
            ),
            params,
        )
    elif operator == "is_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                "AND NOT isNull({left})".format(left=denormalized),
                params,
            )
        return (
            "AND JSONHas({prop_var}, %(k{prepend}_{idx})s)".format(
                idx=idx, prepend=prepend, prop_var=prop_var),
            params,
        )
    elif operator == "is_not_set":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        if is_denormalized:
            return (
                "AND isNull({left})".format(left=denormalized),
                params,
            )
        return (
            "AND (isNull({left}) OR NOT JSONHas({prop_var}, %(k{prepend}_{idx})s))"
            .format(idx=idx,
                    prepend=prepend,
                    prop_var=prop_var,
                    left=json_extract),
            params,
        )
    elif operator == "gt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND toInt64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) > %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else
                "visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s)".
                format(
                    idx=idx,
                    prepend=prepend,
                    prop_var=prop_var,
                ),
            ),
            params,
        )
    elif operator == "lt":
        params = {
            "k{}_{}".format(prepend, idx): prop.key,
            "v{}_{}".format(prepend, idx): prop.value
        }
        return (
            "AND toInt64OrNull(trim(BOTH '\"' FROM replaceRegexpAll({left}, ' ', ''))) < %(v{prepend}_{idx})s"
            .format(
                idx=idx,
                prepend=prepend,
                left=denormalized if is_denormalized else
                "visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s)".
                format(
                    idx=idx,
                    prepend=prepend,
                    prop_var=prop_var,
                ),
            ),
            params,
        )
    else:
        if is_json(prop.value) and not is_denormalized:
            clause = "AND has(%(v{prepend}_{idx})s, replaceRegexpAll(visitParamExtractRaw({prop_var}, %(k{prepend}_{idx})s),' ', ''))"
            params = {
                "k{}_{}".format(prepend, idx):
                prop.key,
                "v{}_{}".format(prepend, idx):
                box_value(prop.value, remove_spaces=True),
            }
        else:
            clause = "AND has(%(v{prepend}_{idx})s, {left})"
            params = {
                "k{}_{}".format(prepend, idx): prop.key,
                "v{}_{}".format(prepend, idx): box_value(prop.value)
            }
        return (
            clause.format(
                left=denormalized if is_denormalized else json_extract,
                idx=idx,
                prepend=prepend,
                prop_var=prop_var),
            params,
        )