Exemplo n.º 1
0
 def test_leaf_boolean_filter(self):
     assert GreaterEqual("a", 2).build() == {"range": {"a": {"gte": 2}}}
     assert LessEqual("a", 2).build() == {"range": {"a": {"lte": 2}}}
     assert Less("a", 2).build() == {"range": {"a": {"lt": 2}}}
     assert Equal("a", 2).build() == {"term": {"a": 2}}
     exp = Equal("a", 2)
     assert (~exp).build()["bool"], {"must_not": {"term": {"a": 2}}}
     assert Greater("a", 2).build() == {"range": {"a": {"gt": 2}}}
     assert IsIn("a", [1, 2, 3]).build() == {"terms": {"a": [1, 2, 3]}}
     assert Like("a", "a*b").build() == {"wildcard": {"a": "a*b"}}
     assert Rlike("a", "a*b").build() == {"regexp": {"a": "a*b"}}
     assert Startswith("a", "jj").build() == {"prefix": {"a": "jj"}}
     assert IsNull("a").build() == {"missing": {"field": "a"}}
     assert NotNull("a").build() == {"exists": {"field": "a"}}
     assert ScriptFilter(
         'doc["num1"].value > params.param1', lang="painless", params={"param1": 5}
     ).build() == {
         "script": {
             "script": {
                 "lang": "painless",
                 "source": 'doc["num1"].value > params.param1',
                 "params": {"param1": 5},
             }
         }
     }
     assert IsIn("ids", [1, 2, 3]).build() == {"ids": {"values": [1, 2, 3]}}
Exemplo n.º 2
0
 def terms(self, field: str, items: List[str], must: bool = True) -> None:
     """
     Add ids query
     https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
     """
     if must:
         if self._query.empty():
             self._query = IsIn(field, items)
         else:
             self._query = self._query & IsIn(field, items)
     else:
         if self._query.empty():
             self._query = ~(IsIn(field, items))
         else:
             self._query = self._query & ~(IsIn(field, items))
Exemplo n.º 3
0
 def ids(self, items: List[Any], must: bool = True) -> None:
     """
     Add ids query
     https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html
     """
     if must:
         if self._query.empty():
             self._query = IsIn("ids", items)
         else:
             self._query = self._query & IsIn("ids", items)
     else:
         if self._query.empty():
             self._query = ~(IsIn("ids", items))
         else:
             self._query = self._query & ~(IsIn("ids", items))
Exemplo n.º 4
0
 def exists(self, field: str, must: bool = True) -> None:
     """
     Add exists query
     https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-exists-query.html
     """
     if must:
         if self._query.empty():
             self._query = NotNull(field)
         else:
             self._query = self._query & NotNull(field)
     else:
         if self._query.empty():
             self._query = IsNull(field)
         else:
             self._query = self._query & IsNull(field)
Exemplo n.º 5
0
 def test_must_or_must_not_filter(self):
     exp = (GreaterEqual("a", 2)
            | GreaterEqual("b", 2)) | ~(IsIn("ids", [1, 2, 3]))
     a = exp.build()
     b = {
         "bool": {
             "should": [
                 {
                     "range": {
                         "a": {
                             "gte": 2
                         }
                     }
                 },
                 {
                     "range": {
                         "b": {
                             "gte": 2
                         }
                     }
                 },
                 {
                     "bool": {
                         "must_not": {
                             "ids": {
                                 "values": [1, 2, 3]
                             }
                         }
                     }
                 },
             ]
         }
     }
     assert a == b
Exemplo n.º 6
0
 def __init__(self, query=None):
     if query is None:
         self._query = BooleanFilter()
         self._aggs = {}
     else:
         # Deep copy the incoming query so we can change it
         self._query = deepcopy(query._query)
         self._aggs = deepcopy(query._aggs)
Exemplo n.º 7
0
    def __init__(self, query: Optional["Query"] = None):
        # type defs
        self._query: BooleanFilter
        self._aggs: Dict[str, Any]

        if query is None:
            self._query = BooleanFilter()
            self._aggs = {}
        else:
            # Deep copy the incoming query so we can change it
            self._query = deepcopy(query._query)
            self._aggs = deepcopy(query._aggs)
Exemplo n.º 8
0
 def isin(self, other: Union[Collection, pd.Series]) -> BooleanFilter:
     if isinstance(other, (Collection, pd.Series)):
         return IsIn(field=self.name, value=to_list(other))
     else:
         raise NotImplementedError(other, type(other))
Exemplo n.º 9
0
class Query:
    """
    Simple class to manage building Elasticsearch queries.
    """

    def __init__(self, query: Optional["Query"] = None):
        # type defs
        self._query: BooleanFilter
        self._aggs: Dict[str, Any]

        if query is None:
            self._query = BooleanFilter()
            self._aggs = {}
        else:
            # Deep copy the incoming query so we can change it
            self._query = deepcopy(query._query)
            self._aggs = deepcopy(query._aggs)

    def exists(self, field: str, must: bool = True) -> None:
        """
        Add exists query
        https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-exists-query.html
        """
        if must:
            if self._query.empty():
                self._query = NotNull(field)
            else:
                self._query = self._query & NotNull(field)
        else:
            if self._query.empty():
                self._query = IsNull(field)
            else:
                self._query = self._query & IsNull(field)

    def ids(self, items: List[Any], must: bool = True) -> None:
        """
        Add ids query
        https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html
        """
        if must:
            if self._query.empty():
                self._query = IsIn("ids", items)
            else:
                self._query = self._query & IsIn("ids", items)
        else:
            if self._query.empty():
                self._query = ~(IsIn("ids", items))
            else:
                self._query = self._query & ~(IsIn("ids", items))

    def terms(self, field: str, items: List[str], must: bool = True) -> None:
        """
        Add ids query
        https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
        """
        if must:
            if self._query.empty():
                self._query = IsIn(field, items)
            else:
                self._query = self._query & IsIn(field, items)
        else:
            if self._query.empty():
                self._query = ~(IsIn(field, items))
            else:
                self._query = self._query & ~(IsIn(field, items))

    def terms_aggs(self, name: str, func: str, field: str, es_size: int) -> None:
        """
        Add terms agg e.g

        "aggs": {
            "name": {
                "terms": {
                    "field": "Airline",
                    "size": 10
                }
            }
        }
        """
        agg = {func: {"field": field, "size": es_size}}
        self._aggs[name] = agg

    def metric_aggs(self, name: str, func: str, field: str) -> None:
        """
        Add metric agg e.g

        "aggs": {
            "name": {
                "max": {
                    "field": "AvgTicketPrice"
                }
            }
        }
        """
        agg = {func: {"field": field}}
        self._aggs[name] = agg

    def hist_aggs(
        self, name: str, field: str, min_value: Any, max_value: Any, num_bins: int,
    ) -> None:
        """
        Add histogram agg e.g.
        "aggs": {
            "name": {
                "histogram": {
                    "field": "AvgTicketPrice"
                    "interval": (max_value - min_value)/bins
                    "offset": min_value
                }
            }
        }
        """

        interval = (max_value - min_value) / num_bins

        if interval != 0:
            agg = {
                "histogram": {"field": field, "interval": interval, "offset": min_value}
            }
            self._aggs[name] = agg

    def to_search_body(self) -> Dict[str, Any]:
        body = {}
        if self._aggs:
            body["aggs"] = self._aggs
        if not self._query.empty():
            body["query"] = self._query.build()
        return body

    def to_count_body(self) -> Optional[Dict[str, Any]]:
        if len(self._aggs) > 0:
            warnings.warn(f"Requesting count for agg query {self}")
        if self._query.empty():
            return None
        else:
            return {"query": self._query.build()}

    def update_boolean_filter(self, boolean_filter: BooleanFilter) -> None:
        if self._query.empty():
            self._query = boolean_filter
        else:
            self._query = self._query & boolean_filter

    def __repr__(self) -> str:
        return repr(self.to_search_body())
Exemplo n.º 10
0
 def isin(self, other):
     if isinstance(other, list):
         return IsIn(field=self.name, value=other)
     else:
         raise NotImplementedError(other, type(other))
Exemplo n.º 11
0
class Query:
    """
    Simple class to manage building Elasticsearch queries.
    """
    def __init__(self, query=None):
        if query is None:
            self._query = BooleanFilter()
            self._aggs = {}
        else:
            # Deep copy the incoming query so we can change it
            self._query = deepcopy(query._query)
            self._aggs = deepcopy(query._aggs)

    def exists(self, field, must=True):
        """
        Add exists query
        https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-exists-query.html
        """
        if must:
            if self._query.empty():
                self._query = NotNull(field)
            else:
                self._query = self._query & NotNull(field)
        else:
            if self._query.empty():
                self._query = IsNull(field)
            else:
                self._query = self._query & IsNull(field)

    def ids(self, items, must=True):
        """
        Add ids query
        https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html
        """
        if must:
            if self._query.empty():
                self._query = IsIn("ids", items)
            else:
                self._query = self._query & IsIn("ids", items)
        else:
            if self._query.empty():
                self._query = ~(IsIn("ids", items))
            else:
                self._query = self._query & ~(IsIn("ids", items))

    def terms(self, field, items, must=True):
        """
        Add ids query
        https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
        """
        if must:
            if self._query.empty():
                self._query = IsIn(field, items)
            else:
                self._query = self._query & IsIn(field, items)
        else:
            if self._query.empty():
                self._query = ~(IsIn(field, items))
            else:
                self._query = self._query & ~(IsIn(field, items))

    def terms_aggs(self, name, func, field, es_size):
        """
        Add terms agg e.g

        "aggs": {
            "name": {
                "terms": {
                    "field": "Airline",
                    "size": 10
                }
            }
        }
        """
        agg = {func: {"field": field, "size": es_size}}
        self._aggs[name] = agg

    def metric_aggs(self, name, func, field):
        """
        Add metric agg e.g

        "aggs": {
            "name": {
                "max": {
                    "field": "AvgTicketPrice"
                }
            }
        }
        """
        agg = {func: {"field": field}}
        self._aggs[name] = agg

    def hist_aggs(self, name, field, min_aggs, max_aggs, num_bins):
        """
        Add histogram agg e.g.
        "aggs": {
            "name": {
                "histogram": {
                    "field": "AvgTicketPrice"
                    "interval": (max_aggs[field] - min_aggs[field])/bins
                }
            }
        }
        """
        min = min_aggs[field]
        max = max_aggs[field]

        interval = (max - min) / num_bins
        offset = min

        agg = {
            "histogram": {
                "field": field,
                "interval": interval,
                "offset": offset
            }
        }

        if interval != 0:
            self._aggs[name] = agg

    def to_search_body(self):
        body = {}
        if self._aggs:
            body["aggs"] = self._aggs
        if not self._query.empty():
            body["query"] = self._query.build()
        return body

    def to_count_body(self):
        if len(self._aggs) > 0:
            warnings.warn("Requesting count for agg query {}", self)
        if self._query.empty():
            return None
        else:
            return {"query": self._query.build()}

    def update_boolean_filter(self, boolean_filter):
        if self._query.empty():
            self._query = boolean_filter
        else:
            self._query = self._query & boolean_filter

    def __repr__(self):
        return repr(self.to_search_body())