def _bucketize(self, queries, fields, size, specs): if not specs: specs = self._specs() dsl = compile(queries, specs)[0] if queries else {"match_all": {}} dsl = {"query": dsl, "aggs": {}, "size": 0} for field in fields: nested, var = check_nested_label(specs[0], field) # replace text field with field.keyword if "types" in specs[0]: if var in specs[0]["types"]: if specs[0]["types"][var] == "text": var = var + ".keyword" # nested aggs aggs = {"terms": {"field": var, "size": size}} if nested: for nest1 in nested: aggs = {"nested": {"path": nest1}, "aggs": {nest1: aggs}} dsl["aggs"][field] = aggs # bucketize r = requests.post(self._host + "/" + self._index + "/_search", json=dsl) self._check_error(r) # summariz results buckets = {} aggs = r.json()["aggregations"] for field in aggs: buckets[field] = {} self._scan_bucket(buckets[field], aggs[field]) return buckets
def search(self, queries, size=10000, where_size=200): dsl = compile(queries, self._specs()) query = dsl[0] if len(dsl) > 1: r = requests.post(self._host + "/" + self._where + "/_search", json={ "query": dsl[1], "size": 0, "aggs": { "recording": { "terms": { "field": "recording.keyword", "min_doc_count": 1, "size": where_size } } } }) self._check_error(r) ids = [ x["key"] for x in r.json()["aggregations"]["recording"]["buckets"] ] query = {"bool": {"must": [query, {"ids": {"values": ids}}]}} r = requests.post(self._host + "/" + self._index + "/_search", json={ "query": query, "size": size, "seq_no_primary_term": True }) self._check_error(r) for x in r.json()["hits"]["hits"]: yield x
def count(self, queries, spec=None): if spec is None: spec = self._spec_from_index() dsl = {"query": compile(queries, spec)} r = self._request(self._requests.post, self._host + "/" + self._index + "/_count", json=dsl) return r["count"]
def count(self, queries): dsl = {"query": compile(queries, self._specs())[0]} r = requests.post(self._host + "/" + self._index + "/" + self._type + "/_count", json=dsl) self._check_error(r) return r.json()["count"]
def search(self, queries, size=10000): dsl = compile(queries, self._spec_from_index()) r = self._request(requests.post, self._host + "/" + self._index + "/_search", json={ "query": dsl, "size": size, "seq_no_primary_term": True }) for x in r["hits"]["hits"]: yield x
def stats(self, queries, fields): specs = self._specs() dsl = compile(queries, specs) query = {"query": dsl[0], "aggs": {}, "size": 0} for field in fields: nested, var = check_nested_label(specs[0], field) # nested aggs aggs = {"stats": {"field": var, "missing": 0}} if nested: for nest1 in nested: aggs = {"nested": {"path": nest1}, "aggs": {field: aggs}} query["aggs"][field] = aggs r = requests.post(self._host + "/" + self._index + "/_search", json=query) self._check_error(r) aggs = r.json()["aggregations"] data = {} for field in fields: values = aggs while field in values: values = values[field] data[field] = values return data
def stats(self, queries, fields, spec=None): if spec is None: spec = self._spec_from_index() dsl = compile(queries, spec) query = {"query": dsl, "aggs": {}, "size": 0} for field in fields: nested, var = check_nested_label(spec, field) # nested aggs aggs = {"stats": {"field": var, "missing": 0}} if nested: for nest1 in nested: aggs = {"nested": {"path": nest1}, "aggs": {field: aggs}} query["aggs"][field] = aggs r = self._request(self._requests.post, self._host + "/" + self._index + "/_search", json=query) aggs = r["aggregations"] data = {} for field in fields: values = aggs while field in values: values = values[field] data[field] = values return data
def count(self, queries): dsl = {"query": compile(queries, self._spec_from_index())} r = self._request(requests.post, self._host + "/" + self._index + "/_count", json=dsl) return r["count"]