def format_cube(decoders, aggs, start, query, select): # decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER new_edges = count_dim(aggs, decoders) dims = [] for e in new_edges: if isinstance(e.value, TupleOp): e.allowNulls = False extra = 0 if e.allowNulls is False else 1 dims.append(len(e.domain.partitions) + extra) dims = tuple(dims) matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select] for row, coord, agg in aggs_iterator(aggs, decoders): for s, m in matricies: try: v = s.pull(agg) m[coord] = v except Exception as e: # THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS if agg.get('doc_count') != 0: Log.error("Programmer error", cause=e) cube = Cube( query.select, sort_using_key( new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY {s.name: m for s, m in matricies}) cube.frum = query return cube
def format_cube(decoders, aggs, start, query, select): # decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER new_edges = count_dim(aggs, decoders) dims = [] for e in new_edges: if isinstance(e.value, TupleOp): e.allowNulls = False extra = 0 if e.allowNulls is False else 1 dims.append(len(e.domain.partitions) + extra) dims = tuple(dims) matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select] for row, coord, agg in aggs_iterator(aggs, decoders): for s, m in matricies: try: v = s.pull(agg) m[coord] = v except Exception as e: # THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS if agg.get('doc_count') != 0: Log.error("Programmer error", cause=e) cube = Cube( query.select, sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY {s.name: m for s, m in matricies} ) cube.frum = query return cube
def format_cube_from_aggop(decoders, aggs, start, query, select): agg = drill(aggs) matricies = [(s, Matrix(dims=[], zeros=s.default)) for s in select] for s, m in matricies: m[tuple()] = s.pull(agg) cube = Cube(query.select, [], {s.name: m for s, m in matricies}) cube.frum = query return cube
def es_fieldop(es, query): FromES = es09.util.build_es_query(query) select = listwrap(query.select) FromES.query = { "bool": { "query": { "match_all": {} }, "filter": jx_expression(query.where).to_esfilter() } } FromES.size = coalesce(query.limit, 200000) FromES.fields = FlatList() for s in select.value: if s == "*": FromES.fields = None elif isinstance(s, list): FromES.fields.extend(s) elif isinstance(s, Mapping): FromES.fields.extend(s.values()) else: FromES.fields.append(s) FromES.sort = [{ s.field: "asc" if s.sort >= 0 else "desc" } for s in query.sort] data = es_post(es, FromES, query.limit) T = data.hits.hits matricies = {} for s in select: if s.value == "*": matricies[s.name] = Matrix.wrap([t._source for t in T]) elif isinstance(s.value, Mapping): # for k, v in s.value.items(): # matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T]) matricies[s.name] = Matrix.wrap([{ k: unwrap(t.fields).get(v, None) for k, v in s.value.items() } for t in T]) elif isinstance(s.value, list): matricies[s.name] = Matrix.wrap([ tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T ]) elif not s.value: matricies[s.name] = Matrix.wrap( [unwrap(t.fields).get(s.value, None) for t in T]) else: try: matricies[s.name] = Matrix.wrap( [unwrap(t.fields).get(s.value, None) for t in T]) except Exception as e: Log.error("", e) cube = Cube(query.select, query.edges, matricies, frum=query) cube.frum = query return cube
def format_cube(aggs, es_query, query, decoders, all_selects): new_edges = count_dim(aggs, es_query, decoders) dims = [] for e in new_edges: if isinstance(e.value, TupleOp): e.allowNulls = False extra = 0 if e.allowNulls is False else 1 dims.append(len(e.domain.partitions) + extra) dims = tuple(dims) if any(s.default != canonical_aggregates[s.aggregate].default for s in all_selects): # UNUSUAL DEFAULT VALUES MESS THE union() FUNCTION is_default = Matrix(dims=dims, zeros=True) matricies = {s.name: Matrix(dims=dims) for s in all_selects} for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders): for select in selects: m = matricies[select.name] v = select.pull(agg) if v == None: continue is_default[coord] = False union(m, coord, v, select.aggregate) # FILL THE DEFAULT VALUES for c, v in is_default: if v: for s in all_selects: matricies[s.name][c] = s.default else: matricies = { s.name: Matrix(dims=dims, zeros=s.default) for s in all_selects } for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders): for select in selects: m = matricies[select.name] v = select.pull(agg) union(m, coord, v, select.aggregate) cube = Cube( query.select, sort_using_key( new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY matricies) cube.frum = query return cube
def es_deepop(es, mvel, query): FromES = es09.util.build_es_query(query) select = query.edges temp_query = query.copy() temp_query.select = select temp_query.edges = FlatList() FromES.facets.mvel = { "terms": { "script_field": mvel.code(temp_query), "size": query.limit }, "facet_filter": jx_expression(query.where).to_esfilter() } data = es_post(es, FromES, query.limit) rows = unpack_terms(data.facets.mvel, query.edges) terms = transpose(*rows) # NUMBER ALL EDGES FOR JSON EXPRESSION INDEXING edges = query.edges for f, e in enumerate(edges): for r in terms[f]: e.domain.getPartByKey(r) e.index = f for p, part in enumerate(e.domain.partitions): part.dataIndex = p e.domain.NULL.dataIndex = len(e.domain.partitions) # MAKE CUBE dims = [len(e.domain.partitions) for e in query.edges] output = Matrix(*dims) # FILL CUBE for r in rows: term_coord = [ e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges) ] output[term_coord] = SUM(output[term_coord], r[-1]) cube = Cube(query.select, query.edges, {query.select.name: output}) cube.frum = query return cube
def format_cube(aggs, es_query, query, decoders, all_selects): new_edges = count_dim(aggs, es_query, decoders) dims = [] for e in new_edges: if is_op(e.value, TupleOp): e.allowNulls = False extra = 0 if e.allowNulls is False else 1 dims.append(len(e.domain.partitions) + extra) dims = tuple(dims) if any(s.default != canonical_aggregates[s.aggregate].default for s in all_selects): # UNUSUAL DEFAULT VALUES MESS THE union() FUNCTION is_default = Matrix(dims=dims, zeros=True) matricies = {s.name: Matrix(dims=dims) for s in all_selects} for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders): for select in selects: m = matricies[select.name] v = select.pull(agg) if v == None: continue is_default[coord] = False union(m, coord, v, select.aggregate) # FILL THE DEFAULT VALUES for c, v in is_default: if v: for s in all_selects: matricies[s.name][c] = s.default else: matricies = {s.name: Matrix(dims=dims, zeros=s.default) for s in all_selects} for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders): for select in selects: m = matricies[select.name] v = select.pull(agg) union(m, coord, v, select.aggregate) cube = Cube( query.select, sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY matricies ) cube.frum = query return cube
def es_countop(es, mvel, query): """ RETURN SINGLE COUNT """ select = listwrap(query.select) FromES = build_es_query(query) for s in select: if is_variable_name(s.value): FromES.facets[s.name] = { "terms": { "field": s.value, "size": query.limit, }, "facet_filter": { "exists": { "field": s.value } } } else: # COMPLICATED value IS PROBABLY A SCRIPT, USE IT FromES.facets[s.name] = { "terms": { "script_field": es09.expressions.compile_expression(s.value, query), "size": 200000 } } data = es_post(es, FromES, query.limit) matricies = {} for s in select: matricies[s.name] = Matrix(value=data.hits.facets[s.name].total) cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube
def es_aggop(es, mvel, query): select = listwrap(query.select) FromES = build_es_query(query) isSimple = AND(aggregates[s.aggregate] == "count" for s in select) if isSimple: return es_countop(es, query) # SIMPLE, USE TERMS FACET INSTEAD value2facet = dict() # ONLY ONE FACET NEEDED PER name2facet = dict() # MAP name TO FACET WITH STATS for s in select: if s.value not in value2facet: if isinstance(s.value, Variable): unwrap(FromES.facets)[s.name] = { "statistical": { "field": s.value.var }, "facet_filter": query.where.to_esfilter() } else: unwrap(FromES.facets)[s.name] = { "statistical": { "script": jx_expression_to_function(s.value) }, "facet_filter": query.where.to_es_filter() } value2facet[s.value] = s.name name2facet[s.name] = value2facet[s.value] data = es_post(es, FromES, query.limit) matricies = { s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[ aggregates[s.aggregate]]) for s in select } cube = Cube(query.select, [], matricies) cube.frum = query return cube
def es_terms_stats(esq, mvel, query): select = listwrap(query.select) facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART termsEdges = FlatList() specialEdge = None special_index = -1 # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME # FIND THE specialEdge, IF ONE for f, tedge in enumerate(query.edges): if tedge.domain.type in domains.KNOWN: for p, part in enumerate(tedge.domain.partitions): part.dataIndex = p # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY) # OR IF WE ARE NOT SIMPLY COUNTING # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet) # OR IF WE JUST WANT TO FORCE IT :) # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM facetEdges.append(tedge) else: if specialEdge: Log.error( "There is more than one open-ended edge: self can not be handled" ) specialEdge = tedge special_index = f termsEdges.append(tedge) if not specialEdge: # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE? num_parts = 0 special_index = -1 for i, e in enumerate(facetEdges): l = len(e.domain.partitions) if ((e.value and is_variable_name(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts: num_parts = l specialEdge = e special_index = i facetEdges.pop(special_index) termsEdges.append(specialEdge) total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges) * len(select) if total_facets > 100: # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY: counts = esq.query({ "from": query.frum, "select": { "aggregate": "count" }, "edges": facetEdges, "where": query.where, "limit": query.limit }) esFacets = [] def add_facet(value, parts, cube): if value: esFacets.append(parts) counts["count"].forall(add_facet) Log.note( "{{theory_count}} theoretical combinations, {{real_count}} actual combos found", real_count=len(esFacets), theory_count=total_facets) if not esFacets: # MAKE EMPTY CUBE matricies = {} dims = [ len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges ] for s in select: matricies[s.name] = Matrix(*dims) cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube else: # GENERATE ALL COMBOS esFacets = getAllEdges(facetEdges) calcTerm = compileEdges2Term(mvel, termsEdges, FlatList()) term2parts = calcTerm.term2parts if len(esFacets) * len(select) > 1000: Log.error( "not implemented yet" ) # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES pass FromES = build_es_query(query) for s in select: for parts in esFacets: condition = FlatList() constants = FlatList() name = [literal_field(s.name)] for f, fedge in enumerate(facetEdges): name.append(str(parts[f].dataIndex)) condition.append(buildCondition(mvel, fedge, parts[f])) constants.append({ "name": fedge.domain.name, "value": parts[f] }) condition.append(query.where) name = ",".join(name) FromES.facets[name] = { "terms_stats": { "key_field": calcTerm.field, "value_field": s.value if is_variable_name(s.value) else None, "value_script": mvel.compile_expression(s.value) if not is_variable_name(s.value) else None, "size": coalesce(query.limit, 200000) } } if condition: FromES.facets[name].facet_filter = simplify_esfilter( {"and": condition}) data = es_post(esq.es, FromES, query.limit) if specialEdge.domain.type not in domains.KNOWN: # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED partitions = FlatList() map = {} for facetName, parts in data.facets.items(): for stats in parts.terms: if not map[stats]: part = {"value": stats, "name": stats} partitions.append(part) map[stats] = part partitions.sort(specialEdge.domain.compare) for p, part in enumerate(partitions): part.dataIndex = p specialEdge.domain.map = map specialEdge.domain.partitions = partitions # MAKE CUBE matricies = {} dims = [ len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges ] for s in select: matricies[s.name] = Matrix(*dims) name2agg = {s.name: aggregates[s.aggregate] for s in select} # FILL CUBE for edgeName, parts in data.facets.items(): temp = edgeName.split(",") pre_coord = tuple(int(c) for c in temp[1:]) sname = temp[0] for stats in parts.terms: if specialEdge: special = term2parts(stats.term)[0] coord = pre_coord[:special_index] + ( special.dataIndex, ) + pre_coord[special_index:] else: coord = pre_coord matricies[sname][coord] = stats[name2agg[sname]] cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube
def es_terms_stats(esq, mvel, query): select = listwrap(query.select) facetEdges = [] # EDGES THAT WILL REQUIRE A FACET FOR EACH PART termsEdges = FlatList() specialEdge = None special_index = -1 # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME # FIND THE specialEdge, IF ONE for f, tedge in enumerate(query.edges): if tedge.domain.type in domains.KNOWN: for p, part in enumerate(tedge.domain.partitions): part.dataIndex = p # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY) # OR IF WE ARE NOT SIMPLY COUNTING # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet) # OR IF WE JUST WANT TO FORCE IT :) # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM facetEdges.append(tedge) else: if specialEdge: Log.error("There is more than one open-ended edge: self can not be handled") specialEdge = tedge special_index = f termsEdges.append(tedge) if not specialEdge: # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE? num_parts = 0 special_index = -1 for i, e in enumerate(facetEdges): l = len(e.domain.partitions) if ((e.value and is_variable_name(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts: num_parts = l specialEdge = e special_index = i facetEdges.pop(special_index) termsEdges.append(specialEdge) total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select) if total_facets > 100: # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY: counts = esq.query({ "from": query.frum, "select": {"aggregate": "count"}, "edges": facetEdges, "where": query.where, "limit": query.limit }) esFacets = [] def add_facet(value, parts, cube): if value: esFacets.append(parts) counts["count"].forall(add_facet) Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found", real_count= len(esFacets), theory_count=total_facets) if not esFacets: # MAKE EMPTY CUBE matricies = {} dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] for s in select: matricies[s.name] = Matrix(*dims) cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube else: # GENERATE ALL COMBOS esFacets = getAllEdges(facetEdges) calcTerm = compileEdges2Term(mvel, termsEdges, FlatList()) term2parts = calcTerm.term2parts if len(esFacets) * len(select) > 1000: Log.error("not implemented yet") # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES pass FromES = build_es_query(query) for s in select: for parts in esFacets: condition = FlatList() constants = FlatList() name = [literal_field(s.name)] for f, fedge in enumerate(facetEdges): name.append(str(parts[f].dataIndex)) condition.append(buildCondition(mvel, fedge, parts[f])) constants.append({"name": fedge.domain.name, "value": parts[f]}) condition.append(query.where) name = ",".join(name) FromES.facets[name] = { "terms_stats": { "key_field": calcTerm.field, "value_field": s.value if is_variable_name(s.value) else None, "value_script": mvel.compile_expression(s.value) if not is_variable_name(s.value) else None, "size": coalesce(query.limit, 200000) } } if condition: FromES.facets[name].facet_filter = simplify_esfilter({"and": condition}) data = es_post(esq.es, FromES, query.limit) if specialEdge.domain.type not in domains.KNOWN: # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED partitions = FlatList() map = {} for facetName, parts in data.facets.items(): for stats in parts.terms: if not map[stats]: part = {"value": stats, "name": stats} partitions.append(part) map[stats] = part partitions.sort(specialEdge.domain.compare) for p, part in enumerate(partitions): part.dataIndex = p specialEdge.domain.map = map specialEdge.domain.partitions = partitions # MAKE CUBE matricies = {} dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges] for s in select: matricies[s.name] = Matrix(*dims) name2agg = {s.name: aggregates[s.aggregate] for s in select} # FILL CUBE for edgeName, parts in data.facets.items(): temp = edgeName.split(",") pre_coord = tuple(int(c) for c in temp[1:]) sname = temp[0] for stats in parts.terms: if specialEdge: special = term2parts(stats.term)[0] coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:] else: coord = pre_coord matricies[sname][coord] = stats[name2agg[sname]] cube = Cube(query.select, query.edges, matricies) cube.frum = query return cube