def format_cube(decoders, aggs, start, query, select): # decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER new_edges = count_dim(aggs, decoders) dims = [] for e in new_edges: if isinstance(e.value, TupleOp): e.allowNulls = False extra = 0 if e.allowNulls is False else 1 dims.append(len(e.domain.partitions) + extra) dims = tuple(dims) matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select] for row, coord, agg in aggs_iterator(aggs, decoders): for s, m in matricies: try: v = s.pull(agg) m[coord] = v except Exception as e: # THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS if agg.get('doc_count') != 0: Log.error("Programmer error", cause=e) cube = Cube( query.select, sort_using_key( new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY {s.name: m for s, m in matricies}) cube.frum = query return cube
def format_cube(decoders, aggs, start, query, select): # decoders = sorted(decoders, key=lambda d: -d.edge.dim) # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER new_edges = count_dim(aggs, decoders) dims = [] for e in new_edges: if isinstance(e.value, TupleOp): e.allowNulls = False extra = 0 if e.allowNulls is False else 1 dims.append(len(e.domain.partitions) + extra) dims = tuple(dims) matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select] for row, coord, agg in aggs_iterator(aggs, decoders): for s, m in matricies: try: v = s.pull(agg) m[coord] = v except Exception as e: # THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS if agg.get('doc_count') != 0: Log.error("Programmer error", cause=e) cube = Cube( query.select, sort_using_key(new_edges, key=lambda e: e.dim), # ENSURE EDGES ARE IN SAME ORDER AS QUERY {s.name: m for s, m in matricies} ) cube.frum = query return cube
def format_table(decoders, aggs, start, query, select): new_edges = count_dim(aggs, decoders) header = new_edges.name + select.name def data(): dims = tuple( len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims, zeros=0) if query.sort and not query.groupby: all_coord = is_sent._all_combos( ) # TRACK THE EXPECTED COMBINATIONS for row, coord, agg in aggs_iterator(aggs, decoders): missing_coord = all_coord.next() while coord != missing_coord: record = [ d.get_value(missing_coord[i]) for i, d in enumerate(decoders) ] for s in select: if s.aggregate == "count": record.append(0) else: record.append(None) yield record missing_coord = all_coord.next() output = [d.get_value(c) for c, d in zip(coord, decoders)] for s in select: output.append(s.pull(agg)) yield output else: for row, coord, agg in aggs_iterator(aggs, decoders): is_sent[coord] = 1 output = [d.get_value(c) for c, d in zip(coord, decoders)] for s in select: output.append(s.pull(agg)) yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for c, v in is_sent: if not v: record = [ d.get_value(c[i]) for i, d in enumerate(decoders) ] for s in select: if s.aggregate == "count": record.append(0) else: record.append(None) yield record return Data(meta={"format": "table"}, header=header, data=list(data()))
def format_table(decoders, aggs, start, query, select): new_edges = count_dim(aggs, decoders) header = new_edges.name + select.name def data(): dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims, zeros=0) if query.sort and not query.groupby: all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS for row, coord, agg in aggs_iterator(aggs, decoders): missing_coord = all_coord.next() while coord != missing_coord: record = [d.get_value(missing_coord[i]) for i, d in enumerate(decoders)] for s in select: if s.aggregate == "count": record.append(0) else: record.append(None) yield record missing_coord = all_coord.next() output = [d.get_value(c) for c, d in zip(coord, decoders)] for s in select: output.append(s.pull(agg)) yield output else: for row, coord, agg in aggs_iterator(aggs, decoders): is_sent[coord] = 1 output = [d.get_value(c) for c, d in zip(coord, decoders)] for s in select: output.append(s.pull(agg)) yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for c, v in is_sent: if not v: record = [d.get_value(c[i]) for i, d in enumerate(decoders)] for s in select: if s.aggregate == "count": record.append(0) else: record.append(None) yield record return Data( meta={"format": "table"}, header=header, data=list(data()) )
def format_list(decoders, aggs, start, query, select): new_edges = count_dim(aggs, decoders) def data(): dims = tuple( len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims, zeros=0) if query.sort and not query.groupby: # TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE all_coord = is_sent._all_combos( ) # TRACK THE EXPECTED COMBINATIONS for _, coord, agg in aggs_iterator(aggs, decoders): missing_coord = all_coord.next() while coord != missing_coord: # INSERT THE MISSING COORDINATE INTO THE GENERATION output = Data() for i, d in enumerate(decoders): output[query.edges[i].name] = d.get_value( missing_coord[i]) for s in select: if s.aggregate == "count": output[s.name] = 0 yield output missing_coord = all_coord.next() output = Data() for e, c, d in zip(query.edges, coord, decoders): output[e.name] = d.get_value(c) for s in select: output[s.name] = s.pull(agg) yield output else: for row, coord, agg in aggs_iterator(aggs, decoders): is_sent[coord] = 1 output = Data() for e, c, d in zip(query.edges, coord, decoders): output[e.name] = d.get_value(c) for s in select: output[s.name] = s.pull(agg) yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for c, v in is_sent: if not v: output = Data() for i, d in enumerate(decoders): output[query.edges[i].name] = d.get_value(c[i]) for s in select: if s.aggregate == "count": output[s.name] = 0 yield output output = Data(meta={"format": "list"}, data=list(data())) return output
def format_list(decoders, aggs, start, query, select): new_edges = count_dim(aggs, decoders) def data(): dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges) is_sent = Matrix(dims=dims, zeros=0) if query.sort and not query.groupby: # TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE all_coord = is_sent._all_combos() # TRACK THE EXPECTED COMBINATIONS for _, coord, agg in aggs_iterator(aggs, decoders): missing_coord = all_coord.next() while coord != missing_coord: # INSERT THE MISSING COORDINATE INTO THE GENERATION output = Data() for i, d in enumerate(decoders): output[query.edges[i].name] = d.get_value(missing_coord[i]) for s in select: if s.aggregate == "count": output[s.name] = 0 yield output missing_coord = all_coord.next() output = Data() for e, c, d in zip(query.edges, coord, decoders): output[e.name] = d.get_value(c) for s in select: output[s.name] = s.pull(agg) yield output else: for row, coord, agg in aggs_iterator(aggs, decoders): is_sent[coord] = 1 output = Data() for e, c, d in zip(query.edges, coord, decoders): output[e.name] = d.get_value(c) for s in select: output[s.name] = s.pull(agg) yield output # EMIT THE MISSING CELLS IN THE CUBE if not query.groupby: for c, v in is_sent: if not v: output = Data() for i, d in enumerate(decoders): output[query.edges[i].name] = d.get_value(c[i]) for s in select: if s.aggregate == "count": output[s.name] = 0 yield output output = Data( meta={"format": "list"}, data=list(data()) ) return output