Exemplo n.º 1
0
    def data():
        dims = tuple(
            len(e.domain.partitions) + (0 if e.allowNulls is False else 1)
            for e in new_edges)

        is_sent = Matrix(dims=dims, zeros=0)
        if query.sort and not query.groupby:
            # TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE
            all_coord = is_sent._all_combos(
            )  # TRACK THE EXPECTED COMBINATIONS
            for _, coord, agg in aggs_iterator(aggs, decoders):
                missing_coord = all_coord.next()
                while coord != missing_coord:
                    # INSERT THE MISSING COORDINATE INTO THE GENERATION
                    output = Data()
                    for i, d in enumerate(decoders):
                        output[query.edges[i].name] = d.get_value(
                            missing_coord[i])

                    for s in select:
                        if s.aggregate == "count":
                            output[s.name] = 0
                    yield output
                    missing_coord = all_coord.next()

                output = Data()
                for e, c, d in zip(query.edges, coord, decoders):
                    output[e.name] = d.get_value(c)

                for s in select:
                    output[s.name] = s.pull(agg)
                yield output
        else:

            for row, coord, agg in aggs_iterator(aggs, decoders):
                is_sent[coord] = 1

                output = Data()
                for e, c, d in zip(query.edges, coord, decoders):
                    output[e.name] = d.get_value(c)

                for s in select:
                    output[s.name] = s.pull(agg)
                yield output

            # EMIT THE MISSING CELLS IN THE CUBE
            if not query.groupby:
                for c, v in is_sent:
                    if not v:
                        output = Data()
                        for i, d in enumerate(decoders):
                            output[query.edges[i].name] = d.get_value(c[i])

                        for s in select:
                            if s.aggregate == "count":
                                output[s.name] = 0
                        yield output
Exemplo n.º 2
0
    def data():
        dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)

        is_sent = Matrix(dims=dims, zeros=0)
        if query.sort and not query.groupby:
            # TODO: USE THE format_table() TO PRODUCE THE NEEDED VALUES INSTEAD OF DUPLICATING LOGIC HERE
            all_coord = is_sent._all_combos()  # TRACK THE EXPECTED COMBINATIONS
            for _, coord, agg in aggs_iterator(aggs, decoders):
                missing_coord = all_coord.next()
                while coord != missing_coord:
                    # INSERT THE MISSING COORDINATE INTO THE GENERATION
                    output = Data()
                    for i, d in enumerate(decoders):
                        output[query.edges[i].name] = d.get_value(missing_coord[i])

                    for s in select:
                        if s.aggregate == "count":
                            output[s.name] = 0
                    yield output
                    missing_coord = all_coord.next()

                output = Data()
                for e, c, d in zip(query.edges, coord, decoders):
                    output[e.name] = d.get_value(c)

                for s in select:
                    output[s.name] = s.pull(agg)
                yield output
        else:

            for row, coord, agg in aggs_iterator(aggs, decoders):
                is_sent[coord] = 1

                output = Data()
                for e, c, d in zip(query.edges, coord, decoders):
                    output[e.name] = d.get_value(c)

                for s in select:
                    output[s.name] = s.pull(agg)
                yield output

            # EMIT THE MISSING CELLS IN THE CUBE
            if not query.groupby:
                for c, v in is_sent:
                    if not v:
                        output = Data()
                        for i, d in enumerate(decoders):
                            output[query.edges[i].name] = d.get_value(c[i])

                        for s in select:
                            if s.aggregate == "count":
                                output[s.name] = 0
                        yield output
Exemplo n.º 3
0
def format_cube(aggs, es_query, query, decoders, all_selects):
    new_edges = count_dim(aggs, es_query, decoders)

    dims = []
    for e in new_edges:
        if isinstance(e.value, TupleOp):
            e.allowNulls = False

        extra = 0 if e.allowNulls is False else 1
        dims.append(len(e.domain.partitions) + extra)

    dims = tuple(dims)
    if any(s.default != canonical_aggregates[s.aggregate].default
           for s in all_selects):
        # UNUSUAL DEFAULT VALUES MESS THE union() FUNCTION
        is_default = Matrix(dims=dims, zeros=True)
        matricies = {s.name: Matrix(dims=dims) for s in all_selects}
        for row, coord, agg, selects in aggs_iterator(aggs, es_query,
                                                      decoders):
            for select in selects:
                m = matricies[select.name]
                v = select.pull(agg)
                if v == None:
                    continue
                is_default[coord] = False
                union(m, coord, v, select.aggregate)

        # FILL THE DEFAULT VALUES
        for c, v in is_default:
            if v:
                for s in all_selects:
                    matricies[s.name][c] = s.default
    else:
        matricies = {
            s.name: Matrix(dims=dims, zeros=s.default)
            for s in all_selects
        }
        for row, coord, agg, selects in aggs_iterator(aggs, es_query,
                                                      decoders):
            for select in selects:
                m = matricies[select.name]
                v = select.pull(agg)
                union(m, coord, v, select.aggregate)

    cube = Cube(
        query.select,
        sort_using_key(
            new_edges,
            key=lambda e: e.dim),  # ENSURE EDGES ARE IN SAME ORDER AS QUERY
        matricies)
    cube.frum = query
    return cube
Exemplo n.º 4
0
    def data():
        dims = tuple(
            len(e.domain.partitions) + (0 if e.allowNulls is False else 1)
            for e in new_edges)
        is_sent = Matrix(dims=dims, zeros=0)

        if query.sort and not query.groupby:
            all_coord = is_sent._all_combos(
            )  # TRACK THE EXPECTED COMBINATIONS
            for row, coord, agg in aggs_iterator(aggs, decoders):
                missing_coord = all_coord.next()
                while coord != missing_coord:
                    record = [
                        d.get_value(missing_coord[i])
                        for i, d in enumerate(decoders)
                    ]
                    for s in select:
                        if s.aggregate == "count":
                            record.append(0)
                        else:
                            record.append(None)
                    yield record
                    missing_coord = all_coord.next()

                output = [d.get_value(c) for c, d in zip(coord, decoders)]
                for s in select:
                    output.append(s.pull(agg))
                yield output
        else:
            for row, coord, agg in aggs_iterator(aggs, decoders):
                is_sent[coord] = 1

                output = [d.get_value(c) for c, d in zip(coord, decoders)]
                for s in select:
                    output.append(s.pull(agg))
                yield output

            # EMIT THE MISSING CELLS IN THE CUBE
            if not query.groupby:
                for c, v in is_sent:
                    if not v:
                        record = [
                            d.get_value(c[i]) for i, d in enumerate(decoders)
                        ]
                        for s in select:
                            if s.aggregate == "count":
                                record.append(0)
                            else:
                                record.append(None)
                        yield record
Exemplo n.º 5
0
    def data():
        groupby = query.groupby
        dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
        is_sent = Matrix(dims=dims)
        give_me_zeros = query.sort and not query.groupby

        finishes = []
        # IRREGULAR DEFAULTS MESS WITH union(), SET THEM AT END, IF ANY
        for s in all_selects:
            if s.default != canonical_aggregates[s.aggregate].default:
                s.finish = s.default
                s.default = None
                finishes.append(s)

        for row, coord, agg, _selects in aggs_iterator(aggs, es_query, decoders, give_me_zeros=give_me_zeros):
            output = is_sent[coord]
            if output == None:
                output = is_sent[coord] = Data()
                for g, d, c in zip(groupby, decoders, coord):
                    output[g.put.name] = d.get_value(c)
                for s in all_selects:
                    output[s.name] = s.default
                yield output
            # THIS IS A TRICK!  WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED
            for s in _selects:
                union(output, s.name, s.pull(agg), s.aggregate)

        if finishes:
            # SET ANY DEFAULTS
            for c, o in is_sent:
                for s in finishes:
                    if o[s.name] == None:
                        o[s.name] = s.finish
Exemplo n.º 6
0
    def data():
        groupby = query.groupby
        dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
        is_sent = Matrix(dims=dims)
        give_me_zeros = query.sort and not query.groupby

        finishes = []
        # IRREGULAR DEFAULTS MESS WITH union(), SET THEM AT END, IF ANY
        for s in all_selects:
            if s.default != canonical_aggregates[s.aggregate].default:
                s.finish = s.default
                s.default = None
                finishes.append(s)

        for row, coord, agg, _selects in aggs_iterator(aggs, es_query, decoders, give_me_zeros=give_me_zeros):
            output = is_sent[coord]
            if output == None:
                output = is_sent[coord] = Data()
                for g, d, c in zip(groupby, decoders, coord):
                    output[g.put.name] = d.get_value(c)
                for s in all_selects:
                    output[s.name] = s.default
                yield output
            # THIS IS A TRICK!  WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED
            for s in _selects:
                union(output, s.name, s.pull(agg), s.aggregate)

        if finishes:
            # SET ANY DEFAULTS
            for c, o in is_sent:
                for s in finishes:
                    if o[s.name] == None:
                        o[s.name] = s.finish
Exemplo n.º 7
0
def format_cube(decoders, aggs, start, query, select):
    # decoders = sorted(decoders, key=lambda d: -d.edge.dim)  # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER
    new_edges = count_dim(aggs, decoders)

    dims = []
    for e in new_edges:
        if isinstance(e.value, TupleOp):
            e.allowNulls = False

        extra = 0 if e.allowNulls is False else 1
        dims.append(len(e.domain.partitions) + extra)

    dims = tuple(dims)
    matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select]
    for row, coord, agg in aggs_iterator(aggs, decoders):
        for s, m in matricies:
            try:
                v = s.pull(agg)
                m[coord] = v
            except Exception as e:
                # THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS
                if agg.get('doc_count') != 0:
                    Log.error("Programmer error", cause=e)

    cube = Cube(
        query.select,
        sort_using_key(new_edges, key=lambda e: e.dim),  # ENSURE EDGES ARE IN SAME ORDER AS QUERY
        {s.name: m for s, m in matricies}
    )
    cube.frum = query
    return cube
Exemplo n.º 8
0
def format_cube(decoders, aggs, start, query, select):
    # decoders = sorted(decoders, key=lambda d: -d.edge.dim)  # REVERSE DECODER ORDER, BECAUSE ES QUERY WAS BUILT IN REVERSE ORDER
    new_edges = count_dim(aggs, decoders)

    dims = []
    for e in new_edges:
        if isinstance(e.value, TupleOp):
            e.allowNulls = False

        extra = 0 if e.allowNulls is False else 1
        dims.append(len(e.domain.partitions) + extra)

    dims = tuple(dims)
    matricies = [(s, Matrix(dims=dims, zeros=s.default)) for s in select]
    for row, coord, agg in aggs_iterator(aggs, decoders):
        for s, m in matricies:
            try:
                v = s.pull(agg)
                m[coord] = v
            except Exception as e:
                # THIS HAPPENS WHEN ES RETURNS MORE TUPLE COMBINATIONS THAN DOCUMENTS
                if agg.get('doc_count') != 0:
                    Log.error("Programmer error", cause=e)

    cube = Cube(
        query.select,
        sort_using_key(
            new_edges,
            key=lambda e: e.dim),  # ENSURE EDGES ARE IN SAME ORDER AS QUERY
        {s.name: m
         for s, m in matricies})
    cube.frum = query
    return cube
Exemplo n.º 9
0
 def data():
     for row, coord, agg in aggs_iterator(aggs, decoders):
         if agg.get('doc_count', 0) == 0:
             continue
         output = [d.get_value_from_row(row) for d in decoders]
         for s in select:
             output.append(s.pull(agg))
         yield output
Exemplo n.º 10
0
 def data():
     for row, coord, agg in aggs_iterator(aggs, decoders):
         if agg.get('doc_count', 0) == 0:
             continue
         output = [d.get_value_from_row(row) for d in decoders]
         for s in select:
             output.append(s.pull(agg))
         yield output
Exemplo n.º 11
0
def format_cube(aggs, es_query, query, decoders, all_selects):
    new_edges = count_dim(aggs, es_query, decoders)

    dims = []
    for e in new_edges:
        if is_op(e.value, TupleOp):
            e.allowNulls = False

        extra = 0 if e.allowNulls is False else 1
        dims.append(len(e.domain.partitions) + extra)

    dims = tuple(dims)
    if any(s.default != canonical_aggregates[s.aggregate].default for s in all_selects):
        # UNUSUAL DEFAULT VALUES MESS THE union() FUNCTION
        is_default = Matrix(dims=dims, zeros=True)
        matricies = {s.name: Matrix(dims=dims) for s in all_selects}
        for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders):
            for select in selects:
                m = matricies[select.name]
                v = select.pull(agg)
                if v == None:
                    continue
                is_default[coord] = False
                union(m, coord, v, select.aggregate)

        # FILL THE DEFAULT VALUES
        for c, v in is_default:
            if v:
                for s in all_selects:
                    matricies[s.name][c] = s.default
    else:
        matricies = {s.name: Matrix(dims=dims, zeros=s.default) for s in all_selects}
        for row, coord, agg, selects in aggs_iterator(aggs, es_query, decoders):
            for select in selects:
                m = matricies[select.name]
                v = select.pull(agg)
                union(m, coord, v, select.aggregate)

    cube = Cube(
        query.select,
        sort_using_key(new_edges, key=lambda e: e.dim),  # ENSURE EDGES ARE IN SAME ORDER AS QUERY
        matricies
    )
    cube.frum = query
    return cube
Exemplo n.º 12
0
    def data():
        dims = tuple(len(e.domain.partitions) + (0 if e.allowNulls is False else 1) for e in new_edges)
        is_sent = Matrix(dims=dims, zeros=0)

        if query.sort and not query.groupby:
            all_coord = is_sent._all_combos()  # TRACK THE EXPECTED COMBINATIONS
            for row, coord, agg in aggs_iterator(aggs, decoders):
                missing_coord = all_coord.next()
                while coord != missing_coord:
                    record = [d.get_value(missing_coord[i]) for i, d in enumerate(decoders)]
                    for s in select:
                        if s.aggregate == "count":
                            record.append(0)
                        else:
                            record.append(None)
                    yield record
                    missing_coord = all_coord.next()

                output = [d.get_value(c) for c, d in zip(coord, decoders)]
                for s in select:
                    output.append(s.pull(agg))
                yield output
        else:
            for row, coord, agg in aggs_iterator(aggs, decoders):
                is_sent[coord] = 1

                output = [d.get_value(c) for c, d in zip(coord, decoders)]
                for s in select:
                    output.append(s.pull(agg))
                yield output

            # EMIT THE MISSING CELLS IN THE CUBE
            if not query.groupby:
                for c, v in is_sent:
                    if not v:
                        record = [d.get_value(c[i]) for i, d in enumerate(decoders)]
                        for s in select:
                            if s.aggregate == "count":
                                record.append(0)
                            else:
                                record.append(None)
                        yield record
Exemplo n.º 13
0
    def data():
        for row, coord, agg in aggs_iterator(aggs, decoders):
            if agg.get('doc_count', 0) == 0:
                continue
            output = Data()
            for g, d in zip(query.groupby, decoders):
                output[coalesce(g.put.name, g.name)] = d.get_value_from_row(row)

            for s in select:
                output[s.name] = s.pull(agg)
            yield output
Exemplo n.º 14
0
    def data():
        for row, coord, agg in aggs_iterator(aggs, decoders):
            if agg.get('doc_count', 0) == 0:
                continue
            output = Data()
            for g, d in zip(query.groupby, decoders):
                output[coalesce(g.put.name,
                                g.name)] = d.get_value_from_row(row)

            for s in select:
                output[s.name] = s.pull(agg)
            yield output
Exemplo n.º 15
0
    def data():
        groupby = query.groupby
        dims = tuple(
            len(e.domain.partitions) + (0 if e.allowNulls is False else 1)
            for e in new_edges)
        is_sent = Matrix(dims=dims)

        for row, coord, agg, _selects in aggs_iterator(
                aggs,
                es_query,
                decoders,
                give_me_zeros=(query.sort and not query.groupby)):
            output = is_sent[coord]
            if output == None:
                output = is_sent[coord] = Data()
                for g, d, c in zip(groupby, decoders, coord):
                    output[g.put.name] = d.get_value(c)
                for s in all_selects:
                    output[s.name] = None
                yield output
            # THIS IS A TRICK!  WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED
            for s in _selects:
                union(output, s.name, s.pull(agg), s.aggregate)
Exemplo n.º 16
0
    def data():
        is_sent = Matrix(dims=dims)
        give_me_zeros = query.sort and not query.groupby
        if give_me_zeros:
            # WE REQUIRE THE ZEROS FOR SORTING
            all_coord = is_sent._all_combos(
            )  # TRACK THE EXPECTED COMBINATIONS
            ordered_coord = all_coord.next()[::-1]
            output = None
            for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders):
                if coord != ordered_coord:
                    # output HAS BEEN YIELDED, BUT SET THE DEFAULT VALUES
                    if output is not None:
                        for s in all_selects:
                            i = name2index[s.name]
                            if output[i] is None:
                                output[i] = s.default
                        # WE CAN GET THE SAME coord MANY TIMES, SO ONLY ADVANCE WHEN NOT
                        ordered_coord = all_coord.next()[::-1]

                while coord != ordered_coord:
                    # HAPPENS WHEN THE coord IS AHEAD OF ordered_coord
                    record = [
                        d.get_value(ordered_coord[i])
                        for i, d in enumerate(decoders)
                    ] + [s.default for s in all_selects]
                    yield record
                    ordered_coord = all_coord.next()[::-1]
                # coord == missing_coord
                output = [d.get_value(c) for c, d in zip(coord, decoders)
                          ] + [None for s in all_selects]
                for select in ss:
                    v = select.pull(agg)
                    if v != None:
                        union(output, name2index[select.name], v,
                              select.aggregate)
                yield output
        else:
            last_coord = None  # HANG ONTO THE output FOR A BIT WHILE WE FILL THE ELEMENTS
            output = None
            for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders):
                if coord != last_coord:
                    if output:
                        # SET DEFAULTS
                        for i, s in enumerate(all_selects):
                            v = output[rank + i]
                            if v == None:
                                output[rank + i] = s.default
                        yield output
                    output = is_sent[coord]
                    if output == None:
                        output = is_sent[coord] = [
                            d.get_value(c) for c, d in zip(coord, decoders)
                        ] + [None for _ in all_selects]
                    last_coord = coord
                # THIS IS A TRICK!  WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED
                for select in ss:
                    v = select.pull(agg)
                    if v != None:
                        union(output, name2index[select.name], v,
                              select.aggregate)

            if output:
                # SET DEFAULTS ON LAST ROW
                for i, s in enumerate(all_selects):
                    v = output[rank + i]
                    if v == None:
                        output[rank + i] = s.default
                yield output

            # EMIT THE MISSING CELLS IN THE CUBE
            if not query.groupby:
                for coord, output in is_sent:
                    if output == None:
                        record = [
                            d.get_value(c) for c, d in zip(coord, decoders)
                        ] + [s.default for s in all_selects]
                        yield record
Exemplo n.º 17
0
    def data():
        is_sent = Matrix(dims=dims)
        give_me_zeros = query.sort and not query.groupby
        if give_me_zeros:
            # WE REQUIRE THE ZEROS FOR SORTING
            all_coord = is_sent._all_combos()  # TRACK THE EXPECTED COMBINATIONS
            ordered_coord = all_coord.next()[::-1]
            output = None
            for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders):
                if coord != ordered_coord:
                    # output HAS BEEN YIELDED, BUT SET THE DEFAULT VALUES
                    if output is not None:
                        for s in all_selects:
                            i = name2index[s.name]
                            if output[i] is None:
                                output[i] = s.default
                        # WE CAN GET THE SAME coord MANY TIMES, SO ONLY ADVANCE WHEN NOT
                        ordered_coord = all_coord.next()[::-1]

                while coord != ordered_coord:
                    # HAPPENS WHEN THE coord IS AHEAD OF ordered_coord
                    record = [d.get_value(ordered_coord[i]) for i, d in enumerate(decoders)] + [s.default for s in all_selects]
                    yield record
                    ordered_coord = all_coord.next()[::-1]
                # coord == missing_coord
                output = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for s in all_selects]
                for select in ss:
                    v = select.pull(agg)
                    if v != None:
                        union(output, name2index[select.name], v, select.aggregate)
                yield output
        else:
            last_coord = None   # HANG ONTO THE output FOR A BIT WHILE WE FILL THE ELEMENTS
            output = None
            for row, coord, agg, ss in aggs_iterator(aggs, es_query, decoders):
                if coord != last_coord:
                    if output:
                        # SET DEFAULTS
                        for i, s in enumerate(all_selects):
                            v = output[rank+i]
                            if v == None:
                                output[rank+i] = s.default
                        yield output
                    output = is_sent[coord]
                    if output == None:
                        output = is_sent[coord] = [d.get_value(c) for c, d in zip(coord, decoders)] + [None for _ in all_selects]
                    last_coord = coord
                # THIS IS A TRICK!  WE WILL UPDATE A ROW THAT WAS ALREADY YIELDED
                for select in ss:
                    v = select.pull(agg)
                    if v != None:
                        union(output, name2index[select.name], v, select.aggregate)

            if output:
                # SET DEFAULTS ON LAST ROW
                for i, s in enumerate(all_selects):
                    v = output[rank+i]
                    if v == None:
                        output[rank+i] = s.default
                yield output

            # EMIT THE MISSING CELLS IN THE CUBE
            if not query.groupby:
                for coord, output in is_sent:
                    if output == None:
                        record = [d.get_value(c) for c, d in zip(coord, decoders)] + [s.default for s in all_selects]
                        yield record