Exemplo n.º 1
0
def _getitem(c, i):
    if len(i)==1:
        select = i[0]
        if select == None:
            return (len(c), ), c
        elif isinstance(select, slice):
            sub = c[select]
            dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in sub])
            return (len(cube),) + dims[0], cube
        else:
            return (), c[select]
    else:
        select = i[0]
        if isinstance(select, int):

            return _getitem(c[select], i[1::])
        elif select == None:
            dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in c])
            return (len(cube),)+dims[0], cube
        elif isinstance(select, slice):
            sub = c[select]
            dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in sub])
            return (len(cube),)+dims[0], cube
        else:
            return _getitem(c[select], i[1::])
Exemplo n.º 2
0
def _getitem(c, i):
    if len(i) == 1:
        select = i[0]
        if select == None:
            return (len(c), ), c
        elif isinstance(select, slice):
            sub = c[select]
            dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in sub])
            return (len(cube), ) + dims[0], cube
        else:
            return (), c[select]
    else:
        select = i[0]
        if isinstance(select, int):

            return _getitem(c[select], i[1::])
        elif select == None:
            dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in c])
            return (len(cube), ) + dims[0], cube
        elif isinstance(select, slice):
            sub = c[select]
            dims, cube = transpose(*[_getitem(cc, i[1::]) for cc in sub])
            return (len(cube), ) + dims[0], cube
        else:
            return _getitem(c[select], i[1::])
Exemplo n.º 3
0
def compare_to_expected(query, result, expect, places):
    query = wrap(query)
    expect = wrap(expect)

    if result.meta.format == "table":
        try:
            assertAlmostEqual(set(result.header), set(expect.header))
        except Exception as e:
            Log.error("format=table headers do not match", cause=e)

        # MAP FROM expected COLUMN TO result COLUMN
        mapping = transpose(*transpose(*filter(
            lambda v: v[0][1] == v[1][1],
            itertools.product(enumerate(expect.header), enumerate(result.header))
        ))[1])[0]
        result.header = [result.header[m] for m in mapping]

        if result.data:
            columns = transpose(*unwrap(result.data))
            result.data = transpose(*(columns[m] for m in mapping))

        if not query.sort:
            sort_table(result)
            sort_table(expect)
    elif result.meta.format == "list":
        if not query.sort:
            try:
                # result.data MAY BE A LIST OF VALUES, NOT OBJECTS
                data_columns = jx.sort(set(jx.get_columns(result.data, leaves=True)) | set(jx.get_columns(expect.data, leaves=True)), "name")
            except Exception:
                data_columns = [{"name": "."}]

            sort_order = listwrap(coalesce(query.edges, query.groupby)) + data_columns

            if is_list(expect.data):
                try:
                    expect.data = jx.sort(expect.data, sort_order.name)
                except Exception as _:
                    pass

            if is_list(result.data):
                try:
                    result.data = jx.sort(result.data, sort_order.name)
                except Exception as _:
                    pass

    elif result.meta.format == "cube" and len(result.edges) == 1 and result.edges[0].name == "rownum" and not query.sort:
        result_data, result_header = cube2list(result.data)
        result_header = map(literal_field, result_header)
        result_data = unwrap(jx.sort(result_data, result_header))
        result.data = list2cube(result_data, result_header)

        expect_data, expect_header = cube2list(expect.data)
        expect_header = map(literal_field, expect_header)
        expect_data = jx.sort(expect_data, expect_header)
        expect.data = list2cube(expect_data, expect_header)

    # CONFIRM MATCH
    assertAlmostEqual(result, expect, places=places)
Exemplo n.º 4
0
    def _groupby(self, edges):
        """
        RETURNS LIST OF (coord, values) TUPLES, WHERE
            coord IS THE INDEX INTO self CUBE (-1 INDEX FOR COORDINATES NOT GROUPED BY)
            values ALL VALUES THAT BELONG TO THE SLICE

        """
        edges = FlatList([n for e in edges for n in _normalize_edge(e)])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[
            getKey[i](p)
            for p in e.domain.partitions + ([None] if e.allowNulls else [])
        ] for i, e in enumerate(self.edges)]

        def coord2term(coord):
            output = wrap_leaves(
                {keys[i]: lookup[i][c]
                 for i, c in enumerate(coord)})
            return output

        if is_list(self.select):
            selects = listwrap(self.select)
            index, v = transpose(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = transpose(*self.data[s.name].group_by(selector))
                values.append(v)

            output = transpose(coord, [
                Cube(self.select, remainder,
                     {s.name: v[i]
                      for i, s in enumerate(selects)}) for v in zip(*values)
            ])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (coord2term(coord), v)
                for coord, v in self.data[self.select.name].groupby(selector))
        else:
            output = (
                (coord2term(coord), Cube(self.select, remainder, v))
                for coord, v in self.data[self.select.name].groupby(selector))

        return output
Exemplo n.º 5
0
    def groupby(self, edges):
        """
        SLICE THIS CUBE IN TO ONES WITH LESS DIMENSIONALITY
        simple==True WILL HAVE GROUPS BASED ON PARTITION VALUE, NOT PARTITION OBJECTS
        """
        edges = FlatList([n for e in edges for n in _normalize_edge(e)])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[
            getKey[i](p)
            for p in e.domain.partitions + ([None] if e.allowNulls else [])
        ] for i, e in enumerate(self.edges)]

        def coord2term(coord):
            output = wrap_leaves(
                {keys[i]: lookup[i][c]
                 for i, c in enumerate(coord)})
            return output

        if is_list(self.select):
            selects = listwrap(self.select)
            index, v = transpose(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = zip(*self.data[s.name].group_by(selector))
                values.append(v)

            output = transpose(coord, [
                Cube(self.select, remainder,
                     {s.name: v[i]
                      for i, s in enumerate(selects)}) for v in zip(*values)
            ])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (coord2term(coord), v)
                for coord, v in self.data[self.select.name].groupby(selector))
        else:
            output = (
                (coord2term(coord), Cube(self.select, remainder, v))
                for coord, v in self.data[self.select.name].groupby(selector))

        return output
Exemplo n.º 6
0
    def register_ops(self, module_vars):
        global JX

        if self.name != "JX":
            self.ops = copy(JX.ops)
        else:
            num_ops = 1 + max(obj.get_id() for obj in module_vars.values()
                              if isinstance(obj, type) and hasattr(obj, ID))
            self.ops = [None] * num_ops

        for _, new_op in module_vars.items():
            if isinstance(new_op, type) and hasattr(new_op, ID):
                # EXPECT OPERATORS TO HAVE id
                # EXPECT NEW DEFINED OPS IN THIS MODULE TO HAVE lang NOT SET
                curr = getattr(new_op, "lang")
                if not curr:
                    old_op = self.ops[new_op.get_id()]
                    if old_op is not None and old_op.__name__ != new_op.__name__:
                        Log.error("Logic error")
                    self.ops[new_op.get_id()] = new_op
                    setattr(new_op, "lang", self)

        if self.name:
            # ENSURE THE ALL OPS ARE DEFINED ON THE NEW LANGUAGE
            for base_op, new_op in transpose(JX.ops, self.ops):
                if new_op is base_op:
                    # MISSED DEFINITION, ADD ONE
                    new_op = type(base_op.__name__, (base_op, ), {})
                    self.ops[new_op.get_id()] = new_op
                    setattr(new_op, "lang", self)
Exemplo n.º 7
0
    def column_query(self, sql, param=None):
        """
        RETURN RESULTS IN [column][row_num] GRID
        """
        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor:  # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()
                self.cursor.execute("SET TIME_ZONE='+00:00'")
                self.cursor.close()
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
            # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
            result = transpose(*grid)

            if not old_cursor:  # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

            return result
        except Exception as e:
            if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
Exemplo n.º 8
0
    def column_query(self, sql, param=None):
        """
        RETURN RESULTS IN [column][row_num] GRID
        """
        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor:  # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()
                self.cursor.execute("SET TIME_ZONE='+00:00'")
                self.cursor.close()
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, quote_param(param))
            sql = self.preamble + outdent(sql)
            self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
            # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
            result = transpose(*grid)

            if not old_cursor:  # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

            return result
        except Exception as e:
            if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
Exemplo n.º 9
0
def format_cube(T, select, query=None):
    with Timer("format table"):
        table = format_table(T, select, query)

    if len(table.data) == 0:
        return Cube(
            select,
            edges=[{
                "name": "rownum",
                "domain": {
                    "type": "rownum",
                    "min": 0,
                    "max": 0,
                    "interval": 1
                }
            }],
            data={h: Matrix(list=[])
                  for i, h in enumerate(table.header)})

    cols = transpose(*unwrap(table.data))
    return Cube(
        select,
        edges=[{
            "name": "rownum",
            "domain": {
                "type": "rownum",
                "min": 0,
                "max": len(table.data),
                "interval": 1
            }
        }],
        data={h: Matrix(list=cols[i])
              for i, h in enumerate(table.header)})
Exemplo n.º 10
0
 def edges2value(*values):
     if is_data(fields):
         output = Data()
         for e, v in transpose(edges, values):
             output[e.name] = v
         return output
     else:
         return tuple(values)
Exemplo n.º 11
0
    def get_value_from_row(self, parts):
        if not parts[0]['doc_count']:
            return None

        output = Data()
        for k, v in transpose(self.put, parts):
            output[k] = v.get('key')
        return output
Exemplo n.º 12
0
 def edges2value(*values):
     if is_data(fields):
         output = Data()
         for e, v in transpose(edges, values):
             output[e.name] = v
         return output
     else:
         return tuple(values)
Exemplo n.º 13
0
    def get_value_from_row(self, parts):
        if not parts[0]['doc_count']:
            return None

        output = Data()
        for k, v in transpose(self.put, parts):
            output[k] = v.get('key')
        return output
Exemplo n.º 14
0
    def get_value_from_row(self, row):
        part = row[self.start:self.start + self.num_columns:]
        if not part[0]['doc_count']:
            return None

        output = Data()
        for k, v in transpose(self.put, part):
            output[k] = v.get('key')
        return output
Exemplo n.º 15
0
    def get_value_from_row(self, row):
        part = row[self.start:self.start + self.num_columns:]
        if not part[0]['doc_count']:
            return None

        output = Data()
        for k, v in transpose(self.put, part):
            output[k] = v.get('key')
        return output
Exemplo n.º 16
0
def tab(value):
    """
    convert single value to tab-delimited form, including a header
    :param value:
    :return:
    """
    if isinstance(value, Mapping):
        h, d = transpose(*wrap(value).leaves())
        return ("\t".join(map(value2json, h)) + "\n" +
                "\t".join(map(value2json, d)))
    else:
        text_type(value)
Exemplo n.º 17
0
def tab(value):
    """
    convert single value to tab-delimited form, including a header
    :param value:
    :return:
    """
    if is_data(value):
        h, d = transpose(*to_data(value).leaves())
        return "\t".join(map(value2json, h)) + CR + "\t".join(
            map(value2json, d))
    else:
        text(value)
Exemplo n.º 18
0
    def append_query(self, es_query, start):
        self.start = start
        domain = self.domain

        domain_key = domain.key
        include, text_include = transpose(*(
            (
                float(v) if isinstance(v, (int, float)) else v,
                text_type(float(v)) if isinstance(v, (int, float)) else v
            )
            for v in (p[domain_key] for p in domain.partitions)
        ))
        value = self.edge.value
        exists = AndOp("and", [
            value.exists(),
            InOp("in", [value, Literal("literal", include)])
        ]).partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if isinstance(value, Variable):
            es_field = self.query.frum.schema.leaves(value.var)[0].es_column  # ALREADY CHECKED THERE IS ONLY ONE
            terms = set_default({"terms": {
                "field": es_field,
                "size": limit,
                "order": {"_term": self.sorted} if self.sorted else None
            }}, es_query)
        else:
            terms = set_default({"terms": {
                "script": {
                    "lang": "painless",
                    "inline": value.to_es_script(self.schema).script(self.schema)
                },
                "size": limit
            }}, es_query)

        if self.edge.allowNulls:
            missing = set_default(
                {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                es_query
            )
        else:
            missing = None

        return wrap({"aggs": {
            "_match": {
                "filter": exists.to_esfilter(self.schema),
                "aggs": {
                    "_filter": terms
                }
            },
            "_missing": missing
        }})
Exemplo n.º 19
0
def tab(value):
    """
    convert single value to tab-delimited form, including a header
    :param value:
    :return:
    """
    if is_data(value):
        h, d = transpose(*wrap(value).leaves())
        return (
            "\t".join(map(value2json, h)) +
            CR +
            "\t".join(map(value2json, d))
        )
    else:
        text_type(value)
Exemplo n.º 20
0
def format_cube(T, select, query=None):
    with Timer("format table"):
        table = format_table(T, select, query)

    if len(table.data) == 0:
        return Cube(
            select,
            edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": 0, "interval": 1}}],
            data={h: Matrix(list=[]) for i, h in enumerate(table.header)}
        )

    cols = transpose(*unwrap(table.data))
    return Cube(
        select,
        edges=[{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(table.data), "interval": 1}}],
        data={h: Matrix(list=cols[i]) for i, h in enumerate(table.header)}
    )
Exemplo n.º 21
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        if isinstance(edge.value, LeavesOp):
            prefix = edge.value.term.var
            flatter = lambda k: literal_field(relative_field(k, prefix))
        else:
            prefix = edge.value.var
            flatter = lambda k: relative_field(k, prefix)

        self.put, self.fields = transpose(*[
            (flatter(untype_path(c.name)), c.es_column)
            for c in query.frum.schema.leaves(prefix)
        ])

        self.domain = self.edge.domain = wrap({"dimension": {"fields": self.fields}})
        self.domain.limit = Math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
Exemplo n.º 22
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        if is_op(edge.value, LeavesOp):
            prefix = edge.value.term.var
            flatter = lambda k: literal_field(relative_field(k, prefix))
        else:
            prefix = edge.value.var
            flatter = lambda k: relative_field(k, prefix)

        self.put, self.fields = transpose(*[
            (flatter(untype_path(c.name)), c.es_column)
            for c in query.frum.schema.leaves(prefix)
        ])

        self.domain = self.edge.domain = dict_to_data({"dimension": {"fields": self.fields}})
        self.domain.limit = temper_limit(self.domain.limit, query)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
Exemplo n.º 23
0
    def __init__(self, edge, query, limit):
        AggsDecoder.__init__(self, edge, query, limit)
        if is_op(edge.value, LeavesOp):
            prefix = edge.value.term.var
            flatter = lambda k: literal_field(relative_field(k, prefix))
        else:
            prefix = edge.value.var
            flatter = lambda k: relative_field(k, prefix)

        self.put, self.fields = transpose(*[
            (flatter(untype_path(c.name)), c.es_column)
            for c in query.frum.schema.leaves(prefix)
        ])

        self.domain = self.edge.domain = wrap({"dimension": {"fields": self.fields}})
        self.domain.limit = mo_math.min(coalesce(self.domain.limit, query.limit, 10), MAX_LIMIT)
        self.parts = list()
        self.key2index = {}
        self.computed_domain = False
Exemplo n.º 24
0
def define_language(lang_name, module_vars):
    # LET ALL EXPRESSIONS POINT TO lang OBJECT WITH ALL EXPRESSIONS
    # ENSURE THIS IS BELOW ALL SUB_CLASS DEFINITIONS SO var() CAPTURES ALL EXPRESSIONS
    global JX

    if lang_name:
        language = Language(lang_name)
        language.ops = copy(JX.ops)
    else:
        num_ops = 1 + max(
            obj.get_id()
            for obj in module_vars.values()
            if isinstance(obj, type) and hasattr(obj, ID)
        )
        language = JX = Language("JX")
        language.ops = [None] * num_ops

    for _, new_op in module_vars.items():
        if isinstance(new_op, type) and hasattr(new_op, ID):
            # EXPECT OPERATORS TO HAVE id
            # EXPECT NEW DEFINED OPS IN THIS MODULE TO HAVE lang NOT SET
            curr = getattr(new_op, "lang")
            if not curr:
                old_op = language.ops[new_op.get_id()]
                if old_op is not None and old_op.__name__ != new_op.__name__:
                    Log.error("Logic error")
                language.ops[new_op.get_id()] = new_op
                setattr(new_op, "lang", language)

    if lang_name:
        # ENSURE THE ALL OPS ARE DEFINED ON THE NEW LANGUAGE
        for base_op, new_op in transpose(JX.ops, language.ops):
            if new_op is base_op:
                # MISSED DEFINITION, ADD ONE
                new_op = type(base_op.__name__, (base_op,), {})
                language.ops[new_op.get_id()] = new_op
                setattr(new_op, "lang", language)

    return language
Exemplo n.º 25
0
    def register_ops(self, module_vars):
        global JX

        if self.name != "JX":
            self.ops = copy(JX.ops)  # A COPY, IF ONLY TO KNOW IT WAS REPLACED

            double_dispatch_methods = tuple(
                sorted(set(self.ops[1].lookups.keys())))
        else:
            num_ops = 1 + max(obj.get_id() for obj in module_vars.values()
                              if isinstance(obj, type) and hasattr(obj, ID))
            self.ops = [None] * num_ops

            # FIND ALL DOUBLE-DISPATCH METHODS
            double_dispatch_methods = set()
            for _, new_op in list(module_vars.items()):
                if is_Expression(new_op):
                    for name, member in vars(new_op).items():
                        try:
                            args = get_function_arguments(member)
                            if args[:2] == ("self", "lang"):
                                double_dispatch_methods.add(name)
                        except Exception as cause:
                            pass
            double_dispatch_methods = tuple(sorted(double_dispatch_methods))

        for _, new_op in list(module_vars.items()):
            if is_Expression(new_op):
                op_id = new_op.get_id()
                jx_op = JX.ops[op_id]
                # LET EACH LANGUAGE POINT TO OP CLASS
                self.ops[op_id] = new_op
                new_op.lang = self

                # ENSURE THE partial_eval IS REGISTERED
                if jx_op is None:
                    for dd_method in double_dispatch_methods:
                        member = extract_method(new_op, dd_method)
                        args = get_function_arguments(member)
                        if args[:2] != ("self", "lang"):
                            Log.error(
                                "{{module}}.{{clazz}}.{{name}} is expecting (self, lang) parameters, minimum",
                                module=new_op.__module__,
                                clazz=new_op.__name__,
                                name=dd_method)
                        new_op.lookups[dd_method] = [member]
                elif jx_op.__name__ != new_op.__name__:
                    Log.error("Logic error")
                else:
                    new_op.lookups = jx_op.lookups
                    for dd_method in double_dispatch_methods:
                        member = extract_method(new_op, dd_method)
                        jx_op.lookups[dd_method] += [member]

                    # COPY OTHER DEFINED METHODS
                    others = list(vars(new_op).items())
                    for n, v in others:
                        if v is not None:
                            o = getattr(jx_op, n, None)
                            if o is None:
                                setattr(jx_op, n, v)
        if self.name == 'JX':
            # FINALLY, SWAP OUT THE BASE METHODS
            for dd_method in double_dispatch_methods:
                existing = getattr(BaseExpression, dd_method, None)
                if existing:
                    # USE BaseExpression WHEN AVAILABLE
                    setattr(Expression, dd_method, existing)
                else:
                    # MAKE A DISPATCHER, IF NOT ONE ALREADY
                    setattr(Expression, dd_method,
                            get_dispatcher_for(dd_method))

        else:
            # ENSURE THE ALL OPS ARE DEFINED ON THE NEW LANGUAGE
            for base_op, new_op in transpose(JX.ops, self.ops):
                if base_op and new_op is base_op:
                    # MISSED DEFINITION, ADD ONE
                    new_op = type(base_op.__name__, (base_op, ), {})
                    self.ops[new_op.get_id()] = new_op
                    setattr(new_op, "lookups", base_op.lookups)
                    for n, v in base_op.lookups.items():
                        v += v[-1:]

        # ENSURE THIS LANGUAGE INSTANCE POINTS TO ALL THE OPS BY NAME
        for o in self.ops[1:]:
            setattr(self, o.__name__, o)
Exemplo n.º 26
0
    def append_query(self, es_query, start):
        self.start = start
        domain = self.domain

        domain_key = domain.key
        include, text_include = transpose(
            *((float(v) if isinstance(v, (int, float)) else v,
               text_type(float(v)) if isinstance(v, (int, float)) else v)
              for v in (p[domain_key] for p in domain.partitions)))
        value = self.edge.value
        exists = AndOp(
            "and",
            [value.exists(),
             InOp("in", [value, Literal("literal", include)])]).partial_eval()

        limit = coalesce(self.limit, len(domain.partitions))

        if isinstance(value, Variable):
            es_field = self.query.frum.schema.leaves(
                value.var)[0].es_column  # ALREADY CHECKED THERE IS ONLY ONE
            terms = set_default(
                {
                    "terms": {
                        "field": es_field,
                        "size": limit,
                        "order": {
                            "_term": self.sorted
                        } if self.sorted else None
                    }
                }, es_query)
        else:
            terms = set_default(
                {
                    "terms": {
                        "script": {
                            "lang":
                            "painless",
                            "inline":
                            value.to_es_script(self.schema).script(self.schema)
                        },
                        "size": limit
                    }
                }, es_query)

        if self.edge.allowNulls:
            missing = set_default(
                {"filter": NotOp("not", exists).to_esfilter(self.schema)},
                es_query)
        else:
            missing = None

        return wrap({
            "aggs": {
                "_match": {
                    "filter": exists.to_esfilter(self.schema),
                    "aggs": {
                        "_filter": terms
                    }
                },
                "_missing": missing
            }
        })
Exemplo n.º 27
0
    def query(self, query=None):
        """
        :param query:  JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT
        :return:
        """
        if not query:
            query = {}
        if not query.get('from'):
            query['from'] = self.name
        elif not startswith_field(query['from'], self.name):
            Log.error("Expecting table, or some nested table")
        query = QueryOp.wrap(query, self.container, self.namespace)
        new_table = "temp_" + unique_name()

        if query.format == "container":
            create_table = SQL_CREATE + quote_column(new_table) + SQL_AS
        else:
            create_table = ""

        if query.groupby and query.format != "cube":
            op, index_to_columns = self._groupby_op(query, self.schema)
            command = create_table + op
        elif query.groupby:
            query.edges, query.groupby = query.groupby, query.edges
            op, index_to_columns = self._edges_op(query, self.schema)
            command = create_table + op
            query.edges, query.groupby = query.groupby, query.edges
        elif query.edges or any(a != "none" for a in listwrap(query.select).aggregate):
            op, index_to_columns = self._edges_op(query, query.frum.schema)
            command = create_table + op
        else:
            op = self._set_op(query)
            return op

        result = self.db.query(command)

        if query.format == "container":
            output = QueryTable(new_table, db=self.db, uid=self.uid, exists=True)
        elif query.format == "cube" or (not query.format and query.edges):
            column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1)
            for c in index_to_columns.values():
                column_names[c.push_column] = c.push_column_name

            if len(query.edges) == 0 and len(query.groupby) == 0:
                data = {n: Data() for n in column_names}
                for s in index_to_columns.values():
                    data[s.push_name][s.push_child] = unwrap(s.pull(result.data[0]))
                if is_list(query.select):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(
                    data=unwrap(data),
                    select=select,
                    meta={"format": "cube"}
                )

            if not result.data:
                edges = []
                dims = []
                for i, e in enumerate(query.edges + query.groupby):
                    allowNulls = coalesce(e.allowNulls, True)

                    if e.domain.type == "set" and e.domain.partitions:
                        domain = SimpleSetDomain(partitions=e.domain.partitions.name)
                    elif e.domain.type == "range":
                        domain = e.domain
                    elif is_op(e.value, TupleOp):
                        pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name],
                                        "push_child").pull
                        parts = [tuple(p(d) for p in pulls) for d in result.data]
                        domain = SimpleSetDomain(partitions=jx.sort(set(parts)))
                    else:
                        domain = SimpleSetDomain(partitions=[])

                    dims.append(1 if allowNulls else 0)
                    edges.append(Data(
                        name=e.name,
                        allowNulls=allowNulls,
                        domain=domain
                    ))

                data = {}
                for si, s in enumerate(listwrap(query.select)):
                    if s.aggregate == "count":
                        data[s.name] = Matrix(dims=dims, zeros=0)
                    else:
                        data[s.name] = Matrix(dims=dims)

                if is_list(query.select):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(
                    meta={"format": "cube"},
                    edges=edges,
                    select=select,
                    data={k: v.cube for k, v in data.items()}
                )

            columns = None

            edges = []
            dims = []
            for g in query.groupby:
                g.is_groupby = True

            for i, e in enumerate(query.edges + query.groupby):
                allowNulls = coalesce(e.allowNulls, True)

                if e.domain.type == "set" and e.domain.partitions:
                    domain = SimpleSetDomain(partitions=e.domain.partitions.name)
                elif e.domain.type == "range":
                    domain = e.domain
                elif e.domain.type == "time":
                    domain = wrap(mo_json.scrub(e.domain))
                elif e.domain.type == "duration":
                    domain = wrap(mo_json.scrub(e.domain))
                elif is_op(e.value, TupleOp):
                    pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name], "push_child").pull
                    parts = [tuple(p(d) for p in pulls) for d in result.data]
                    domain = SimpleSetDomain(partitions=jx.sort(set(parts)))
                else:
                    if not columns:
                        columns = transpose(*result.data)
                    parts = set(columns[i])
                    if e.is_groupby and None in parts:
                        allowNulls = True
                    parts -= {None}

                    if query.sort[i].sort == -1:
                        domain = SimpleSetDomain(partitions=wrap(sorted(parts, reverse=True)))
                    else:
                        domain = SimpleSetDomain(partitions=jx.sort(parts))

                dims.append(len(domain.partitions) + (1 if allowNulls else 0))
                edges.append(Data(
                    name=e.name,
                    allowNulls=allowNulls,
                    domain=domain
                ))

            data_cubes = {}
            for si, s in enumerate(listwrap(query.select)):
                if s.aggregate == "count":
                    data_cubes[s.name] = Matrix(dims=dims, zeros=0)
                else:
                    data_cubes[s.name] = Matrix(dims=dims)

            r2c = index_to_coordinate(dims)  # WORKS BECAUSE THE DATABASE SORTED THE EDGES TO CONFORM
            for rownum, row in enumerate(result.data):
                coord = r2c(rownum)

                for i, s in enumerate(index_to_columns.values()):
                    if s.is_edge:
                        continue
                    if s.push_child == ".":
                        data_cubes[s.push_name][coord] = s.pull(row)
                    else:
                        data_cubes[s.push_name][coord][s.push_child] = s.pull(row)

            if query.select == None:
                select = Null
            elif is_list(query.select):
                select = [{"name": s.name} for s in query.select]
            else:
                select = {"name": query.select.name}

            return Data(
                meta={"format": "cube"},
                edges=edges,
                select=select,
                data={k: v.cube for k, v in data_cubes.items()}
            )
        elif query.format == "table" or (not query.format and query.groupby):
            column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1)
            for c in index_to_columns.values():
                column_names[c.push_column] = c.push_column_name
            data = []
            for d in result.data:
                row = [None for _ in column_names]
                for s in index_to_columns.values():
                    if s.push_child == ".":
                        row[s.push_column] = s.pull(d)
                    elif s.num_push_columns:
                        tuple_value = row[s.push_column]
                        if tuple_value == None:
                            tuple_value = row[s.push_column] = [None] * s.num_push_columns
                        tuple_value[s.push_child] = s.pull(d)
                    elif row[s.push_column] == None:
                        row[s.push_column] = Data()
                        row[s.push_column][s.push_child] = s.pull(d)
                    else:
                        row[s.push_column][s.push_child] = s.pull(d)
                data.append(tuple(unwrap(r) for r in row))

            output = Data(
                meta={"format": "table"},
                header=column_names,
                data=data
            )
        elif query.format == "list" or (not query.edges and not query.groupby):
            if not query.edges and not query.groupby and any(listwrap(query.select).aggregate):
                if is_list(query.select):
                    data = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            if data[c.push_name] == None:
                                data[c.push_name] = c.pull(result.data[0])
                            elif is_list(data[c.push_name]):
                                data[c.push_name].append(c.pull(result.data[0]))
                            else:
                                data[c.push_name] = [data[c.push_name], c.pull(result.data[0])]
                        else:
                            data[c.push_name][c.push_child] = c.pull(result.data[0])

                    output = Data(
                        meta={"format": "value"},
                        data=data
                    )
                else:
                    data = Data()
                    for s in index_to_columns.values():
                        if not data[s.push_child]:
                            data[s.push_child] = s.pull(result.data[0])
                        else:
                            data[s.push_child] += [s.pull(result.data[0])]
                    output = Data(
                        meta={"format": "value"},
                        data=unwrap(data)
                    )
            else:
                data = []
                for rownum in result.data:
                    row = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            row[c.push_name] = c.pull(rownum)
                        elif c.num_push_columns:
                            tuple_value = row[c.push_name]
                            if not tuple_value:
                                tuple_value = row[c.push_name] = [None] * c.num_push_columns
                            tuple_value[c.push_child] = c.pull(rownum)
                        else:
                            row[c.push_name][c.push_child] = c.pull(rownum)

                    data.append(row)

                output = Data(
                    meta={"format": "list"},
                    data=data
                )
        else:
            Log.error("unknown format {{format}}", format=query.format)

        return output