def _normalize_sort(sort=None):
    """
    CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE
    """

    if sort == None:
        return Null

    output = FlatList()
    for s in listwrap(sort):
        if is_text(s):
            output.append({"value": jx_expression(s), "sort": 1})
        elif is_expression(s):
            output.append({"value": s, "sort": 1})
        elif mo_math.is_integer(s):
            output.append({"value": jx_expression({"offset": s}), "sort": 1})
        elif not s.sort and not s.value and all(d in sort_direction
                                                for d in s.values()):
            for v, d in s.items():
                output.append({
                    "value": jx_expression(v),
                    "sort": sort_direction[d]
                })
        elif not s.sort and not s.value:
            Log.error("`sort` clause must have a `value` property")
        else:
            output.append({
                "value": jx_expression(coalesce(s.value, s.field)),
                "sort": sort_direction[s.sort]
            })
    return output
def _normalize_range(range):
    if range == None:
        return None

    return Data(min=None if range.min == None else jx_expression(range.min),
                max=None if range.max == None else jx_expression(range.max),
                mode=range.mode)
Пример #3
0
 def define(cls, expr):
     term = expr.get('prefix')
     if not term:
         return PrefixOp(NULL, NULL)
     elif is_data(term):
         expr, const = first(term.items())
         return PrefixOp(Variable(expr), Literal(const))
     else:
         expr, const = term
         return PrefixOp(jx_expression(expr), jx_expression(const))
Пример #4
0
    def define(cls, expr):
        """
        GENERAL SUPPORT FOR BUILDING EXPRESSIONS FROM JSON EXPRESSIONS
        OVERRIDE THIS IF AN OPERATOR EXPECTS COMPLICATED PARAMETERS
        :param expr: Data representing a JSON Expression
        :return: parse tree
        """

        try:
            lang = cls.lang
            items = items_(expr)
            for item in items:
                op, term = item
                full_op = operators.get(op)
                if full_op:
                    class_ = lang.ops[full_op.get_id()]
                    clauses = {
                        k: jx_expression(v)
                        for k, v in expr.items() if k != op
                    }
                    break
            else:
                if not items:
                    return NULL
                raise Log.error("{{operator|quote}} is not a known operator",
                                operator=expr)

            if term == None:
                return class_([], **clauses)
            elif is_container(term):
                terms = [jx_expression(t) for t in term]
                return class_(terms, **clauses)
            elif is_data(term):
                items = items_(term)
                if class_.has_simple_form:
                    if len(items) == 1:
                        k, v = items[0]
                        return class_([Variable(k), Literal(v)], **clauses)
                    else:
                        return class_({k: Literal(v)
                                       for k, v in items}, **clauses)
                else:
                    return class_(_jx_expression(term, lang), **clauses)
            else:
                if op in ["literal", "date", "offset"]:
                    return class_(term, **clauses)
                else:
                    return class_(_jx_expression(term, lang), **clauses)
        except Exception as e:
            Log.error("programmer error expr = {{value|quote}}",
                      value=expr,
                      cause=e)
def _normalize_select_no_context(select, schema=None):
    """
    SAME NORMALIZE, BUT NO SOURCE OF COLUMNS
    """
    if is_text(select):
        select = Data(value=select)
    else:
        select = to_data(select)

    output = select.copy()
    if not select.value:
        output.name = coalesce(select.name, select.aggregate)
        if output.name:
            output.value = jx_expression(".", schema=schema)
        elif len(select):
            Log.error(BAD_SELECT, select=select)
        else:
            return Null
    elif is_text(select.value):
        if select.value.endswith(".*"):
            name = select.value[:-2].lstrip(".")
            output.name = coalesce(select.name, name)
            output.value = LeavesOp(Variable(name),
                                    prefix=coalesce(select.prefix, name))
        else:
            if select.value == ".":
                output.name = coalesce(select.name, select.aggregate, ".")
                output.value = jx_expression(select.value, schema=schema)
            elif select.value == "*":
                output.name = coalesce(select.name, select.aggregate, ".")
                output.value = LeavesOp(Variable("."))
            else:
                output.name = coalesce(select.name, select.value.lstrip("."),
                                       select.aggregate)
                output.value = jx_expression(select.value, schema=schema)
    elif is_number(output.value):
        if not output.name:
            output.name = text(output.value)
        output.value = jx_expression(select.value, schema=schema)
    else:
        output.value = jx_expression(select.value, schema=schema)

    if not output.name:
        Log.error("expecting select to have a name: {{select}}", select=select)
    if output.name.endswith(".*"):
        Log.error("{{name|quote}} is invalid select", name=output.name)

    output.aggregate = coalesce(canonical_aggregates[select.aggregate].name,
                                select.aggregate, "none")
    output.default = coalesce(select.default,
                              canonical_aggregates[output.aggregate].default)
    return output
Пример #6
0
    def define(cls, expr):
        terms = expr["concat"]
        if is_data(terms):
            k, v = first(terms.items())
            terms = [Variable(k), Literal(v)]
        else:
            terms = [jx_expression(t) for t in terms]

        return ConcatOp(
            terms, **{
                k: Literal(v)
                if is_text(v) and not is_variable_name(v) else jx_expression(v)
                for k, v in expr.items() if k in ["default", "separator"]
            })
Пример #7
0
 def define(cls, expr):
     term = expr.between
     if is_sequence(term):
         return cls.lang[BetweenOp(
             value=jx_expression(term[0]),
             prefix=jx_expression(term[1]),
             suffix=jx_expression(term[2]),
             default=jx_expression(expr.default),
             start=jx_expression(expr.start),
         )]
     elif is_data(term):
         var, vals = term.items()[0]
         if is_sequence(vals) and len(vals) == 2:
             return cls.lang[BetweenOp(
                 value=Variable(var),
                 prefix=Literal(vals[0]),
                 suffix=Literal(vals[1]),
                 default=jx_expression(expr.default),
                 start=jx_expression(expr.start),
             )]
         else:
             Log.error(
                 "`between` parameters are expected to be in {var: [prefix, suffix]} form"
             )
     else:
         Log.error(
             "`between` parameters are expected to be in {var: [prefix, suffix]} form"
         )
def _normalize_window(window, schema=None):
    v = window.value
    try:
        expr = jx_expression(v, schema=schema)
    except Exception:
        if hasattr(v, "__call__"):
            expr = v
        else:
            expr = ScriptOp(v)

    return Data(name=coalesce(window.name, window.value),
                value=expr,
                edges=[
                    n for i, e in enumerate(listwrap(window.edges))
                    for n in _normalize_edge(e, i, limit=None, schema=schema)
                ],
                sort=_normalize_sort(window.sort),
                aggregate=window.aggregate,
                range=_normalize_range(window.range),
                where=_normalize_where(window.where, schema=schema))
def _normalize_where(where, schema=None):
    return jx_expression(where, schema=schema)
def _normalize_group(edge, dim_index, limit, schema=None):
    """
    :param edge: Not normalized groupby
    :param dim_index: Dimensions are ordered; this is this groupby's index into that order
    :param schema: for context
    :return: a normalized groupby
    """
    if is_text(edge):
        if edge.endswith(".*"):
            prefix = edge[:-2]
            if schema:
                output = list_to_data([
                    {  # BECASUE THIS IS A GROUPBY, EARLY SPLIT INTO LEAVES WORKS JUST FINE
                        "name":
                        concat_field(
                            prefix,
                            literal_field(
                                relative_field(untype_path(c.name), prefix))),
                        "put": {
                            "name": literal_field(untype_path(c.name))
                        },
                        "value":
                        jx_expression(c.es_column, schema=schema),
                        "allowNulls":
                        True,
                        "domain": {
                            "type": "default"
                        }
                    } for c in schema.leaves(prefix)
                ])
                return output
            else:
                return list_to_data([{
                    "name": untype_path(prefix),
                    "put": {
                        "name": literal_field(untype_path(prefix))
                    },
                    "value": LeavesOp(Variable(prefix)),
                    "allowNulls": True,
                    "dim": dim_index,
                    "domain": {
                        "type": "default"
                    }
                }])

        return list_to_data([{
            "name": edge,
            "value": jx_expression(edge, schema=schema),
            "allowNulls": True,
            "dim": dim_index,
            "domain": Domain(type="default", limit=limit)
        }])
    else:
        edge = to_data(edge)
        if (edge.domain and edge.domain.type != "default"):
            Log.error("groupby does not accept complicated domains")

        if not edge.name and not is_text(edge.value):
            Log.error("You must name compound edges: {{edge}}", edge=edge)

        return list_to_data([{
            "name": coalesce(edge.name, edge.value),
            "value": jx_expression(edge.value, schema=schema),
            "allowNulls": True,
            "dim": dim_index,
            "domain": {
                "type": "default"
            }
        }])
def _normalize_edge(edge, dim_index, limit, schema=None):
    """
    :param edge: Not normalized edge
    :param dim_index: Dimensions are ordered; this is this edge's index into that order
    :param schema: for context
    :return: a normalized edge
    """
    if not edge:
        Log.error("Edge has no value, or expression is empty")
    elif is_text(edge):
        if schema:
            leaves = unwraplist(list(schema.leaves(edge)))
            if not leaves or is_container(leaves):
                return [
                    Data(name=edge,
                         value=jx_expression(edge, schema=schema),
                         allowNulls=True,
                         dim=dim_index,
                         domain=_normalize_domain(None, limit))
                ]
            elif isinstance(leaves, Column):
                return [
                    Data(name=edge,
                         value=jx_expression(edge, schema=schema),
                         allowNulls=True,
                         dim=dim_index,
                         domain=_normalize_domain(domain=leaves,
                                                  limit=limit,
                                                  schema=schema))
                ]
            elif is_list(leaves.fields) and len(leaves.fields) == 1:
                return [
                    Data(name=leaves.name,
                         value=jx_expression(leaves.fields[0], schema=schema),
                         allowNulls=True,
                         dim=dim_index,
                         domain=leaves.getDomain())
                ]
            else:
                return [
                    Data(name=leaves.name,
                         allowNulls=True,
                         dim=dim_index,
                         domain=leaves.getDomain())
                ]
        else:
            return [
                Data(name=edge,
                     value=jx_expression(edge, schema=schema),
                     allowNulls=True,
                     dim=dim_index,
                     domain=DefaultDomain())
            ]
    else:
        edge = to_data(edge)
        if not edge.name and not is_text(edge.value):
            Log.error("You must name compound and complex edges: {{edge}}",
                      edge=edge)

        if is_container(edge.value) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Data(fields=edge.value)

            return [
                Data(name=edge.name,
                     value=jx_expression(edge.value, schema=schema),
                     allowNulls=bool(coalesce(edge.allowNulls, True)),
                     dim=dim_index,
                     domain=domain)
            ]

        domain = _normalize_domain(edge.domain, schema=schema)

        return [
            Data(name=coalesce(edge.name, edge.value),
                 value=jx_expression(edge.value, schema=schema),
                 range=_normalize_range(edge.range),
                 allowNulls=bool(coalesce(edge.allowNulls, True)),
                 dim=dim_index,
                 domain=domain)
        ]
def _normalize_select(select, frum, schema=None):
    """
    :param select: ONE SELECT COLUMN
    :param frum: TABLE TO get_columns()
    :param schema: SCHEMA TO LOOKUP NAMES FOR DEFINITIONS
    :return: AN ARRAY OF SELECT COLUMNS
    """
    if is_text(select):
        canonical = select = Data(value=select)
    else:
        select = to_data(select)
        canonical = select.copy()

    canonical.aggregate = coalesce(canonical_aggregates[select.aggregate].name,
                                   select.aggregate, "none")
    canonical.default = coalesce(
        select.default, canonical_aggregates[canonical.aggregate].default)

    if hasattr(unwrap(frum), "_normalize_select"):
        return frum._normalize_select(canonical)

    output = []

    if len(select) and not select.value:
        Log.error(BAD_SELECT, select=select)
    elif not select.value or select.value == ".":
        output.extend([
            set_default(
                {
                    "name": c.name,
                    "value": jx_expression(c.name, schema=schema)
                }, canonical) for c in schema.leaves('.')
            # TOP LEVEL COLUMNS ONLY
            if len(c.nested_path) == 1
        ])
    elif is_text(select.value):
        if select.value.endswith(".*"):
            canonical.name = coalesce(select.name, ".")
            value = jx_expression(select[:-2], schema=schema)
            if not is_op(value, Variable):
                Log.error("`*` over general expression not supported yet")
                output.append([
                    set_default(
                        {
                            "value": LeavesOp(value, prefix=select.prefix),
                            "format": "dict"  # MARKUP FOR DECODING
                        },
                        canonical) for c in frum.get_columns()
                    if c.jx_type not in INTERNAL
                ])
            else:
                Log.error("do not know what to do")
        else:
            canonical.name = coalesce(select.name, select.value,
                                      select.aggregate)
            canonical.value = jx_expression(select.value, schema=schema)
            output.append(canonical)

    output = to_data(output)
    if any(n == None for n in output.name):
        Log.error("expecting select to have a name: {{select}}", select=select)
    return output
from mo_dots import Data, FlatList, Null, coalesce, concat_field, is_container, is_data, is_list, listwrap, \
    literal_field, relative_field, set_default, unwrap, unwraplist, is_many, dict_to_data, to_data, list_to_data
from mo_future import is_text, text
from mo_imports import expect
from mo_json import INTERNAL
from mo_json.typed_encoder import untype_path
from mo_logs import Log
from mo_math import AND, UNION, is_number

Column = expect("Column")

BAD_SELECT = "Expecting `value` or `aggregate` in select clause not {{select}}"
DEFAULT_LIMIT = 10
MAX_LIMIT = 10000
DEFAULT_SELECT = Data(name="count",
                      value=jx_expression("."),
                      aggregate="count",
                      default=0)


class QueryOp(Expression):
    __slots__ = [
        "frum", "select", "edges", "groupby", "where", "window", "sort",
        "limit", "format", "chunk_size", "destination"
    ]

    def __init__(self,
                 frum,
                 select=None,
                 edges=None,
                 groupby=None,
Пример #14
0
def _normalize_where(where, schema=None):
    if is_many(where):
        where = {"and": where}
    elif not where:
        where = TRUE
    return jx_expression(where, schema=schema)