Пример #1
0
def get_pull(column):
    if column.nested_path[0] == ".":
        return concat_field("fields", literal_field(column.es_column))
    else:
        depth = len(split_field(column.nested_path[0]))
        rel_name = split_field(column.es_column)[depth:]
        return join_field(["_inner"] + rel_name)
Пример #2
0
def untype_path(encoded):
    if encoded.startswith(".."):
        remainder = encoded.lstrip(".")
        back = len(encoded) - len(remainder) - 1
        return ("." * back) + join_field(decode_property(c) for c in split_field(remainder) if not c.startswith(TYPE_PREFIX))
    else:
        return join_field(decode_property(c) for c in split_field(encoded) if not c.startswith(TYPE_PREFIX))
Пример #3
0
    def select(self, selectList, fromPath, varName, sourceVar):
        path = split_field(fromPath)
        is_deep = len(path) > 1
        heads = []
        list = []
        for s in selectList:
            if is_deep:
                if s.value and is_variable_name(s.value):
                    shortForm = self._translate(s.value)
                    list.append("Value2Pipe(" + shortForm + ")\n")
                else:
                    Log.error("do not know how to handle yet")
            else:
                if s.value and is_variable_name(s.value):
                    list.append("Value2Pipe(getDocValue(" + value2MVEL(s.value) + "))\n")
                elif s.value:
                    shortForm = self._translate(s.value)
                    list.append("Value2Pipe(" + shortForm + ")\n")
                else:
                    code, decode = self.Parts2Term(s.domain)
                    heads.append(code.head)
                    list.append("Value2Pipe(" + code.body + ")\n")


        if len(split_field(fromPath)) > 1:
            output = 'if (' + varName + ' != "") ' + varName + '+="|";\n' + varName + '+=' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'
        else:
            output = varName + ' = ' + '+"|"+'.join(["Value2Pipe("+v+")\n" for v in list]) + ';\n'

        return Data(
            head="".join(heads),
            body=output
        )
Пример #4
0
def tuple(data, field_name):
    """
    RETURN LIST  OF TUPLES
    """
    if isinstance(data, Cube):
        Log.error("not supported yet")

    if isinstance(data, FlatList):
        Log.error("not supported yet")

    if is_data(field_name) and "value" in field_name:
        # SIMPLIFY {"value":value} AS STRING
        field_name = field_name["value"]

    # SIMPLE PYTHON ITERABLE ASSUMED
    if is_text(field_name):
        if len(split_field(field_name)) == 1:
            return [(d[field_name],) for d in data]
        else:
            path = split_field(field_name)
            output = []
            flat_list._tuple1(data, path, 0, output)
            return output
    elif is_list(field_name):
        paths = [_select_a_field(f) for f in field_name]
        output = FlatList()
        _tuple((), unwrap(data), paths, 0, output)
        return output
    else:
        paths = [_select_a_field(field_name)]
        output = FlatList()
        _tuple((), data, paths, 0, output)
        return output
Пример #5
0
def _select_a_field(field):
    if is_text(field):
        return wrap({"name": field, "value": split_field(field)})
    elif is_text(wrap(field).value):
        field = wrap(field)
        return wrap({"name": field.name, "value": split_field(field.value)})
    else:
        return wrap({"name": field.name, "value": field.value})
Пример #6
0
def _select_a_field(field):
    if isinstance(field, basestring):
        return wrap({"name": field, "value": split_field(field)})
    elif isinstance(wrap(field).value, basestring):
        field = wrap(field)
        return wrap({"name": field.name, "value": split_field(field.value)})
    else:
        return wrap({"name": field.name, "value": field.value})
Пример #7
0
    def select(self, fields):
        if isinstance(fields, Mapping):
            fields=fields.value

        if isinstance(fields, text_type):
            # RETURN LIST OF VALUES
            if len(split_field(fields)) == 1:
                if self.path[0] == fields:
                    return [d[1] for d in self.data]
                else:
                    return [d[0][fields] for d in self.data]
            else:
                keys = split_field(fields)
                depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path))  # LENGTH OF COMMON PREFIX
                short_key = keys[depth:]

                output = FlatList()
                _select1((wrap(d[depth]) for d in self.data), short_key, 0, output)
                return output

        if isinstance(fields, list):
            output = FlatList()

            meta = []
            for f in fields:
                if hasattr(f.value, "__call__"):
                    meta.append((f.name, f.value))
                else:
                    meta.append((f.name, functools.partial(lambda v, d: d[v], f.value)))

            for row in self._values():
                agg = Data()
                for name, f in meta:
                    agg[name] = f(row)

                output.append(agg)

            return output

            # meta = []
            # for f in fields:
            #     keys = split_field(f.value)
            #     depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path))  # LENGTH OF COMMON PREFIX
            #     short_key = join_field(keys[depth:])
            #
            #     meta.append((f.name, depth, short_key))
            #
            # for row in self._data:
            #     agg = Data()
            #     for name, depth, short_key in meta:
            #         if short_key:
            #             agg[name] = row[depth][short_key]
            #         else:
            #             agg[name] = row[depth]
            #     output.append(agg)
            # return output

        Log.error("multiselect over FlatList not supported")
Пример #8
0
def _select_a_field(field):
    if is_text(field):
        return dict_to_data({"name": field, "value": split_field(field)})
    elif is_text(to_data(field).value):
        field = to_data(field)
        return dict_to_data({
            "name": field.name,
            "value": split_field(field.value)
        })
    else:
        return dict_to_data({"name": field.name, "value": field.value})
Пример #9
0
def untype_path(encoded):
    if encoded.startswith(".."):
        remainder = encoded.lstrip(".")
        back = len(encoded) - len(remainder) - 1
        return ("." * back) + join_field(
            decode_property(c)
            for c in split_field(remainder) if not c.startswith(TYPE_PREFIX))
    else:
        return join_field(
            decode_property(c) for c in split_field(encoded)
            if not c.startswith(TYPE_PREFIX))
Пример #10
0
                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")
Пример #11
0
                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")
Пример #12
0
    def getFrameVariables(self, body):
        contextVariables = []
        columns = self.fromData.columns

        parentVarNames = set()    # ALL PARENTS OF VARIABLES WITH "." IN NAME
        body = body.replace(".?", ".")

        for i, c in enumerate(columns):
            j = body.find(c.name, 0)
            while j >= 0:
                s = j
                j = body.find(c.name, s + 1)

                test0 = body[s - 1: s + len(c.name) + 1:]
                test3 = body[s - 8: s + len(c.name):]

                if test0[:-1] == "\"" + c.name:
                    continue
                if test3 == "_source." + c.name:
                    continue

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

                body = body.replace(c.name, "-"*len(c.name))

                if self.isLean or c.useSource:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getSourceValue(\"" + c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = _source[\"" + c.name + "\"];\n")
                else:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
                break

        return "".join(contextVariables)
Пример #13
0
    def getFrameVariables(self, body):
        contextVariables = []
        columns = self.fromData.columns

        parentVarNames = set()    # ALL PARENTS OF VARIABLES WITH "." IN NAME
        body = body.replace(".?", ".")

        for i, c in enumerate(columns):
            j = body.find(c.name, 0)
            while j >= 0:
                s = j
                j = body.find(c.name, s + 1)

                test0 = body[s - 1: s + len(c.name) + 1:]
                test3 = body[s - 8: s + len(c.name):]

                if test0[:-1] == "\"" + c.name:
                    continue
                if test3 == "_source." + c.name:
                    continue

                def defParent(name):
                    # DO NOT MAKE THE SAME PARENT TWICE
                    if name in parentVarNames:
                        return
                    parentVarNames.add(name)

                    if len(split_field(name)) == 1:
                        contextVariables.append("Map " + name + " = new HashMap();\n")
                    else:
                        defParent(join_field(split_field(name)[0:-1]))
                        contextVariables.append(name + " = new HashMap();\n")

                body = body.replace(c.name, "-"*len(c.name))

                if self.isLean or c.useSource:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getSourceValue(\"" + c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = _source[\"" + c.name + "\"];\n")
                else:
                    if len(split_field(c.name)) > 1:
                        defParent(join_field(split_field(c.name)[0:-1]))
                        contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
                    else:
                        contextVariables.append(c.name + " = getDocValue(\"" + c.name + "\");\n")
                break

        return "".join(contextVariables)
Пример #14
0
def setValues(expression, constants):
    if not constants:
        return expression

    constants = constants.copy()

    # EXPAND ALL CONSTANTS TO PRIMITIVE VALUES (MVEL CAN ONLY ACCEPT PRIMITIVE VALUES)
    for c in constants:
        value = c.value
        n = c.name
        if len(split_field(n)) >= 3:
            continue    # DO NOT GO TOO DEEP
        if isinstance(value, list):
            continue  # DO NOT MESS WITH ARRAYS

        if isinstance(value, Mapping):
            for k, v in value.items():
                constants.append({"name": n + "." + k, "value": v})

    for c in reverse(constants):# REVERSE ORDER, SO LONGER NAMES ARE TESTED FIRST
        s = 0
        while True:
            s = expression.find(c.name, s)
            if s == -1:
                break
            if re.match(r"\w", expression[s - 1]):
                break
            if re.match(r"\w", expression[s + len(c.name)]):
                break

            v = value2MVEL(c.value)
            expression = expression[:s:] + "" + v + expression[:s + len(c.name):]

    return expression
Пример #15
0
    def get_column(self, name):
        sub_schema = self.schema

        while '.' in sub_schema.more:
            sub_schema = sub_schema.more.get('.')

        for n in split_field(name):
            m = sub_schema.more
            if n in m:
                sub_schema = m.get(n)
            else:
                Log.error("{{name}} not found in schema", name=name)

            while '.' in sub_schema.more:
                sub_schema = sub_schema.more.get('.')

        return Column(
            name,
            self.values[name],
            self.reps[name],
            self.defs[name],
            self.num_rows,
            sub_schema,
            self.schema.max_repetition_level(name),
            self.schema.max_definition_level(name)
        )
Пример #16
0
def is_setop(es, query):
    if not any(
            map(es.cluster.version.startswith,
                ["1.4.", "1.5.", "1.6.", "1.7."])):
        return False

    select = listwrap(query.select)

    if not query.edges:
        isDeep = len(split_field(
            query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
        simpleAgg = AND([
            s.aggregate in ("count", "none") for s in select
        ])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

        # NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
        if simpleAgg or isDeep:
            return True
    else:
        isSmooth = AND(
            (e.domain.type in ALGEBRAIC and e.domain.interval == "none")
            for e in query.edges)
        if isSmooth:
            return True

    return False
Пример #17
0
    def not_monitor(self, please_stop):
        Log.alert("metadata scan has been disabled")
        please_stop.on_go(lambda: self.todo.add(THREAD_STOP))
        while not please_stop:
            column = self.todo.pop()
            if column == THREAD_STOP:
                break

            if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE:
                DEBUG and Log.note("{{column.es_column}} is a struct", column=column)
                column.last_updated = Date.now()
                continue
            elif column.last_updated > Date.now() - TOO_OLD and column.cardinality is not None:
                # DO NOT UPDATE FRESH COLUMN METADATA
                DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now()-Date(column.last_updated)).seconds)
                continue

            with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": column}, silent=not DEBUG, too_long=0.05):
                if untype_path(column.name) in ["build.type", "run.type"]:
                    try:
                        self._update_cardinality(column)
                    except Exception as e:
                        Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
                else:
                    column.last_updated = Date.now()
Пример #18
0
def unnest_path(encoded):
    if encoded.startswith(".."):
        encoded = encoded.lstrip(".")
        if not encoded:
            encoded = "."

    return join_field(decode_property(c) for c in split_field(encoded) if c != NESTED_TYPE)
Пример #19
0
 def to_sql(self, schema, not_null=False, boolean=False):
     if not is_op(self.term, Variable):
         Log.error("Can only handle Variable")
     term = self.term.var
     prefix_length = len(split_field(term))
     output = wrap([{
         "name":
         join_field(split_field(schema.get_column_name(c))[prefix_length:]),
         "sql":
         Variable(schema.get_column_name(c)).to_sql(schema)[0].sql,
     } for c in schema.columns if startswith_field(c.name, term) and (
         (c.jx_type not in (EXISTS, OBJECT, NESTED)
          and startswith_field(schema.nested_path[0], c.nested_path[0])) or
         (c.jx_type not in
          (EXISTS, OBJECT) and schema.nested_path[0] == c.nested_path[0]))])
     return output
Пример #20
0
def typed_encode(value, flake):
    """
    RETURN (typed_value, flake_update, added_nested) TUPLES
    :param value: THE RECORD TO CONVERT TO STRICT TYPED FORM
    :param flake: LOOKUP SCHEMA, WILL BE UPDATED WITH CHANGES
    :return: (record, update, nested) TUPLE
    """
    _ = flake.columns  # ENSURE WE HAVE INTERNAL STRUCTURES FILLED
    output, update, nested = _typed_encode(value, flake.schema)
    if update:
        # REFRESH COLUMNS
        flake._columns = None
        _ = flake.columns

    worker = to_data(output)
    for path, field in flake._top_level_fields.items():
        worker[field] = worker[path]
        worker[path] = None

        # DO NOT LEAVE ANY EMPTY OBJECT RESIDUE
        _path = split_field(path)
        for i, _ in jx.reverse(enumerate(_path)):
            sub_path = join_field(_path[:i])
            v = worker[sub_path]
            if is_data(v) and not worker[sub_path].keys():
                worker[sub_path] = None
            else:
                break

    return output, update, nested
Пример #21
0
    def get_columns(self, table_name, column_name=None, force=False):
        """
        RETURN METADATA COLUMNS
        """
        try:
            # LAST TIME WE GOT INFO FOR THIS TABLE
            short_name = join_field(split_field(table_name)[0:1])
            table = self.get_table(short_name)[0]

            if not table:
                table = Table(
                    name=short_name,
                    url=None,
                    query_path=None,
                    timestamp=Date.now()
                )
                with self.meta.tables.locker:
                    self.meta.tables.add(table)
                self._get_columns(table=short_name)
            elif force or table.timestamp == None or table.timestamp < Date.now() - MAX_COLUMN_METADATA_AGE:
                table.timestamp = Date.now()
                self._get_columns(table=short_name)

            with self.meta.columns.locker:
                columns = self.meta.columns.find(table_name, column_name)
            if columns:
                columns = jx.sort(columns, "name")
                # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
                while len(self.todo) and not all(columns.get("last_updated")):
                    if DEBUG:
                        Log.note("waiting for columns to update {{columns|json}}", columns=[c.table+"."+c.es_column for c in columns if not c.last_updated])
                    Till(seconds=1).wait()
                return columns
        except Exception, e:
            Log.error("Not expected", cause=e)
Пример #22
0
def to_python(self, not_null=False, boolean=False, many=False):
    path = split_field(self.var)
    agg = "row"
    if not path:
        return agg
    elif path[0] in ["row", "rownum"]:
        # MAGIC VARIABLES
        agg = path[0]
        path = path[1:]
        if len(path) == 0:
            return agg
    elif path[0] == "rows":
        if len(path) == 1:
            return "rows"
        elif path[1] in ["first", "last"]:
            agg = "rows." + path[1] + "()"
            path = path[2:]
        else:
            Log.error("do not know what {{var}} of `rows` is", var=path[1])

    for p in path[:-1]:
        if not_null:
            agg = agg + ".get(" + convert.value2quote(p) + ")"
        else:
            agg = agg + ".get(" + convert.value2quote(p) + ", EMPTY_DICT)"
    output = agg + ".get(" + convert.value2quote(path[-1]) + ")"
    if many:
        output = "listwrap(" + output + ")"
    return output
Пример #23
0
def find_container(frum, after):
    """
    :param frum:
    :return:
    """
    global namespace
    if not namespace:
        if not container.config.default.settings:
            Log.error(
                "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info"
            )
        namespace = ElasticsearchMetadata(container.config.default.settings)
    if not frum:
        Log.error("expecting json query expression with from clause")

    # FORCE A RELOAD
    namespace.get_columns(frum, after=after)

    if is_text(frum):
        if frum in container_cache:
            return container_cache[frum]

        path = split_field(frum)
        if path[0] == "meta":
            if path[1] == "columns":
                return namespace.meta.columns.denormalized()
            elif path[1] == "tables":
                return namespace.meta.tables
            else:
                fact_table_name = join_field(path[:2])
        else:
            fact_table_name = path[0]

        type_ = container.config.default.type

        settings = set_default(
            {
                "alias": fact_table_name,
                "name": frum,
                "exists": True
            },
            container.config.default.settings,
        )
        settings.type = None
        output = container.type2container[type_](settings)
        container_cache[frum] = output
        return output
    elif is_data(frum) and frum.type and container.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return container.type2container[frum.type](frum.settings)
    elif is_data(frum) and (frum["from"] or is_container(frum["from"])):
        from jx_base.query import QueryOp

        return QueryOp.wrap(frum)
    elif is_container(frum):
        return ListContainer("test_list", frum)
    else:
        return frum
Пример #24
0
def setValues(expression, constants):
    if not constants:
        return expression

    constants = constants.copy()

    # EXPAND ALL CONSTANTS TO PRIMITIVE VALUES (MVEL CAN ONLY ACCEPT PRIMITIVE VALUES)
    for c in constants:
        value = c.value
        n = c.name
        if len(split_field(n)) >= 3:
            continue    # DO NOT GO TOO DEEP
        if isinstance(value, list):
            continue  # DO NOT MESS WITH ARRAYS

        if isinstance(value, Mapping):
            for k, v in value.items():
                constants.append({"name": n + "." + k, "value": v})

    for c in reverse(constants):# REVERSE ORDER, SO LONGER NAMES ARE TESTED FIRST
        s = 0
        while True:
            s = expression.find(c.name, s)
            if s == -1:
                break
            if re.match(r"\w", expression[s - 1]):
                break
            if re.match(r"\w", expression[s + len(c.name)]):
                break

            v = value2MVEL(c.value)
            expression = expression[:s:] + "" + v + expression[:s + len(c.name):]

    return expression
Пример #25
0
    def parse_field(fieldname, data, depth):
        """
        RETURN (first, rest) OF fieldname
        """
        col = split_field(fieldname)
        d = data
        for i, c in enumerate(col):
            try:
                d = d[c]
            except Exception as e:
                Log.error("{{name}} does not exist", name=fieldname)
            if is_list(d) and len(col) > 1:
                if len(primary_column) <= depth + i:
                    primary_nested.append(True)
                    primary_column.append(c)
                    primary_branch.append(d)
                elif primary_nested[depth] and primary_column[depth + i] != c:
                    Log.error("only one branch of tree allowed")
                else:
                    primary_nested[depth + i] = True
                    primary_column[depth + i] = c
                    primary_branch[depth + i] = d

                return c, join_field(col[i + 1:])
            else:
                if len(primary_column) <= depth + i:
                    primary_nested.append(False)
                    primary_column.append(c)
                    primary_branch.append([d])
        return fieldname, None
Пример #26
0
    def parse_field(fieldname, data, depth):
        """
        RETURN (first, rest) OF fieldname
        """
        col = split_field(fieldname)
        d = data
        for i, c in enumerate(col):
            try:
                d = d[c]
            except Exception as e:
                Log.error("{{name}} does not exist", name=fieldname)
            if is_list(d) and len(col) > 1:
                if len(primary_column) <= depth + i:
                    primary_nested.append(True)
                    primary_column.append(c)
                    primary_branch.append(d)
                elif primary_nested[depth] and primary_column[depth + i] != c:
                    Log.error("only one branch of tree allowed")
                else:
                    primary_nested[depth + i] = True
                    primary_column[depth + i] = c
                    primary_branch[depth + i] = d

                return c, join_field(col[i + 1 :])
            else:
                if len(primary_column) <= depth + i:
                    primary_nested.append(False)
                    primary_column.append(c)
                    primary_branch.append([d])
        return fieldname, None
Пример #27
0
def to_python(self, not_null=False, boolean=False, many=False):
    path = split_field(self.var)
    agg = "row"
    if not path:
        return agg
    elif path[0] in ["row", "rownum"]:
        # MAGIC VARIABLES
        agg = path[0]
        path = path[1:]
        if len(path) == 0:
            return agg
    elif path[0] == "rows":
        if len(path) == 1:
            return "rows"
        elif path[1] in ["first", "last"]:
            agg = "rows." + path[1] + "()"
            path = path[2:]
        else:
            Log.error("do not know what {{var}} of `rows` is", var=path[1])

    for p in path[:-1]:
        if not_null:
            agg = agg + ".get(" + convert.value2quote(p) + ")"
        else:
            agg = agg + ".get(" + convert.value2quote(p) + ", EMPTY_DICT)"
    output = agg + ".get(" + convert.value2quote(path[-1]) + ")"
    if many:
        output = "listwrap(" + output + ")"
    return output
Пример #28
0
def _get_schema_from_list(frum, table_name, prefix_path, nested_path, columns):
    """
    :param frum: The list
    :param table_name: Name of the table this list holds records for
    :param prefix_path: parent path
    :param nested_path: each nested array, in reverse order
    :param columns: map from full name to column definition
    :return:
    """

    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix_path)
            column = columns[full_name]
            if not column:
                column = Column(names={table_name: full_name},
                                es_column=full_name,
                                es_index=".",
                                type="undefined",
                                nested_path=nested_path)
                columns.add(column)
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix_path + [name])
                column = columns[full_name]
                if not column:
                    column = Column(names={table_name: full_name},
                                    es_column=full_name,
                                    es_index=".",
                                    type="undefined",
                                    nested_path=nested_path)
                    columns.add(column)
                if isinstance(value, list):
                    if len(value) == 0:
                        this_type = "undefined"
                    elif len(value) == 1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], table_name,
                                          prefix_path + [name], nested_path,
                                          columns)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist(
                        [join_field(split_field(np[0]) + [name])] + np)
                    _get_schema_from_list(value, table_name,
                                          prefix_path + [name], newpath,
                                          columns)
Пример #29
0
    def __getitem__(self, name):
        def _get(node, path):
            if not path:
                return self.element

            return _get(node.more[path[0]], path[1:])

        return _get(self, split_field(name))
Пример #30
0
    def __getitem__(self, name):
        def _get(node, path):
            if not path:
                return self.element

            return _get(node.more[path[0]], path[1:])

        return _get(self, split_field(name))
Пример #31
0
def _get_schema_from_list(frum, table_name, prefix_path, nested_path, columns):
    """
    :param frum: The list
    :param table_name: Name of the table this list holds records for
    :param prefix_path: parent path
    :param nested_path: each nested array, in reverse order
    :param columns: map from full name to column definition
    :return:
    """

    for d in frum:
        row_type = _type_to_name[d.__class__]
        if row_type != "object":
            full_name = join_field(prefix_path)
            column = columns[full_name]
            if not column:
                column = Column(
                    names={table_name: full_name},
                    es_column=full_name,
                    es_index=".",
                    type="undefined",
                    nested_path=nested_path
                )
                columns.add(column)
            column.type = _merge_type[column.type][row_type]
        else:
            for name, value in d.items():
                full_name = join_field(prefix_path + [name])
                column = columns[full_name]
                if not column:
                    column = Column(
                        names={table_name: full_name},
                        es_column=full_name,
                        es_index=".",
                        type="undefined",
                        nested_path=nested_path
                    )
                    columns.add(column)
                if isinstance(value, list):
                    if len(value) == 0:
                        this_type = "undefined"
                    elif len(value) == 1:
                        this_type = _type_to_name[value[0].__class__]
                    else:
                        this_type = _type_to_name[value[0].__class__]
                        if this_type == "object":
                            this_type = "nested"
                else:
                    this_type = _type_to_name[value.__class__]
                new_type = _merge_type[column.type][this_type]
                column.type = new_type

                if this_type == "object":
                    _get_schema_from_list([value], table_name, prefix_path + [name], nested_path, columns)
                elif this_type == "nested":
                    np = listwrap(nested_path)
                    newpath = unwraplist([join_field(split_field(np[0])+[name])]+np)
                    _get_schema_from_list(value, table_name, prefix_path + [name], newpath, columns)
Пример #32
0
def set(constants):
    """
    REACH INTO THE MODULES AND OBJECTS TO SET CONSTANTS.
    THINK OF THIS AS PRIMITIVE DEPENDENCY INJECTION FOR MODULES.
    USEFUL FOR SETTING DEBUG FLAGS.
    """
    if not constants:
        return
    constants = wrap(constants)

    for full_path, new_value in constants.leaves():
        errors = []
        k_path = split_field(full_path)
        if len(k_path) < 2:
            from mo_logs import Log
            Log.error(
                "expecting <module>.<constant> format, not {{path|quote}}",
                path=k_path)
        name = k_path[-1]
        try:
            old_value = mo_dots_set_attr(sys.modules, k_path, new_value)
            continue
        except Exception as e:
            errors.append(e)

        # ONE MODULE IS MISSING, THE CALLING MODULE
        try:
            caller_globals = sys._getframe(1).f_globals
            caller_file = caller_globals["__file__"]
            if not caller_file.endswith(".py"):
                raise Exception("do not know how to handle non-python caller")
            caller_module = caller_file[:-3].replace("\\", "/")
            module_path = caller_module.split("/")

            # ENSURE THERE IS SOME EVIDENCE THE MODULE MATCHES THE PATH
            if k_path[-2] != module_path[-1]:
                continue

            old_value = mo_dots_set_attr(caller_globals, [name], new_value)
            if DEBUG:
                from mo_logs import Log

                Log.note(
                    "Changed {{module}}[{{attribute}}] from {{old_value}} to {{new_value}}",
                    module=caller_module,
                    attribute=name,
                    old_value=old_value,
                    new_value=new_value)
            break
        except Exception as e:
            errors.append(e)

        if errors:
            from mo_logs import Log

            Log.error("Can not set constant {{path}}",
                      path=full_path,
                      cause=errors)
Пример #33
0
def _get(v, k, d):
    for p in split_field(k):
        try:
            v = v.get(p)
            if v is None:
                return d
        except Exception:
            v = [vv.get(p) for vv in v]
    return v
Пример #34
0
    def _edges_op(self, query, frum):
        query = query.copy()  # WE WILL BE MARKING UP THE QUERY
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        outer_selects = []  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        base_table, path = schema.snowflake.fact_name, schema.nested_path
        nest_to_alias = {
            nested_path: quote_column("__" + unichr(ord('a') + i) + "__")
            for i, (nested_path, sub_table) in enumerate(self.sf.tables)
        }

        schema = self.sf.tables[relative_field(frum, self.sf.fact_name)].schema

        tables = []
        for n, a in nest_to_alias.items():
            if startswith_field(path, n):
                tables.append({"nest": n, "alias": a})
        tables = jx.sort(tables, {"value": {"length": "nest"}})

        from_sql = quote_column(join_field([base_table] + split_field(tables[0].nest))) + tables[0].alias
        for previous, t in zip(tables, tables[1::]):
            from_sql += (
                SQL_LEFT_JOIN + quote_column(concat_field(base_table, t.nest)) + t.alias +
                SQL_ON + join_column(t.alias, quoted_PARENT) + " = " + join_column(previous.alias, quoted_UID)
            )

        main_filter = query.where.to_sql(schema, boolean=True)[0].sql.b

        # SHIFT THE COLUMN DEFINITIONS BASED ON THE NESTED QUERY DEPTH
        ons = []
        join_types = []
        wheres = []
        null_ons = [EXISTS_COLUMN + SQL_IS_NULL]
        groupby = []
        null_groupby = []
        orderby = []
        domains = []

        select_clause = [SQL_ONE + EXISTS_COLUMN] + [quote_column(c.es_column) for c in self.sf.tables["."].columns]

        for edge_index, query_edge in enumerate(query.edges):
            edge_alias = quote_column("e" + text_type(edge_index))

            if query_edge.value:
                edge_values = [p for c in query_edge.value.to_sql(schema).sql for p in c.items()]

            elif not query_edge.value and any(query_edge.domain.partitions.where):
                case = SQL_CASE
                for pp, p in enumerate(query_edge.domain.partitions):
                    w = p.where.to_sql(schema)[0].sql.b
                    t = quote_value(pp)
                    case += SQL_WHEN + w + SQL_THEN + t
                case += SQL_ELSE + SQL_NULL + SQL_END  # quote value with length of partitions
                edge_values = [("n", case)]

            elif query_edge.range:
                edge_values = query_edge.range.min.to_sql(schema)[0].sql.items() + query_edge.range.max.to_sql(schema)[
                    0].sql.items()
Пример #35
0
 def __call__(self, row, rownum=None, rows=None):
     path = split_field(self.var)
     for p in path:
         row = row.get(p)
         if row is None:
             return None
     if isinstance(row, list) and len(row) == 1:
         return row[0]
     return row
Пример #36
0
def _get(v, k, d):
    for p in split_field(k):
        try:
            v = v.get(p)
            if v is None:
                return d
        except Exception:
            v = [vv.get(p) for vv in v]
    return v
Пример #37
0
def to_python(self, not_null=False, boolean=False, many=False):
    agg = "rows[rownum+" + self.offset.to_python() + "]"
    path = split_field(json2value(self.var.json))
    if not path:
        return agg

    for p in path[:-1]:
        agg = agg + ".get(" + convert.value2quote(p) + ", EMPTY_DICT)"
    return agg + ".get(" + convert.value2quote(path[-1]) + ")"
Пример #38
0
def to_python(self, not_null=False, boolean=False, many=False):
    agg = "rows[rownum+" + self.offset.to_python() + "]"
    path = split_field(json2value(self.var.json))
    if not path:
        return agg

    for p in path[:-1]:
        agg = agg + ".get(" + convert.value2quote(p) + ", EMPTY_DICT)"
    return agg + ".get(" + convert.value2quote(path[-1]) + ")"
Пример #39
0
    def get_columns(self, table_name, column_name=None, force=False):
        """
        RETURN METADATA COLUMNS
        """
        table_path = split_field(table_name)
        root_table_name = table_path[0]

        alias = self._find_alias(root_table_name)
        if not alias:
            self.es_cluster.get_metadata(force=True)
            alias = self._find_alias(root_table_name)
            if not alias:
                Log.error("{{table|quote}} does not exist", table=table_name)

        try:
            last_update = MAX([
                self.es_cluster.index_last_updated[i]
                for i in self.index_to_alias.get_domain(alias)
            ])

            table = self.get_table(alias)[0]
            # LAST TIME WE GOT INFO FOR THIS TABLE
            if not table:
                table = TableDesc(name=alias,
                                  url=None,
                                  query_path=['.'],
                                  timestamp=Date.MIN)
                with self.meta.tables.locker:
                    self.meta.tables.add(table)
                self._reload_columns(table)
            elif force or table.timestamp < last_update:
                self._reload_columns(table)

            columns = self.meta.columns.find(alias, column_name)
            columns = jx.sort(columns, "names.\\.")
            # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
            while len(self.todo) and not all(columns.get("last_updated")):
                if DEBUG:
                    if len(columns) > 10:
                        Log.note("waiting for {{num}} columns to update",
                                 num=len([
                                     c for c in columns if not c.last_updated
                                 ]))
                    else:
                        Log.note(
                            "waiting for columns to update {{columns|json}}",
                            columns=[
                                c.es_index + "." + c.es_column for c in columns
                                if not c.last_updated
                            ])
                Till(seconds=1).wait()
            return columns
        except Exception as e:
            Log.error("Not expected", cause=e)

        return []
Пример #40
0
def untyped_column(column_name):
    """
    :param column_name:  DATABASE COLUMN NAME
    :return: (NAME, TYPE) PAIR
    """
    if "$" in column_name:
        path = split_field(column_name)
        return join_field(path[:-1]), path[-1][1:]
    else:
        return column_name, None
Пример #41
0
    def add_column_to_schema(self, nest_to_schema, column):
        abs_table = literal_field(self.name)
        abs_name = column.names[abs_table]

        for nest, schema in nest_to_schema.items():
            rel_table = literal_field(
                join_field([self.name] + split_field(nest)))
            rel_name = relative_field(abs_name, nest)

            column.names[rel_table] = rel_name
Пример #42
0
def get_nested_path(typed_path):
    # CONSTRUCT THE nested_path FROM THE typed_path
    path = split_field(typed_path)
    parent = "."
    nested_path = (parent, )
    for i, p in enumerate(path[:-1]):
        if p == ARRAY_KEY:
            step = concat_field(parent, join_field(path[0:i + 1]))
            nested_path = (step, ) + nested_path
    return nested_path
Пример #43
0
def exists_variable(path):
    """
    RETURN THE VARIABLE THAT WILL INDICATE OBJECT (OR ARRAY) EXISTS (~e~)
    """
    steps = split_field(path)
    if not steps:
        return EXISTS_TYPE
    if steps[-1] == NESTED_TYPE:
        steps = steps[:-1]
    return join_field(steps + [EXISTS_TYPE])
Пример #44
0
def get_document_value(document, column):
    """
    RETURN DOCUMENT VALUE IF MATCHES THE column (name, type)

    :param document: THE DOCUMENT
    :param column: A (name, type) PAIR
    :return: VALUE, IF IT IS THE SAME NAME AND TYPE
    """
    v = document.get(split_field(column.name)[0], None)
    return get_if_type(v, column.type)
Пример #45
0
 def schema_element(self, path):
     if isinstance(path, text_type):
         path = split_field(path)
     output = self
     while '.' in output.more:
         output = output.more['.']
     for p in path:
         output = output.more.get(p)
         while '.' in output.more:
             output = output.more['.']
     return output.element if output else None
Пример #46
0
    def monitor(self, please_stop):
        please_stop.on_go(lambda: self.todo.add(THREAD_STOP))
        while not please_stop:
            try:
                if not self.todo:
                    old_columns = [
                        c
                        for c in self.meta.columns
                        if ((c.last_updated < Date.now() - MAX_COLUMN_METADATA_AGE) or c.cardinality == None) and c.jx_type not in STRUCT
                    ]
                    if old_columns:
                        DEBUG and Log.note(
                            "Old columns {{names|json}} last updated {{dates|json}}",
                            names=wrap(old_columns).es_column,
                            dates=[Date(t).format() for t in wrap(old_columns).last_updated]
                        )
                        self.todo.extend(old_columns)
                    else:
                        DEBUG and Log.note("no more metatdata to update")

                column = self.todo.pop(Till(seconds=(10*MINUTE).seconds))
                if column:
                    if column is THREAD_STOP:
                        continue

                    with Timer("update {{table}}.{{column}}", param={"table": column.es_index, "column": column.es_column}, silent=not DEBUG):
                        if column.es_index in self.index_does_not_exist:
                            DEBUG and Log.note("{{column.es_column}} does not exist", column=column)
                            self.meta.columns.update({
                                "clear": ".",
                                "where": {"eq": {"es_index": column.es_index}}
                            })
                            continue
                        if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE:
                            DEBUG and Log.note("{{column.es_column}} is a struct", column=column)
                            column.last_updated = Date.now()
                            continue
                        elif column.last_updated > Date.now() - TOO_OLD and column.cardinality is not None:
                            # DO NOT UPDATE FRESH COLUMN METADATA
                            DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now()-Date(column.last_updated)).seconds)
                            continue
                        try:
                            self._update_cardinality(column)
                            (DEBUG and not column.es_index.startswith(TEST_TABLE_PREFIX)) and Log.note("updated {{column.name}}", column=column)
                        except Exception as e:
                            if '"status":404' in e:
                                self.meta.columns.update({
                                    "clear": ".",
                                    "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}}
                                })
                            else:
                                Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e)
            except Exception as e:
                Log.warning("problem in cardinality monitor", cause=e)
Пример #47
0
 def schema_element(self, path):
     if isinstance(path, text):
         path = split_field(path)
     output = self
     while '.' in output.more:
         output = output.more['.']
     for p in path:
         output = output.more.get(p)
         while '.' in output.more:
             output = output.more['.']
     return output.element if output else None
Пример #48
0
def _test_mode_wait(query):
    """
    WAIT FOR METADATA TO ARRIVE ON INDEX
    :param query: dict() OF REQUEST BODY
    :return: nothing
    """
    try:
        m = meta.singlton
        now = Date.now()
        end_time = now + MINUTE

        # MARK COLUMNS DIRTY
        m.meta.columns.update({
            "clear": ["partitions", "count", "cardinality", "last_updated"],
            "where": {
                "eq": {
                    "es_index": join_field(split_field(query["from"])[0:1])
                }
            }
        })

        # BE SURE THEY ARE ON THE todo QUEUE FOR RE-EVALUATION
        cols = [
            c for c in m.get_columns(table_name=query["from"], force=True)
            if c.type not in STRUCT
        ]
        for c in cols:
            Log.note("Mark {{column}} dirty at {{time}}",
                     column=c.names["."],
                     time=now)
            c.last_updated = now - TOO_OLD
            m.todo.push(c)

        while end_time > now:
            # GET FRESH VERSIONS
            cols = [
                c for c in m.get_columns(table_name=query["from"])
                if c.type not in STRUCT
            ]
            for c in cols:
                if not c.last_updated or c.cardinality == None:
                    Log.note(
                        "wait for column (table={{col.es_index}}, name={{col.es_column}}) metadata to arrive",
                        col=c)
                    break
            else:
                break
            Till(seconds=1).wait()
        for c in cols:
            Log.note(
                "fresh column name={{column.name}} updated={{column.last_updated|date}} parts={{column.partitions}}",
                column=c)
    except Exception, e:
        Log.warning("could not pickup columns", cause=e)
Пример #49
0
 def __radd__(self, path):
     """
     RETURN self AT THE END OF path
     :param path
     """
     acc = self
     for step in reversed(split_field(path)):
         if IS_PRIMITIVE_KEY.match(step):
             continue
         acc = JsonType(**{step: acc})
     return acc
Пример #50
0
def find_container(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _meta:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, text_type):
        if not container.config.default.settings:
            Log.error(
                "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info"
            )

        type_ = None
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns.denormalized()
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)

        type_ = container.config.default.type
        fact_table_name = split_field(frum)[0]

        settings = set_default(
            {
                "index": fact_table_name,
                "name": frum,
                "exists": True,
            }, container.config.default.settings)
        settings.type = None
        return container.type2container[type_](settings)
    elif isinstance(
            frum,
            Mapping) and frum.type and container.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return container.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"]
                                        or isinstance(frum["from"],
                                                      (list, set))):
        from jx_base.query import QueryOp
        return QueryOp.wrap(frum, namespace=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum
Пример #51
0
def set(constants):
    """
    REACH INTO THE MODULES AND OBJECTS TO SET CONSTANTS.
    THINK OF THIS AS PRIMITIVE DEPENDENCY INJECTION FOR MODULES.
    USEFUL FOR SETTING DEBUG FLAGS.
    """
    if not constants:
        return
    constants = wrap(constants)

    for k, new_value in constants.leaves():
        errors = []
        try:
            old_value = mo_dots_set_attr(sys.modules, k, new_value)
            continue
        except Exception as e:
            errors.append(e)

        # ONE MODULE IS MISSING, THE CALLING MODULE
        try:
            caller_globals = sys._getframe(1).f_globals
            caller_file = caller_globals["__file__"]
            if not caller_file.endswith(".py"):
                raise Exception("do not know how to handle non-python caller")
            caller_module = caller_file[:-3].replace("/", ".")

            path = split_field(k)
            for i, p in enumerate(path):
                if i == 0:
                    continue
                prefix = join_field(path[:1])
                name = join_field(path[i:])
                if caller_module.endswith(prefix):
                    old_value = mo_dots_set_attr(caller_globals, name, new_value)
                    if DEBUG:
                        from mo_logs import Log

                        Log.note(
                            "Changed {{module}}[{{attribute}}] from {{old_value}} to {{new_value}}",
                            module=prefix,
                            attribute=name,
                            old_value=old_value,
                            new_value=new_value
                        )
                    break
        except Exception as e:
            errors.append(e)

        if errors:
            from mo_logs import Log

            Log.error("Can not set constant {{path}}", path=k, cause=errors)
Пример #52
0
    def __getitem__(self, key):
        if isinstance(key, slice):
            return Null
        elif isinstance(key, str):
            key = key.decode("utf8")
        elif isinstance(key, int):
            return NullType(self, key)

        path = split_field(key)
        output = self
        for p in path:
            output = NullType(output, p)
        return output
Пример #53
0
    def _get_columns(self, table=None):
        # TODO: HANDLE MORE THEN ONE ES, MAP TABLE SHORT_NAME TO ES INSTANCE
        table_path = split_field(table)
        es_index = table_path[0]
        query_path = join_field(table_path[1:])
        meta = self.es_metadata.indices[es_index]
        if not meta or self.last_es_metadata < Date.now() - OLD_METADATA:
            self.es_metadata = self.default_es.get_metadata(force=True)
            meta = self.es_metadata.indices[es_index]

        for _, properties in meta.mappings.items():
            properties.properties["_id"] = {"type": "string", "index": "not_analyzed"}
            self._parse_properties(meta.index, properties, meta)
Пример #54
0
def wrap_from(frum, schema=None):
    """
    :param frum:
    :param schema:
    :return:
    """
    if not _containers:
        _delayed_imports()

    frum = wrap(frum)

    if isinstance(frum, basestring):
        if not _containers.config.default.settings:
            Log.error("expecting pyLibrary.queries.query.config.default.settings to contain default elasticsearch connection info")

        type_ = None
        index = frum
        if frum.startswith("meta."):
            if frum == "meta.columns":
                return _meta.singlton.meta.columns.denormalized()
            elif frum == "meta.tables":
                return _meta.singlton.meta.tables
            else:
                Log.error("{{name}} not a recognized table", name=frum)
        else:
            type_ = _containers.config.default.type
            index = split_field(frum)[0]

        settings = set_default(
            {
                "index": index,
                "name": frum,
                "exists": True,
            },
            _containers.config.default.settings
        )
        settings.type = None
        return _containers.type2container[type_](settings)
    elif isinstance(frum, Mapping) and frum.type and _containers.type2container[frum.type]:
        # TODO: Ensure the frum.name is set, so we capture the deep queries
        if not frum.type:
            Log.error("Expecting from clause to have a 'type' property")
        return _containers.type2container[frum.type](frum.settings)
    elif isinstance(frum, Mapping) and (frum["from"] or isinstance(frum["from"], (list, set))):
        from pyLibrary.queries.query import QueryOp
        return QueryOp.wrap(frum, schema=schema)
    elif isinstance(frum, (list, set)):
        return _ListContainer("test_list", frum)
    else:
        return frum
Пример #55
0
    def get_columns(self, table_name, column_name=None, force=False):
        """
        RETURN METADATA COLUMNS
        """
        table_path = split_field(table_name)
        root_table_name = table_path[0]

        alias = self._find_alias(root_table_name)
        if not alias:
            self.es_cluster.get_metadata(force=True)
            alias = self._find_alias(root_table_name)
            if not alias:
                Log.error("{{table|quote}} does not exist", table=table_name)

        try:
            last_update = MAX([
                self.es_cluster.index_last_updated[i]
                for i in self.index_to_alias.get_domain(alias)
            ])

            table = self.get_table(alias)[0]
            # LAST TIME WE GOT INFO FOR THIS TABLE
            if not table:
                table = TableDesc(
                    name=alias,
                    url=None,
                    query_path=['.'],
                    timestamp=Date.MIN
                )
                with self.meta.tables.locker:
                    self.meta.tables.add(table)
                self._reload_columns(table)
            elif force or table.timestamp < last_update:
                self._reload_columns(table)

            columns = self.meta.columns.find(alias, column_name)
            columns = jx.sort(columns, "names.\\.")
            # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
            while len(self.todo) and not all(columns.get("last_updated")):
                if DEBUG:
                    if len(columns) > 10:
                        Log.note("waiting for {{num}} columns to update", num=len([c for c in columns if not c.last_updated]))
                    else:
                        Log.note("waiting for columns to update {{columns|json}}", columns=[c.es_index+"."+c.es_column for c in columns if not c.last_updated])
                Till(seconds=1).wait()
            return columns
        except Exception as e:
            Log.error("Not expected", cause=e)

        return []
Пример #56
0
def is_deepop(es, query):
    if query.edges or query.groupby:
        return False
    if all(s.aggregate not in (None, "none") for s in listwrap(query.select)):
        return False
    if len(split_field(query.frum.name)) > 1:
        return True

    # ASSUME IT IS NESTED IF WE ARE ASKING FOR NESTED COLUMNS
    # vars_ = query_get_all_vars(query)
    # columns = query.frum.get_columns()
    # if any(c for c in columns if len(c.nested_path) != 1 and c.name in vars_):
    #    return True
    return False
Пример #57
0
def unnest_path(encoded):
    if encoded.startswith(".."):
        remainder = encoded.lstrip(".")
        back = len(encoded) - len(remainder)
        return ("." * back) + unnest_path(remainder)

    path = split_field(encoded)
    if not path:
        return "."
    if path[-1] == NESTED_TYPE:
        path = path[:-1]
        if not path:
            return "."

    return join_field([decode_property(c) for c in path[:-1] if not c.startswith(TYPE_PREFIX)] + [decode_property(path[-1])])
Пример #58
0
    def __init__(self, table_name, columns):
        """
        :param table_name: THE FACT TABLE
        :param query_path: PATH TO ARM OF SNOWFLAKE
        :param columns: ALL COLUMNS IN SNOWFLAKE
        """
        table_path = split_field(table_name)
        self.table = table_path[0]  # USED AS AN EXPLICIT STATEMENT OF PERSPECTIVE IN THE DATABASE
        self.query_path = join_field(table_path[1:])
        self._columns = copy(columns)

        lookup = self.lookup = _index(columns, self.query_path)
        if self.query_path != ".":
            alternate = _index(columns, ".")
            for k,v in alternate.items():
                lookup.setdefault(k, v)
Пример #59
0
def is_setop(es, query):
    select = listwrap(query.select)

    if not query.edges:
        isDeep = len(split_field(query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
        simpleAgg = AND([s.aggregate in ("count", "none") for s in select])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

        # NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
        if simpleAgg or isDeep:
            return True
    else:
        isSmooth = AND((e.domain.type in ALGEBRAIC and e.domain.interval == "none") for e in query.edges)
        if isSmooth:
            return True

    return False