Пример #1
0
    def _add_column(self, column):
        cname = column.name
        if column.jx_type == NESTED:
            # WE ARE ALSO NESTING
            self._nest_column(column, [cname]+column.nested_path)

        table = concat_field(self.fact_name, column.nested_path[0])

        try:
            with self.namespace.db.transaction() as t:
                t.execute(
                    "ALTER TABLE" + quote_column(table) +
                    "ADD COLUMN" + quote_column(column.es_column) + column.es_type
                )
            self.namespace.columns.add(column)
        except Exception as e:
            if "duplicate column name" in e:
                # THIS HAPPENS WHEN MULTIPLE THREADS ARE ASKING FOR MORE COLUMNS TO STORE DATA
                # THIS SHOULD NOT BE A PROBLEM SINCE THE THREADS BOTH AGREE THE COLUMNS SHOULD EXIST
                # BUT, IT WOULD BE NICE TO MAKE LARGER TRANSACTIONS SO THIS NEVER HAPPENS
                # CONFIRM THE COLUMN EXISTS IN LOCAL DATA STRUCTURES
                for c in self.namespace.columns:
                    if c.es_column == column.es_column:
                        break
                else:
                    Log.error("Did not add column {{column}]", column=column.es_column, cause=e)
            else:
                Log.error("Did not add column {{column}]", column=column.es_column, cause=e)
Пример #2
0
    def _edges_op(self, query, frum):
        schema = frum
        query = query.copy()  # WE WILL BE MARKING UP THE QUERY
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        outer_selects = []  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        base_table, path = schema.snowflake.fact_name, schema.nested_path
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path, sub_table) in enumerate(self.snowflake.tables)
        }

        tables = []
        for n, a in nest_to_alias.items():
            if startswith_field(path, n):
                tables.append({"nest": n, "alias": a})
        tables = jx.sort(tables, {"value": {"length": "nest"}})

        from_sql = sql_alias(quote_column(concat_field(base_table, tables[0].nest)),  tables[0].alias)
        for previous, t in zip(tables, tables[1::]):
            from_sql += (
                SQL_LEFT_JOIN + sql_alias(quote_column(concat_field(base_table, t.nest)), t.alias) +
                SQL_ON + quote_column(t.alias, PARENT) + SQL_EQ + quote_column(previous.alias, UID)
            )

        main_filter = SQLang[query.where].to_sql(schema, boolean=True)[0].sql.b

        # SHIFT THE COLUMN DEFINITIONS BASED ON THE NESTED QUERY DEPTH
        ons = []
        join_types = []
        wheres = []
        null_ons = [EXISTS_COLUMN + SQL_IS_NULL]
        groupby = []
        null_groupby = []
        orderby = []
        domains = []

        select_clause = [SQL_ONE + EXISTS_COLUMN] + [quote_column(c.es_column) for c in self.snowflake.columns]

        for edge_index, query_edge in enumerate(query.edges):
            edge_alias = "e" + text(edge_index)

            if query_edge.value:
                edge_values = [p for c in SQLang[query_edge.value].to_sql(schema).sql for p in c.items()]

            elif not query_edge.value and any(query_edge.domain.partitions.where):
                case = SQL_CASE
                for pp, p in enumerate(query_edge.domain.partitions):
                    w = SQLang[p.where].to_sql(schema)[0].sql.b
                    t = quote_value(pp)
                    case += SQL_WHEN + w + SQL_THEN + t
                case += SQL_ELSE + SQL_NULL + SQL_END  # quote value with length of partitions
                edge_values = [("n", case)]

            elif query_edge.range:
                edge_values = SQLang[query_edge.range.min].to_sql(schema)[0].sql.items() + SQLang[query_edge.range.max].to_sql(schema)[
                    0].sql.items()
Пример #3
0
    def to_sql(self, schema, not_null=False, boolean=False, many=True):
        var_name = self.var
        if var_name == GUID:
            return wrap([{
                "name": ".",
                "sql": {
                    "s": quoted_GUID
                },
                "nested_path": ROOT_PATH
            }])
        cols = schema.leaves(var_name)
        if not cols:
            return self.lang[NULL].to_sql(schema)
        acc = {}
        if boolean:
            for col in cols:
                cname = relative_field(col.name, var_name)
                nested_path = col.nested_path[0]
                if col.type == OBJECT:
                    value = SQL_TRUE
                elif col.type == BOOLEAN:
                    value = quote_column(col.es_column)
                else:
                    value = quote_column(col.es_column) + SQL_IS_NOT_NULL
                tempa = acc.setdefault(nested_path, {})
                tempb = tempa.setdefault(get_property_name(cname), {})
                tempb["b"] = value
        else:
            for col in cols:
                cname = relative_field(col.name, var_name)
                if col.jx_type == OBJECT:
                    prefix = self.var + "."
                    for cn, cs in schema.items():
                        if cn.startswith(prefix):
                            for child_col in cs:
                                tempa = acc.setdefault(
                                    child_col.nested_path[0], {})
                                tempb = tempa.setdefault(
                                    get_property_name(cname), {})
                                tempb[json_type_to_sql_type[
                                    col.type]] = quote_column(
                                        child_col.es_column)
                else:
                    nested_path = col.nested_path[0]
                    tempa = acc.setdefault(nested_path, {})
                    tempb = tempa.setdefault(get_property_name(cname), {})
                    tempb[json_type_to_sql_type[col.jx_type]] = quote_column(
                        col.es_column)

        return wrap([{
            "name": cname,
            "sql": types,
            "nested_path": nested_path
        } for nested_path, pairs in acc.items()
                     for cname, types in pairs.items()])
Пример #4
0
 def output():
     while True:
         with self.db.transaction() as t:
             top_id = first(
                 first(
                     t.query(SQL_SELECT + quote_column("next_id") +
                             SQL_FROM +
                             quote_column(ABOUT_TABLE)).data))
             max_id = top_id + 1000
             t.execute(SQL_UPDATE + quote_column(ABOUT_TABLE) +
                       SQL_SET + sql_eq(next_id=max_id))
         while top_id < max_id:
             yield top_id
             top_id += 1
Пример #5
0
    def _drop_column(self, column):
        # DROP COLUMN BY RENAMING IT, WITH __ PREFIX TO HIDE IT
        cname = column.name
        if column.jx_type == "nested":
            # WE ARE ALSO NESTING
            self._nest_column(column, [cname]+column.nested_path)

        table = concat_field(self.fact_name, column.nested_path[0])

        with self.namespace.db.transaction() as t:
            t.execute(
                "ALTER TABLE" + quote_column(table) +
                "RENAME COLUMN" + quote_column(column.es_column) + " TO " + quote_column("__" + column.es_column)
            )
        self.namespace.columns.remove(column)
Пример #6
0
    def to_sql(self, schema, not_null=False, boolean=False):
        value = SQLang[self.value].partial_eval().to_sql(schema)[0].sql.s
        find = SQLang[self.find].partial_eval().to_sql(schema)[0].sql.s
        start = SQLang[self.start].partial_eval().to_sql(schema)[0].sql.n
        default = coalesce(
            SQLang[self.default].partial_eval().to_sql(schema)[0].sql.n, SQL_NULL
        )

        if start.sql != SQL_ZERO.sql:
            value = NotRightOp([self.value, self.start]).to_sql(schema)[0].sql.s

        index = sql_call("INSTR", value, find)
        i = quote_column("i")
        sql = with_var(
            i,
            index,
            ConcatSQL(
                SQL_CASE,
                SQL_WHEN,
                i,
                SQL_THEN,
                i,
                SQL(" - "),
                SQL_ONE,
                SQL_PLUS,
                start,
                SQL_ELSE,
                default,
                SQL_END,
            ),
        )
        return wrap([{"name": ".", "sql": {"n": sql}}])
Пример #7
0
 def delete(self, where):
     filter = SQLang[jx_expression(where)].to_sql(self.schema)
     with self.db.transaction() as t:
         t.execute(
             ConcatSQL(SQL_DELETE, SQL_FROM,
                       quote_column(self.snowflake.fact_name), SQL_WHERE,
                       filter))
Пример #8
0
    def _insert(self, collection):
        for nested_path, details in collection.items():
            active_columns = wrap(list(details.active_columns))
            rows = details.rows
            num_rows = len(rows)
            table_name = concat_field(self.name, nested_path)

            if table_name == self.name:
                # DO NOT REQUIRE PARENT OR ORDER COLUMNS
                meta_columns = [GUID, UID]
            else:
                meta_columns = [UID, PARENT, ORDER]

            all_columns = meta_columns + active_columns.es_column  # ONLY THE PRIMITIVE VALUE COLUMNS
            command = ConcatSQL(
                SQL_INSERT,
                quote_column(table_name),
                sql_iso(sql_list(map(quote_column, all_columns))),
                SQL_VALUES,
                sql_list(
                    sql_iso(sql_list(quote_value(row.get(c)) for c in all_columns))
                    for row in unwrap(rows)
                )
            )

            with self.db.transaction() as t:
                t.execute(command)
Пример #9
0
 def remove_facts(self, fact_name):
     paths = self.ns.columns._snowflakes[fact_name]
     if paths:
         with self.db.transaction() as t:
             for p in paths:
                 full_name = concat_field(fact_name, p[0])
                 t.execute("DROP TABLE "+quote_column(full_name))
         self.ns.columns.remove_table(fact_name)
Пример #10
0
    def _window_op(self, query, window):
        # http://www2.sqlite.org/cvstrac/wiki?p=UnsupportedSqlAnalyticalFunctions
        if window.value == "rownum":
            return ("ROW_NUMBER()-1 OVER (" + " PARTITION BY " +
                    sql_iso(sql_list(window.edges.values)) + SQL_ORDERBY +
                    sql_iso(sql_list(window.edges.sort)) + ") AS " +
                    quote_column(window.name))

        range_min = text(coalesce(window.range.min, "UNBOUNDED"))
        range_max = text(coalesce(window.range.max, "UNBOUNDED"))

        return (sql_aggs[window.aggregate] +
                sql_iso(window.value.to_sql(schema)) + " OVER (" +
                " PARTITION BY " + sql_iso(sql_list(window.edges.values)) +
                SQL_ORDERBY + sql_iso(sql_list(window.edges.sort)) +
                " ROWS BETWEEN " + range_min + " PRECEDING AND " + range_max +
                " FOLLOWING " + ") AS " + quote_column(window.name))
Пример #11
0
    def where(self, filter):
        """
        WILL NOT PULL WHOLE OBJECT, JUST TOP-LEVEL PROPERTIES
        :param filter:  jx_expression filter
        :return: list of objects that match
        """
        select = []
        column_names = []
        for c in self.schema.columns:
            if c.jx_type in STRUCT:
                continue
            if len(c.nested_path) != 1:
                continue
            column_names.append(c.name)
            select.append(sql_alias(quote_column(c.es_column), c.name))

        where_sql = SQLang[jx_expression(filter)].to_sql(self.schema)[0].sql.b
        result = self.db.query(ConcatSQL(
            SQL_SELECT, JoinSQL(SQL_COMMA, select),
            SQL_FROM, quote_column(self.snowflake.fact_name),
            SQL_WHERE, where_sql
        ))

        return wrap([{c: v for c, v in zip(column_names, r)} for r in result.data])
Пример #12
0
    def _make_range_domain(self, domain, column_name):
        width = (domain.max - domain.min) / domain.interval
        digits = mo_math.floor(mo_math.log10(width - 1))
        if digits == 0:
            value = quote_column("a", "value")
        else:
            value = SQL_PLUS.join("1" + ("0" * j) + SQL_STAR +
                                  text(chr(ord(b'a') + j)) + ".value"
                                  for j in range(digits + 1))
        if domain.interval == 1:
            if domain.min == 0:
                domain = (SQL_SELECT + sql_alias(value, column_name) +
                          SQL_FROM +
                          sql_alias(quote_column(DIGITS_TABLE), "a"))
            else:
                domain = (SQL_SELECT + sql_alias(
                    sql_iso(value) + SQL_PLUS + quote_value(domain.min),
                    column_name) + SQL_FROM +
                          sql_alias(quote_column(DIGITS_TABLE), "a"))
        else:
            if domain.min == 0:
                domain = ConcatSQL(
                    SQL_SELECT,
                    sql_alias(value + SQL_STAR + quote_value(domain.interval),
                              column_name), SQL_FROM,
                    sql_alias(quote_column(DIGITS_TABLE), "a"))
            else:
                domain = ConcatSQL(
                    SQL_SELECT,
                    sql_alias(
                        sql_iso(value, SQL_STAR, quote_value(domain.interval))
                        + SQL_PLUS + quote_value(domain.min), column_name),
                    SQL_FROM, sql_alias(quote_column(DIGITS_TABLE), "a"))

        for j in range(digits):
            domain += SQL_INNER_JOIN + sql_alias(
                quote_column(DIGITS_TABLE), text(
                    chr(ord(b'a') + j + 1))) + SQL_ON + SQL_TRUE
        domain += SQL_WHERE + value + " < " + quote_value(width)
        return domain
Пример #13
0
    untyped_column, PARENT, UID
from jx_sqlite.container import DIGITS_TABLE
from jx_sqlite.expressions._utils import SQLang, sql_type_to_json_type
from jx_sqlite.expressions.tuple_op import TupleOp
from jx_sqlite.expressions.variable import Variable
from jx_sqlite.setop_table import SetOpTable
from mo_dots import coalesce, concat_field, join_field, listwrap, split_field, startswith_field
from mo_future import text, unichr
from mo_logs import Log
from mo_sql import SQL, SQL_AND, SQL_CASE, SQL_COMMA, SQL_DESC, SQL_ELSE, SQL_END, SQL_FROM, SQL_GROUPBY, \
    SQL_INNER_JOIN, SQL_IS_NOT_NULL, SQL_IS_NULL, SQL_LEFT_JOIN, SQL_LIMIT, SQL_NULL, SQL_ON, SQL_ONE, SQL_OR, \
    SQL_ORDERBY, SQL_SELECT, SQL_STAR, SQL_THEN, SQL_TRUE, SQL_UNION_ALL, SQL_WHEN, SQL_WHERE, sql_coalesce, \
    sql_count, sql_iso, sql_list, SQL_DOT, SQL_PLUS, ConcatSQL, SQL_EQ
from jx_sqlite.sqlite import quote_column, quote_value, sql_alias

EXISTS_COLUMN = quote_column("__exists__")


class EdgesTable(SetOpTable):
    def _edges_op(self, query, frum):
        schema = frum
        query = query.copy()  # WE WILL BE MARKING UP THE QUERY
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        outer_selects = [
        ]  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        base_table, path = schema.snowflake.fact_name, schema.nested_path
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path, sub_table) in enumerate(self.snowflake.tables)
        }
Пример #14
0
    def _make_sql_for_one_nest_in_set_op(
            self,
            primary_nested_path,
            selects,  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE
            where_clause,
            active_columns,
            index_to_sql_select  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
    ):
        """
        FOR EACH NESTED LEVEL, WE MAKE A QUERY THAT PULLS THE VALUES/COLUMNS REQUIRED
        WE `UNION ALL` THEM WHEN DONE
        :param primary_nested_path:
        :param selects:
        :param where_clause:
        :param active_columns:
        :param index_to_sql_select:
        :return: SQL FOR ONE NESTED LEVEL
        """

        parent_alias = "a"
        from_clause = []
        select_clause = []
        children_sql = []
        done = []
        if not where_clause:
            where_clause = SQL_TRUE
        # STATEMENT FOR EACH NESTED PATH
        for i, (nested_path, sub_table) in enumerate(self.snowflake.tables):
            if any(startswith_field(nested_path, d) for d in done):
                continue

            alias = "__" + unichr(ord('a') + i) + "__"

            if primary_nested_path == nested_path:
                select_clause = []
                # ADD SELECT CLAUSE HERE
                for select_index, s in enumerate(selects):
                    sql_select = index_to_sql_select.get(select_index)
                    if not sql_select:
                        select_clause.append(selects[select_index])
                        continue

                    if startswith_field(sql_select.nested_path[0],
                                        nested_path):
                        select_clause.append(
                            sql_alias(sql_select.sql, sql_select.column_alias))
                    else:
                        # DO NOT INCLUDE DEEP STUFF AT THIS LEVEL
                        select_clause.append(
                            sql_alias(SQL_NULL, sql_select.column_alias))

                if nested_path == ".":
                    from_clause.append(SQL_FROM)
                    from_clause.append(
                        sql_alias(quote_column(self.snowflake.fact_name),
                                  alias))
                else:
                    from_clause.append(SQL_LEFT_JOIN)
                    from_clause.append(
                        sql_alias(
                            quote_column(self.snowflake.fact_name,
                                         sub_table.name), alias))
                    from_clause.append(SQL_ON)
                    from_clause.append(quote_column(alias, PARENT))
                    from_clause.append(SQL_EQ)
                    from_clause.append(quote_column(parent_alias, UID))
                    where_clause = sql_iso(
                        where_clause) + SQL_AND + quote_column(alias,
                                                               ORDER) + " > 0"
                parent_alias = alias

            elif startswith_field(primary_nested_path, nested_path):
                # PARENT TABLE
                # NO NEED TO INCLUDE COLUMNS, BUT WILL INCLUDE ID AND ORDER
                if nested_path == ".":
                    from_clause.append(SQL_FROM)
                    from_clause.append(
                        sql_alias(quote_column(self.snowflake.fact_name),
                                  alias))
                else:
                    parent_alias = alias = unichr(ord('a') + i - 1)
                    from_clause.append(SQL_LEFT_JOIN)
                    from_clause.append(
                        sql_alias(
                            quote_column(self.snowflake.fact_name,
                                         sub_table.name), alias))
                    from_clause.append(SQL_ON)
                    from_clause.append(quote_column(alias, PARENT))
                    from_clause.append(SQL_EQ)
                    from_clause.append(quote_column(parent_alias, UID))
                    where_clause = sql_iso(
                        where_clause) + SQL_AND + quote_column(
                            parent_alias, ORDER) + " > 0"
                parent_alias = alias

            elif startswith_field(nested_path, primary_nested_path):
                # CHILD TABLE
                # GET FIRST ROW FOR EACH NESTED TABLE
                from_clause.append(SQL_LEFT_JOIN)
                from_clause.append(
                    sql_alias(
                        quote_column(self.snowflake.fact_name, sub_table.name),
                        alias))
                from_clause.append(SQL_ON)
                from_clause.append(quote_column(alias, PARENT))
                from_clause.append(SQL_EQ)
                from_clause.append(quote_column(parent_alias, UID))
                from_clause.append(SQL_AND)
                from_clause.append(quote_column(alias, ORDER))
                from_clause.append(SQL_EQ)
                from_clause.append(SQL_ZERO)

                # IMMEDIATE CHILDREN ONLY
                done.append(nested_path)
                # NESTED TABLES WILL USE RECURSION
                children_sql.append(
                    self._make_sql_for_one_nest_in_set_op(
                        nested_path,
                        selects,  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE
                        where_clause,
                        active_columns,
                        index_to_sql_select  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
                    ))
            else:
                # SIBLING PATHS ARE IGNORED
                continue

        sql = SQL_UNION_ALL.join([
            ConcatSQL(SQL_SELECT, sql_list(select_clause),
                      ConcatSQL(*from_clause), SQL_WHERE, where_clause)
        ], *children_sql)

        return sql
Пример #15
0
    def _groupby_op(self, query, schema):
        base_table = schema.snowflake.fact_name
        path = schema.nested_path
        # base_table, path = tail_field(frum)
        # schema = self.snowflake.tables[path].schema
        index_to_column = {}
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, nested_path in enumerate(self.schema.snowflake.query_paths)
        }
        tables = []
        for n, a in nest_to_alias.items():
            if startswith_field(path, n):
                tables.append({"nest": n, "alias": a})
        tables = jx.sort(tables, {"value": {"length": "nest"}})

        from_sql = join_field(
            [base_table] + split_field(tables[0].nest)) + " " + tables[0].alias
        previous = tables[0]
        for t in tables[1::]:
            from_sql += (SQL_LEFT_JOIN +
                         quote_column(concat_field(base_table, t.nest)) + " " +
                         t.alias + SQL_ON + quote_column(t.alias, PARENT) +
                         SQL_EQ + quote_column(previous.alias, UID))

        selects = []
        groupby = []
        for i, e in enumerate(query.groupby):
            for edge_sql in SQLang[e.value].to_sql(schema):
                column_number = len(selects)
                sql_type, sql = edge_sql.sql.items()[0]
                if sql is SQL_NULL and not e.value.var in schema.keys():
                    Log.error("No such column {{var}}", var=e.value.var)

                column_alias = _make_column_name(column_number)
                groupby.append(sql)
                selects.append(sql_alias(sql, column_alias))
                if edge_sql.nested_path == ".":
                    select_name = edge_sql.name
                else:
                    select_name = "."
                index_to_column[column_number] = ColumnMapping(
                    is_edge=True,
                    push_name=e.name,
                    push_column_name=e.name.replace("\\.", "."),
                    push_column=i,
                    push_child=select_name,
                    pull=get_column(column_number),
                    sql=sql,
                    column_alias=column_alias,
                    type=sql_type_to_json_type[sql_type])

        for i, select in enumerate(listwrap(query.select)):
            column_number = len(selects)
            sql_type, sql = SQLang[select.value].to_sql(
                schema)[0].sql.items()[0]
            if sql == 'NULL' and not select.value.var in schema.keys():
                Log.error("No such column {{var}}", var=select.value.var)

            # AGGREGATE
            if select.value == "." and select.aggregate == "count":
                sql = sql_count(SQL_ONE)
            else:
                sql = sql_call(sql_aggs[select.aggregate], sql)

            if select.default != None:
                sql = sql_coalesce([sql, quote_value(select.default)])

            selects.append(sql_alias(sql, select.name))

            index_to_column[column_number] = ColumnMapping(
                push_name=select.name,
                push_column_name=select.name,
                push_column=i + len(query.groupby),
                push_child=".",
                pull=get_column(column_number),
                sql=sql,
                column_alias=quote_column(select.name),
                type=sql_type_to_json_type[sql_type])

        for w in query.window:
            selects.append(self._window_op(self, query, w))

        where = SQLang[query.where].to_sql(schema)[0].sql.b

        command = (SQL_SELECT + (sql_list(selects)) + SQL_FROM + from_sql +
                   SQL_WHERE + where + SQL_GROUPBY + sql_list(groupby))

        if query.sort:
            command += SQL_ORDERBY + sql_list(
                sql_iso(sql[t]) + SQL_IS_NULL + "," + sql[t] +
                (" DESC" if s.sort == -1 else "")
                for s, sql in [(s, SQLang[s.value].to_sql(schema)[0].sql)
                               for s in query.sort] for t in "bns" if sql[t])

        return command, index_to_column
Пример #16
0
    def device_callback(self, path=None):
        # HANDLE BROWESR RETURN FROM AUTH0 LOGIN
        error = request.args.get("error")
        if error:
            Log.error("You did it wrong")
        session_id = request.cookies.get(self.device.login.session.name)
        if not session_id:
            Log.error("You did it wrong")
        login_session = self.session_manager.get_session(session_id)

        code = request.args.get("code")
        state = request.args.get("state")

        result = self.device.db.query(
            sql_query({
                "from": "device",
                "select": "session_id",
                "where": {
                    "eq": {
                        "state": state
                    }
                },
            }))
        if not result.data:
            Log.error("expecting valid state")
        device_session_id = result.data[0][0]

        # GO BACK TO AUTH0 TO GET TOKENS
        token_request = {
            "client_id": self.device.auth0.client_id,
            "redirect_uri": self.device.auth0.redirect_uri,
            "code_verifier": login_session.code_verifier,
            "code": code,
            "grant_type": "authorization_code",
        }
        DEBUG and Log.note("Send token request to Auth0:\n {{request}}",
                           request=token_request)
        auth_response = requests.request(
            "POST",
            str(URL("https://" + self.device.auth0.domain,
                    path="oauth/token")),
            headers={
                "Accept": mimetype.JSON,
                "Content-Type": mimetype.JSON,
                # "Referer": str(URL(self.device.auth0.redirect_uri, query={"code": code, "state": state})),
            },
            data=value2json(token_request),
        )

        try:
            auth_result = wrap(auth_response.json())
        except Exception as e:
            Log.error("not json {{value}}",
                      value=auth_response.content,
                      cause=e)

        # VERIFY TOKENS, ADD USER TO DEVICE'S SESSION
        user_details = self.verify_opaque_token(auth_result.access_token)
        self.session_manager.update_session(
            device_session_id,
            {"user": self.permissions.get_or_create_user(user_details)},
        )

        # REMOVE DEVICE SETUP STATE
        with self.device.db.transaction() as t:
            t.execute(
                ConcatSQL(
                    SQL_DELETE,
                    SQL_FROM,
                    quote_column(self.device.table),
                    SQL_WHERE,
                    sql_eq(state=state),
                ))
        Log.note("login complete")
        return Response("Login complete. You may close this page", status=200)
Пример #17
0
    def update(self, command):
        """
        :param command:  EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT
        """
        command = wrap(command)
        clear_columns = set(listwrap(command['clear']))

        # REJECT DEEP UPDATES
        touched_columns = command.set.keys() | clear_columns
        for c in self.schema.columns:
            if c.name in touched_columns and len(c.nested_path) > 1:
                Log.error("Deep update not supported")

        # ADD NEW COLUMNS
        where = jx_expression(command.where) or TRUE
        _vars = where.vars()
        _map = {
            v: c.es_column
            for v in _vars for c in self.columns.get(v, Null)
            if c.jx_type not in STRUCT
        }
        where_sql = where.map(_map).to_sql(self.schema)[0].sql.b
        new_columns = set(command.set.keys()) - set(
            c.name for c in self.schema.columns)
        for new_column_name in new_columns:
            nested_value = command.set[new_column_name]
            ctype = get_jx_type(nested_value)
            column = Column(name=new_column_name,
                            jx_type=ctype,
                            es_index=self.name,
                            es_type=json_type_to_sqlite_type(ctype),
                            es_column=typed_column(new_column_name, ctype),
                            last_updated=Date.now())
            self.add_column(column)

        # UPDATE THE NESTED VALUES
        for nested_column_name, nested_value in command.set.items():
            if get_jx_type(nested_value) == "nested":
                nested_table_name = concat_field(self.name, nested_column_name)
                nested_table = nested_tables[nested_column_name]
                self_primary_key = sql_list(
                    quote_column(c.es_column) for u in self.uid
                    for c in self.columns[u])
                extra_key_name = UID + text(len(self.uid))
                extra_key = [e
                             for e in nested_table.columns[extra_key_name]][0]

                sql_command = (
                    SQL_DELETE + SQL_FROM + quote_column(nested_table.name) +
                    SQL_WHERE + "EXISTS" +
                    sql_iso(SQL_SELECT + SQL_ONE + SQL_FROM +
                            sql_alias(quote_column(nested_table.name), "n") +
                            SQL_INNER_JOIN +
                            sql_iso(SQL_SELECT + self_primary_key + SQL_FROM +
                                    quote_column(abs_schema.fact) + SQL_WHERE +
                                    where_sql) + " t ON " +
                            SQL_AND.join(
                                quote_column("t", c.es_column) + SQL_EQ +
                                quote_column("n", c.es_column)
                                for u in self.uid for c in self.columns[u])))
                self.db.execute(sql_command)

                # INSERT NEW RECORDS
                if not nested_value:
                    continue

                doc_collection = {}
                for d in listwrap(nested_value):
                    nested_table.flatten(d,
                                         Data(),
                                         doc_collection,
                                         path=nested_column_name)

                prefix = SQL_INSERT + quote_column(nested_table.name) + sql_iso(
                    sql_list([self_primary_key] + [quote_column(extra_key)] + [
                        quote_column(c.es_column)
                        for c in doc_collection.get(".", Null).active_columns
                    ]))

                # BUILD THE PARENT TABLES
                parent = (SQL_SELECT + self_primary_key + SQL_FROM +
                          quote_column(abs_schema.fact) + SQL_WHERE +
                          jx_expression(command.where).to_sql(schema))

                # BUILD THE RECORDS
                children = SQL_UNION_ALL.join(
                    SQL_SELECT +
                    sql_alias(quote_value(i), extra_key.es_column) +
                    SQL_COMMA + sql_list(
                        sql_alias(quote_value(row[c.name]),
                                  quote_column(c.es_column))
                        for c in doc_collection.get(".", Null).active_columns)
                    for i, row in enumerate(
                        doc_collection.get(".", Null).rows))

                sql_command = (prefix + SQL_SELECT + sql_list([
                    quote_column("p", c.es_column) for u in self.uid
                    for c in self.columns[u]
                ] + [quote_column("c", extra_key)] + [
                    quote_column("c", c.es_column)
                    for c in doc_collection.get(".", Null).active_columns
                ]) + SQL_FROM + sql_iso(parent) + " p" + SQL_INNER_JOIN +
                               sql_iso(children) + " c" + SQL_ON + SQL_TRUE)

                self.db.execute(sql_command)

                # THE CHILD COLUMNS COULD HAVE EXPANDED
                # ADD COLUMNS TO SELF
                for n, cs in nested_table.columns.items():
                    for c in cs:
                        column = Column(name=c.name,
                                        jx_type=c.jx_type,
                                        es_type=c.es_type,
                                        es_index=c.es_index,
                                        es_column=c.es_column,
                                        nested_path=[nested_column_name] +
                                        c.nested_path,
                                        last_updated=Date.now())
                        if c.name not in self.columns:
                            self.columns[column.name] = {column}
                        elif c.jx_type not in [
                                c.jx_type for c in self.columns[c.name]
                        ]:
                            self.columns[column.name].add(column)

        command = ConcatSQL(
            SQL_UPDATE, quote_column(self.name), SQL_SET,
            sql_list([
                quote_column(c.es_column) + SQL_EQ +
                quote_value(get_if_type(v, c.jx_type))
                for c in self.schema.columns
                if c.jx_type != NESTED and len(c.nested_path) == 1
                for v in [command.set[c.name]] if v != None
            ] + [
                quote_column(c.es_column) + SQL_EQ + SQL_NULL
                for c in self.schema.columns
                if (c.name in clear_columns and command.set[c.name] != None
                    and c.jx_type != NESTED and len(c.nested_path) == 1)
            ]), SQL_WHERE, where_sql)

        with self.db.transaction() as t:
            t.execute(command)
Пример #18
0
    "sum": "SUM"
}

STATS = {
    "count": "COUNT({{value}})",
    "std": "SQRT((1-1.0/COUNT({{value}}))*VARIANCE({{value}}))",
    "min": "MIN({{value}})",
    "max": "MAX({{value}})",
    "sum": "SUM({{value}})",
    "median": "MEDIAN({{value}})",
    "sos": "SUM({{value}}*{{value}})",
    "var": "(1-1.0/COUNT({{value}}))*VARIANCE({{value}})",
    "avg": "AVG({{value}})"
}

quoted_GUID = quote_column(GUID)
quoted_UID = quote_column(UID)
quoted_ORDER = quote_column(ORDER)
quoted_PARENT = quote_column(PARENT)


def sql_text_array_to_set(column):
    def _convert(row):
        text = row[column]
        if text == None:
            return set()
        else:
            value = json2value(row[column])
            return set(value) - {None}

    return _convert
Пример #19
0
    def _nest_column(self, column):
        new_path, type_ = untyped_column(column.es_column)
        if type_ != SQL_NESTED_TYPE:
            Log.error("only nested types can be nested")
        destination_table = concat_field(self.fact_name, new_path)
        existing_table = concat_field(self.fact_name, column.nested_path[0])

        # FIND THE INNER COLUMNS WE WILL BE MOVING
        moving_columns = []
        for c in self.columns:
            if destination_table != column.es_index and column.es_column == c.es_column:
                moving_columns.append(c)
                c.nested_path = new_path

        # TODO: IF THERE ARE CHILD TABLES, WE MUST UPDATE THEIR RELATIONS TOO?

        # LOAD THE COLUMNS
        data = self.namespace.db.about(destination_table)
        if not data:
            # DEFINE A NEW TABLE
            command = (
                SQL_CREATE + quote_column(destination_table) + sql_iso(sql_list([
                    quoted_UID + "INTEGER",
                    quoted_PARENT + "INTEGER",
                    quoted_ORDER + "INTEGER",
                    "PRIMARY KEY " + sql_iso(quoted_UID),
                    "FOREIGN KEY " + sql_iso(quoted_PARENT) + " REFERENCES " + quote_column(existing_table) + sql_iso(quoted_UID)
                ]))
            )
            with self.namespace.db.transaction() as t:
                t.execute(command)
                self.add_table([new_path]+column.nested_path)

        # TEST IF THERE IS ANY DATA IN THE NEW NESTED ARRAY
        if not moving_columns:
            return

        column.es_index = destination_table
        with self.namespace.db.transaction() as t:
            t.execute(
                "ALTER TABLE " + quote_column(destination_table) +
                " ADD COLUMN " + quote_column(column.es_column) + " " + column.es_type
            )

            # Deleting parent columns
            for col in moving_columns:
                column = col.es_column
                tmp_table = "tmp_" + existing_table
                columns = list(map(text, t.query(SQL_SELECT + SQL_STAR + SQL_FROM + quote_column(existing_table) + SQL_LIMIT + SQL_ZERO).header))
                t.execute(
                    "ALTER TABLE " + quote_column(existing_table) +
                    " RENAME TO " + quote_column(tmp_table)
                )
                t.execute(
                    SQL_CREATE + quote_column(existing_table) + SQL_AS +
                    SQL_SELECT + sql_list([quote_column(c) for c in columns if c != column]) +
                    SQL_FROM + quote_column(tmp_table)
                )
                t.execute("DROP TABLE " + quote_column(tmp_table))
Пример #20
0
 def __nonzero__(self):
     counter = self.db.query(SQL_SELECT + sql_count("*") + SQL_FROM + quote_column(self.snowflake.fact_name))[0][0]
     return bool(counter)
Пример #21
0
    def query(self, query=None):
        """
        :param query:  JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT
        :return:
        """
        if not query:
            query = {}
        if not query.get('from'):
            query['from'] = self.name
        elif not startswith_field(query['from'], self.name):
            Log.error("Expecting table, or some nested table")
        query = QueryOp.wrap(query, self.container, self.namespace)
        new_table = "temp_" + unique_name()

        if query.format == "container":
            create_table = SQL_CREATE + quote_column(new_table) + SQL_AS
        else:
            create_table = ""

        if query.groupby and query.format != "cube":
            op, index_to_columns = self._groupby_op(query, self.schema)
            command = create_table + op
        elif query.groupby:
            query.edges, query.groupby = query.groupby, query.edges
            op, index_to_columns = self._edges_op(query, self.schema)
            command = create_table + op
            query.edges, query.groupby = query.groupby, query.edges
        elif query.edges or any(a != "none" for a in listwrap(query.select).aggregate):
            op, index_to_columns = self._edges_op(query, query.frum.schema)
            command = create_table + op
        else:
            op = self._set_op(query)
            return op

        result = self.db.query(command)

        if query.format == "container":
            output = QueryTable(new_table, db=self.db, uid=self.uid, exists=True)
        elif query.format == "cube" or (not query.format and query.edges):
            column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1)
            for c in index_to_columns.values():
                column_names[c.push_column] = c.push_column_name

            if len(query.edges) == 0 and len(query.groupby) == 0:
                data = {n: Data() for n in column_names}
                for s in index_to_columns.values():
                    data[s.push_name][s.push_child] = unwrap(s.pull(result.data[0]))
                if is_list(query.select):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(
                    data=unwrap(data),
                    select=select,
                    meta={"format": "cube"}
                )

            if not result.data:
                edges = []
                dims = []
                for i, e in enumerate(query.edges + query.groupby):
                    allowNulls = coalesce(e.allowNulls, True)

                    if e.domain.type == "set" and e.domain.partitions:
                        domain = SimpleSetDomain(partitions=e.domain.partitions.name)
                    elif e.domain.type == "range":
                        domain = e.domain
                    elif is_op(e.value, TupleOp):
                        pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name],
                                        "push_child").pull
                        parts = [tuple(p(d) for p in pulls) for d in result.data]
                        domain = SimpleSetDomain(partitions=jx.sort(set(parts)))
                    else:
                        domain = SimpleSetDomain(partitions=[])

                    dims.append(1 if allowNulls else 0)
                    edges.append(Data(
                        name=e.name,
                        allowNulls=allowNulls,
                        domain=domain
                    ))

                data = {}
                for si, s in enumerate(listwrap(query.select)):
                    if s.aggregate == "count":
                        data[s.name] = Matrix(dims=dims, zeros=0)
                    else:
                        data[s.name] = Matrix(dims=dims)

                if is_list(query.select):
                    select = [{"name": s.name} for s in query.select]
                else:
                    select = {"name": query.select.name}

                return Data(
                    meta={"format": "cube"},
                    edges=edges,
                    select=select,
                    data={k: v.cube for k, v in data.items()}
                )

            columns = None

            edges = []
            dims = []
            for g in query.groupby:
                g.is_groupby = True

            for i, e in enumerate(query.edges + query.groupby):
                allowNulls = coalesce(e.allowNulls, True)

                if e.domain.type == "set" and e.domain.partitions:
                    domain = SimpleSetDomain(partitions=e.domain.partitions.name)
                elif e.domain.type == "range":
                    domain = e.domain
                elif e.domain.type == "time":
                    domain = wrap(mo_json.scrub(e.domain))
                elif e.domain.type == "duration":
                    domain = wrap(mo_json.scrub(e.domain))
                elif is_op(e.value, TupleOp):
                    pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name], "push_child").pull
                    parts = [tuple(p(d) for p in pulls) for d in result.data]
                    domain = SimpleSetDomain(partitions=jx.sort(set(parts)))
                else:
                    if not columns:
                        columns = transpose(*result.data)
                    parts = set(columns[i])
                    if e.is_groupby and None in parts:
                        allowNulls = True
                    parts -= {None}

                    if query.sort[i].sort == -1:
                        domain = SimpleSetDomain(partitions=wrap(sorted(parts, reverse=True)))
                    else:
                        domain = SimpleSetDomain(partitions=jx.sort(parts))

                dims.append(len(domain.partitions) + (1 if allowNulls else 0))
                edges.append(Data(
                    name=e.name,
                    allowNulls=allowNulls,
                    domain=domain
                ))

            data_cubes = {}
            for si, s in enumerate(listwrap(query.select)):
                if s.aggregate == "count":
                    data_cubes[s.name] = Matrix(dims=dims, zeros=0)
                else:
                    data_cubes[s.name] = Matrix(dims=dims)

            r2c = index_to_coordinate(dims)  # WORKS BECAUSE THE DATABASE SORTED THE EDGES TO CONFORM
            for rownum, row in enumerate(result.data):
                coord = r2c(rownum)

                for i, s in enumerate(index_to_columns.values()):
                    if s.is_edge:
                        continue
                    if s.push_child == ".":
                        data_cubes[s.push_name][coord] = s.pull(row)
                    else:
                        data_cubes[s.push_name][coord][s.push_child] = s.pull(row)

            if query.select == None:
                select = Null
            elif is_list(query.select):
                select = [{"name": s.name} for s in query.select]
            else:
                select = {"name": query.select.name}

            return Data(
                meta={"format": "cube"},
                edges=edges,
                select=select,
                data={k: v.cube for k, v in data_cubes.items()}
            )
        elif query.format == "table" or (not query.format and query.groupby):
            column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1)
            for c in index_to_columns.values():
                column_names[c.push_column] = c.push_column_name
            data = []
            for d in result.data:
                row = [None for _ in column_names]
                for s in index_to_columns.values():
                    if s.push_child == ".":
                        row[s.push_column] = s.pull(d)
                    elif s.num_push_columns:
                        tuple_value = row[s.push_column]
                        if tuple_value == None:
                            tuple_value = row[s.push_column] = [None] * s.num_push_columns
                        tuple_value[s.push_child] = s.pull(d)
                    elif row[s.push_column] == None:
                        row[s.push_column] = Data()
                        row[s.push_column][s.push_child] = s.pull(d)
                    else:
                        row[s.push_column][s.push_child] = s.pull(d)
                data.append(tuple(unwrap(r) for r in row))

            output = Data(
                meta={"format": "table"},
                header=column_names,
                data=data
            )
        elif query.format == "list" or (not query.edges and not query.groupby):
            if not query.edges and not query.groupby and any(listwrap(query.select).aggregate):
                if is_list(query.select):
                    data = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            if data[c.push_name] == None:
                                data[c.push_name] = c.pull(result.data[0])
                            elif is_list(data[c.push_name]):
                                data[c.push_name].append(c.pull(result.data[0]))
                            else:
                                data[c.push_name] = [data[c.push_name], c.pull(result.data[0])]
                        else:
                            data[c.push_name][c.push_child] = c.pull(result.data[0])

                    output = Data(
                        meta={"format": "value"},
                        data=data
                    )
                else:
                    data = Data()
                    for s in index_to_columns.values():
                        if not data[s.push_child]:
                            data[s.push_child] = s.pull(result.data[0])
                        else:
                            data[s.push_child] += [s.pull(result.data[0])]
                    output = Data(
                        meta={"format": "value"},
                        data=unwrap(data)
                    )
            else:
                data = []
                for rownum in result.data:
                    row = Data()
                    for c in index_to_columns.values():
                        if c.push_child == ".":
                            row[c.push_name] = c.pull(rownum)
                        elif c.num_push_columns:
                            tuple_value = row[c.push_name]
                            if not tuple_value:
                                tuple_value = row[c.push_name] = [None] * c.num_push_columns
                            tuple_value[c.push_child] = c.pull(rownum)
                        else:
                            row[c.push_name][c.push_child] = c.pull(rownum)

                    data.append(row)

                output = Data(
                    meta={"format": "list"},
                    data=data
                )
        else:
            Log.error("unknown format {{format}}", format=query.format)

        return output
Пример #22
0
    def _set_op(self, query):
        # GET LIST OF SELECTED COLUMNS
        vars_ = UNION([
            v.var for select in listwrap(query.select)
            for v in select.value.vars()
        ])
        schema = self.schema
        known_vars = schema.keys()

        active_columns = {".": set()}
        for v in vars_:
            for c in schema.leaves(v):
                nest = c.nested_path[0]
                active_columns.setdefault(nest, set()).add(c)

        # ANY VARS MENTIONED WITH NO COLUMNS?
        for v in vars_:
            if not any(startswith_field(cname, v) for cname in known_vars):
                active_columns["."].add(
                    Column(name=v,
                           jx_type=IS_NULL,
                           es_column=".",
                           es_index=".",
                           es_type='NULL',
                           nested_path=["."],
                           last_updated=Date.now()))

        # EVERY COLUMN, AND THE INDEX IT TAKES UP
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        index_to_uid = {}  # FROM NESTED PATH TO THE INDEX OF UID
        sql_selects = [
        ]  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        nest_to_alias = {
            nested_path[0]: "__" + unichr(ord('a') + i) + "__"
            for i, nested_path in enumerate(self.snowflake.query_paths)
        }

        sorts = []
        if query.sort:
            for select in query.sort:
                col = SQLang[select.value].to_sql(schema)[0]
                for t, sql in col.sql.items():
                    json_type = sql_type_to_json_type[t]
                    if json_type in STRUCT:
                        continue
                    column_number = len(sql_selects)
                    # SQL HAS ABS TABLE REFERENCE
                    column_alias = _make_column_name(column_number)
                    sql_selects.append(sql_alias(sql, column_alias))
                    if select.sort == -1:
                        sorts.append(quote_column(column_alias) + SQL_IS_NULL)
                        sorts.append(quote_column(column_alias) + " DESC")
                    else:
                        sorts.append(quote_column(column_alias) + SQL_IS_NULL)
                        sorts.append(quote_column(column_alias))

        primary_doc_details = Data()
        # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH
        # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED
        nested_path = []
        for step, sub_table in self.snowflake.tables:
            nested_path.insert(0, step)
            nested_doc_details = {
                "sub_table": sub_table,
                "children": [],
                "index_to_column": {},
                "nested_path": nested_path
            }

            # INSERT INTO TREE
            if not primary_doc_details:
                primary_doc_details = nested_doc_details
            else:

                def place(parent_doc_details):
                    if startswith_field(step,
                                        parent_doc_details['nested_path'][0]):
                        for c in parent_doc_details['children']:
                            if place(c):
                                return True
                        parent_doc_details['children'].append(
                            nested_doc_details)

                place(primary_doc_details)

            alias = nested_doc_details['alias'] = nest_to_alias[step]

            # WE ALWAYS ADD THE UID
            column_number = index_to_uid[step] = nested_doc_details[
                'id_coord'] = len(sql_selects)
            sql_select = quote_column(alias, UID)
            sql_selects.append(
                sql_alias(sql_select, _make_column_name(column_number)))
            if step != ".":
                # ID AND ORDER FOR CHILD TABLES
                index_to_column[column_number] = ColumnMapping(
                    sql=sql_select,
                    type="number",
                    nested_path=nested_path,
                    column_alias=_make_column_name(column_number))
                column_number = len(sql_selects)
                sql_select = quote_column(alias, ORDER)
                sql_selects.append(
                    sql_alias(sql_select, _make_column_name(column_number)))
                index_to_column[column_number] = ColumnMapping(
                    sql=sql_select,
                    type="number",
                    nested_path=nested_path,
                    column_alias=_make_column_name(column_number))

            # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM
            if step not in active_columns:
                continue

            # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE
            si = 0
            for select in listwrap(query.select):
                try:
                    column_number = len(sql_selects)
                    select.pull = get_column(column_number)
                    db_columns = SQLang[select.value].partial_eval().to_sql(
                        schema)

                    for column in db_columns:
                        for t, unsorted_sql in column.sql.items():
                            json_type = sql_type_to_json_type[t]
                            if json_type in STRUCT:
                                continue
                            column_number = len(sql_selects)
                            column_alias = _make_column_name(column_number)
                            sql_selects.append(
                                sql_alias(unsorted_sql, column_alias))
                            if startswith_field(schema.path, step) and is_op(
                                    select.value, LeavesOp):
                                # ONLY FLATTEN primary_nested_path AND PARENTS, NOT CHILDREN
                                index_to_column[
                                    column_number] = nested_doc_details[
                                        'index_to_column'][
                                            column_number] = ColumnMapping(
                                                push_name=literal_field(
                                                    get_property_name(
                                                        concat_field(
                                                            select.name,
                                                            column.name))),
                                                push_child=".",
                                                push_column_name=
                                                get_property_name(
                                                    concat_field(
                                                        select.name,
                                                        column.name)),
                                                push_column=si,
                                                pull=get_column(column_number),
                                                sql=unsorted_sql,
                                                type=json_type,
                                                column_alias=column_alias,
                                                nested_path=nested_path)
                                si += 1
                            else:
                                index_to_column[
                                    column_number] = nested_doc_details[
                                        'index_to_column'][
                                            column_number] = ColumnMapping(
                                                push_name=select.name,
                                                push_child=column.name,
                                                push_column_name=select.name,
                                                push_column=si,
                                                pull=get_column(column_number),
                                                sql=unsorted_sql,
                                                type=json_type,
                                                column_alias=column_alias,
                                                nested_path=nested_path)
                finally:
                    si += 1

        where_clause = BooleanOp(query.where).partial_eval().to_sql(
            schema, boolean=True)[0].sql.b
        unsorted_sql = self._make_sql_for_one_nest_in_set_op(
            ".", sql_selects, where_clause, active_columns, index_to_column)

        for n, _ in self.snowflake.tables:
            sorts.append(quote_column(COLUMN + text(index_to_uid[n])))

        ordered_sql = ConcatSQL(SQL_SELECT, SQL_STAR, SQL_FROM,
                                sql_iso(unsorted_sql), SQL_ORDERBY,
                                sql_list(sorts), SQL_LIMIT,
                                quote_value(query.limit))
        result = self.db.query(ordered_sql)

        def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id,
                               parent_id_coord):
            """
            :param rows: REVERSED STACK OF ROWS (WITH push() AND pop())
            :param row: CURRENT ROW BEING EXTRACTED
            :param nested_doc_details: {
                    "nested_path": wrap_nested_path(nested_path),
                    "index_to_column": map from column number to column details
                    "children": all possible direct decedents' nested_doc_details
                 }
            :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop)
            :param parent_id_coord: the column number for the parent id (so we ca extract from each row)
            :return: the nested property (usually an array)
            """
            previous_doc_id = None
            doc = Null
            output = []
            id_coord = nested_doc_details['id_coord']

            while True:
                doc_id = row[id_coord]

                if doc_id == None or (parent_id_coord is not None and
                                      row[parent_id_coord] != parent_doc_id):
                    rows.append(
                        row
                    )  # UNDO PREVIOUS POP (RECORD IS NOT A NESTED RECORD OF parent_doc)
                    return output

                if doc_id != previous_doc_id:
                    previous_doc_id = doc_id
                    doc = Null
                    curr_nested_path = nested_doc_details['nested_path'][0]
                    index_to_column = nested_doc_details[
                        'index_to_column'].items()
                    for i, c in index_to_column:
                        value = row[i]
                        if is_list(query.select) or is_op(
                                query.select.value, LeavesOp):
                            # ASSIGN INNER PROPERTIES
                            relative_field = concat_field(
                                c.push_name, c.push_child)
                        else:  # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT
                            relative_field = c.push_child

                        if relative_field == ".":
                            if exists(value):
                                doc = value
                        elif exists(value):
                            if doc is Null:
                                doc = Data()
                            doc[relative_field] = value

                for child_details in nested_doc_details['children']:
                    # EACH NESTED TABLE MUST BE ASSEMBLED INTO A LIST OF OBJECTS
                    child_id = row[child_details['id_coord']]
                    if child_id is not None:
                        nested_value = _accumulate_nested(
                            rows, row, child_details, doc_id, id_coord)
                        if nested_value != None:
                            push_name = child_details['nested_path'][0]
                            if is_list(query.select) or is_op(
                                    query.select.value, LeavesOp):
                                # ASSIGN INNER PROPERTIES
                                relative_field = relative_field(
                                    push_name, curr_nested_path)
                            else:  # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT
                                relative_field = "."

                            if relative_field == ".":
                                doc = unwraplist(nested_value)
                            else:
                                doc[relative_field] = unwraplist(nested_value)

                output.append(doc)

                try:
                    row = rows.pop()
                except IndexError:
                    return output

        cols = tuple(
            [i for i in index_to_column.values() if i.push_name != None])
        rows = list(reversed(unwrap(result.data)))
        if rows:
            row = rows.pop()
            data = _accumulate_nested(rows, row, primary_doc_details, None,
                                      None)
        else:
            data = result.data

        if query.format == "cube":
            # for f, full_name in self.snowflake.tables:
            #     if f != '.' or (test_dots(cols) and is_list(query.select)):
            #         num_rows = len(result.data)
            #         num_cols = MAX([c.push_column for c in cols]) + 1 if len(cols) else 0
            #         map_index_to_name = {c.push_column: c.push_column_name for c in cols}
            #         temp_data = [[None] * num_rows for _ in range(num_cols)]
            #         for rownum, d in enumerate(result.data):
            #             for c in cols:
            #                 if c.push_child == ".":
            #                     temp_data[c.push_column][rownum] = c.pull(d)
            #                 else:
            #                     column = temp_data[c.push_column][rownum]
            #                     if column is None:
            #                         column = temp_data[c.push_column][rownum] = {}
            #                     column[c.push_child] = c.pull(d)
            #         output = Data(
            #             meta={"format": "cube"},
            #             data={n: temp_data[c] for c, n in map_index_to_name.items()},
            #             edges=[{
            #                 "name": "rownum",
            #                 "domain": {
            #                     "type": "rownum",
            #                     "min": 0,
            #                     "max": num_rows,
            #                     "interval": 1
            #                 }
            #             }]
            #         )
            #         return output

            if is_list(query.select) or is_op(query.select.value, LeavesOp):
                num_rows = len(data)
                temp_data = {
                    c.push_column_name: [None] * num_rows
                    for c in cols
                }
                for rownum, d in enumerate(data):
                    for c in cols:
                        temp_data[c.push_column_name][rownum] = d[c.push_name]
                return Data(meta={"format": "cube"},
                            data=temp_data,
                            edges=[{
                                "name": "rownum",
                                "domain": {
                                    "type": "rownum",
                                    "min": 0,
                                    "max": num_rows,
                                    "interval": 1
                                }
                            }])
            else:
                num_rows = len(data)
                map_index_to_name = {
                    c.push_column: c.push_column_name
                    for c in cols
                }
                temp_data = [data]

                return Data(meta={"format": "cube"},
                            data={
                                n: temp_data[c]
                                for c, n in map_index_to_name.items()
                            },
                            edges=[{
                                "name": "rownum",
                                "domain": {
                                    "type": "rownum",
                                    "min": 0,
                                    "max": num_rows,
                                    "interval": 1
                                }
                            }])

        elif query.format == "table":
            # for f, _ in self.snowflake.tables:
            #     if frum.endswith(f):
            #         num_column = MAX([c.push_column for c in cols]) + 1
            #         header = [None] * num_column
            #         for c in cols:
            #             header[c.push_column] = c.push_column_name
            #
            #         output_data = []
            #         for d in result.data:
            #             row = [None] * num_column
            #             for c in cols:
            #                 set_column(row, c.push_column, c.push_child, c.pull(d))
            #             output_data.append(row)
            #
            #         return Data(
            #             meta={"format": "table"},
            #             header=header,
            #             data=output_data
            #         )
            if is_list(query.select) or is_op(query.select.value, LeavesOp):
                column_names = [None] * (max(c.push_column for c in cols) + 1)
                for c in cols:
                    column_names[c.push_column] = c.push_column_name

                temp_data = []
                for rownum, d in enumerate(data):
                    row = [None] * len(column_names)
                    for c in cols:
                        row[c.push_column] = d[c.push_name]
                    temp_data.append(row)

                return Data(meta={"format": "table"},
                            header=column_names,
                            data=temp_data)
            else:
                column_names = listwrap(query.select).name
                return Data(meta={"format": "table"},
                            header=column_names,
                            data=[[d] for d in data])

        else:
            # for f, _ in self.snowflake.tables:
            #     if frum.endswith(f) or (test_dots(cols) and is_list(query.select)):
            #         data = []
            #         for d in result.data:
            #             row = Data()
            #             for c in cols:
            #                 if c.push_child == ".":
            #                     row[c.push_name] = c.pull(d)
            #                 elif c.num_push_columns:
            #                     tuple_value = row[c.push_name]
            #                     if not tuple_value:
            #                         tuple_value = row[c.push_name] = [None] * c.num_push_columns
            #                     tuple_value[c.push_child] = c.pull(d)
            #                 else:
            #                     row[c.push_name][c.push_child] = c.pull(d)
            #
            #             data.append(row)
            #
            #         return Data(
            #             meta={"format": "list"},
            #             data=data
            #         )

            if is_list(query.select) or is_op(query.select.value, LeavesOp):
                temp_data = []
                for rownum, d in enumerate(data):
                    row = {}
                    for c in cols:
                        row[c.push_column_name] = d[c.push_name]
                    temp_data.append(row)
                return Data(meta={"format": "list"}, data=temp_data)
            else:
                return Data(meta={"format": "list"}, data=data)
Пример #23
0
    untyped_column, PARENT, UID
from jx_sqlite.container import DIGITS_TABLE
from jx_sqlite.expressions._utils import SQLang, sql_type_to_json_type
from jx_sqlite.expressions.tuple_op import TupleOp
from jx_sqlite.expressions.variable import Variable
from jx_sqlite.setop_table import SetOpTable
from mo_dots import coalesce, concat_field, join_field, listwrap, split_field, startswith_field
from mo_future import text, unichr
from mo_logs import Log
from mo_sql import SQL, SQL_AND, SQL_CASE, SQL_COMMA, SQL_DESC, SQL_ELSE, SQL_END, SQL_FROM, SQL_GROUPBY, \
    SQL_INNER_JOIN, SQL_IS_NOT_NULL, SQL_IS_NULL, SQL_LEFT_JOIN, SQL_LIMIT, SQL_NULL, SQL_ON, SQL_ONE, SQL_OR, \
    SQL_ORDERBY, SQL_SELECT, SQL_STAR, SQL_THEN, SQL_TRUE, SQL_UNION_ALL, SQL_WHEN, SQL_WHERE, sql_coalesce, \
    sql_count, sql_iso, sql_list, SQL_DOT, SQL_PLUS, ConcatSQL, SQL_EQ
from jx_sqlite.sqlite import quote_column, quote_value, sql_alias

EXISTS_COLUMN = quote_column("__exists__")


class EdgesTable(SetOpTable):
    def _edges_op(self, query, frum):
        schema = frum
        query = query.copy()  # WE WILL BE MARKING UP THE QUERY
        index_to_column = {}  # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE)
        outer_selects = []  # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE)
        base_table, path = schema.snowflake.fact_name, schema.nested_path
        nest_to_alias = {
            nested_path: "__" + unichr(ord('a') + i) + "__"
            for i, (nested_path, sub_table) in enumerate(self.snowflake.tables)
        }

        tables = []