def _add_column(self, column): cname = column.name if column.jx_type == NESTED: # WE ARE ALSO NESTING self._nest_column(column, [cname]+column.nested_path) table = concat_field(self.fact_name, column.nested_path[0]) try: with self.namespace.db.transaction() as t: t.execute( "ALTER TABLE" + quote_column(table) + "ADD COLUMN" + quote_column(column.es_column) + column.es_type ) self.namespace.columns.add(column) except Exception as e: if "duplicate column name" in e: # THIS HAPPENS WHEN MULTIPLE THREADS ARE ASKING FOR MORE COLUMNS TO STORE DATA # THIS SHOULD NOT BE A PROBLEM SINCE THE THREADS BOTH AGREE THE COLUMNS SHOULD EXIST # BUT, IT WOULD BE NICE TO MAKE LARGER TRANSACTIONS SO THIS NEVER HAPPENS # CONFIRM THE COLUMN EXISTS IN LOCAL DATA STRUCTURES for c in self.namespace.columns: if c.es_column == column.es_column: break else: Log.error("Did not add column {{column}]", column=column.es_column, cause=e) else: Log.error("Did not add column {{column}]", column=column.es_column, cause=e)
def _edges_op(self, query, frum): schema = frum query = query.copy() # WE WILL BE MARKING UP THE QUERY index_to_column = {} # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) outer_selects = [] # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE) base_table, path = schema.snowflake.fact_name, schema.nested_path nest_to_alias = { nested_path: "__" + unichr(ord('a') + i) + "__" for i, (nested_path, sub_table) in enumerate(self.snowflake.tables) } tables = [] for n, a in nest_to_alias.items(): if startswith_field(path, n): tables.append({"nest": n, "alias": a}) tables = jx.sort(tables, {"value": {"length": "nest"}}) from_sql = sql_alias(quote_column(concat_field(base_table, tables[0].nest)), tables[0].alias) for previous, t in zip(tables, tables[1::]): from_sql += ( SQL_LEFT_JOIN + sql_alias(quote_column(concat_field(base_table, t.nest)), t.alias) + SQL_ON + quote_column(t.alias, PARENT) + SQL_EQ + quote_column(previous.alias, UID) ) main_filter = SQLang[query.where].to_sql(schema, boolean=True)[0].sql.b # SHIFT THE COLUMN DEFINITIONS BASED ON THE NESTED QUERY DEPTH ons = [] join_types = [] wheres = [] null_ons = [EXISTS_COLUMN + SQL_IS_NULL] groupby = [] null_groupby = [] orderby = [] domains = [] select_clause = [SQL_ONE + EXISTS_COLUMN] + [quote_column(c.es_column) for c in self.snowflake.columns] for edge_index, query_edge in enumerate(query.edges): edge_alias = "e" + text(edge_index) if query_edge.value: edge_values = [p for c in SQLang[query_edge.value].to_sql(schema).sql for p in c.items()] elif not query_edge.value and any(query_edge.domain.partitions.where): case = SQL_CASE for pp, p in enumerate(query_edge.domain.partitions): w = SQLang[p.where].to_sql(schema)[0].sql.b t = quote_value(pp) case += SQL_WHEN + w + SQL_THEN + t case += SQL_ELSE + SQL_NULL + SQL_END # quote value with length of partitions edge_values = [("n", case)] elif query_edge.range: edge_values = SQLang[query_edge.range.min].to_sql(schema)[0].sql.items() + SQLang[query_edge.range.max].to_sql(schema)[ 0].sql.items()
def to_sql(self, schema, not_null=False, boolean=False, many=True): var_name = self.var if var_name == GUID: return wrap([{ "name": ".", "sql": { "s": quoted_GUID }, "nested_path": ROOT_PATH }]) cols = schema.leaves(var_name) if not cols: return self.lang[NULL].to_sql(schema) acc = {} if boolean: for col in cols: cname = relative_field(col.name, var_name) nested_path = col.nested_path[0] if col.type == OBJECT: value = SQL_TRUE elif col.type == BOOLEAN: value = quote_column(col.es_column) else: value = quote_column(col.es_column) + SQL_IS_NOT_NULL tempa = acc.setdefault(nested_path, {}) tempb = tempa.setdefault(get_property_name(cname), {}) tempb["b"] = value else: for col in cols: cname = relative_field(col.name, var_name) if col.jx_type == OBJECT: prefix = self.var + "." for cn, cs in schema.items(): if cn.startswith(prefix): for child_col in cs: tempa = acc.setdefault( child_col.nested_path[0], {}) tempb = tempa.setdefault( get_property_name(cname), {}) tempb[json_type_to_sql_type[ col.type]] = quote_column( child_col.es_column) else: nested_path = col.nested_path[0] tempa = acc.setdefault(nested_path, {}) tempb = tempa.setdefault(get_property_name(cname), {}) tempb[json_type_to_sql_type[col.jx_type]] = quote_column( col.es_column) return wrap([{ "name": cname, "sql": types, "nested_path": nested_path } for nested_path, pairs in acc.items() for cname, types in pairs.items()])
def output(): while True: with self.db.transaction() as t: top_id = first( first( t.query(SQL_SELECT + quote_column("next_id") + SQL_FROM + quote_column(ABOUT_TABLE)).data)) max_id = top_id + 1000 t.execute(SQL_UPDATE + quote_column(ABOUT_TABLE) + SQL_SET + sql_eq(next_id=max_id)) while top_id < max_id: yield top_id top_id += 1
def _drop_column(self, column): # DROP COLUMN BY RENAMING IT, WITH __ PREFIX TO HIDE IT cname = column.name if column.jx_type == "nested": # WE ARE ALSO NESTING self._nest_column(column, [cname]+column.nested_path) table = concat_field(self.fact_name, column.nested_path[0]) with self.namespace.db.transaction() as t: t.execute( "ALTER TABLE" + quote_column(table) + "RENAME COLUMN" + quote_column(column.es_column) + " TO " + quote_column("__" + column.es_column) ) self.namespace.columns.remove(column)
def to_sql(self, schema, not_null=False, boolean=False): value = SQLang[self.value].partial_eval().to_sql(schema)[0].sql.s find = SQLang[self.find].partial_eval().to_sql(schema)[0].sql.s start = SQLang[self.start].partial_eval().to_sql(schema)[0].sql.n default = coalesce( SQLang[self.default].partial_eval().to_sql(schema)[0].sql.n, SQL_NULL ) if start.sql != SQL_ZERO.sql: value = NotRightOp([self.value, self.start]).to_sql(schema)[0].sql.s index = sql_call("INSTR", value, find) i = quote_column("i") sql = with_var( i, index, ConcatSQL( SQL_CASE, SQL_WHEN, i, SQL_THEN, i, SQL(" - "), SQL_ONE, SQL_PLUS, start, SQL_ELSE, default, SQL_END, ), ) return wrap([{"name": ".", "sql": {"n": sql}}])
def delete(self, where): filter = SQLang[jx_expression(where)].to_sql(self.schema) with self.db.transaction() as t: t.execute( ConcatSQL(SQL_DELETE, SQL_FROM, quote_column(self.snowflake.fact_name), SQL_WHERE, filter))
def _insert(self, collection): for nested_path, details in collection.items(): active_columns = wrap(list(details.active_columns)) rows = details.rows num_rows = len(rows) table_name = concat_field(self.name, nested_path) if table_name == self.name: # DO NOT REQUIRE PARENT OR ORDER COLUMNS meta_columns = [GUID, UID] else: meta_columns = [UID, PARENT, ORDER] all_columns = meta_columns + active_columns.es_column # ONLY THE PRIMITIVE VALUE COLUMNS command = ConcatSQL( SQL_INSERT, quote_column(table_name), sql_iso(sql_list(map(quote_column, all_columns))), SQL_VALUES, sql_list( sql_iso(sql_list(quote_value(row.get(c)) for c in all_columns)) for row in unwrap(rows) ) ) with self.db.transaction() as t: t.execute(command)
def remove_facts(self, fact_name): paths = self.ns.columns._snowflakes[fact_name] if paths: with self.db.transaction() as t: for p in paths: full_name = concat_field(fact_name, p[0]) t.execute("DROP TABLE "+quote_column(full_name)) self.ns.columns.remove_table(fact_name)
def _window_op(self, query, window): # http://www2.sqlite.org/cvstrac/wiki?p=UnsupportedSqlAnalyticalFunctions if window.value == "rownum": return ("ROW_NUMBER()-1 OVER (" + " PARTITION BY " + sql_iso(sql_list(window.edges.values)) + SQL_ORDERBY + sql_iso(sql_list(window.edges.sort)) + ") AS " + quote_column(window.name)) range_min = text(coalesce(window.range.min, "UNBOUNDED")) range_max = text(coalesce(window.range.max, "UNBOUNDED")) return (sql_aggs[window.aggregate] + sql_iso(window.value.to_sql(schema)) + " OVER (" + " PARTITION BY " + sql_iso(sql_list(window.edges.values)) + SQL_ORDERBY + sql_iso(sql_list(window.edges.sort)) + " ROWS BETWEEN " + range_min + " PRECEDING AND " + range_max + " FOLLOWING " + ") AS " + quote_column(window.name))
def where(self, filter): """ WILL NOT PULL WHOLE OBJECT, JUST TOP-LEVEL PROPERTIES :param filter: jx_expression filter :return: list of objects that match """ select = [] column_names = [] for c in self.schema.columns: if c.jx_type in STRUCT: continue if len(c.nested_path) != 1: continue column_names.append(c.name) select.append(sql_alias(quote_column(c.es_column), c.name)) where_sql = SQLang[jx_expression(filter)].to_sql(self.schema)[0].sql.b result = self.db.query(ConcatSQL( SQL_SELECT, JoinSQL(SQL_COMMA, select), SQL_FROM, quote_column(self.snowflake.fact_name), SQL_WHERE, where_sql )) return wrap([{c: v for c, v in zip(column_names, r)} for r in result.data])
def _make_range_domain(self, domain, column_name): width = (domain.max - domain.min) / domain.interval digits = mo_math.floor(mo_math.log10(width - 1)) if digits == 0: value = quote_column("a", "value") else: value = SQL_PLUS.join("1" + ("0" * j) + SQL_STAR + text(chr(ord(b'a') + j)) + ".value" for j in range(digits + 1)) if domain.interval == 1: if domain.min == 0: domain = (SQL_SELECT + sql_alias(value, column_name) + SQL_FROM + sql_alias(quote_column(DIGITS_TABLE), "a")) else: domain = (SQL_SELECT + sql_alias( sql_iso(value) + SQL_PLUS + quote_value(domain.min), column_name) + SQL_FROM + sql_alias(quote_column(DIGITS_TABLE), "a")) else: if domain.min == 0: domain = ConcatSQL( SQL_SELECT, sql_alias(value + SQL_STAR + quote_value(domain.interval), column_name), SQL_FROM, sql_alias(quote_column(DIGITS_TABLE), "a")) else: domain = ConcatSQL( SQL_SELECT, sql_alias( sql_iso(value, SQL_STAR, quote_value(domain.interval)) + SQL_PLUS + quote_value(domain.min), column_name), SQL_FROM, sql_alias(quote_column(DIGITS_TABLE), "a")) for j in range(digits): domain += SQL_INNER_JOIN + sql_alias( quote_column(DIGITS_TABLE), text( chr(ord(b'a') + j + 1))) + SQL_ON + SQL_TRUE domain += SQL_WHERE + value + " < " + quote_value(width) return domain
untyped_column, PARENT, UID from jx_sqlite.container import DIGITS_TABLE from jx_sqlite.expressions._utils import SQLang, sql_type_to_json_type from jx_sqlite.expressions.tuple_op import TupleOp from jx_sqlite.expressions.variable import Variable from jx_sqlite.setop_table import SetOpTable from mo_dots import coalesce, concat_field, join_field, listwrap, split_field, startswith_field from mo_future import text, unichr from mo_logs import Log from mo_sql import SQL, SQL_AND, SQL_CASE, SQL_COMMA, SQL_DESC, SQL_ELSE, SQL_END, SQL_FROM, SQL_GROUPBY, \ SQL_INNER_JOIN, SQL_IS_NOT_NULL, SQL_IS_NULL, SQL_LEFT_JOIN, SQL_LIMIT, SQL_NULL, SQL_ON, SQL_ONE, SQL_OR, \ SQL_ORDERBY, SQL_SELECT, SQL_STAR, SQL_THEN, SQL_TRUE, SQL_UNION_ALL, SQL_WHEN, SQL_WHERE, sql_coalesce, \ sql_count, sql_iso, sql_list, SQL_DOT, SQL_PLUS, ConcatSQL, SQL_EQ from jx_sqlite.sqlite import quote_column, quote_value, sql_alias EXISTS_COLUMN = quote_column("__exists__") class EdgesTable(SetOpTable): def _edges_op(self, query, frum): schema = frum query = query.copy() # WE WILL BE MARKING UP THE QUERY index_to_column = {} # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) outer_selects = [ ] # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE) base_table, path = schema.snowflake.fact_name, schema.nested_path nest_to_alias = { nested_path: "__" + unichr(ord('a') + i) + "__" for i, (nested_path, sub_table) in enumerate(self.snowflake.tables) }
def _make_sql_for_one_nest_in_set_op( self, primary_nested_path, selects, # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE where_clause, active_columns, index_to_sql_select # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) ): """ FOR EACH NESTED LEVEL, WE MAKE A QUERY THAT PULLS THE VALUES/COLUMNS REQUIRED WE `UNION ALL` THEM WHEN DONE :param primary_nested_path: :param selects: :param where_clause: :param active_columns: :param index_to_sql_select: :return: SQL FOR ONE NESTED LEVEL """ parent_alias = "a" from_clause = [] select_clause = [] children_sql = [] done = [] if not where_clause: where_clause = SQL_TRUE # STATEMENT FOR EACH NESTED PATH for i, (nested_path, sub_table) in enumerate(self.snowflake.tables): if any(startswith_field(nested_path, d) for d in done): continue alias = "__" + unichr(ord('a') + i) + "__" if primary_nested_path == nested_path: select_clause = [] # ADD SELECT CLAUSE HERE for select_index, s in enumerate(selects): sql_select = index_to_sql_select.get(select_index) if not sql_select: select_clause.append(selects[select_index]) continue if startswith_field(sql_select.nested_path[0], nested_path): select_clause.append( sql_alias(sql_select.sql, sql_select.column_alias)) else: # DO NOT INCLUDE DEEP STUFF AT THIS LEVEL select_clause.append( sql_alias(SQL_NULL, sql_select.column_alias)) if nested_path == ".": from_clause.append(SQL_FROM) from_clause.append( sql_alias(quote_column(self.snowflake.fact_name), alias)) else: from_clause.append(SQL_LEFT_JOIN) from_clause.append( sql_alias( quote_column(self.snowflake.fact_name, sub_table.name), alias)) from_clause.append(SQL_ON) from_clause.append(quote_column(alias, PARENT)) from_clause.append(SQL_EQ) from_clause.append(quote_column(parent_alias, UID)) where_clause = sql_iso( where_clause) + SQL_AND + quote_column(alias, ORDER) + " > 0" parent_alias = alias elif startswith_field(primary_nested_path, nested_path): # PARENT TABLE # NO NEED TO INCLUDE COLUMNS, BUT WILL INCLUDE ID AND ORDER if nested_path == ".": from_clause.append(SQL_FROM) from_clause.append( sql_alias(quote_column(self.snowflake.fact_name), alias)) else: parent_alias = alias = unichr(ord('a') + i - 1) from_clause.append(SQL_LEFT_JOIN) from_clause.append( sql_alias( quote_column(self.snowflake.fact_name, sub_table.name), alias)) from_clause.append(SQL_ON) from_clause.append(quote_column(alias, PARENT)) from_clause.append(SQL_EQ) from_clause.append(quote_column(parent_alias, UID)) where_clause = sql_iso( where_clause) + SQL_AND + quote_column( parent_alias, ORDER) + " > 0" parent_alias = alias elif startswith_field(nested_path, primary_nested_path): # CHILD TABLE # GET FIRST ROW FOR EACH NESTED TABLE from_clause.append(SQL_LEFT_JOIN) from_clause.append( sql_alias( quote_column(self.snowflake.fact_name, sub_table.name), alias)) from_clause.append(SQL_ON) from_clause.append(quote_column(alias, PARENT)) from_clause.append(SQL_EQ) from_clause.append(quote_column(parent_alias, UID)) from_clause.append(SQL_AND) from_clause.append(quote_column(alias, ORDER)) from_clause.append(SQL_EQ) from_clause.append(SQL_ZERO) # IMMEDIATE CHILDREN ONLY done.append(nested_path) # NESTED TABLES WILL USE RECURSION children_sql.append( self._make_sql_for_one_nest_in_set_op( nested_path, selects, # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE where_clause, active_columns, index_to_sql_select # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) )) else: # SIBLING PATHS ARE IGNORED continue sql = SQL_UNION_ALL.join([ ConcatSQL(SQL_SELECT, sql_list(select_clause), ConcatSQL(*from_clause), SQL_WHERE, where_clause) ], *children_sql) return sql
def _groupby_op(self, query, schema): base_table = schema.snowflake.fact_name path = schema.nested_path # base_table, path = tail_field(frum) # schema = self.snowflake.tables[path].schema index_to_column = {} nest_to_alias = { nested_path: "__" + unichr(ord('a') + i) + "__" for i, nested_path in enumerate(self.schema.snowflake.query_paths) } tables = [] for n, a in nest_to_alias.items(): if startswith_field(path, n): tables.append({"nest": n, "alias": a}) tables = jx.sort(tables, {"value": {"length": "nest"}}) from_sql = join_field( [base_table] + split_field(tables[0].nest)) + " " + tables[0].alias previous = tables[0] for t in tables[1::]: from_sql += (SQL_LEFT_JOIN + quote_column(concat_field(base_table, t.nest)) + " " + t.alias + SQL_ON + quote_column(t.alias, PARENT) + SQL_EQ + quote_column(previous.alias, UID)) selects = [] groupby = [] for i, e in enumerate(query.groupby): for edge_sql in SQLang[e.value].to_sql(schema): column_number = len(selects) sql_type, sql = edge_sql.sql.items()[0] if sql is SQL_NULL and not e.value.var in schema.keys(): Log.error("No such column {{var}}", var=e.value.var) column_alias = _make_column_name(column_number) groupby.append(sql) selects.append(sql_alias(sql, column_alias)) if edge_sql.nested_path == ".": select_name = edge_sql.name else: select_name = "." index_to_column[column_number] = ColumnMapping( is_edge=True, push_name=e.name, push_column_name=e.name.replace("\\.", "."), push_column=i, push_child=select_name, pull=get_column(column_number), sql=sql, column_alias=column_alias, type=sql_type_to_json_type[sql_type]) for i, select in enumerate(listwrap(query.select)): column_number = len(selects) sql_type, sql = SQLang[select.value].to_sql( schema)[0].sql.items()[0] if sql == 'NULL' and not select.value.var in schema.keys(): Log.error("No such column {{var}}", var=select.value.var) # AGGREGATE if select.value == "." and select.aggregate == "count": sql = sql_count(SQL_ONE) else: sql = sql_call(sql_aggs[select.aggregate], sql) if select.default != None: sql = sql_coalesce([sql, quote_value(select.default)]) selects.append(sql_alias(sql, select.name)) index_to_column[column_number] = ColumnMapping( push_name=select.name, push_column_name=select.name, push_column=i + len(query.groupby), push_child=".", pull=get_column(column_number), sql=sql, column_alias=quote_column(select.name), type=sql_type_to_json_type[sql_type]) for w in query.window: selects.append(self._window_op(self, query, w)) where = SQLang[query.where].to_sql(schema)[0].sql.b command = (SQL_SELECT + (sql_list(selects)) + SQL_FROM + from_sql + SQL_WHERE + where + SQL_GROUPBY + sql_list(groupby)) if query.sort: command += SQL_ORDERBY + sql_list( sql_iso(sql[t]) + SQL_IS_NULL + "," + sql[t] + (" DESC" if s.sort == -1 else "") for s, sql in [(s, SQLang[s.value].to_sql(schema)[0].sql) for s in query.sort] for t in "bns" if sql[t]) return command, index_to_column
def device_callback(self, path=None): # HANDLE BROWESR RETURN FROM AUTH0 LOGIN error = request.args.get("error") if error: Log.error("You did it wrong") session_id = request.cookies.get(self.device.login.session.name) if not session_id: Log.error("You did it wrong") login_session = self.session_manager.get_session(session_id) code = request.args.get("code") state = request.args.get("state") result = self.device.db.query( sql_query({ "from": "device", "select": "session_id", "where": { "eq": { "state": state } }, })) if not result.data: Log.error("expecting valid state") device_session_id = result.data[0][0] # GO BACK TO AUTH0 TO GET TOKENS token_request = { "client_id": self.device.auth0.client_id, "redirect_uri": self.device.auth0.redirect_uri, "code_verifier": login_session.code_verifier, "code": code, "grant_type": "authorization_code", } DEBUG and Log.note("Send token request to Auth0:\n {{request}}", request=token_request) auth_response = requests.request( "POST", str(URL("https://" + self.device.auth0.domain, path="oauth/token")), headers={ "Accept": mimetype.JSON, "Content-Type": mimetype.JSON, # "Referer": str(URL(self.device.auth0.redirect_uri, query={"code": code, "state": state})), }, data=value2json(token_request), ) try: auth_result = wrap(auth_response.json()) except Exception as e: Log.error("not json {{value}}", value=auth_response.content, cause=e) # VERIFY TOKENS, ADD USER TO DEVICE'S SESSION user_details = self.verify_opaque_token(auth_result.access_token) self.session_manager.update_session( device_session_id, {"user": self.permissions.get_or_create_user(user_details)}, ) # REMOVE DEVICE SETUP STATE with self.device.db.transaction() as t: t.execute( ConcatSQL( SQL_DELETE, SQL_FROM, quote_column(self.device.table), SQL_WHERE, sql_eq(state=state), )) Log.note("login complete") return Response("Login complete. You may close this page", status=200)
def update(self, command): """ :param command: EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT """ command = wrap(command) clear_columns = set(listwrap(command['clear'])) # REJECT DEEP UPDATES touched_columns = command.set.keys() | clear_columns for c in self.schema.columns: if c.name in touched_columns and len(c.nested_path) > 1: Log.error("Deep update not supported") # ADD NEW COLUMNS where = jx_expression(command.where) or TRUE _vars = where.vars() _map = { v: c.es_column for v in _vars for c in self.columns.get(v, Null) if c.jx_type not in STRUCT } where_sql = where.map(_map).to_sql(self.schema)[0].sql.b new_columns = set(command.set.keys()) - set( c.name for c in self.schema.columns) for new_column_name in new_columns: nested_value = command.set[new_column_name] ctype = get_jx_type(nested_value) column = Column(name=new_column_name, jx_type=ctype, es_index=self.name, es_type=json_type_to_sqlite_type(ctype), es_column=typed_column(new_column_name, ctype), last_updated=Date.now()) self.add_column(column) # UPDATE THE NESTED VALUES for nested_column_name, nested_value in command.set.items(): if get_jx_type(nested_value) == "nested": nested_table_name = concat_field(self.name, nested_column_name) nested_table = nested_tables[nested_column_name] self_primary_key = sql_list( quote_column(c.es_column) for u in self.uid for c in self.columns[u]) extra_key_name = UID + text(len(self.uid)) extra_key = [e for e in nested_table.columns[extra_key_name]][0] sql_command = ( SQL_DELETE + SQL_FROM + quote_column(nested_table.name) + SQL_WHERE + "EXISTS" + sql_iso(SQL_SELECT + SQL_ONE + SQL_FROM + sql_alias(quote_column(nested_table.name), "n") + SQL_INNER_JOIN + sql_iso(SQL_SELECT + self_primary_key + SQL_FROM + quote_column(abs_schema.fact) + SQL_WHERE + where_sql) + " t ON " + SQL_AND.join( quote_column("t", c.es_column) + SQL_EQ + quote_column("n", c.es_column) for u in self.uid for c in self.columns[u]))) self.db.execute(sql_command) # INSERT NEW RECORDS if not nested_value: continue doc_collection = {} for d in listwrap(nested_value): nested_table.flatten(d, Data(), doc_collection, path=nested_column_name) prefix = SQL_INSERT + quote_column(nested_table.name) + sql_iso( sql_list([self_primary_key] + [quote_column(extra_key)] + [ quote_column(c.es_column) for c in doc_collection.get(".", Null).active_columns ])) # BUILD THE PARENT TABLES parent = (SQL_SELECT + self_primary_key + SQL_FROM + quote_column(abs_schema.fact) + SQL_WHERE + jx_expression(command.where).to_sql(schema)) # BUILD THE RECORDS children = SQL_UNION_ALL.join( SQL_SELECT + sql_alias(quote_value(i), extra_key.es_column) + SQL_COMMA + sql_list( sql_alias(quote_value(row[c.name]), quote_column(c.es_column)) for c in doc_collection.get(".", Null).active_columns) for i, row in enumerate( doc_collection.get(".", Null).rows)) sql_command = (prefix + SQL_SELECT + sql_list([ quote_column("p", c.es_column) for u in self.uid for c in self.columns[u] ] + [quote_column("c", extra_key)] + [ quote_column("c", c.es_column) for c in doc_collection.get(".", Null).active_columns ]) + SQL_FROM + sql_iso(parent) + " p" + SQL_INNER_JOIN + sql_iso(children) + " c" + SQL_ON + SQL_TRUE) self.db.execute(sql_command) # THE CHILD COLUMNS COULD HAVE EXPANDED # ADD COLUMNS TO SELF for n, cs in nested_table.columns.items(): for c in cs: column = Column(name=c.name, jx_type=c.jx_type, es_type=c.es_type, es_index=c.es_index, es_column=c.es_column, nested_path=[nested_column_name] + c.nested_path, last_updated=Date.now()) if c.name not in self.columns: self.columns[column.name] = {column} elif c.jx_type not in [ c.jx_type for c in self.columns[c.name] ]: self.columns[column.name].add(column) command = ConcatSQL( SQL_UPDATE, quote_column(self.name), SQL_SET, sql_list([ quote_column(c.es_column) + SQL_EQ + quote_value(get_if_type(v, c.jx_type)) for c in self.schema.columns if c.jx_type != NESTED and len(c.nested_path) == 1 for v in [command.set[c.name]] if v != None ] + [ quote_column(c.es_column) + SQL_EQ + SQL_NULL for c in self.schema.columns if (c.name in clear_columns and command.set[c.name] != None and c.jx_type != NESTED and len(c.nested_path) == 1) ]), SQL_WHERE, where_sql) with self.db.transaction() as t: t.execute(command)
"sum": "SUM" } STATS = { "count": "COUNT({{value}})", "std": "SQRT((1-1.0/COUNT({{value}}))*VARIANCE({{value}}))", "min": "MIN({{value}})", "max": "MAX({{value}})", "sum": "SUM({{value}})", "median": "MEDIAN({{value}})", "sos": "SUM({{value}}*{{value}})", "var": "(1-1.0/COUNT({{value}}))*VARIANCE({{value}})", "avg": "AVG({{value}})" } quoted_GUID = quote_column(GUID) quoted_UID = quote_column(UID) quoted_ORDER = quote_column(ORDER) quoted_PARENT = quote_column(PARENT) def sql_text_array_to_set(column): def _convert(row): text = row[column] if text == None: return set() else: value = json2value(row[column]) return set(value) - {None} return _convert
def _nest_column(self, column): new_path, type_ = untyped_column(column.es_column) if type_ != SQL_NESTED_TYPE: Log.error("only nested types can be nested") destination_table = concat_field(self.fact_name, new_path) existing_table = concat_field(self.fact_name, column.nested_path[0]) # FIND THE INNER COLUMNS WE WILL BE MOVING moving_columns = [] for c in self.columns: if destination_table != column.es_index and column.es_column == c.es_column: moving_columns.append(c) c.nested_path = new_path # TODO: IF THERE ARE CHILD TABLES, WE MUST UPDATE THEIR RELATIONS TOO? # LOAD THE COLUMNS data = self.namespace.db.about(destination_table) if not data: # DEFINE A NEW TABLE command = ( SQL_CREATE + quote_column(destination_table) + sql_iso(sql_list([ quoted_UID + "INTEGER", quoted_PARENT + "INTEGER", quoted_ORDER + "INTEGER", "PRIMARY KEY " + sql_iso(quoted_UID), "FOREIGN KEY " + sql_iso(quoted_PARENT) + " REFERENCES " + quote_column(existing_table) + sql_iso(quoted_UID) ])) ) with self.namespace.db.transaction() as t: t.execute(command) self.add_table([new_path]+column.nested_path) # TEST IF THERE IS ANY DATA IN THE NEW NESTED ARRAY if not moving_columns: return column.es_index = destination_table with self.namespace.db.transaction() as t: t.execute( "ALTER TABLE " + quote_column(destination_table) + " ADD COLUMN " + quote_column(column.es_column) + " " + column.es_type ) # Deleting parent columns for col in moving_columns: column = col.es_column tmp_table = "tmp_" + existing_table columns = list(map(text, t.query(SQL_SELECT + SQL_STAR + SQL_FROM + quote_column(existing_table) + SQL_LIMIT + SQL_ZERO).header)) t.execute( "ALTER TABLE " + quote_column(existing_table) + " RENAME TO " + quote_column(tmp_table) ) t.execute( SQL_CREATE + quote_column(existing_table) + SQL_AS + SQL_SELECT + sql_list([quote_column(c) for c in columns if c != column]) + SQL_FROM + quote_column(tmp_table) ) t.execute("DROP TABLE " + quote_column(tmp_table))
def __nonzero__(self): counter = self.db.query(SQL_SELECT + sql_count("*") + SQL_FROM + quote_column(self.snowflake.fact_name))[0][0] return bool(counter)
def query(self, query=None): """ :param query: JSON Query Expression, SET `format="container"` TO MAKE NEW TABLE OF RESULT :return: """ if not query: query = {} if not query.get('from'): query['from'] = self.name elif not startswith_field(query['from'], self.name): Log.error("Expecting table, or some nested table") query = QueryOp.wrap(query, self.container, self.namespace) new_table = "temp_" + unique_name() if query.format == "container": create_table = SQL_CREATE + quote_column(new_table) + SQL_AS else: create_table = "" if query.groupby and query.format != "cube": op, index_to_columns = self._groupby_op(query, self.schema) command = create_table + op elif query.groupby: query.edges, query.groupby = query.groupby, query.edges op, index_to_columns = self._edges_op(query, self.schema) command = create_table + op query.edges, query.groupby = query.groupby, query.edges elif query.edges or any(a != "none" for a in listwrap(query.select).aggregate): op, index_to_columns = self._edges_op(query, query.frum.schema) command = create_table + op else: op = self._set_op(query) return op result = self.db.query(command) if query.format == "container": output = QueryTable(new_table, db=self.db, uid=self.uid, exists=True) elif query.format == "cube" or (not query.format and query.edges): column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1) for c in index_to_columns.values(): column_names[c.push_column] = c.push_column_name if len(query.edges) == 0 and len(query.groupby) == 0: data = {n: Data() for n in column_names} for s in index_to_columns.values(): data[s.push_name][s.push_child] = unwrap(s.pull(result.data[0])) if is_list(query.select): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data( data=unwrap(data), select=select, meta={"format": "cube"} ) if not result.data: edges = [] dims = [] for i, e in enumerate(query.edges + query.groupby): allowNulls = coalesce(e.allowNulls, True) if e.domain.type == "set" and e.domain.partitions: domain = SimpleSetDomain(partitions=e.domain.partitions.name) elif e.domain.type == "range": domain = e.domain elif is_op(e.value, TupleOp): pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name], "push_child").pull parts = [tuple(p(d) for p in pulls) for d in result.data] domain = SimpleSetDomain(partitions=jx.sort(set(parts))) else: domain = SimpleSetDomain(partitions=[]) dims.append(1 if allowNulls else 0) edges.append(Data( name=e.name, allowNulls=allowNulls, domain=domain )) data = {} for si, s in enumerate(listwrap(query.select)): if s.aggregate == "count": data[s.name] = Matrix(dims=dims, zeros=0) else: data[s.name] = Matrix(dims=dims) if is_list(query.select): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data( meta={"format": "cube"}, edges=edges, select=select, data={k: v.cube for k, v in data.items()} ) columns = None edges = [] dims = [] for g in query.groupby: g.is_groupby = True for i, e in enumerate(query.edges + query.groupby): allowNulls = coalesce(e.allowNulls, True) if e.domain.type == "set" and e.domain.partitions: domain = SimpleSetDomain(partitions=e.domain.partitions.name) elif e.domain.type == "range": domain = e.domain elif e.domain.type == "time": domain = wrap(mo_json.scrub(e.domain)) elif e.domain.type == "duration": domain = wrap(mo_json.scrub(e.domain)) elif is_op(e.value, TupleOp): pulls = jx.sort([c for c in index_to_columns.values() if c.push_name == e.name], "push_child").pull parts = [tuple(p(d) for p in pulls) for d in result.data] domain = SimpleSetDomain(partitions=jx.sort(set(parts))) else: if not columns: columns = transpose(*result.data) parts = set(columns[i]) if e.is_groupby and None in parts: allowNulls = True parts -= {None} if query.sort[i].sort == -1: domain = SimpleSetDomain(partitions=wrap(sorted(parts, reverse=True))) else: domain = SimpleSetDomain(partitions=jx.sort(parts)) dims.append(len(domain.partitions) + (1 if allowNulls else 0)) edges.append(Data( name=e.name, allowNulls=allowNulls, domain=domain )) data_cubes = {} for si, s in enumerate(listwrap(query.select)): if s.aggregate == "count": data_cubes[s.name] = Matrix(dims=dims, zeros=0) else: data_cubes[s.name] = Matrix(dims=dims) r2c = index_to_coordinate(dims) # WORKS BECAUSE THE DATABASE SORTED THE EDGES TO CONFORM for rownum, row in enumerate(result.data): coord = r2c(rownum) for i, s in enumerate(index_to_columns.values()): if s.is_edge: continue if s.push_child == ".": data_cubes[s.push_name][coord] = s.pull(row) else: data_cubes[s.push_name][coord][s.push_child] = s.pull(row) if query.select == None: select = Null elif is_list(query.select): select = [{"name": s.name} for s in query.select] else: select = {"name": query.select.name} return Data( meta={"format": "cube"}, edges=edges, select=select, data={k: v.cube for k, v in data_cubes.items()} ) elif query.format == "table" or (not query.format and query.groupby): column_names = [None] * (max(c.push_column for c in index_to_columns.values()) + 1) for c in index_to_columns.values(): column_names[c.push_column] = c.push_column_name data = [] for d in result.data: row = [None for _ in column_names] for s in index_to_columns.values(): if s.push_child == ".": row[s.push_column] = s.pull(d) elif s.num_push_columns: tuple_value = row[s.push_column] if tuple_value == None: tuple_value = row[s.push_column] = [None] * s.num_push_columns tuple_value[s.push_child] = s.pull(d) elif row[s.push_column] == None: row[s.push_column] = Data() row[s.push_column][s.push_child] = s.pull(d) else: row[s.push_column][s.push_child] = s.pull(d) data.append(tuple(unwrap(r) for r in row)) output = Data( meta={"format": "table"}, header=column_names, data=data ) elif query.format == "list" or (not query.edges and not query.groupby): if not query.edges and not query.groupby and any(listwrap(query.select).aggregate): if is_list(query.select): data = Data() for c in index_to_columns.values(): if c.push_child == ".": if data[c.push_name] == None: data[c.push_name] = c.pull(result.data[0]) elif is_list(data[c.push_name]): data[c.push_name].append(c.pull(result.data[0])) else: data[c.push_name] = [data[c.push_name], c.pull(result.data[0])] else: data[c.push_name][c.push_child] = c.pull(result.data[0]) output = Data( meta={"format": "value"}, data=data ) else: data = Data() for s in index_to_columns.values(): if not data[s.push_child]: data[s.push_child] = s.pull(result.data[0]) else: data[s.push_child] += [s.pull(result.data[0])] output = Data( meta={"format": "value"}, data=unwrap(data) ) else: data = [] for rownum in result.data: row = Data() for c in index_to_columns.values(): if c.push_child == ".": row[c.push_name] = c.pull(rownum) elif c.num_push_columns: tuple_value = row[c.push_name] if not tuple_value: tuple_value = row[c.push_name] = [None] * c.num_push_columns tuple_value[c.push_child] = c.pull(rownum) else: row[c.push_name][c.push_child] = c.pull(rownum) data.append(row) output = Data( meta={"format": "list"}, data=data ) else: Log.error("unknown format {{format}}", format=query.format) return output
def _set_op(self, query): # GET LIST OF SELECTED COLUMNS vars_ = UNION([ v.var for select in listwrap(query.select) for v in select.value.vars() ]) schema = self.schema known_vars = schema.keys() active_columns = {".": set()} for v in vars_: for c in schema.leaves(v): nest = c.nested_path[0] active_columns.setdefault(nest, set()).add(c) # ANY VARS MENTIONED WITH NO COLUMNS? for v in vars_: if not any(startswith_field(cname, v) for cname in known_vars): active_columns["."].add( Column(name=v, jx_type=IS_NULL, es_column=".", es_index=".", es_type='NULL', nested_path=["."], last_updated=Date.now())) # EVERY COLUMN, AND THE INDEX IT TAKES UP index_to_column = {} # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) index_to_uid = {} # FROM NESTED PATH TO THE INDEX OF UID sql_selects = [ ] # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE) nest_to_alias = { nested_path[0]: "__" + unichr(ord('a') + i) + "__" for i, nested_path in enumerate(self.snowflake.query_paths) } sorts = [] if query.sort: for select in query.sort: col = SQLang[select.value].to_sql(schema)[0] for t, sql in col.sql.items(): json_type = sql_type_to_json_type[t] if json_type in STRUCT: continue column_number = len(sql_selects) # SQL HAS ABS TABLE REFERENCE column_alias = _make_column_name(column_number) sql_selects.append(sql_alias(sql, column_alias)) if select.sort == -1: sorts.append(quote_column(column_alias) + SQL_IS_NULL) sorts.append(quote_column(column_alias) + " DESC") else: sorts.append(quote_column(column_alias) + SQL_IS_NULL) sorts.append(quote_column(column_alias)) primary_doc_details = Data() # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED nested_path = [] for step, sub_table in self.snowflake.tables: nested_path.insert(0, step) nested_doc_details = { "sub_table": sub_table, "children": [], "index_to_column": {}, "nested_path": nested_path } # INSERT INTO TREE if not primary_doc_details: primary_doc_details = nested_doc_details else: def place(parent_doc_details): if startswith_field(step, parent_doc_details['nested_path'][0]): for c in parent_doc_details['children']: if place(c): return True parent_doc_details['children'].append( nested_doc_details) place(primary_doc_details) alias = nested_doc_details['alias'] = nest_to_alias[step] # WE ALWAYS ADD THE UID column_number = index_to_uid[step] = nested_doc_details[ 'id_coord'] = len(sql_selects) sql_select = quote_column(alias, UID) sql_selects.append( sql_alias(sql_select, _make_column_name(column_number))) if step != ".": # ID AND ORDER FOR CHILD TABLES index_to_column[column_number] = ColumnMapping( sql=sql_select, type="number", nested_path=nested_path, column_alias=_make_column_name(column_number)) column_number = len(sql_selects) sql_select = quote_column(alias, ORDER) sql_selects.append( sql_alias(sql_select, _make_column_name(column_number))) index_to_column[column_number] = ColumnMapping( sql=sql_select, type="number", nested_path=nested_path, column_alias=_make_column_name(column_number)) # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM if step not in active_columns: continue # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE si = 0 for select in listwrap(query.select): try: column_number = len(sql_selects) select.pull = get_column(column_number) db_columns = SQLang[select.value].partial_eval().to_sql( schema) for column in db_columns: for t, unsorted_sql in column.sql.items(): json_type = sql_type_to_json_type[t] if json_type in STRUCT: continue column_number = len(sql_selects) column_alias = _make_column_name(column_number) sql_selects.append( sql_alias(unsorted_sql, column_alias)) if startswith_field(schema.path, step) and is_op( select.value, LeavesOp): # ONLY FLATTEN primary_nested_path AND PARENTS, NOT CHILDREN index_to_column[ column_number] = nested_doc_details[ 'index_to_column'][ column_number] = ColumnMapping( push_name=literal_field( get_property_name( concat_field( select.name, column.name))), push_child=".", push_column_name= get_property_name( concat_field( select.name, column.name)), push_column=si, pull=get_column(column_number), sql=unsorted_sql, type=json_type, column_alias=column_alias, nested_path=nested_path) si += 1 else: index_to_column[ column_number] = nested_doc_details[ 'index_to_column'][ column_number] = ColumnMapping( push_name=select.name, push_child=column.name, push_column_name=select.name, push_column=si, pull=get_column(column_number), sql=unsorted_sql, type=json_type, column_alias=column_alias, nested_path=nested_path) finally: si += 1 where_clause = BooleanOp(query.where).partial_eval().to_sql( schema, boolean=True)[0].sql.b unsorted_sql = self._make_sql_for_one_nest_in_set_op( ".", sql_selects, where_clause, active_columns, index_to_column) for n, _ in self.snowflake.tables: sorts.append(quote_column(COLUMN + text(index_to_uid[n]))) ordered_sql = ConcatSQL(SQL_SELECT, SQL_STAR, SQL_FROM, sql_iso(unsorted_sql), SQL_ORDERBY, sql_list(sorts), SQL_LIMIT, quote_value(query.limit)) result = self.db.query(ordered_sql) def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id, parent_id_coord): """ :param rows: REVERSED STACK OF ROWS (WITH push() AND pop()) :param row: CURRENT ROW BEING EXTRACTED :param nested_doc_details: { "nested_path": wrap_nested_path(nested_path), "index_to_column": map from column number to column details "children": all possible direct decedents' nested_doc_details } :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop) :param parent_id_coord: the column number for the parent id (so we ca extract from each row) :return: the nested property (usually an array) """ previous_doc_id = None doc = Null output = [] id_coord = nested_doc_details['id_coord'] while True: doc_id = row[id_coord] if doc_id == None or (parent_id_coord is not None and row[parent_id_coord] != parent_doc_id): rows.append( row ) # UNDO PREVIOUS POP (RECORD IS NOT A NESTED RECORD OF parent_doc) return output if doc_id != previous_doc_id: previous_doc_id = doc_id doc = Null curr_nested_path = nested_doc_details['nested_path'][0] index_to_column = nested_doc_details[ 'index_to_column'].items() for i, c in index_to_column: value = row[i] if is_list(query.select) or is_op( query.select.value, LeavesOp): # ASSIGN INNER PROPERTIES relative_field = concat_field( c.push_name, c.push_child) else: # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT relative_field = c.push_child if relative_field == ".": if exists(value): doc = value elif exists(value): if doc is Null: doc = Data() doc[relative_field] = value for child_details in nested_doc_details['children']: # EACH NESTED TABLE MUST BE ASSEMBLED INTO A LIST OF OBJECTS child_id = row[child_details['id_coord']] if child_id is not None: nested_value = _accumulate_nested( rows, row, child_details, doc_id, id_coord) if nested_value != None: push_name = child_details['nested_path'][0] if is_list(query.select) or is_op( query.select.value, LeavesOp): # ASSIGN INNER PROPERTIES relative_field = relative_field( push_name, curr_nested_path) else: # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT relative_field = "." if relative_field == ".": doc = unwraplist(nested_value) else: doc[relative_field] = unwraplist(nested_value) output.append(doc) try: row = rows.pop() except IndexError: return output cols = tuple( [i for i in index_to_column.values() if i.push_name != None]) rows = list(reversed(unwrap(result.data))) if rows: row = rows.pop() data = _accumulate_nested(rows, row, primary_doc_details, None, None) else: data = result.data if query.format == "cube": # for f, full_name in self.snowflake.tables: # if f != '.' or (test_dots(cols) and is_list(query.select)): # num_rows = len(result.data) # num_cols = MAX([c.push_column for c in cols]) + 1 if len(cols) else 0 # map_index_to_name = {c.push_column: c.push_column_name for c in cols} # temp_data = [[None] * num_rows for _ in range(num_cols)] # for rownum, d in enumerate(result.data): # for c in cols: # if c.push_child == ".": # temp_data[c.push_column][rownum] = c.pull(d) # else: # column = temp_data[c.push_column][rownum] # if column is None: # column = temp_data[c.push_column][rownum] = {} # column[c.push_child] = c.pull(d) # output = Data( # meta={"format": "cube"}, # data={n: temp_data[c] for c, n in map_index_to_name.items()}, # edges=[{ # "name": "rownum", # "domain": { # "type": "rownum", # "min": 0, # "max": num_rows, # "interval": 1 # } # }] # ) # return output if is_list(query.select) or is_op(query.select.value, LeavesOp): num_rows = len(data) temp_data = { c.push_column_name: [None] * num_rows for c in cols } for rownum, d in enumerate(data): for c in cols: temp_data[c.push_column_name][rownum] = d[c.push_name] return Data(meta={"format": "cube"}, data=temp_data, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": num_rows, "interval": 1 } }]) else: num_rows = len(data) map_index_to_name = { c.push_column: c.push_column_name for c in cols } temp_data = [data] return Data(meta={"format": "cube"}, data={ n: temp_data[c] for c, n in map_index_to_name.items() }, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": num_rows, "interval": 1 } }]) elif query.format == "table": # for f, _ in self.snowflake.tables: # if frum.endswith(f): # num_column = MAX([c.push_column for c in cols]) + 1 # header = [None] * num_column # for c in cols: # header[c.push_column] = c.push_column_name # # output_data = [] # for d in result.data: # row = [None] * num_column # for c in cols: # set_column(row, c.push_column, c.push_child, c.pull(d)) # output_data.append(row) # # return Data( # meta={"format": "table"}, # header=header, # data=output_data # ) if is_list(query.select) or is_op(query.select.value, LeavesOp): column_names = [None] * (max(c.push_column for c in cols) + 1) for c in cols: column_names[c.push_column] = c.push_column_name temp_data = [] for rownum, d in enumerate(data): row = [None] * len(column_names) for c in cols: row[c.push_column] = d[c.push_name] temp_data.append(row) return Data(meta={"format": "table"}, header=column_names, data=temp_data) else: column_names = listwrap(query.select).name return Data(meta={"format": "table"}, header=column_names, data=[[d] for d in data]) else: # for f, _ in self.snowflake.tables: # if frum.endswith(f) or (test_dots(cols) and is_list(query.select)): # data = [] # for d in result.data: # row = Data() # for c in cols: # if c.push_child == ".": # row[c.push_name] = c.pull(d) # elif c.num_push_columns: # tuple_value = row[c.push_name] # if not tuple_value: # tuple_value = row[c.push_name] = [None] * c.num_push_columns # tuple_value[c.push_child] = c.pull(d) # else: # row[c.push_name][c.push_child] = c.pull(d) # # data.append(row) # # return Data( # meta={"format": "list"}, # data=data # ) if is_list(query.select) or is_op(query.select.value, LeavesOp): temp_data = [] for rownum, d in enumerate(data): row = {} for c in cols: row[c.push_column_name] = d[c.push_name] temp_data.append(row) return Data(meta={"format": "list"}, data=temp_data) else: return Data(meta={"format": "list"}, data=data)
untyped_column, PARENT, UID from jx_sqlite.container import DIGITS_TABLE from jx_sqlite.expressions._utils import SQLang, sql_type_to_json_type from jx_sqlite.expressions.tuple_op import TupleOp from jx_sqlite.expressions.variable import Variable from jx_sqlite.setop_table import SetOpTable from mo_dots import coalesce, concat_field, join_field, listwrap, split_field, startswith_field from mo_future import text, unichr from mo_logs import Log from mo_sql import SQL, SQL_AND, SQL_CASE, SQL_COMMA, SQL_DESC, SQL_ELSE, SQL_END, SQL_FROM, SQL_GROUPBY, \ SQL_INNER_JOIN, SQL_IS_NOT_NULL, SQL_IS_NULL, SQL_LEFT_JOIN, SQL_LIMIT, SQL_NULL, SQL_ON, SQL_ONE, SQL_OR, \ SQL_ORDERBY, SQL_SELECT, SQL_STAR, SQL_THEN, SQL_TRUE, SQL_UNION_ALL, SQL_WHEN, SQL_WHERE, sql_coalesce, \ sql_count, sql_iso, sql_list, SQL_DOT, SQL_PLUS, ConcatSQL, SQL_EQ from jx_sqlite.sqlite import quote_column, quote_value, sql_alias EXISTS_COLUMN = quote_column("__exists__") class EdgesTable(SetOpTable): def _edges_op(self, query, frum): schema = frum query = query.copy() # WE WILL BE MARKING UP THE QUERY index_to_column = {} # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) outer_selects = [] # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE) base_table, path = schema.snowflake.fact_name, schema.nested_path nest_to_alias = { nested_path: "__" + unichr(ord('a') + i) + "__" for i, (nested_path, sub_table) in enumerate(self.snowflake.tables) } tables = []