def _aggop(self, query): """ SINGLE ROW RETURNED WITH AGGREGATES """ if isinstance(query.select, list): # RETURN SINGLE OBJECT WITH AGGREGATES for s in query.select: if s.aggregate not in aggregates: Log.error("Expecting all columns to have an aggregate: {{select}}", select=s) selects = FlatList() for s in query.select: selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value),quote_column(s.name))) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} """, { "selects": SQL(",\n".join(selects)), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.filter) }) return sql, lambda sql: self.db.column(sql)[0] # RETURNING SINGLE OBJECT WITH AGGREGATE VALUES else: # RETURN SINGLE VALUE s0 = query.select if s0.aggregate not in aggregates: Log.error("Expecting all columns to have an aggregate: {{select}}", select=s0) select = sql_alias(aggregates[s0.aggregate].replace("{{code}}", s0.value) , quote_column(s0.name)) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} """, { "selects": SQL(select), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.where) }) def post(sql): result = self.db.column_query(sql) return result[0][0] return sql, post # RETURN SINGLE VALUE
def _aggop(self, query): """ SINGLE ROW RETURNED WITH AGGREGATES """ if isinstance(query.select, list): # RETURN SINGLE OBJECT WITH AGGREGATES for s in query.select: if s.aggregate not in aggregates: Log.error("Expecting all columns to have an aggregate: {{select}}", select=s) selects = FlatList() for s in query.select: selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value),self.db.quote_column(s.name))) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} """, { "selects": SQL(",\n".join(selects)), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.filter) }) return sql, lambda sql: self.db.column(sql)[0] # RETURNING SINGLE OBJECT WITH AGGREGATE VALUES else: # RETURN SINGLE VALUE s0 = query.select if s0.aggregate not in aggregates: Log.error("Expecting all columns to have an aggregate: {{select}}", select=s0) select = sql_alias(aggregates[s0.aggregate].replace("{{code}}", s0.value) , self.db.quote_column(s0.name)) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} """, { "selects": SQL(select), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.where) }) def post(sql): result = self.db.column_query(sql) return result[0][0] return sql, post # RETURN SINGLE VALUE
def _build_list_sql(self, db, first, batch_size): # TODO: ENSURE THE LAST COLUMN IS THE id if first: dim = len(self._extract.field) where = SQL_OR.join( sql_iso( sql_and( quote_column(f) + ineq(i, e, dim) + db.quote_value(Date(v) if t == "time" else v) for e, (f, v, t) in enumerate( zip(self._extract.field[0:i + 1:], first, self._extract.type[0:i + 1:])))) for i in range(dim)) else: where = SQL_TRUE selects = [] for t, f in zip(self._extract.type, self._extract.field): if t == "time": selects.append( "CAST" + sql_iso(sql_alias(quote_column(f), SQL("DATETIME(6)")))) else: selects.append(quote_column(f)) sql = (SQL_SELECT + sql_list(selects) + SQL_FROM + self.settings.snowflake.fact_table + SQL_WHERE + where + SQL_ORDERBY + sql_list(quote_column(f) for f in self._extract.field) + SQL_LIMIT + db.quote_value(batch_size)) return sql
def quote_column(self, column_name, table=None): if column_name == None: Log.error("missing column_name") elif isinstance(column_name, text_type): if table: column_name = join_column(table, column_name) return SQL("`" + column_name.replace(".", "`.`") + "`") # MY SQL QUOTE OF COLUMN NAMES elif isinstance(column_name, list): if table: return sql_list(join_column(table, c) for c in column_name) return sql_list(self.quote_column(c) for c in column_name) else: # ASSUME {"name":name, "value":value} FORM return SQL( sql_alias(column_name.value, self.quote_column(column_name.name)))
def quote_column(column_name, table=None): if column_name == None: Log.error("missing column_name") elif is_text(column_name): if table: return join_column(table, column_name) else: return SQL("`" + '`.`'.join(split_field(column_name)) + "`") # MYSQL QUOTE OF COLUMN NAMES elif is_binary(column_name): return quote_column(column_name.decode('utf8'), table) elif is_list(column_name): if table: return sql_list(join_column(table, c) for c in column_name) return sql_list(quote_column(c) for c in column_name) else: # ASSUME {"name":name, "value":value} FORM return SQL(sql_alias(column_name.value, quote_column(column_name.name)))
def get_sql(self, get_ids): sql = self._compose_sql(get_ids) # ORDERING sort = [] ordering = [] for ci, c in enumerate(self.columns): if c.sort: sort.append(quote_column(c.column_alias) + SQL_IS_NOT_NULL) sort.append(quote_column(c.column_alias)) ordering.append(ci) union_all_sql = SQL_UNION_ALL.join(sql) union_all_sql = (SQL_SELECT + SQL_STAR + SQL_FROM + sql_alias(sql_iso(union_all_sql), quote_column('a')) + SQL_ORDERBY + sql_list(sort)) return union_all_sql
pull=pull, sql=sql, type=sql_type_to_json_type[json_type], column_alias=sql_name) vals = [v for t, v in edge_values] if query_edge.domain.type == "set": domain_name = quote_column("d" + text_type(edge_index) + "c" + text_type(column_index)) domain_names = [domain_name] if len(edge_names) > 1: Log.error("Do not know how to handle") if query_edge.value: domain = SQL_UNION_ALL.join( SQL_SELECT + sql_alias(quote_value(coalesce(p.dataIndex, i)), quote_column("rownum")) + SQL_COMMA + sql_alias(quote_value(p.value), domain_name) for i, p in enumerate(query_edge.domain.partitions)) if query_edge.allowNulls: domain += (SQL_UNION_ALL + SQL_SELECT + sql_alias( quote_value(len(query_edge.domain.partitions)), quote_column("rownum")) + SQL_COMMA + sql_alias(SQL_NULL, domain_name)) where = None join_type = SQL_LEFT_JOIN if query_edge.allowNulls else SQL_INNER_JOIN on_clause = (SQL_OR.join( join_column(edge_alias, k) + " = " + v for k, v in zip(domain_names, vals)) + SQL_OR + sql_iso( join_column(edge_alias, domain_name) + SQL_IS_NULL + SQL_AND +
def _make_sql_for_one_nest_in_set_op( self, primary_nested_path, selects, # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE where_clause, active_columns, index_to_sql_select # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) ): """ FOR EACH NESTED LEVEL, WE MAKE A QUERY THAT PULLS THE VALUES/COLUMNS REQUIRED WE `UNION ALL` THEM WHEN DONE :param primary_nested_path: :param selects: :param where_clause: :param active_columns: :param index_to_sql_select: :return: SQL FOR ONE NESTED LEVEL """ parent_alias = "a" from_clause = "" select_clause = [] children_sql = [] done = [] if not where_clause: where_clause = SQL_TRUE # STATEMENT FOR EACH NESTED PATH for i, (nested_path, sub_table) in enumerate(self.sf.tables.items()): if any(startswith_field(nested_path, d) for d in done): continue alias = quote_column("__" + unichr(ord('a') + i) + "__") if primary_nested_path == nested_path: select_clause = [] # ADD SELECT CLAUSE HERE for select_index, s in enumerate(selects): sql_select = index_to_sql_select.get(select_index) if not sql_select: select_clause.append(selects[select_index]) continue if startswith_field(sql_select.nested_path[0], nested_path): select_clause.append( sql_alias(sql_select.sql, sql_select.column_alias)) else: # DO NOT INCLUDE DEEP STUFF AT THIS LEVEL select_clause.append( sql_alias(SQL_NULL, sql_select.column_alias)) if nested_path == ".": from_clause += SQL_FROM + sql_alias( quote_column(self.sf.fact), alias) else: from_clause += (SQL_LEFT_JOIN + sql_alias( quote_column(concat_field( self.sf.fact, sub_table.name)), alias) + SQL_ON + join_column(alias, quoted_PARENT) + " = " + join_column(parent_alias, quoted_UID)) where_clause = sql_iso( where_clause) + SQL_AND + join_column( alias, quoted_ORDER) + " > 0" parent_alias = alias elif startswith_field(primary_nested_path, nested_path): # PARENT TABLE # NO NEED TO INCLUDE COLUMNS, BUT WILL INCLUDE ID AND ORDER if nested_path == ".": from_clause += SQL_FROM + quote_column( self.sf.fact) + " " + alias else: parent_alias = alias = unichr(ord('a') + i - 1) from_clause += (SQL_LEFT_JOIN + quote_column( concat_field(self.sf.fact, sub_table.name)) + " " + alias + SQL_ON + join_column(alias, quoted_PARENT) + " = " + join_column(parent_alias, quoted_UID)) where_clause = sql_iso( where_clause) + SQL_AND + join_column( parent_alias, quoted_ORDER) + " > 0" parent_alias = alias elif startswith_field(nested_path, primary_nested_path): # CHILD TABLE # GET FIRST ROW FOR EACH NESTED TABLE from_clause += (SQL_LEFT_JOIN + sql_alias( quote_column(concat_field(self.sf.fact, sub_table.name)), alias) + SQL_ON + join_column(alias, quoted_PARENT) + " = " + join_column(parent_alias, quoted_UID) + SQL_AND + join_column(alias, ORDER) + " = 0") # IMMEDIATE CHILDREN ONLY done.append(nested_path) # NESTED TABLES WILL USE RECURSION children_sql.append( self._make_sql_for_one_nest_in_set_op( nested_path, selects, # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE where_clause, active_columns, index_to_sql_select # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) )) else: # SIBLING PATHS ARE IGNORED continue sql = SQL_UNION_ALL.join([ SQL_SELECT + sql_list(select_clause) + from_clause + SQL_WHERE + where_clause ] + children_sql) return sql
def _set_op(self, query, frum): # GET LIST OF COLUMNS base_name, primary_nested_path = tail_field(frum) vars_ = UNION([ v.var for select in listwrap(query.select) for v in select.value.vars() ]) schema = self.sf.tables[primary_nested_path].schema active_columns = {".": set()} for v in vars_: for c in schema.leaves(v): nest = c.nested_path[0] active_columns.setdefault(nest, set()).add(c) # ANY VARS MENTIONED WITH NO COLUMNS? for v in vars_: if not any(startswith_field(cname, v) for cname in schema.keys()): active_columns["."].add( Column(name=v, jx_type="null", es_column=".", es_index=".", nested_path=["."])) # EVERY COLUMN, AND THE INDEX IT TAKES UP index_to_column = {} # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) index_to_uid = {} # FROM NESTED PATH TO THE INDEX OF UID sql_selects = [ ] # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE) nest_to_alias = { nested_path: "__" + unichr(ord('a') + i) + "__" for i, (nested_path, sub_table) in enumerate(self.sf.tables.items()) } sorts = [] if query.sort: for select in query.sort: col = select.value.to_sql(schema)[0] for t, sql in col.sql.items(): json_type = sql_type_to_json_type[t] if json_type in STRUCT: continue column_number = len(sql_selects) # SQL HAS ABS TABLE REFERENCE column_alias = _make_column_name(column_number) sql_selects.append(sql_alias(sql, column_alias)) if select.sort == -1: sorts.append(column_alias + SQL_IS_NOT_NULL) sorts.append(column_alias + " DESC") else: sorts.append(column_alias + SQL_IS_NULL) sorts.append(column_alias) primary_doc_details = Data() # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED nested_path = [] for step, sub_table in self.sf.tables.items(): nested_path.insert(0, step) nested_doc_details = { "sub_table": sub_table, "children": [], "index_to_column": {}, "nested_path": nested_path } # INSERT INTO TREE if not primary_doc_details: primary_doc_details = nested_doc_details else: def place(parent_doc_details): if startswith_field(step, parent_doc_details['nested_path'][0]): for c in parent_doc_details['children']: if place(c): return True parent_doc_details['children'].append( nested_doc_details) place(primary_doc_details) alias = nested_doc_details['alias'] = nest_to_alias[step] # WE ALWAYS ADD THE UID column_number = index_to_uid[step] = nested_doc_details[ 'id_coord'] = len(sql_selects) sql_select = join_column(alias, quoted_UID) sql_selects.append( sql_alias(sql_select, _make_column_name(column_number))) if step != ".": # ID AND ORDER FOR CHILD TABLES index_to_column[column_number] = ColumnMapping( sql=sql_select, type="number", nested_path=nested_path, column_alias=_make_column_name(column_number)) column_number = len(sql_selects) sql_select = join_column(alias, quoted_ORDER) sql_selects.append( sql_alias(sql_select, _make_column_name(column_number))) index_to_column[column_number] = ColumnMapping( sql=sql_select, type="number", nested_path=nested_path, column_alias=_make_column_name(column_number)) # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM if step not in active_columns: continue # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE si = 0 for select in listwrap(query.select): try: column_number = len(sql_selects) select.pull = get_column(column_number) db_columns = select.value.partial_eval().to_sql(schema) for column in db_columns: if isinstance(column.nested_path, list): column.nested_path = column.nested_path[ 0] # IN THE EVENT THIS "column" IS MULTIVALUED for t, unsorted_sql in column.sql.items(): json_type = sql_type_to_json_type[t] if json_type in STRUCT: continue column_number = len(sql_selects) column_alias = _make_column_name(column_number) sql_selects.append( sql_alias(unsorted_sql, column_alias)) if startswith_field(primary_nested_path, step) and isinstance( select.value, LeavesOp): # ONLY FLATTEN primary_nested_path AND PARENTS, NOT CHILDREN index_to_column[ column_number] = nested_doc_details[ 'index_to_column'][ column_number] = ColumnMapping( push_name=literal_field( get_property_name( concat_field( select.name, column.name))), push_child=".", push_column_name= get_property_name( concat_field( select.name, column.name)), push_column=si, pull=get_column(column_number), sql=unsorted_sql, type=json_type, column_alias=column_alias, nested_path=nested_path) si += 1 else: index_to_column[ column_number] = nested_doc_details[ 'index_to_column'][ column_number] = ColumnMapping( push_name=select.name, push_child=column.name, push_column_name=select.name, push_column=si, pull=get_column(column_number), sql=unsorted_sql, type=json_type, column_alias=column_alias, nested_path=nested_path) finally: si += 1 where_clause = BooleanOp("boolean", query.where).partial_eval().to_sql( schema, boolean=True)[0].sql.b unsorted_sql = self._make_sql_for_one_nest_in_set_op( ".", sql_selects, where_clause, active_columns, index_to_column) for n, _ in self.sf.tables.items(): sorts.append(quote_column(COLUMN + text_type(index_to_uid[n]))) ordered_sql = (SQL_SELECT + "*" + SQL_FROM + sql_iso(unsorted_sql) + SQL_ORDERBY + sql_list(sorts) + SQL_LIMIT + quote_value(query.limit)) self.db.create_new_functions() # creating new functions: regexp result = self.db.query(ordered_sql) def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id, parent_id_coord): """ :param rows: REVERSED STACK OF ROWS (WITH push() AND pop()) :param row: CURRENT ROW BEING EXTRACTED :param nested_doc_details: { "nested_path": wrap_nested_path(nested_path), "index_to_column": map from column number to column details "children": all possible direct decedents' nested_doc_details } :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop) :param parent_id_coord: the column number for the parent id (so we ca extract from each row) :return: the nested property (usually an array) """ previous_doc_id = None doc = Data() output = [] id_coord = nested_doc_details['id_coord'] while True: doc_id = row[id_coord] if doc_id == None or (parent_id_coord is not None and row[parent_id_coord] != parent_doc_id): rows.append( row ) # UNDO PREVIOUS POP (RECORD IS NOT A NESTED RECORD OF parent_doc) return output if doc_id != previous_doc_id: previous_doc_id = doc_id doc = Data() curr_nested_path = nested_doc_details['nested_path'][0] index_to_column = nested_doc_details[ 'index_to_column'].items() if index_to_column: for i, c in index_to_column: value = row[i] if isinstance(query.select, list) or isinstance( query.select.value, LeavesOp): # ASSIGN INNER PROPERTIES relative_field = concat_field( c.push_name, c.push_child) else: # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT relative_field = c.push_child if relative_field == ".": if value == '': doc = Null else: doc = value elif value != None and value != '': doc[relative_field] = value for child_details in nested_doc_details['children']: # EACH NESTED TABLE MUST BE ASSEMBLED INTO A LIST OF OBJECTS child_id = row[child_details['id_coord']] if child_id is not None: nested_value = _accumulate_nested( rows, row, child_details, doc_id, id_coord) if nested_value: push_name = child_details['nested_path'][0] if isinstance(query.select, list) or isinstance( query.select.value, LeavesOp): # ASSIGN INNER PROPERTIES relative_field = relative_field( push_name, curr_nested_path) else: # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT relative_field = "." if relative_field == "." and doc is Null: doc = nested_value elif relative_field == ".": doc = unwraplist(nested_value) else: doc[relative_field] = unwraplist(nested_value) output.append(doc) try: row = rows.pop() except IndexError: return output cols = tuple( [i for i in index_to_column.values() if i.push_name != None]) rows = list(reversed(unwrap(result.data))) if rows: row = rows.pop() data = _accumulate_nested(rows, row, primary_doc_details, None, None) else: data = result.data if query.format == "cube": for f, _ in self.sf.tables.items(): if frum.endswith(f) or (test_dots(cols) and isinstance(query.select, list)): num_rows = len(result.data) num_cols = MAX([c.push_column for c in cols]) + 1 if len(cols) else 0 map_index_to_name = { c.push_column: c.push_column_name for c in cols } temp_data = [[None] * num_rows for _ in range(num_cols)] for rownum, d in enumerate(result.data): for c in cols: if c.push_child == ".": temp_data[c.push_column][rownum] = c.pull(d) else: column = temp_data[c.push_column][rownum] if column is None: column = temp_data[ c.push_column][rownum] = {} column[c.push_child] = c.pull(d) output = Data(meta={"format": "cube"}, data={ n: temp_data[c] for c, n in map_index_to_name.items() }, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": num_rows, "interval": 1 } }]) return output if isinstance(query.select, list) or isinstance( query.select.value, LeavesOp): num_rows = len(data) temp_data = { c.push_column_name: [None] * num_rows for c in cols } for rownum, d in enumerate(data): for c in cols: temp_data[c.push_column_name][rownum] = d[c.push_name] return Data(meta={"format": "cube"}, data=temp_data, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": num_rows, "interval": 1 } }]) else: num_rows = len(data) map_index_to_name = { c.push_column: c.push_column_name for c in cols } temp_data = [data] return Data(meta={"format": "cube"}, data={ n: temp_data[c] for c, n in map_index_to_name.items() }, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": num_rows, "interval": 1 } }]) elif query.format == "table": for f, _ in self.sf.tables.items(): if frum.endswith(f): num_column = MAX([c.push_column for c in cols]) + 1 header = [None] * num_column for c in cols: header[c.push_column] = c.push_column_name output_data = [] for d in result.data: row = [None] * num_column for c in cols: set_column(row, c.push_column, c.push_child, c.pull(d)) output_data.append(row) return Data(meta={"format": "table"}, header=header, data=output_data) if isinstance(query.select, list) or isinstance( query.select.value, LeavesOp): column_names = [None] * (max(c.push_column for c in cols) + 1) for c in cols: column_names[c.push_column] = c.push_column_name temp_data = [] for rownum, d in enumerate(data): row = [None] * len(column_names) for c in cols: row[c.push_column] = d[c.push_name] temp_data.append(row) return Data(meta={"format": "table"}, header=column_names, data=temp_data) else: column_names = listwrap(query.select).name return Data(meta={"format": "table"}, header=column_names, data=[[d] for d in data]) else: for f, _ in self.sf.tables.items(): if frum.endswith(f) or (test_dots(cols) and isinstance(query.select, list)): data = [] for d in result.data: row = Data() for c in cols: if c.push_child == ".": row[c.push_name] = c.pull(d) elif c.num_push_columns: tuple_value = row[c.push_name] if not tuple_value: tuple_value = row[c.push_name] = [ None ] * c.num_push_columns tuple_value[c.push_child] = c.pull(d) else: row[c.push_name][c.push_child] = c.pull(d) data.append(row) return Data(meta={"format": "list"}, data=data) if isinstance(query.select, list) or isinstance( query.select.value, LeavesOp): temp_data = [] for rownum, d in enumerate(data): row = {} for c in cols: row[c.push_column_name] = d[c.push_name] temp_data.append(row) return Data(meta={"format": "list"}, data=temp_data) else: return Data(meta={"format": "list"}, data=data)
def _groupby_op(self, query, frum): base_table, path = tail_field(frum) schema = self.sf.tables[path].schema index_to_column = {} nest_to_alias = { nested_path: "__" + unichr(ord('a') + i) + "__" for i, (nested_path, sub_table) in enumerate(self.sf.tables.items()) } tables = [] for n, a in nest_to_alias.items(): if startswith_field(path, n): tables.append({"nest": n, "alias": a}) tables = jx.sort(tables, {"value": {"length": "nest"}}) from_sql = join_field( [base_table] + split_field(tables[0].nest)) + " " + tables[0].alias previous = tables[0] for t in tables[1::]: from_sql += (SQL_LEFT_JOIN + quote_column(concat_field(base_table, t.nest)) + " " + t.alias + SQL_ON + join_column(t.alias, quoted_PARENT) + " = " + join_column(previous.alias, quoted_UID)) selects = [] groupby = [] for i, e in enumerate(query.groupby): for edge_sql in e.value.to_sql(schema): column_number = len(selects) sql_type, sql = edge_sql.sql.items()[0] if sql is SQL_NULL and not e.value.var in schema.keys(): Log.error("No such column {{var}}", var=e.value.var) column_alias = _make_column_name(column_number) groupby.append(sql) selects.append(sql_alias(sql, column_alias)) if edge_sql.nested_path == ".": select_name = edge_sql.name else: select_name = "." index_to_column[column_number] = ColumnMapping( is_edge=True, push_name=e.name, push_column_name=e.name.replace("\\.", "."), push_column=i, push_child=select_name, pull=get_column(column_number), sql=sql, column_alias=column_alias, type=sql_type_to_json_type[sql_type]) for i, select in enumerate(listwrap(query.select)): column_number = len(selects) sql_type, sql = select.value.to_sql(schema)[0].sql.items()[0] if sql == 'NULL' and not select.value.var in schema.keys(): Log.error("No such column {{var}}", var=select.value.var) if select.value == "." and select.aggregate == "count": selects.append( sql_alias(sql_count(SQL_ONE), quote_column(select.name))) else: selects.append( sql_alias(sql_aggs[select.aggregate] + sql_iso(sql), quote_column(select.name))) index_to_column[column_number] = ColumnMapping( push_name=select.name, push_column_name=select.name, push_column=i + len(query.groupby), push_child=".", pull=get_column(column_number), sql=sql, column_alias=quote_column(select.name), type=sql_type_to_json_type[sql_type]) for w in query.window: selects.append(self._window_op(self, query, w)) where = query.where.to_sql(schema)[0].sql.b command = (SQL_SELECT + (sql_list(selects)) + SQL_FROM + from_sql + SQL_WHERE + where + SQL_GROUPBY + sql_list(groupby)) if query.sort: command += SQL_ORDERBY + sql_list( sql_iso(sql[t]) + SQL_IS_NULL + "," + sql[t] + (" DESC" if s.sort == -1 else "") for s, sql in [(s, s.value.to_sql(schema)[0].sql) for s in query.sort] for t in "bns" if sql[t]) return command, index_to_column
def _compose_sql(self, get_ids): """ :param get_ids: SQL to get the ids, and used to select the documents returned :return: """ sql = [] for nested_path in self.all_nested_paths: # MAKE THE REQUIRED JOINS sql_joins = [] for i, curr_join in enumerate( self.nested_path_to_join[nested_path[0]]): curr_join = wrap(curr_join) rel = curr_join.join_columns[0] if i == 0: sql_joins.append( SQL_FROM + sql_alias(sql_iso(get_ids), quote_column(rel.referenced.table.alias))) elif curr_join.children: full_name = quote_column(rel.table.name, rel.table.schema) sql_joins.append(SQL_JOIN + sql_alias( full_name, quote_column(rel.table.alias) ) + SQL_ON + sql_and( quote_column(const_col.column.name, rel.table.alias) + "=" + quote_column(const_col.referenced.column.name, rel.referenced.table.alias) for const_col in curr_join.join_columns)) else: full_name = quote_column(rel.referenced.table.name, rel.referenced.table.schema) sql_joins.append(SQL_LEFT_JOIN + sql_alias( full_name, quote_column(rel.referenced.table.alias) ) + SQL_ON + sql_and( quote_column(const_col.referenced.column.name, rel.referenced.table.alias) + "=" + quote_column(const_col.column.name, rel.table.alias) for const_col in curr_join.join_columns)) # ONLY SELECT WHAT WE NEED, NULL THE REST selects = [] not_null_column_seen = False for ci, c in enumerate(self.columns): if c.column_alias[1:] != text_type(ci): Log.error("expecting consistency") if c.nested_path[0] == nested_path[0]: s = sql_alias( quote_column(c.column.column.name, c.table_alias), quote_column(c.column_alias)) if s == None: Log.error("bug") selects.append(s) not_null_column_seen = True elif startswith_field(nested_path[0], c.path): # PARENT ID REFERENCES if c.column.is_id: s = sql_alias( quote_column(c.column.column.name, c.table_alias), quote_column(c.column_alias)) selects.append(s) not_null_column_seen = True else: selects.append( sql_alias(SQL_NULL, quote_column(c.column_alias))) else: selects.append( sql_alias(SQL_NULL, quote_column(c.column_alias))) if not_null_column_seen: sql.append(SQL_SELECT + sql_list(selects) + "".join(sql_joins)) return sql
def _setop(self, query): """ NO AGGREGATION, SIMPLE LIST COMPREHENSION """ if isinstance(query.select, list): # RETURN BORING RESULT SET selects = FlatList() for s in listwrap(query.select): if isinstance(s.value, Mapping): for k, v in s.value.items: selects.append(sql_alias(v, quote_column(s.name + "." + k))) if isinstance(s.value, list): for i, ss in enumerate(s.value): selects.append(sql_alias(s.value, quote_column(s.name + "," + str(i)))) else: selects.append(sql_alias(s.value, quote_column(s.name))) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} {{sort}} {{limit}} """, { "selects": SQL(",\n".join(selects)), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.where), "limit": self._limit2sql(query.limit), "sort": self._sort2sql(query.sort) }) def post_process(sql): result = self.db.query(sql) for s in listwrap(query.select): if isinstance(s.value, Mapping): for r in result: r[s.name] = {} for k, v in s.value: r[s.name][k] = r[s.name + "." + k] r[s.name + "." + k] = None if isinstance(s.value, list): # REWRITE AS TUPLE for r in result: r[s.name] = tuple(r[s.name + "," + str(i)] for i, ss in enumerate(s.value)) for i, ss in enumerate(s.value): r[s.name + "," + str(i)] = None expand_json(result) return result return sql, post_process # RETURN BORING RESULT SET else: # RETURN LIST OF VALUES if query.select.value == ".": select = "*" else: name = query.select.name select = sql_alias(query.select.value, quote_column(name)) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} {{sort}} {{limit}} """, { "selects": SQL(select), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.where), "limit": self._limit2sql(query.limit), "sort": self._sort2sql(query.sort) }) if query.select.value == ".": def post(sql): result = self.db.query(sql) expand_json(result) return result return sql, post else: return sql, lambda sql: [r[name] for r in self.db.query(sql)] # RETURNING LIST OF VALUES
def _grouped(self, query, stacked=False): select = listwrap(query.select) # RETURN SINGLE OBJECT WITH AGGREGATES for s in select: if s.aggregate not in aggregates: Log.error("Expecting all columns to have an aggregate: {{select}}", select=s) selects = FlatList() groups = FlatList() edges = query.edges for e in edges: if e.domain.type != "default": Log.error("domain of type {{type}} not supported, yet", type=e.domain.type) groups.append(e.value) selects.append(sql_alias(e.value, quote_column(e.name))) for s in select: selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value), quote_column(s.name))) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} GROUP BY {{groups}} """, { "selects": SQL(",\n".join(selects)), "groups": SQL(",\n".join(groups)), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.where) }) def post_stacked(sql): # RETURN IN THE USUAL DATABASE RESULT SET FORMAT return self.db.query(sql) def post(sql): # FIND OUT THE default DOMAIN SIZES result = self.db.column_query(sql) num_edges = len(edges) for e, edge in enumerate(edges): domain = edge.domain if domain.type == "default": domain.type = "set" parts = set(result[e]) domain.partitions = [{"index": i, "value": p} for i, p in enumerate(parts)] domain.map = {p: i for i, p in enumerate(parts)} else: Log.error("Do not know what to do here, yet") # FILL THE DATA CUBE maps = [(unwrap(e.domain.map), result[i]) for i, e in enumerate(edges)] cubes = FlatList() for c, s in enumerate(select): data = Matrix(*[len(e.domain.partitions) + (1 if e.allow_nulls else 0) for e in edges]) for rownum, value in enumerate(result[c + num_edges]): coord = [m[r[rownum]] for m, r in maps] data[coord] = value cubes.append(data) if isinstance(query.select, list): return cubes else: return cubes[0] return sql, post if not stacked else post_stacked
def _setop(self, query): """ NO AGGREGATION, SIMPLE LIST COMPREHENSION """ if isinstance(query.select, list): # RETURN BORING RESULT SET selects = FlatList() for s in listwrap(query.select): if isinstance(s.value, Mapping): for k, v in s.value.items: selects.append(sql_alias(v, self.db.quote_column(s.name + "." + k))) if isinstance(s.value, list): for i, ss in enumerate(s.value): selects.append(sql_alias(s.value, self.db.quote_column(s.name + "," + str(i)))) else: selects.append(sql_alias(s.value, self.db.quote_column(s.name))) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} {{sort}} {{limit}} """, { "selects": SQL(",\n".join(selects)), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.where), "limit": self._limit2sql(query.limit), "sort": self._sort2sql(query.sort) }) def post_process(sql): result = self.db.query(sql) for s in listwrap(query.select): if isinstance(s.value, Mapping): for r in result: r[s.name] = {} for k, v in s.value: r[s.name][k] = r[s.name + "." + k] r[s.name + "." + k] = None if isinstance(s.value, list): # REWRITE AS TUPLE for r in result: r[s.name] = tuple(r[s.name + "," + str(i)] for i, ss in enumerate(s.value)) for i, ss in enumerate(s.value): r[s.name + "," + str(i)] = None expand_json(result) return result return sql, post_process # RETURN BORING RESULT SET else: # RETURN LIST OF VALUES if query.select.value == ".": select = "*" else: name = query.select.name select = sql_alias(query.select.value, self.db.quote_column(name)) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} {{sort}} {{limit}} """, { "selects": SQL(select), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.where), "limit": self._limit2sql(query.limit), "sort": self._sort2sql(query.sort) }) if query.select.value == ".": def post(sql): result = self.db.query(sql) expand_json(result) return result return sql, post else: return sql, lambda sql: [r[name] for r in self.db.query(sql)] # RETURNING LIST OF VALUES
def _grouped(self, query, stacked=False): select = listwrap(query.select) # RETURN SINGLE OBJECT WITH AGGREGATES for s in select: if s.aggregate not in aggregates: Log.error("Expecting all columns to have an aggregate: {{select}}", select=s) selects = FlatList() groups = FlatList() edges = query.edges for e in edges: if e.domain.type != "default": Log.error("domain of type {{type}} not supported, yet", type=e.domain.type) groups.append(e.value) selects.append(sql_alias(e.value, self.db.quote_column(e.name))) for s in select: selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value), self.db.quote_column(s.name))) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} GROUP BY {{groups}} """, { "selects": SQL(",\n".join(selects)), "groups": SQL(",\n".join(groups)), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.where) }) def post_stacked(sql): # RETURN IN THE USUAL DATABASE RESULT SET FORMAT return self.db.query(sql) def post(sql): # FIND OUT THE default DOMAIN SIZES result = self.db.column_query(sql) num_edges = len(edges) for e, edge in enumerate(edges): domain = edge.domain if domain.type == "default": domain.type = "set" parts = set(result[e]) domain.partitions = [{"index": i, "value": p} for i, p in enumerate(parts)] domain.map = {p: i for i, p in enumerate(parts)} else: Log.error("Do not know what to do here, yet") # FILL THE DATA CUBE maps = [(unwrap(e.domain.map), result[i]) for i, e in enumerate(edges)] cubes = FlatList() for c, s in enumerate(select): data = Matrix(*[len(e.domain.partitions) + (1 if e.allow_nulls else 0) for e in edges]) for rownum, value in enumerate(result[c + num_edges]): coord = [m[r[rownum]] for m, r in maps] data[coord] = value cubes.append(data) if isinstance(query.select, list): return cubes else: return cubes[0] return sql, post if not stacked else post_stacked