Exemplo n.º 1
0
    def return_clause_to_sql_selection(self, clause, litmap, varmap):
        select = clause.distinct and 'DISTINCT ' or ''
        first = True
        # Cypher does not have a 'GROUP BY' clause but instead uses non-aggregate return columns
        # that precede an aggregate function as grouping keys, so we have to keep track of those:
        agg_info = []
        for item in clause.items:
            expr = self.expression_to_sql(item.expression, litmap, varmap)
            select += first and expr or (', ' + expr)
            first = False
            # check if this item calls an aggregation function or not: if it does then preceding columns
            # that aren't aggregates are used for grouping, if it doesn't this column might be used for grouping:
            is_agg = parser.has_element(
                item.expression, lambda x: isinstance(x, parser.Call) and self.
                store.is_aggregate_function(x.function))
            if item.name is not None:
                # we have to register the alias as a variable, otherwise it can't be referenced in --order-by,
                # but it is not tied to a specific graph table, thus that part is '_' in the registration below:
                self.register_clause_variable(item.name, ('_', item.name),
                                              varmap, set())
                select += ' ' + sql_quote_ident(item.name)
                agg_info.append(not is_agg and item.name or None)
            else:
                agg_info.append(not is_agg and expr or None)

        # we only need to group if there is at least one aggregate column and
        # at least one regular column before one of the aggregate columns:
        first_reg = len(agg_info)
        last_agg = -1
        for col, aggi in enumerate(agg_info):
            if aggi is not None:
                first_reg = min(col, first_reg)
            else:
                last_agg = max(col, last_agg)
        if last_agg > first_reg:
            group_by = [col for col in agg_info[0:last_agg] if col is not None]
            group_by = 'GROUP BY ' + ', '.join(group_by)
        else:
            group_by = None
        return select, group_by
Exemplo n.º 2
0
    def translate_to_sql(self):
        graphs = set(
        )  # the set of graph table names with aliases referenced by this query
        litmap = {}  # maps Kypher literals onto parameter placeholders
        varmap = {
        }  # maps Kypher variables onto representative (graph, col) SQL columns
        restrictions = set(
        )  # maps (graph, col) SQL columns onto literal restrictions
        joins = set(
        )  # maps equivalent SQL column pairs (avoiding dupes and redundant flips)
        parameters = None  # maps ? parameters in sequence onto actual query parameters

        # translate clause top-level info:
        for i, clause in enumerate(self.match_clauses):
            graph = self.get_pattern_clause_graph(clause)
            graph_alias = '%s_c%d' % (
                graph, i + 1)  # per-clause graph table alias for self-joins
            graphs.add((graph, graph_alias))
            self.pattern_clause_to_sql(clause, graph_alias, litmap, varmap,
                                       restrictions, joins)

        # translate properties:
        for i, clause in enumerate(self.match_clauses):
            graph = self.get_pattern_clause_graph(clause)
            graph_alias = '%s_c%d' % (
                graph, i + 1)  # per-clause graph table alias for self-joins
            self.pattern_clause_props_to_sql(clause, graph_alias, litmap,
                                             varmap, restrictions, joins)

        # assemble SQL query:
        select, group_by = self.return_clause_to_sql_selection(
            self.return_clause, litmap, varmap)
        graph_tables = ', '.join(
            [g + ' AS ' + a for g, a in sorted(list(graphs))])
        query = io.StringIO()
        query.write('SELECT %s\nFROM %s' % (select, graph_tables))

        if len(restrictions) > 0 or len(
                joins) > 0 or self.where_clause is not None:
            query.write('\nWHERE TRUE')
        for (g, c), val in sorted(list(restrictions)):
            query.write('\nAND %s.%s=%s' % (g, sql_quote_ident(c), val))
        for (g1, c1), (g2, c2) in sorted(list(joins)):
            query.write('\nAND %s.%s=%s.%s' %
                        (g1, sql_quote_ident(c1), g2, sql_quote_ident(c2)))

        where = self.where_clause_to_sql(self.where_clause, litmap, varmap)
        where and query.write('\nAND ' + where)
        group_by and query.write('\n' + group_by)
        order = self.order_clause_to_sql(self.order_clause, litmap, varmap)
        order and query.write('\n' + order)
        limit = self.limit_clauses_to_sql(self.skip_clause, self.limit_clause,
                                          litmap, varmap)
        limit and query.write('\n' + limit)
        query = query.getvalue().replace(' TRUE\nAND', '')
        query, parameters = self.replace_literal_parameters(query, litmap)
        auto_indexes = self.compute_auto_indexes(graphs, restrictions, joins)

        # logging:
        rule = '-' * 45
        self.log(
            1, 'SQL Translation:\n%s\n  %s\n  PARAS: %s\n%s' %
            (rule, query.replace('\n', '\n     '), parameters, rule))

        return query, parameters, sorted(list(zip(*graphs))[0]), auto_indexes
Exemplo n.º 3
0
    def expression_to_sql(self, expr, litmap, varmap):
        """Translate a Kypher expression 'expr' into its SQL equivalent.
        """
        expr_type = type(expr)
        if expr_type == parser.Literal:
            return self.get_literal_parameter(expr.value, litmap)
        elif expr_type == parser.Parameter:
            value = self.get_parameter_value(expr.name)
            return self.get_literal_parameter(value, litmap)

        elif expr_type == parser.Variable:
            query_var = expr.name
            if varmap is None:
                # for cases where external variables are not allowed (e.g. LIMIT):
                raise Exception('Illegal context for variable: %s' % query_var)
            if query_var == '*':
                return query_var
            sql_vars = varmap.get(query_var)
            if sql_vars is None:
                raise Exception('Undefined variable: %s' % query_var)
            graph, col = list(sql_vars)[0]
            if graph == '_':
                # we have a return column alias:
                return sql_quote_ident(col)
            else:
                return '%s.%s' % (graph, sql_quote_ident(col))

        elif expr_type == parser.List:
            # we only allow literals in lists, Cypher also supports variables:
            elements = [
                self.expression_to_sql(elt, litmap, None)
                for elt in expr.elements
            ]
            return '(' + ', '.join(elements) + ')'

        elif expr_type == parser.Minus:
            arg = self.expression_to_sql(expr.arg, litmap, varmap)
            return '(- %s)' % arg
        elif expr_type in (parser.Add, parser.Sub, parser.Multi, parser.Div,
                           parser.Mod):
            arg1 = self.expression_to_sql(expr.arg1, litmap, varmap)
            arg2 = self.expression_to_sql(expr.arg2, litmap, varmap)
            op = self.OPERATOR_TABLE[expr_type]
            return '(%s %s %s)' % (arg1, op, arg2)
        elif expr_type == parser.Hat:
            raise Exception("Unsupported operator: '^'")

        elif expr_type in (parser.Eq, parser.Neq, parser.Lt, parser.Gt,
                           parser.Lte, parser.Gte):
            arg1 = self.expression_to_sql(expr.arg1, litmap, varmap)
            arg2 = self.expression_to_sql(expr.arg2, litmap, varmap)
            op = self.OPERATOR_TABLE[expr_type]
            return '(%s %s %s)' % (arg1, op, arg2)
        elif expr_type == parser.Not:
            arg = self.expression_to_sql(expr.arg, litmap, varmap)
            return '(NOT %s)' % arg
        elif expr_type in (parser.And, parser.Or):
            arg1 = self.expression_to_sql(expr.arg1, litmap, varmap)
            arg2 = self.expression_to_sql(expr.arg2, litmap, varmap)
            op = self.OPERATOR_TABLE[expr_type]
            return '(%s %s %s)' % (arg1, op, arg2)
        elif expr_type == parser.Xor:
            raise Exception("Unsupported operator: 'XOR'")
        elif expr_type == parser.Case:
            # TO DO: implement, has the same syntax as SQL:
            raise Exception("Unsupported operator: 'CASE'")

        elif expr_type == parser.Call:
            function = expr.function
            if function.upper() == 'CAST':
                # special-case SQLite CAST which isn't directly supported by Cypher:
                if len(expr.args) == 2 and isinstance(expr.args[1],
                                                      parser.Variable):
                    arg = self.expression_to_sql(expr.args[0], litmap, varmap)
                    typ = expr.args[1].name
                    return 'CAST(%s AS %s)' % (arg, typ)
                else:
                    raise Exception("Illegal CAST expression")
            args = [
                self.expression_to_sql(arg, litmap, varmap)
                for arg in expr.args
            ]
            distinct = expr.distinct and 'DISTINCT ' or ''
            self.store.load_user_function(function, error=False)
            return function + '(' + distinct + ', '.join(args) + ')'

        elif expr_type == parser.Expression2:
            arg1 = expr.arg1
            arg2 = expr.arg2
            if isinstance(arg1, parser.Variable):
                var = self.expression_to_sql(arg1, litmap, varmap)
                for proplook in arg2:
                    if not isinstance(proplook, parser.PropertyLookup):
                        var = None
                        break
                    prop = proplook.property
                    if self.is_kgtk_operator(
                            prop) and self.store.is_user_function(prop):
                        self.store.load_user_function(prop)
                        var = prop + '(' + var + ')'
                    # TO DO: figure out how to better abstract property to column mapping:
                    elif var.upper().endswith('."ID"'):
                        # we are referring to the relation ID, subsitute it with the prop column:
                        var = var[:-3] + prop + '"'
                    else:
                        # we must be referring to a node-path column such as node1;name or node2;creator:
                        # TO DO: check existance of column here instead of waiting for SQLite to error
                        var = var[:-1] + ';' + prop + '"'
                else:
                    return var
            raise Exception("Unhandled property lookup expression: " +
                            str(expr))

        elif expr_type == parser.Expression3:
            arg1 = self.expression_to_sql(expr.arg1, litmap, varmap)
            arg2 = self.expression_to_sql(expr.arg2, litmap, varmap)
            op = expr.operator.upper()
            if op in ('IN'):
                return '(%s %s %s)' % (arg1, op, arg2)
            elif op in ('REGEX'):
                self.store.load_user_function('KGTK_REGEX')
                return 'KGTK_REGEX(%s, %s)' % (arg1, arg2)
            else:
                raise Exception('Unhandled operator: %s' % str(op))
        else:
            raise Exception('Unhandled expression type: %s' %
                            str(parser.object_to_tree(expr)))