Exemplos de extract_tables em Python, exemplos de clickhouse_cli.ui.parseutils.tables.extract_tables em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: parquet_ordering.py Projeto: swathiiyer2/user-query-optimizer

def checkOrdering(optimizations, schema, parsed_queries, *db_params):
    for stmt_list in parsed_queries:
        where_line = None
        for stmt in stmt_list:
            tables = [str(tr.name) for tr in extract_tables(str(stmt))]
            if schema["name"] not in tables:
                continue
            ordering = set(schema["ordering"])
            seen_stmt = ""
            for token in stmt.tokens:
                if isinstance(token, Where):
                    # newlines in sqlparse sometimes group clauses together - need to recalculate
                    where_line = seen_stmt.count("\n")
                    for item in token.tokens:
                        if isinstance(item, Comparison):
                            if item.left.value in ordering:
                                ordering.remove(item.left.value)
                            if item.right.value in ordering:
                                ordering.remove(item.right.value)
                seen_stmt += str(token)
            if len(ordering) != 0:
                lineno = 0 if where_line is None else where_line
                optimizations[stmt].append((
                    lineno,
                    "Filtering on ordered columns can improve performance. Here are some that you aren't filtering on: "
                    + ', '.join(list(ordering))))

Exemplo n.º 2

0

Exibir arquivo

 def get_tables(self, scope='full'):
     """ Gets the tables available in the statement.
     param `scope:` possible values: 'full', 'insert', 'before'
     If 'insert', only the first table is returned.
     If 'before', only tables before the cursor are returned.
     If not 'insert' and the stmt is an insert, the first table is skipped.
     """
     tables = extract_tables(self.full_text if scope == 'full' else self.text_before_cursor)
     if scope == 'insert':
         tables = tables[:1]
     elif self.is_insert():
         tables = tables[1:]
     return tables

Exemplo n.º 3

0

Exibir arquivo

def checkPartitions(optimizations, schema, parsed_queries, *db_params):
    for stmt_list in parsed_queries:
        where_line = None
        for stmt in stmt_list:
            tables = [str(tr.name) for tr in extract_tables(str(stmt))]
            if schema["name"] not in tables:
                continue
            seen_stmt = ""
            partition_seen = False
            for token in stmt.tokens:
                if isinstance(token, Where):
                    # newlines in sqlparse sometimes group clauses together - need to recalculate
                    where_line = seen_stmt.count("\n")
                    for item in token.tokens:
                        if isinstance(item, Comparison):
                            if item.left.value in schema["partitions"] or \
                                    item.right.value in schema["partitions"]:
                                    partition_seen = True
                                    break
                seen_stmt += str(token)
            if not partition_seen:
                lineno = 0 if where_line is None else where_line
                optimizations[stmt].append((lineno, "filter on a partitioned column"))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: helpers.py Projeto: worenga/clickhouse-cli

def suggest_based_on_last_token(token, stmt):

    if isinstance(token, str):
        token_v = token.lower()
    elif isinstance(token, Comparison):
        # If 'token' is a Comparison type such as
        # 'select * FROM abc a JOIN def d ON a.id = d.'. Then calling
        # token.value on the comparison type will only return the lhs of the
        # comparison. In this case a.id. So we need to do token.tokens to get
        # both sides of the comparison and pick the last token out of that
        # list.
        token_v = token.tokens[-1].value.lower()
    elif isinstance(token, Where):
        # sqlparse groups all tokens from the where clause into a single token
        # list. This means that token.value may be something like
        # 'where foo > 5 and '. We need to look "inside" token.tokens to handle
        # suggestions in complicated where clauses correctly
        prev_keyword = stmt.reduce_to_prev_keyword()
        return suggest_based_on_last_token(prev_keyword, stmt)
    elif isinstance(token, Identifier):
        # If the previous token is an identifier, we can suggest datatypes if
        # we're in a parenthesized column/field list, e.g.:
        #       CREATE TABLE foo (Identifier <CURSOR>
        #       CREATE FUNCTION foo (Identifier <CURSOR>
        # If we're not in a parenthesized list, the most likely scenario is the
        # user is about to specify an alias, e.g.:
        #       SELECT Identifier <CURSOR>
        #       SELECT foo FROM Identifier <CURSOR>
        prev_keyword, _ = find_prev_keyword(stmt.text_before_cursor)
        if prev_keyword and prev_keyword.value == '(':
            # Suggest datatypes
            return suggest_based_on_last_token('type', stmt)
        else:
            return (Keyword(), )
    else:
        token_v = token.value.lower()

    if not token:
        return (Keyword(), Special())
    elif token_v.endswith('('):
        p = sqlparse.parse(stmt.text_before_cursor)[0]

        if p.tokens and isinstance(p.tokens[-1], Where):
            # Four possibilities:
            #  1 - Parenthesized clause like "WHERE foo AND ("
            #        Suggest columns/functions
            #  2 - Function call like "WHERE foo("
            #        Suggest columns/functions
            #  3 - Subquery expression like "WHERE EXISTS ("
            #        Suggest keywords, in order to do a subquery
            #  4 - Subquery OR array comparison like "WHERE foo = ANY("
            #        Suggest columns/functions AND keywords. (If we wanted to be
            #        really fancy, we could suggest only array-typed columns)

            column_suggestions = suggest_based_on_last_token('where', stmt)

            # Check for a subquery expression (cases 3 & 4)
            where = p.tokens[-1]
            prev_tok = where.token_prev(len(where.tokens) - 1)[1]

            if isinstance(prev_tok, Comparison):
                # e.g. "SELECT foo FROM bar WHERE foo = ANY("
                prev_tok = prev_tok.tokens[-1]

            prev_tok = prev_tok.value.lower()
            if prev_tok == 'exists':
                return (Keyword(), )
            else:
                return column_suggestions

        # Get the token before the parens
        prev_tok = p.token_prev(len(p.tokens) - 1)[1]

        if (prev_tok and prev_tok.value
                and prev_tok.value.lower().split(' ')[-1] == 'using'):
            # tbl1 INNER JOIN tbl2 USING (col1, col2)
            tables = stmt.get_tables('before')
            # suggest columns that are present in more than one table
            return (Column(table_refs=tables,
                           require_last_table=True,
                           local_tables=stmt.local_tables), )

        elif p.token_first().value.lower() == 'select':
            # If the lparen is preceeded by a space chances are we're about to
            # do a sub-select.
            if last_word(stmt.text_before_cursor,
                         'all_punctuations').startswith('('):
                return (Keyword(), )
        prev_prev_tok = prev_tok and p.token_prev(p.token_index(prev_tok))[1]
        if prev_prev_tok and prev_prev_tok.normalized == 'INTO':
            return (Column(table_refs=stmt.get_tables('insert')), )
        # We're probably in a function argument list
        return (Column(table_refs=extract_tables(stmt.full_text),
                       local_tables=stmt.local_tables,
                       qualifiable=True), )
    elif token_v == 'set':
        return (Column(table_refs=stmt.get_tables(),
                       local_tables=stmt.local_tables), )
    elif token_v in ('select', 'where', 'having', 'by', 'distinct'):
        # Check for a table alias or schema qualification
        parent = (stmt.identifier
                  and stmt.identifier.get_parent_name()) or None
        tables = stmt.get_tables()
        if parent:
            tables = tuple(t for t in tables if identifies(parent, t))
            return (
                Column(table_refs=tables, local_tables=stmt.local_tables),
                Table(schema=parent),
                View(schema=parent),
                Function(schema=parent),
            )
        else:
            tables = tuple(
                TableReference(schema=table.schema or 'default',
                               name=table.name,
                               alias=table.alias,
                               is_function=table.is_function)
                for table in tables)
            return (
                Column(table_refs=tables, local_tables=stmt.local_tables),
                Function(schema=None),
                Keyword(),
            )
    elif token_v == 'as':
        # Don't suggest anything for aliases
        return ()
    elif (token_v.endswith('join') and token.is_keyword) or (token_v in (
            'copy', 'from', 'update', 'into', 'describe', 'truncate')):

        schema = stmt.get_identifier_schema()
        tables = extract_tables(stmt.text_before_cursor)
        is_join = token_v.endswith('join') and token.is_keyword

        # Suggest tables from either the currently-selected schema or the
        # public schema if no schema has been specified
        suggest = []

        if not schema:
            # Suggest schemas
            suggest.insert(0, Schema())

        if token_v == 'from' or is_join:
            suggest.append(
                FromClauseItem(schema=schema,
                               table_refs=tables,
                               local_tables=stmt.local_tables))
        elif token_v == 'truncate':
            suggest.append(Table(schema))
        else:
            suggest.extend((Table(schema), View(schema)))

        if is_join and _allow_join(stmt.parsed):
            tables = stmt.get_tables('before')
            suggest.append(Join(table_refs=tables, schema=schema))

        return tuple(suggest)

    elif token_v in ('table', 'view', 'function'):
        # E.g. 'DROP FUNCTION <funcname>', 'ALTER TABLE <tablname>'
        rel_type = {
            'table': Table,
            'view': View,
            'function': Function
        }[token_v]
        schema = stmt.get_identifier_schema()
        if schema:
            return (rel_type(schema=schema), )
        else:
            return (Schema(), rel_type(schema=schema))

    elif token_v == 'column':
        # E.g. 'ALTER TABLE foo ALTER COLUMN bar
        return (Column(table_refs=stmt.get_tables()), )

    elif token_v == 'on':
        tables = stmt.get_tables('before')
        parent = (stmt.identifier
                  and stmt.identifier.get_parent_name()) or None
        if parent:
            # "ON parent.<suggestion>"
            # parent can be either a schema name or table alias
            filteredtables = tuple(t for t in tables if identifies(parent, t))
            sugs = [
                Column(table_refs=filteredtables,
                       local_tables=stmt.local_tables),
                Table(schema=parent),
                View(schema=parent),
                Function(schema=parent)
            ]
            if filteredtables and _allow_join_condition(stmt.parsed):
                sugs.append(
                    JoinCondition(table_refs=tables,
                                  parent=filteredtables[-1]))
            return tuple(sugs)
        else:
            # ON <suggestion>
            # Use table alias if there is one, otherwise the table name
            aliases = tuple(t.ref for t in tables)
            if _allow_join_condition(stmt.parsed):
                return (Alias(aliases=aliases),
                        JoinCondition(table_refs=tables, parent=None))
            else:
                return (Alias(aliases=aliases), )

    elif token_v in ('c', 'use', 'database', 'template'):
        # "\c <db", "use <db>", "DROP DATABASE <db>",
        # "CREATE DATABASE <newdb> WITH TEMPLATE <db>"
        return (Database(), )
    elif token_v == 'schema':
        # DROP SCHEMA schema_name
        return (Schema(), )
    elif token_v.endswith(',') or token_v in ('=', 'and', 'or'):
        prev_keyword = stmt.reduce_to_prev_keyword()
        if prev_keyword:
            return suggest_based_on_last_token(prev_keyword, stmt)
        else:
            return ()
    elif token_v == 'format':
        return (Format(), )
    elif token_v in ('type', '::'):
        #   ALTER TABLE foo SET DATA TYPE bar
        #   SELECT foo::bar
        # Note that tables are a form of composite type in postgresql, so
        # they're suggested here as well
        schema = stmt.get_identifier_schema()
        suggestions = [Datatype(schema=schema), Table(schema=schema)]
        if not schema:
            suggestions.append(Schema())
        return tuple(suggestions)
    elif token_v == 'alter':
        return (Keyword(), )
    elif token.is_keyword:
        # token is a keyword we haven't implemented any special handling for
        # go backwards in the query until we find one we do recognize
        prev_keyword = stmt.reduce_to_prev_keyword(n_skip=1)
        if prev_keyword:
            return suggest_based_on_last_token(prev_keyword, stmt)
        else:
            return (Keyword(), )
    else:
        return (Keyword(), )