Пример #1
0
def check_high_offset_selects(_, queries):
    """
    :type _ indexdigest.database.Database
    :type queries list[str]
    :rtype: list[LinterEntry]
    """
    for query in queries:
        # ignore insert queries (#140)
        if not is_select_query(query):
            continue

        res = get_query_limit_and_offset(query)

        if res is None:
            continue

        (limit, offset) = res

        if offset < OFFSET_THRESHOLD:
            continue

        table_name = get_query_tables(query)[0]

        context = OrderedDict()
        context['query'] = query
        context['limit'] = limit
        context['offset'] = offset

        yield LinterEntry(
            linter_type='high_offset_selects',
            table_name=table_name,
            message='"{}" query uses too high offset impacting the performance'
            .format(shorten_query(query)),
            context=context)
def check_not_used_columns(database, queries):
    """
    :type database  indexdigest.database.Database
    :type queries list[str]
    :rtype: list[LinterEntry]
    :raises Exception
    """
    logger = logging.getLogger(__name__)

    # analyze only SELECT queries from the log
    queries = list(filter(is_select_query, queries))

    used_tables = get_used_tables_from_queries(queries)
    used_columns = defaultdict(list)

    logger.info("Will check these tables: %s", used_tables)

    # analyze given queries and collect used columns for each table
    for query in queries:
        tables = get_query_tables(query)
        if tables:
            columns = get_query_columns(query)

            # print(query, table, columns)

            # add used columns per table
            # FIXME: assume we're querying just a single table for now
            used_columns[tables[0]] += columns
        else:
            logger.error(
                'Unable to extract tables and columns used from the query: %s',
                query)

    # analyze table schemas and report not used columns for each table
    for table in used_tables:
        logger.info("Checking %s table", table)
        table_columns = database.get_table_columns(table)

        # now get the difference and report them
        not_used_columns = [
            column for column in table_columns
            if column.name not in set(used_columns[table])
        ] if table_columns else []

        for column in not_used_columns:
            yield LinterEntry(
                linter_type='not_used_columns',
                table_name=table,
                message='"{}" column was not used by provided queries'.format(
                    column),
                context={
                    'column_name': column.name,
                    'column_type': column.type
                })
def get_used_tables_from_queries(queries):
    """
    :type queries list[str]
    :rtype: list[str]
    """
    used_tables = []
    queries = filter(is_select_query, queries)

    for query in queries:
        # parse each query from the log
        tables = get_query_tables(query)
        if tables and tables[0] not in used_tables:
            used_tables.append(tables[0])

    return used_tables
Пример #4
0
def check_having_clause(_, queries):
    """
    :type queries list[str]
    :rtype: list[LinterEntry]
    """
    queries_with_having_clause = [
        query for query in queries if query_has_having_clause(query)
    ]

    for query in queries_with_having_clause:
        table_name = get_query_tables(query)[0]

        yield LinterEntry(linter_type='having_clause',
                          table_name=table_name,
                          message='"{}" query uses HAVING clause'.format(
                              shorten_query(query)),
                          context={"query": query})
Пример #5
0
def check_select_star(_, queries):
    """
    :type queries list[str]
    :rtype: list[LinterEntry]
    """
    queries_with_wildcard = [
        query for query in queries if is_wildcard_query(query)
    ]

    for query in queries_with_wildcard:
        table_name = get_query_tables(query)[0]

        yield LinterEntry(linter_type='select_star',
                          table_name=table_name,
                          message='"{}" query uses SELECT *'.format(
                              shorten_query(query)),
                          context={"query": query})
Пример #6
0
def check_insert_ignore_queries(database, queries):
    """
    :type database  indexdigest.database.Database
    :type queries list[str]
    :rtype: list[LinterEntry]
    """
    queries = [query for query in queries if is_insert_ignore_query(query)]

    for query in queries:
        table_used = get_query_tables(query)[0]

        context = OrderedDict()
        context['query'] = query
        context['schema'] = database.get_table_schema(table_used)

        yield LinterEntry(linter_type='insert_ignore', table_name=table_used,
                          message='"{}" query uses a risky INSERT IGNORE'.
                          format(shorten_query(query)),
                          context=context)
Пример #7
0
    def test_get_query_tables(self):
        self.assertListEqual(['test_table'],
                             get_query_tables('SELECT * FROM `test_table`'))

        self.assertListEqual(
            ['0001_test_table'],
            get_query_tables('SELECT * FROM `0001_test_table`'))

        self.assertListEqual(['test_table'],
                             get_query_tables('SELECT foo FROM `test_table`'))

        self.assertListEqual(
            ['test_table'],
            get_query_tables('SELECT foo FROM test_table WHERE id = 1'))

        self.assertListEqual(
            ['test_table', 'second_table'],
            get_query_tables(
                'SELECT foo FROM test_table, second_table WHERE id = 1'))

        self.assertListEqual(
            ['revision', 'page', 'wikicities_user'],
            get_query_tables(
                'SELECT rev_id,rev_page,rev_text_id,rev_timestamp,rev_comment,rev_user_text,rev_user,rev_minor_edit,rev_deleted,rev_len,rev_parent_id,rev_shaN,page_namespace,page_title,page_id,page_latest,user_name FROM `revision` INNER JOIN `page` ON ((page_id = rev_page)) LEFT JOIN `wikicities_user` ON ((rev_user != N) AND (user_id = rev_user)) WHERE rev_id = X LIMIT N'
            ))

        self.assertListEqual(
            ['events'],
            get_query_tables(
                "SELECT COUNT( 0 ) AS cnt, date_format(event_date, '%Y-%m-%d') AS date 	 FROM events 	 WHERE event_date BETWEEN '2017-10-18 00:00:00' 	 AND '2017-10-24 23:59:59'  	 AND wiki_id = '1289985' GROUP BY date WITH ROLLUP"
            ))

        # complex queries
        # @see https://github.com/macbre/query-digest/issues/16
        self.assertListEqual(
            ['report_wiki_recent_pageviews', 'dimension_wikis'],
            get_query_tables(
                "SELECT r.wiki_id AS id, pageviews_Nday AS pageviews FROM report_wiki_recent_pageviews AS r INNER JOIN dimension_wikis AS d ON r.wiki_id = d.wiki_id WHERE d.public = X AND r.lang = X AND r.hub_name = X ORDER BY pageviews DESC LIMIT N"
            ))

        self.assertListEqual(
            ['dimension_wikis', 'fact_wam_scores'],
            get_query_tables(
                "SELECT DISTINCT dw.lang FROM `dimension_wikis` `dw` INNER JOIN `fact_wam_scores` `fwN` ON ((dw.wiki_id = fwN.wiki_id)) WHERE fwN.time_id = FROM_UNIXTIME(N) ORDER BY dw.lang ASC"
            ))

        self.assertListEqual(
            ['fact_wam_scores', 'dimension_wikis'],
            get_query_tables(
                "SELECT count(fwN.wiki_id) as wam_results_total FROM `fact_wam_scores` `fwN` left join `fact_wam_scores` `fwN` ON ((fwN.wiki_id = fwN.wiki_id) AND (fwN.time_id = FROM_UNIXTIME(N))) left join `dimension_wikis` `dw` ON ((fwN.wiki_id = dw.wiki_id)) WHERE (fwN.time_id = FROM_UNIXTIME(N)) AND (dw.url like X OR dw.title like X) AND fwN.vertical_id IN (XYZ) AND dw.lang = X AND (fwN.wiki_id NOT IN (XYZ)) AND ((dw.url IS NOT NULL AND dw.title IS NOT NULL))"
            ))

        # INSERT queries
        self.assertListEqual(
            ['0070_insert_ignore_table'],
            get_query_tables(
                "INSERT IGNORE INTO `0070_insert_ignore_table` VALUES (9, '123', '2017-01-01');"
            ))

        self.assertListEqual(
            ['0070_insert_ignore_table'],
            get_query_tables(
                "INSERT into `0070_insert_ignore_table` VALUES (9, '123', '2017-01-01');"
            ))
Пример #8
0
def test_get_query_tables():
    assert ["test_table"] == get_query_tables("SELECT * FROM `test_table`")
    assert ["test_table", "second_table"] == get_query_tables(
        "SELECT foo FROM test_table, second_table WHERE id = 1"
    )