def test_exception_parsing(): e = ParsingException( "SELECT * FROM df", """org.apache.calcite.runtime.CalciteContextException: From line 1, column 3 to line 1, column 4: Message""", ) expected = """Can not parse the given SQL: org.apache.calcite.runtime.CalciteContextException: From line 1, column 3 to line 1, column 4: Message The problem is probably somewhere here: \tSELECT * FROM df \t ^^""" assert str(e) == expected e = ParsingException( "SELECT * FROM df", """Lexical error at line 1, column 3. Message""", ) expected = """Can not parse the given SQL: Lexical error at line 1, column 3. Message The problem is probably somewhere here: \tSELECT * FROM df \t ^""" assert str(e) == expected e = ParsingException( "SELECT *\nFROM df\nWHERE x = 3", """From line 1, column 3 to line 2, column 3: Message""", ) expected = """Can not parse the given SQL: From line 1, column 3 to line 2, column 3: Message The problem is probably somewhere here: \tSELECT * \t ^^^^^^^ \tFROM df \t^^^ \tWHERE x = 3""" assert str(e) == expected e = ParsingException( "SELECT *", "Message", ) assert str(e) == "Message"
def _get_ral(self, sql): """Helper function to turn the sql query into a relational algebra and resulting column names""" # get the schema of what we currently have registered schema = self._prepare_schema() # Now create a relational algebra from that generator = RelationalAlgebraGenerator(schema) default_dialect = generator.getDialect() logger.debug(f"Using dialect: {get_java_class(default_dialect)}") try: sqlNode = generator.getSqlNode(sql) sqlNodeClass = get_java_class(sqlNode) if sqlNodeClass.startswith("com.dask.sql.parser."): rel = sqlNode rel_string = "" else: validatedSqlNode = generator.getValidatedNode(sqlNode) nonOptimizedRelNode = generator.getRelationalAlgebra( validatedSqlNode) rel = generator.getOptimizedRelationalAlgebra( nonOptimizedRelNode) rel_string = str(generator.getRelationalAlgebraString(rel)) except (ValidationException, SqlParseException) as e: logger.debug(f"Original exception raised by Java:\n {e}") # We do not want to re-raise an exception here # as this would print the full java stack trace # if debug is not set. # Instead, we raise a nice exception raise ParsingException(sql, str(e.message())) from None # Internal, temporary results of calcite are sometimes # named EXPR$N (with N a number), which is not very helpful # to the user. We replace these cases therefore with # the actual query string. This logic probably fails in some # edge cases (if the outer SQLNode is not a select node), # but so far I did not find such a case. # So please raise an issue if you have found one! if sqlNodeClass == "org.apache.calcite.sql.SqlOrderBy": sqlNode = sqlNode.query sqlNodeClass = get_java_class(sqlNode) if sqlNodeClass == "org.apache.calcite.sql.SqlSelect": select_names = [ self._to_sql_string(s, default_dialect=default_dialect) for s in sqlNode.getSelectList() ] else: logger.debug( "Not extracting output column names as the SQL is not a SELECT call" ) select_names = None logger.debug(f"Extracted relational algebra:\n {rel_string}") return rel, select_names, rel_string
def sql(self, sql: str) -> dd.DataFrame: """ Query the registered tables with the given SQL. The SQL follows approximately the postgreSQL standard - however, not all operations are already implemented. In general, only select statements (no data manipulation) works. For more information, see :ref:`sql`. Example: In this example, a query is called using the registered tables and then executed using dask. .. code-block:: python result = c.sql("SELECT a, b FROM my_table") print(result.compute()) Args: sql (:obj:`str`): The query string to execute debug (:obj:`bool`): Turn on printing of debug information. Returns: :obj:`dask.dataframe.DataFrame`: the created data frame of this query. """ try: rel, select_names = self._get_ral(sql) dc = RelConverter.convert(rel, context=self) except (ValidationException, SqlParseException) as e: logger.debug(f"Original exception raised by Java:\n {e}") # We do not want to re-raise an exception here # as this would print the full java stack trace # if debug is not set. # Instead, we raise a nice exception raise ParsingException(sql, str(e.message())) from None if dc is not None: if select_names: # Rename any columns named EXPR$* to a more human readable name cc = dc.column_container cc = cc.rename({ df_col: df_col if not df_col.startswith("EXPR$") else select_name for df_col, select_name in zip(cc.columns, select_names) }) dc = DataContainer(dc.df, cc) return dc.assign()
def _get_ral(self, sql): """Helper function to turn the sql query into a relational algebra and resulting column names""" # get the schema of what we currently have registered schemas = self._prepare_schemas() RelationalAlgebraGeneratorBuilder = ( com.dask.sql.application.RelationalAlgebraGeneratorBuilder) # True if the SQL query should be case sensitive and False otherwise case_sensitive = dask_config.get("sql.identifier.case_sensitive", default=True) generator_builder = RelationalAlgebraGeneratorBuilder( self.schema_name, case_sensitive, java.util.ArrayList()) for schema in schemas: generator_builder = generator_builder.addSchema(schema) generator = generator_builder.build() default_dialect = generator.getDialect() logger.debug(f"Using dialect: {get_java_class(default_dialect)}") ValidationException = org.apache.calcite.tools.ValidationException SqlParseException = org.apache.calcite.sql.parser.SqlParseException CalciteContextException = org.apache.calcite.runtime.CalciteContextException try: sqlNode = generator.getSqlNode(sql) sqlNodeClass = get_java_class(sqlNode) select_names = None rel = sqlNode rel_string = "" if not sqlNodeClass.startswith("com.dask.sql.parser."): nonOptimizedRelNode = generator.getRelationalAlgebra(sqlNode) # Optimization might remove some alias projects. Make sure to keep them here. select_names = [ str(name) for name in nonOptimizedRelNode.getRowType().getFieldNames() ] rel = generator.getOptimizedRelationalAlgebra( nonOptimizedRelNode) rel_string = str(generator.getRelationalAlgebraString(rel)) except (ValidationException, SqlParseException, CalciteContextException) as e: logger.debug(f"Original exception raised by Java:\n {e}") # We do not want to re-raise an exception here # as this would print the full java stack trace # if debug is not set. # Instead, we raise a nice exception raise ParsingException(sql, str(e.message())) from None # Internal, temporary results of calcite are sometimes # named EXPR$N (with N a number), which is not very helpful # to the user. We replace these cases therefore with # the actual query string. This logic probably fails in some # edge cases (if the outer SQLNode is not a select node), # but so far I did not find such a case. # So please raise an issue if you have found one! if sqlNodeClass == "org.apache.calcite.sql.SqlOrderBy": sqlNode = sqlNode.query sqlNodeClass = get_java_class(sqlNode) if sqlNodeClass == "org.apache.calcite.sql.SqlSelect": select_names = [ self._to_sql_string(s, default_dialect=default_dialect) if current_name.startswith("EXPR$") else current_name for s, current_name in zip(sqlNode.getSelectList(), select_names) ] else: logger.debug( "Not extracting output column names as the SQL is not a SELECT call" ) logger.debug(f"Extracted relational algebra:\n {rel_string}") return rel, select_names, rel_string