def _parse_tokens( tokens: Sequence[BaseSegment], config: FluffConfig, recurse: bool = True ) -> Tuple[Optional[BaseSegment], List[SQLParseError]]: parser = Parser(config=config) violations = [] # Parse the file and log any problems try: parsed: Optional[BaseSegment] = parser.parse(tokens, recurse=recurse) except SQLParseError as err: linter_logger.info("PARSING FAILED! : %s", err) violations.append(err) return None, violations if parsed: linter_logger.info("\n###\n#\n# {}\n#\n###".format("Parsed Tree:")) linter_logger.info("\n" + parsed.stringify()) # We may succeed parsing, but still have unparsable segments. Extract them here. for unparsable in parsed.iter_unparsables(): # No exception has been raised explicitly, but we still create one here # so that we can use the common interface violations.append( SQLParseError( "Line {0[0]}, Position {0[1]}: Found unparsable section: {1!r}".format( unparsable.pos_marker.working_loc, unparsable.raw if len(unparsable.raw) < 40 else unparsable.raw[:40] + "...", ), segment=unparsable, ) ) linter_logger.info("Found unparsable segment...") linter_logger.info(unparsable.stringify()) return parsed, violations
def test_bigquery_relational_operator_parsing(data): """Tests queries with a diverse mixture of relational operators.""" # Generate a simple SELECT query with relational operators and conjunctions # as specified in 'data'. Note the conjunctions are used as separators # between comparisons, sn the conjunction in the first item is not used. filter = [] for i, (relation, conjunction) in enumerate(data): if i: filter.append(f" {conjunction} ") filter.append(f"a {relation} b") raw = f'SELECT * FROM t WHERE {"".join(filter)}' note(f"query: {raw}") # Load the right dialect config = FluffConfig(overrides=dict(dialect="bigquery")) tokens, lex_vs = Lexer(config=config).lex(raw) # From just the initial parse, check we're all there assert "".join(token.raw for token in tokens) == raw # Check we don't have lexing issues assert not lex_vs # Do the parse WITHOUT lots of logging # The logs get too long here to be useful. We should use # specfic segment tests if we want to debug logs. parsed = Parser(config=config).parse(tokens) print("Post-parse structure: {0}".format(parsed.to_tuple(show_raw=True))) print("Post-parse structure: {0}".format(parsed.stringify())) # Check we're all there. assert parsed.raw == raw # Check that there's nothing un parsable typs = parsed.type_set() assert "unparsable" not in typs
def test__dialect__base_parse_struct(dialect, sqlfile, code_only, yamlfile, yaml_loader): """For given test examples, check parsed structure against yaml.""" # Load the right dialect config = FluffConfig(overrides=dict(dialect=dialect)) # Load the SQL raw = load_file(dialect, sqlfile) # Lex and parse the file tokens, _ = Lexer(config=config).lex(raw) parsed = Parser(config=config).parse(tokens) # Load the YAML res = yaml_loader(make_dialect_path(dialect, yamlfile)) assert parsed.to_tuple(code_only=code_only, show_raw=True) == res
def test_bigquery_table_reference_segment_iter_raw_references( table_reference, reference_parts): """Tests BigQuery override of TableReferenceSegment.iter_raw_references(). The BigQuery implementation is more complex, handling: - hyphenated table references - quoted or not quoted table references """ query = f"SELECT bar.user_id FROM {table_reference}" config = FluffConfig(overrides=dict(dialect="bigquery")) tokens, lex_vs = Lexer(config=config).lex(query) parsed = Parser(config=config).parse(tokens) for table_reference in parsed.recursive_crawl("table_reference"): actual_reference_parts = [ orp.part for orp in table_reference.iter_raw_references() ] assert reference_parts == actual_reference_parts
def parse_example_file(dialect: str, sqlfile: str): """Parse example SQL file, return parse tree.""" config = FluffConfig(overrides=dict(dialect=dialect)) # Load the SQL raw = load_file(dialect, sqlfile) # Lex and parse the file tokens, _ = Lexer(config=config).lex(raw) tree = Parser(config=config).parse(tokens) return tree
def test__dialect__base_file_parse(dialect, file): """For given test examples, check successful parsing.""" raw = load_file(dialect, file) # Load the right dialect config = FluffConfig(overrides=dict(dialect=dialect)) tokens, lex_vs = Lexer(config=config).lex(raw) # From just the initial parse, check we're all there assert "".join(token.raw for token in tokens) == raw # Check we don't have lexing issues assert not lex_vs # Do the parse WITHOUT lots of logging # The logs get too long here to be useful. We should use # specfic segment tests if we want to debug logs. if raw: parsed = Parser(config=config).parse(tokens) print("Post-parse structure: {0}".format( parsed.to_tuple(show_raw=True))) print("Post-parse structure: {0}".format(parsed.stringify())) # Check we're all there. assert parsed.raw == raw # Check that there's nothing un parsable typs = parsed.type_set() assert "unparsable" not in typs else: # If it's an empty file, check that we get a value exception # here. The linter handles this by *not* parsing the file, # but in this case, we explicitly want an error. with pytest.raises(ValueError): Parser(config=config).parse(tokens)
def test__dialect__base_file_parse(dialect, file): """For given test examples, check successful parsing.""" raw = load_file(dialect, file) # Load the right dialect config = FluffConfig(overrides=dict(dialect=dialect)) tokens, lex_vs = Lexer(config=config).lex(raw) # From just the initial parse, check we're all there assert "".join(token.raw for token in tokens) == raw # Check we don't have lexing issues assert not lex_vs # Do the parse WITHOUT lots of logging # The logs get too long here to be useful. We should use # specfic segment tests if we want to debug logs. parsed = Parser(config=config).parse(tokens) print("Post-parse structure: {0}".format(parsed.to_tuple(show_raw=True))) print("Post-parse structure: {0}".format(parsed.stringify())) # Check we're all there. assert parsed.raw == raw # Check that there's nothing un parsable typs = parsed.type_set() assert "unparsable" not in typs
def lex_and_parse(config_overrides: Dict[str, Any], raw: str) -> Optional[BaseSegment]: """Performs a Lex and Parse, with cachable inputs within fixture.""" # Load the right dialect config = FluffConfig(overrides=config_overrides) tokens, lex_vs = Lexer(config=config).lex(raw) # From just the initial parse, check we're all there assert "".join(token.raw for token in tokens) == raw # Check we don't have lexing issues assert not lex_vs # TODO: Handle extremely verbose logging # temp - use negative grep: | grep -v "INFO\|DEBUG\|\[L\|#\|Initial\|^$" # better maybe - https://docs.pytest.org/en/6.2.x/logging.html#caplog-fixture if not raw: return None return Parser(config=config).parse(tokens)
from sqlfluff.core.parser import Parser, Lexer from sqlfluff.core import FluffConfig from sqlfluff.cli.commands import quoted_presenter from dialects.parse_fixtures import get_parse_fixtures, load_file yaml.add_representer(str, quoted_presenter) parse_success_examples, _ = get_parse_fixtures() for example in parse_success_examples: dialect, sqlfile = example config = FluffConfig(overrides=dict(dialect=dialect)) # Load the SQL raw = load_file(dialect, sqlfile) # Lex and parse the file tokens, _ = Lexer(config=config).lex(raw) tree = Parser(config=config).parse(tokens) r = None if tree: r = tree.as_record(code_only=True, show_raw=True) # Remove the .sql file extension root = sqlfile[:-4] path = os.path.join("test", "fixtures", "parser", dialect, root + ".yml") with open(path, "w", newline="\n") as f: if r: yaml.dump(r, f, default_flow_style=False) else: f.write("")