def test__templated_file_find_slice_indices_of_templated_pos( templated_position, inclusive, file_slices, sliced_idx_start, sliced_idx_stop): """Test TemplatedFile._find_slice_indices_of_templated_pos.""" file = TemplatedFile(source_str="Dummy String", sliced_file=file_slices) res_start, res_stop = file._find_slice_indices_of_templated_pos( templated_position, inclusive=inclusive) assert res_start == sliced_idx_start assert res_stop == sliced_idx_stop
def test__templated_file_templated_slice_to_source_slice( in_slice, out_slice, is_literal, file_slices, raw_slices): """Test TemplatedFile.templated_slice_to_source_slice.""" file = TemplatedFile(source_str="Dummy String", sliced_file=file_slices, raw_sliced=raw_slices) source_slice = file.templated_slice_to_source_slice(in_slice) literal_test = file.is_source_slice_literal(source_slice) assert (is_literal, source_slice) == (literal_test, out_slice)
def test__templated_file_get_line_pos_of_char_pos(source_str, templated_str, file_slices, in_charpos, out_line_no, out_line_pos): """Test TemplatedFile.get_line_pos_of_char_pos.""" file = TemplatedFile(source_str=source_str, templated_str=templated_str, sliced_file=file_slices) res_line_no, res_line_pos = file.get_line_pos_of_char_pos(in_charpos) assert res_line_no == out_line_no assert res_line_pos == out_line_pos
def test__templated_file_source_only_slices(): """Test TemplatedFile.source_only_slices.""" file = TemplatedFile( source_str=" Dummy String again ", # NB: has length 20 raw_sliced=[ RawFileSlice("a" * 10, "literal", 0), RawFileSlice("b" * 7, "comment", 10), RawFileSlice("a" * 10, "literal", 17), ], ) assert file.source_only_slices() == [RawFileSlice("b" * 7, "comment", 10)]
def test__templated_file_get_line_pos_of_char_pos(source_str, templated_str, file_slices, in_charpos, out_line_no, out_line_pos): """Test TemplatedFile.get_line_pos_of_char_pos.""" file = TemplatedFile( source_str=source_str, templated_str=templated_str, sliced_file=file_slices, fname="test", check_consistency=False, ) res_line_no, res_line_pos = file.get_line_pos_of_char_pos(in_charpos) assert res_line_no == out_line_no assert res_line_pos == out_line_pos
def test__templated_file_templated_slice_to_source_slice( in_slice, out_slice, is_literal, file_slices, raw_slices): """Test TemplatedFile.templated_slice_to_source_slice.""" file = TemplatedFile( source_str="Dummy String", sliced_file=file_slices, raw_sliced=[ rs if isinstance(rs, RawFileSlice) else RawFileSlice(*rs) for rs in raw_slices ], fname="test", ) source_slice = file.templated_slice_to_source_slice(in_slice) literal_test = file.is_source_slice_literal(source_slice) assert (is_literal, source_slice) == (literal_test, out_slice)
def generate_test_segments_func(elems): """Roughly generate test segments. This function isn't totally robust, but good enough for testing. Use with caution. """ buff = [] raw_file = "".join(elems) templated_file = TemplatedFile.from_string(raw_file) idx = 0 for elem in elems: if elem == "<indent>": buff.append( Indent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue elif elem == "<dedent>": buff.append( Dedent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue seg_kwargs = {} if set(elem) <= {" ", "\t"}: SegClass = WhitespaceSegment elif set(elem) <= {"\n"}: SegClass = NewlineSegment elif elem == "(": SegClass = SymbolSegment seg_kwargs = {"name": "bracket_open"} elif elem == ")": SegClass = SymbolSegment seg_kwargs = {"name": "bracket_close"} elif elem.startswith("--"): SegClass = CommentSegment seg_kwargs = {"name": "inline_comment"} elif elem.startswith('"'): SegClass = CodeSegment seg_kwargs = {"name": "double_quote"} elif elem.startswith("'"): SegClass = CodeSegment seg_kwargs = {"name": "single_quote"} else: SegClass = CodeSegment # Set a none position marker which we'll realign at the end. buff.append( SegClass(raw=elem, pos_marker=PositionMarker( slice(idx, idx + len(elem)), slice(idx, idx + len(elem)), templated_file, ), **seg_kwargs)) idx += len(elem) return tuple(buff)
def test__parser__base_segments_raw_compare(): """Test comparison of raw segments.""" template = TemplatedFile.from_string("foobar") rs1 = RawSegment("foobar", PositionMarker(slice(0, 6), slice(0, 6), template)) rs2 = RawSegment("foobar", PositionMarker(slice(0, 6), slice(0, 6), template)) assert rs1 == rs2
def test__parser__base_segments_stubs(): """Test stub methods that have no implementation in base class.""" template = TemplatedFile.from_string("foobar") rs1 = RawSegment("foobar", PositionMarker(slice(0, 6), slice(0, 6), template)) base_segment = BaseSegment(segments=[rs1]) with pytest.raises(NotImplementedError): base_segment.edit("foo")
def test_markers__setting_position_raw(): """Test that we can correctly infer positions from strings & locations.""" templ = TemplatedFile.from_string("foobar") # Check inference in the template assert templ.get_line_pos_of_char_pos(2, source=True) == (1, 3) assert templ.get_line_pos_of_char_pos(2, source=False) == (1, 3) # Now check it passes through pos = PositionMarker(slice(2, 5), slice(2, 5), templ) # Can we infer positions correctly? assert pos.working_loc == (1, 3)
def test__parser__base_segments_base_compare(): """Test comparison of base segments.""" template = TemplatedFile.from_string("foobar") rs1 = RawSegment("foobar", PositionMarker(slice(0, 6), slice(0, 6), template)) rs2 = RawSegment("foobar", PositionMarker(slice(0, 6), slice(0, 6), template)) ds1 = DummySegment([rs1]) ds2 = DummySegment([rs2]) dsa2 = DummyAuxSegment([rs2]) # Check for equality assert ds1 == ds2 # Check a different match on the same details are not the same assert ds1 != dsa2
def lex( self, raw: Union[str, TemplatedFile] ) -> Tuple[Tuple[BaseSegment, ...], List[SQLLexError]]: """Take a string or TemplatedFile and return segments. If we fail to match the *whole* string, then we must have found something that we cannot lex. If that happens we should package it up as unlexable and keep track of the exceptions. """ # Make sure we've got a string buffer and a template # regardless of what was passed in. if isinstance(raw, str): template = TemplatedFile.from_string(raw) str_buff = raw else: template = raw str_buff = str(template) # Lex the string to get a tuple of LexedElement element_buffer: List[LexedElement] = [] while True: res = self.lex_match(str_buff, self.lexer_matchers) element_buffer += res.elements if res.forward_string: resort_res = self.last_resort_lexer.match(res.forward_string) if not resort_res: # If we STILL can't match, then just panic out. raise SQLLexError( f"Fatal. Unable to lex characters: {0!r}".format( res.forward_string[:10] + "..." if len(res.forward_string) > 9 else res. forward_string)) str_buff = resort_res.forward_string element_buffer += resort_res.elements else: break # Map tuple LexedElement to list of TemplateElement. # This adds the template_slice to the object. templated_buffer = self.map_template_slices(element_buffer, template) # Turn lexed elements into segments. segments: Tuple[RawSegment, ...] = self.elements_to_segments( templated_buffer, template) # Generate any violations violations: List[SQLLexError] = self.violations_from_segments(segments) return segments, violations
def test_linted_file_ignore_masked_violations( noqa: dict, violations: List[SQLBaseError], expected ): """Test that _ignore_masked_violations() correctly filters violations.""" ignore_mask = [Linter.parse_noqa(**c) for c in noqa] lf = linter.LintedFile( path="", violations=violations, time_dict={}, tree=None, ignore_mask=ignore_mask, templated_file=TemplatedFile.from_string(""), ) result = lf._ignore_masked_violations(violations) expected_violations = [v for i, v in enumerate(violations) if i in expected] assert expected_violations == result
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) stash_source_slice, last_source_slice = None, None # Now work out source slices, and add in template placeholders. for idx, element in enumerate(elements): # Calculate Source Slice if idx != 0: last_source_slice = stash_source_slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) stash_source_slice = source_slice # Output the slice as we lex. lexer_logger.debug( " %s, %s, %s, %r", idx, element, source_slice, templated_file.templated_str[element.template_slice], ) # Detect when we've gone backward in the source. # NOTE: If it's the _same_ slice then don't insert a marker # because we're probably just within a single templated # section. if (last_source_slice and last_source_slice.stop > source_slice.start and last_source_slice != source_slice): # If we have, insert a loop marker to reflect that. lexer_logger.debug( " Backward jump detected. Inserting Loop Marker") segment_buffer.append( TemplateLoop(pos_marker=PositionMarker.from_point( last_source_slice.stop, element.template_slice.start, templated_file, ))) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. so_slices = [] # Only look for source only slices if we've got a new source slice to # avoid unnecessary duplication. if last_source_slice != source_slice: for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx >= source_slice.stop: break elif source_only_slice.source_idx >= source_slice.start: so_slices.append(source_only_slice) if so_slices: lexer_logger.debug(" Collected Source Only Slices") for so_slice in so_slices: lexer_logger.debug(" %s", so_slice) # Calculate some things which will be useful templ_str = templated_file.templated_str[ element.template_slice] source_str = templated_file.source_str[source_slice] # For reasons which aren't entirely clear right now, if there is # an included literal, it will always be at the end. Let's see if it's # there. if source_str.endswith(templ_str): existing_len = len(templ_str) else: existing_len = 0 # Calculate slices placeholder_slice = slice(source_slice.start, source_slice.stop - existing_len) placeholder_str = source_str[:-existing_len] source_slice = slice(source_slice.stop - existing_len, source_slice.stop) # If it doesn't manage to extract a placeholder string from the source # just concatenate the source only strings. There is almost always # only one of them. if not placeholder_str: placeholder_str = "".join(s.raw for s in so_slices) # The Jinja templater sometimes returns source-only slices with # gaps between. For example, in this section: # # {% else %} # JOIN # {{action}}_raw_effect_sizes # USING # ({{ states }}) # {% endif %} # # we might get {% else %} and {% endif %} slices, without the # 4 lines between. This indicates those lines were not executed # In this case, generate a placeholder where the skipped code is # omitted but noted with a brief string, e.g.: # # "{% else %}... [103 unused template characters] ...{% endif %}". # # This is more readable -- it would be REALLY confusing for a # placeholder to include code that wasn't even executed!! if len(so_slices) >= 2: has_gap = False gap_placeholder_parts = [] last_slice = None # For each slice... for so_slice in so_slices: # If it's not the first slice, was there a gap? if last_slice: end_last = last_slice.source_idx + len( last_slice.raw) chars_skipped = so_slice.source_idx - end_last if chars_skipped: # Yes, gap between last_slice and so_slice. has_gap = True # Generate a string documenting the gap. if chars_skipped >= 10: gap_placeholder_parts.append( f"... [{chars_skipped} unused template " "characters] ...") else: gap_placeholder_parts.append("...") # Now add the slice's source. gap_placeholder_parts.append(so_slice.raw) last_slice = so_slice if has_gap: placeholder_str = "".join(gap_placeholder_parts) lexer_logger.debug( " Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r", existing_len, placeholder_slice, source_slice, placeholder_str, templ_str, ) # Calculate potential indent/dedent block_slices = sum( s.slice_type.startswith("block_") for s in so_slices) block_balance = sum(s.slice_type == "block_start" for s in so_slices) - sum( s.slice_type == "block_end" for s in so_slices) lead_dedent = so_slices[0].slice_type in ("block_end", "block_mid") trail_indent = so_slices[-1].slice_type in ("block_start", "block_mid") add_indents = self.config.get("template_blocks_indent", "indentation") lexer_logger.debug( " Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, " "Add: %s", block_slices, block_balance, lead_dedent, trail_indent, add_indents, ) # Add a dedent if appropriate. if lead_dedent and add_indents: lexer_logger.debug(" DEDENT") segment_buffer.append( Dedent(pos_marker=PositionMarker.from_point( placeholder_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=placeholder_str, block_type=so_slices[0].slice_type if len(so_slices) == 1 else "compound", )) lexer_logger.debug(" Placeholder: %s, %r", segment_buffer[-1], placeholder_str) # Add an indent if appropriate. if trail_indent and add_indents: lexer_logger.debug(" INDENT") segment_buffer.append( Indent( is_template=True, pos_marker=PositionMarker.from_point( placeholder_slice.stop, element.template_slice.start, templated_file, ), )) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Generate placeholders for any source-only slices that *follow* # the last element. This happens, for example, if a Jinja templated # file ends with "{% endif %}", and there's no trailing newline. if idx == len(elements) - 1: so_slices = [ so for so in source_only_slices if so.source_idx >= source_slice.stop ] for so_slice in so_slices: segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( slice(so_slice.source_idx, so_slice.end_source_idx()), slice( element.template_slice.stop, element.template_slice.stop, ), templated_file, ), source_str=so_slice.raw, block_type=so_slice.slice_type, )) # Add an end of file marker segment_buffer.append( EndOfFile(pos_marker=segment_buffer[-1].pos_marker. end_point_marker() if segment_buffer else PositionMarker. from_point(0, 0, templated_file))) # Convert to tuple before return return tuple(segment_buffer)
def enrich_segments( segment_buff: Tuple[BaseSegment, ...], templated_file: TemplatedFile ) -> Tuple[BaseSegment, ...]: """Enrich the segments using the templated file. We use the mapping in the template to provide positions in the source file. """ # Make a new buffer to hold the enriched segments. # We need a new buffer to hold the new meta segments # introduced. new_segment_buff = [] # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info( "Enriching Segments. Source-only slices: %s", source_only_slices ) for segment in segment_buff: templated_slice = slice( segment.pos_marker.char_pos, segment.pos_marker.char_pos + len(segment.raw), ) source_slice = templated_file.templated_slice_to_source_slice( templated_slice ) # At this stage, templated slices will be INCLUDED in the source slice, # so we should consider whether we've captured any. If we have then # we need to re-evaluate whether it's a literal or not. for source_only_slice in source_only_slices: if source_only_slice.source_idx > source_slice.start: break elif source_only_slice.source_idx == source_slice.start: lexer_logger.debug( "Found templated section! %s, %s, %s", source_only_slice.source_slice(), source_only_slice.slice_type, templated_slice.start, ) # Adjust the source slice accordingly. source_slice = slice( source_only_slice.end_source_idx(), source_slice.stop ) # Add segments as appropriate. # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_end", "block_mid"): new_segment_buff.append( Dedent.when(template_blocks_indent=True)( pos_marker=segment.pos_marker ) ) # Always add a placeholder new_segment_buff.append( TemplateSegment( pos_marker=segment.pos_marker, source_str=source_only_slice.raw, block_type=source_only_slice.slice_type, ) ) # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_start", "block_mid"): new_segment_buff.append( Indent.when(template_blocks_indent=True)( pos_marker=segment.pos_marker ) ) source_line, source_pos = templated_file.get_line_pos_of_char_pos( source_slice.start ) # Recalculate is_literal is_literal = templated_file.is_source_slice_literal(source_slice) segment.pos_marker = EnrichedFilePositionMarker( statement_index=segment.pos_marker.statement_index, line_no=segment.pos_marker.line_no, line_pos=segment.pos_marker.line_pos, char_pos=segment.pos_marker.char_pos, templated_slice=templated_slice, source_slice=source_slice, is_literal=is_literal, source_pos_marker=FilePositionMarker( segment.pos_marker.statement_index, source_line, source_pos, source_slice.start, ), ) new_segment_buff.append(segment) lexer_logger.debug("Enriched Segments:") for seg in new_segment_buff: lexer_logger.debug( "\tTmp: %s\tSrc: %s\tSeg: %s", getattr(seg.pos_marker, "templated_slice", None), getattr(seg.pos_marker, "source_slice", None), seg, ) return tuple(new_segment_buff)
def test_markers__setting_position_working(): """Test that we can correctly set positions manually.""" templ = TemplatedFile.from_string("foobar") pos = PositionMarker(slice(2, 5), slice(2, 5), templ, 4, 4) # Can we NOT infer when we're told. assert pos.working_loc == (4, 4)
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) stash_source_slice, last_source_slice = None, None # Now work out source slices, and add in template placeholders. for idx, element in enumerate(elements): # Calculate Source Slice if idx != 0: last_source_slice = stash_source_slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) stash_source_slice = source_slice # Output the slice as we lex. lexer_logger.debug( " %s, %s, %s, %r", idx, element, source_slice, templated_file.templated_str[element.template_slice], ) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. so_slices = [] # Only look for source only slices if we've got a new source slice to # avoid unnecessary duplication. if last_source_slice != source_slice: for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx >= source_slice.stop: break elif source_only_slice.source_idx >= source_slice.start: so_slices.append(source_only_slice) if so_slices: lexer_logger.debug(" Collected Source Only Slices") for so_slice in so_slices: lexer_logger.debug(" %s", so_slice) # Calculate some things which will be useful templ_str = templated_file.templated_str[ element.template_slice] source_str = templated_file.source_str[source_slice] # For reasons which aren't entirely clear right now, if there is # an included literal, it will always be at the end. Let's see if it's # there. if source_str.endswith(templ_str): existing_len = len(templ_str) else: existing_len = 0 # Calculate slices placeholder_slice = slice(source_slice.start, source_slice.stop - existing_len) placeholder_str = source_str[:-existing_len] source_slice = slice(source_slice.stop - existing_len, source_slice.stop) # If it doesn't manage to extract a placeholder string from the source # just concatenate the source only strings. There is almost always # only one of them. if not placeholder_str: placeholder_str = "".join(s.raw for s in so_slices) lexer_logger.debug( " Overlap Length: %s. PS: %s, LS: %s, p_str: %r, templ_str: %r", existing_len, placeholder_slice, source_slice, placeholder_str, templ_str, ) # Caluculate potential indent/dedent block_slices = sum( s.slice_type.startswith("block_") for s in so_slices) block_balance = sum(s.slice_type == "block_start" for s in so_slices) - sum( s.slice_type == "block_end" for s in so_slices) lead_dedent = so_slices[0].slice_type in ("block_end", "block_mid") trail_indent = so_slices[-1].slice_type in ("block_start", "block_mid") add_indents = self.config.get("template_blocks_indent", "indentation") lexer_logger.debug( " Block Slices: %s. Block Balance: %s. Lead: %s, Trail: %s, Add: %s", block_slices, block_balance, lead_dedent, trail_indent, add_indents, ) # Add a dedent if appropriate. if lead_dedent and add_indents: lexer_logger.debug(" DEDENT") segment_buffer.append( Dedent(pos_marker=PositionMarker.from_point( placeholder_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=placeholder_str, block_type=so_slices[0].slice_type if len(so_slices) == 1 else "compound", )) lexer_logger.debug(" Placholder: %s, %r", segment_buffer[-1], placeholder_str) # Add a dedent if appropriate. if trail_indent and add_indents: lexer_logger.debug(" INDENT") segment_buffer.append( Indent(pos_marker=PositionMarker.from_point( placeholder_slice.stop, element.template_slice.start, templated_file, ))) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Convert to tuple before return return tuple(segment_buffer)
def generate_test_segments_func(elems): """Roughly generate test segments. This function isn't totally robust, but good enough for testing. Use with caution. """ buff = [] raw_file = "".join(elems) templated_file = TemplatedFile.from_string(raw_file) idx = 0 for elem in elems: if elem == "<indent>": buff.append( Indent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue elif elem == "<dedent>": buff.append( Dedent(pos_marker=PositionMarker.from_point( idx, idx, templated_file))) continue if set(elem) <= {" ", "\t"}: cls = RawSegment.make(" ", name="whitespace", type="whitespace", _is_code=False) elif set(elem) <= {"\n"}: cls = RawSegment.make("\n", name="newline", type="newline", _is_code=False) elif elem == "(": cls = RawSegment.make("(", name="bracket_open") elif elem == ")": cls = RawSegment.make(")", name="bracket_close") elif elem.startswith("--"): cls = RawSegment.make("--", name="inline_comment", _is_code=False) elif elem.startswith('"'): cls = RawSegment.make('"', name="double_quote") elif elem.startswith("'"): cls = RawSegment.make("'", name="single_quote") else: cls = RawSegment.make("") # Set a none position marker which we'll realign at the end. buff.append( cls( elem, pos_marker=PositionMarker( slice(idx, idx + len(elem)), slice(idx, idx + len(elem)), templated_file, ), )) idx += len(elem) return tuple(buff)
def test__linted_file__slice_source_file_using_patches(source_patches, source_only_slices, raw_source_string, expected_result, caplog): """Test _slice_source_file_using_patches. This is part of fix_string(). """ with caplog.at_level(logging.DEBUG, logger="sqlfluff.linter"): result = LintedFile._slice_source_file_using_patches( source_patches, source_only_slices, raw_source_string) assert result == expected_result templated_file_1 = TemplatedFile.from_string("abc") templated_file_2 = TemplatedFile( "{# blah #}{{ foo }}bc", "<testing>", "abc", [ TemplatedFileSlice("comment", slice(0, 10), slice(0, 0)), TemplatedFileSlice("templated", slice(10, 19), slice(0, 1)), TemplatedFileSlice("literal", slice(19, 21), slice(1, 3)), ], [ RawFileSlice("{# blah #}", "comment", 0), RawFileSlice("{{ foo }}", "templated", 10), RawFileSlice("bc", "literal", 19), ], )
) source_slice = file.templated_slice_to_source_slice(in_slice) literal_test = file.is_source_slice_literal(source_slice) assert (is_literal, source_slice) == (literal_test, out_slice) @pytest.mark.parametrize( "file,expected_result", [ # Comment example ( TemplatedFile( source_str="a" * 20, fname="test", raw_sliced=[ RawFileSlice("a" * 10, "literal", 0), RawFileSlice("{# b #}", "comment", 10), RawFileSlice("a" * 10, "literal", 17), ], check_consistency=False, ), [RawFileSlice("{# b #}", "comment", 10)], ), # Template tags aren't source only. ( TemplatedFile( source_str="aaabbbaaa", fname="test", raw_sliced=[ RawFileSlice("aaa", "literal", 0), RawFileSlice("{{ b }}", "templated", 3), RawFileSlice("aaa", "literal", 6),
def elements_to_segments( self, elements: List[TemplateElement], templated_file: TemplatedFile) -> Tuple[RawSegment, ...]: """Convert a tuple of lexed elements into a tuple of segments.""" # Working buffer to build up segments segment_buffer: List[RawSegment] = [] lexer_logger.info("Elements to Segments.") # Get the templated slices to re-insert tokens for them source_only_slices = templated_file.source_only_slices() lexer_logger.info("Source-only slices: %s", source_only_slices) # Now work out source slices, and add in template placeholders. for element in elements: # Calculate Source Slice source_slice = templated_file.templated_slice_to_source_slice( element.template_slice) # The calculated source slice will include any source only slices. # We should consider all of them in turn to see whether we can # insert them. for source_only_slice in source_only_slices: # If it's later in the source, stop looking. Any later # ones *also* won't match. if source_only_slice.source_idx > source_slice.start: break # Is there a templated section within this source slice? # If there is then for some reason I can't quite explain, # it will always be at the start of the section. This is # very convenient beause it means we'll always have the # start and end of it in a definite position. This makes # slicing and looping much easier. elif source_only_slice.source_idx == source_slice.start: lexer_logger.debug( "Found templated section! %s, %s, %s", source_only_slice.source_slice(), source_only_slice.slice_type, element.template_slice.start, ) # Calculate a slice for any placeholders placeholder_source_slice = slice( source_slice.start, source_only_slice.end_source_idx()) # Adjust the source slice accordingly. source_slice = slice(source_only_slice.end_source_idx(), source_slice.stop) # TODO: Readjust this to remove .when once ProtoSegment is in. # Add segments as appropriate. # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_end", "block_mid"): segment_buffer.append( Dedent.when(template_blocks_indent=True)( pos_marker=PositionMarker.from_point( placeholder_source_slice.start, element.template_slice.start, templated_file, ))) # Always add a placeholder segment_buffer.append( TemplateSegment( pos_marker=PositionMarker( placeholder_source_slice, slice( element.template_slice.start, element.template_slice.start, ), templated_file, ), source_str=source_only_slice.raw, block_type=source_only_slice.slice_type, )) # If it's a block end, add a dedent. if source_only_slice.slice_type in ("block_start", "block_mid"): segment_buffer.append( Indent.when(template_blocks_indent=True)( pos_marker=PositionMarker.from_point( placeholder_source_slice.stop, element.template_slice.start, templated_file, ))) # Add the actual segment segment_buffer.append( element.to_segment(pos_marker=PositionMarker( source_slice, element.template_slice, templated_file, ), )) # Convert to tuple before return return tuple(segment_buffer)
def _generate_source_patches( cls, tree: BaseSegment, templated_file: TemplatedFile) -> List[FixPatch]: """Use the fixed tree to generate source patches. Importantly here we deduplicate and sort the patches from their position in the templated file into their intended order in the source file. """ # Iterate patches, filtering and translating as we go: linter_logger.debug("### Beginning Patch Iteration.") filtered_source_patches = [] dedupe_buffer = [] # We use enumerate so that we get an index for each patch. This is entirely # so when debugging logs we can find a given patch again! for idx, patch in enumerate( tree.iter_patches(templated_file=templated_file)): linter_logger.debug(" %s Yielded patch: %s", idx, patch) cls._log_hints(patch, templated_file) # Check for duplicates if patch.dedupe_tuple() in dedupe_buffer: linter_logger.info( " - Skipping. Source space Duplicate: %s", patch.dedupe_tuple(), ) continue # We now evaluate patches in the source-space for whether they overlap # or disrupt any templated sections. # The intent here is that unless explicitly stated, a fix should never # disrupt a templated section. # NOTE: We rely here on the patches being generated in order. # TODO: Implement a mechanism for doing templated section fixes. Given # these patches are currently generated from fixed segments, there will # likely need to be an entirely different mechanism # Get the affected raw slices. local_raw_slices = templated_file.raw_slices_spanning_source_slice( patch.source_slice) local_type_list = [slc.slice_type for slc in local_raw_slices] # Deal with the easy cases of 1) New code at end 2) only literals if not local_type_list or set(local_type_list) == {"literal"}: linter_logger.info( " * Keeping patch on new or literal-only section: %s", patch, ) filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) # Handle the easy case of an explicit source fix elif patch.patch_category == "source": linter_logger.info( " * Keeping explicit source fix patch: %s", patch, ) filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) # Is it a zero length patch. elif (patch.source_slice.start == patch.source_slice.stop and patch.source_slice.start == local_raw_slices[0].source_idx): linter_logger.info( " * Keeping insertion patch on slice boundary: %s", patch, ) filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) else: # We've got a situation where the ends of our patch need to be # more carefully mapped. Likely because we're greedily including # a section of source templating with our fix and we need to work # around it gracefully. # Identify all the places the string appears in the source content. positions = list(findall(patch.templated_str, patch.source_str)) if len(positions) != 1: # NOTE: This section is not covered in tests. While we # don't have an example of it's use (we should), the # code after this relies on there being only one # instance found - so the safety check remains. linter_logger.debug( # pragma: no cover " - Skipping edit patch on non-unique templated " "content: %s", patch, ) continue # pragma: no cover # We have a single occurrence of the thing we want to patch. This # means we can use its position to place our patch. new_source_slice = slice( patch.source_slice.start + positions[0], patch.source_slice.start + positions[0] + len(patch.templated_str), ) linter_logger.debug( " * Keeping Tricky Case. Positions: %s, New Slice: %s, " "Patch: %s", positions, new_source_slice, patch, ) patch.source_slice = new_source_slice filtered_source_patches.append(patch) dedupe_buffer.append(patch.dedupe_tuple()) continue # Sort the patches before building up the file. return sorted(filtered_source_patches, key=lambda x: x.source_slice.start)