def _match_once(self, segments: Tuple[BaseSegment, ...], parse_context: ParseContext) -> MatchResult: """Match the forward segments against the available elements once. This serves as the main body of OneOf, but also a building block for AnyNumberOf. """ # For efficiency, we'll be pruning options if we can # based on their simpleness. this provides a short cut # to return earlier if we can. # `segments` may already be nested so we need to break out # the raw segments within it. available_options, _ = self._prune_options(segments, parse_context=parse_context) # If we've pruned all the options, return unmatched (with some logging). if not available_options: return MatchResult.from_unmatched(segments) with parse_context.deeper_match() as ctx: match, _ = self._longest_trimmed_match( segments, available_options, parse_context=ctx, trim_noncode=False, ) return match
def match(self, segments: Tuple[BaseSegment, ...], parse_context: ParseContext) -> MatchResult: """Match against any of the elements a relevant number of times. If it matches multiple, it returns the longest, and if any are the same length it returns the first (unless we explicitly just match first). """ # First if we have an *exclude* option, we should check that # which would prevent the rest of this grammar from matching. if self.exclude: with parse_context.deeper_match() as ctx: if self.exclude.match(segments, parse_context=ctx): return MatchResult.from_unmatched(segments) # Match on each of the options matched_segments: MatchResult = MatchResult.from_empty() unmatched_segments: Tuple[BaseSegment, ...] = segments n_matches = 0 while True: if self.max_times and n_matches >= self.max_times: # We've matched as many times as we can return MatchResult(matched_segments.matched_segments, unmatched_segments) # Is there anything left to match? if len(unmatched_segments) == 0: # No... if n_matches >= self.min_times: return MatchResult(matched_segments.matched_segments, unmatched_segments) else: # We didn't meet the hurdle return MatchResult.from_unmatched(unmatched_segments) # If we've already matched once... if n_matches > 0 and self.allow_gaps: # Consume any non-code if there is any pre_seg, mid_seg, post_seg = trim_non_code_segments( unmatched_segments) unmatched_segments = mid_seg + post_seg else: pre_seg = () # empty tuple match = self._match_once(unmatched_segments, parse_context=parse_context) if match: matched_segments += pre_seg + match.matched_segments unmatched_segments = match.unmatched_segments n_matches += 1 else: # If we get here, then we've not managed to match. And the next # unmatched segments are meaningful, i.e. they're not what we're # looking for. if n_matches >= self.min_times: return MatchResult(matched_segments.matched_segments, pre_seg + unmatched_segments) else: # We didn't meet the hurdle return MatchResult.from_unmatched(unmatched_segments)
def match(self, segments: Tuple[BaseSegment, ...], parse_context: ParseContext) -> "MatchResult": """Match a list of segments against this segment. Matching can be done from either the raw or the segments. This raw function can be overridden, or a grammar defined on the underlying class. The match element of Ref, also implements the caching using the parse_context `denylist` methods. """ elem = self._get_elem(dialect=parse_context.dialect) # First if we have an *exclude* option, we should check that # which would prevent the rest of this grammar from matching. if self.exclude: with parse_context.deeper_match() as ctx: if self.exclude.match(segments, parse_context=ctx): return MatchResult.from_unmatched(segments) # First check against the efficiency Cache. # We rely on segments not being mutated within a given # match cycle and so the ids should continue to refer to unchanged # objects. seg_tuple = (id(seg) for seg in segments) self_name = self._get_ref() if parse_context.denylist.check(self_name, seg_tuple): # pragma: no cover TODO? # This has been tried before. parse_match_logging( self.__class__.__name__, "match", "SKIP", parse_context=parse_context, v_level=3, self_name=self_name, ) return MatchResult.from_unmatched(segments) # Match against that. NB We're not incrementing the match_depth here. # References shouldn't really count as a depth of match. with parse_context.matching_segment(self._get_ref()) as ctx: resp = elem.match(segments=segments, parse_context=ctx) if not resp: parse_context.denylist.mark(self_name, seg_tuple) return resp
def match(cls, segments: Tuple["BaseSegment", ...], parse_context: ParseContext) -> MatchResult: """Match a list of segments against this segment. Note: Match for segments is done in the ABSTRACT. When dealing with concrete then we're always in parse. Parse is what happens during expand. Matching can be done from either the raw or the segments. This raw function can be overridden, or a grammar defined on the underlying class. """ # Edge case, but it's possible that we have *already matched* on # a previous cycle. Do should first check whether this is a case # of that. if len(segments) == 1 and isinstance(segments[0], cls): # This has already matched. Winner. parse_match_logging( cls.__name__, "_match", "SELF", parse_context=parse_context, v_level=3, symbol="+++", ) return MatchResult.from_matched(segments) elif len(segments) > 1 and isinstance(segments[0], cls): parse_match_logging( cls.__name__, "_match", "SELF", parse_context=parse_context, v_level=3, symbol="+++", ) # This has already matched, but only partially. return MatchResult((segments[0], ), segments[1:]) if cls.match_grammar: # Call the private method with parse_context.deeper_match() as ctx: m = cls.match_grammar.match(segments=segments, parse_context=ctx) # Calling unify here, allows the MatchResult class to do all the type checking. if not isinstance(m, MatchResult): raise TypeError( "[PD:{0} MD:{1}] {2}.match. Result is {3}, not a MatchResult!" .format( parse_context.parse_depth, parse_context.match_depth, cls.__name__, type(m), )) # Once unified we can deal with it just as a MatchResult if m.has_match(): return MatchResult((cls(segments=m.matched_segments), ), m.unmatched_segments) else: return MatchResult.from_unmatched(segments) else: raise NotImplementedError( "{0} has no match function implemented".format(cls.__name__))
def match(self, segments: Tuple[BaseSegment, ...], parse_context: ParseContext) -> MatchResult: """Match an arbitrary number of elements separated by a delimiter. Note that if there are multiple elements passed in that they will be treated as different options of what can be delimited, rather than a sequence. """ # Have we been passed an empty list? if len(segments) == 0: return MatchResult.from_empty() # Make some buffers seg_buff = segments matched_segments = MatchResult.from_empty() # delimiters is a list of tuples containing delimiter segments as we find them. delimiters: List[BaseSegment] = [] # First iterate through all the segments, looking for the delimiter. # Second, split the list on each of the delimiters, and ensure that # each sublist in turn matches one of the elements. # In more detail, match against delimiter, if we match, put a slice # up to that point onto a list of slices. Carry on. while True: # Check to see whether we've exhausted the buffer, either by iterating through it, # or by consuming all the non-code segments already. # NB: If we're here then we've already tried matching the remaining segments against # the content, so we must be in a trailing case. if len(seg_buff) == 0: # Append the remaining buffer in case we're in the not is_code case. matched_segments += seg_buff # Nothing left, this is potentially a trailing case? if self.allow_trailing and ( self.min_delimiters is None or len(delimiters) >= self.min_delimiters): # It is! (nothing left so no unmatched segments to append) return MatchResult.from_matched( matched_segments.matched_segments) else: return MatchResult.from_unmatched(segments) # We rely on _bracket_sensitive_look_ahead_match to do the bracket counting # element of this now. We look ahead to find a delimiter or terminator. matchers = [self.delimiter] if self.terminator: matchers.append(self.terminator) # If gaps aren't allowed, a gap (or non-code segment), acts like a terminator. if not self.allow_gaps: matchers.append(NonCodeMatcher()) with parse_context.deeper_match() as ctx: ( pre_content, delimiter_match, delimiter_matcher, ) = self._bracket_sensitive_look_ahead_match( seg_buff, matchers, parse_context=ctx, ) # Keep track of the *length* of this pre-content section before we start # to change it later. We need this for dealing with terminators. pre_content_len = len(pre_content) # Have we found a delimiter or terminator looking forward? if delimiter_match: if delimiter_matcher is self.delimiter: # Yes. Store it and then match the contents up to now. delimiters.append(delimiter_match.matched_segments) # We now test the intervening section as to whether it matches one # of the things we're looking for. NB: If it's of zero length then # we return without trying it. if len(pre_content) > 0: with parse_context.deeper_match() as ctx: match, matcher = self._longest_trimmed_match( segments=pre_content, matchers=self._elements, parse_context=ctx, trim_noncode=self.allow_gaps, ) # No match, or an incomplete match: Not allowed if not match or not match.is_complete(): return MatchResult.from_unmatched(segments) # We have a complete match! # First add the segment up to the delimiter to the matched segments matched_segments += match.matched_segments # Then it depends what we matched. # Delimiter if delimiter_matcher is self.delimiter: # Then add the delimiter to the matched segments matched_segments += delimiter_match.matched_segments # Break this for loop and move on, looking for the next delimiter seg_buff = delimiter_match.unmatched_segments # Still got some buffer left. Carry on. continue # Terminator (or the gap terminator). elif delimiter_matcher is self.terminator or isinstance( delimiter_matcher, NonCodeMatcher): # We just return straight away here. We don't add the terminator to # this match, it should go with the unmatched parts. The terminator # may also have mutated the returned segments so we also DON'T want # the mutated version, it can do that itself (so we return `seg_buff` # and not `delimiter_match.all_segments()``) # First check we've had enough delimiters if (self.min_delimiters and len(delimiters) < self.min_delimiters): return MatchResult.from_unmatched(segments) else: return MatchResult( matched_segments.matched_segments, # Return the part of the seg_buff which isn't in the # pre-content. seg_buff[pre_content_len:], ) else: raise RuntimeError(( "I don't know how I got here. Matched instead on {0}, which " "doesn't appear to be delimiter or terminator" ).format(delimiter_matcher)) else: # Zero length section between delimiters, or zero code # elements if appropriate. Return unmatched. return MatchResult.from_unmatched(segments) else: # No match for a delimiter looking forward, this means we're # at the end. In this case we look for a potential partial match # looking forward. We know it's a non-zero length section because # we checked that up front. # First check we're had enough delimiters, because if we haven't then # there's no sense to try matching if self.min_delimiters and len( delimiters) < self.min_delimiters: return MatchResult.from_unmatched(segments) # We use the whitespace padded match to hoover up whitespace if enabled, # and default to the longest matcher. We don't care which one matches. with parse_context.deeper_match() as ctx: mat, _ = self._longest_trimmed_match( seg_buff, self._elements, parse_context=ctx, trim_noncode=self.allow_gaps, ) if mat: # We've got something at the end. Return! if mat.unmatched_segments: # We have something unmatched and so we should let it also have the trailing elements return MatchResult( matched_segments.matched_segments + mat.matched_segments, mat.unmatched_segments, ) else: # If there's nothing unmatched in the most recent match, then we can consume the trailing # non code segments return MatchResult.from_matched( matched_segments.matched_segments + mat.matched_segments, ) else: # No match at the end, are we allowed to trail? If we are then return, # otherwise we fail because we can't match the last element. if self.allow_trailing: return MatchResult(matched_segments.matched_segments, seg_buff) else: return MatchResult.from_unmatched(segments)
def match(self, segments: Tuple["BaseSegment", ...], parse_context: ParseContext) -> MatchResult: """Match if this is a bracketed sequence, with content that matches one of the elements. 1. work forwards to find the first bracket. If we find something other that whitespace, then fail out. 2. Once we have the first bracket, we need to bracket count forward to find its partner. 3. Assuming we find its partner then we try and match what goes between them using the match method of Sequence. If we match, great. If not, then we return an empty match. If we never find its partner then we return an empty match but should probably log a parsing warning, or error? """ # Trim ends if allowed. if self.allow_gaps: pre_nc, seg_buff, post_nc = trim_non_code_segments(segments) else: seg_buff = segments # Rehydrate the bracket segments in question. start_bracket, end_bracket = self.get_bracket_from_dialect( parse_context) # Allow optional override for special bracket-like things start_bracket = self.start_bracket or start_bracket end_bracket = self.end_bracket or end_bracket # Look for the first bracket with parse_context.deeper_match() as ctx: start_match = start_bracket.match(seg_buff, parse_context=ctx) if start_match: seg_buff = start_match.unmatched_segments else: # Can't find the opening bracket. No Match. return MatchResult.from_unmatched(segments) # Look for the closing bracket content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match( segments=seg_buff, matchers=[end_bracket], parse_context=parse_context, start_bracket=start_bracket, end_bracket=end_bracket, bracket_pairs_set=self.bracket_pairs_set, ) if not end_match: raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=start_match.matched_segments[0], ) # Match the content now we've confirmed the brackets. # First deal with the case of TOTALLY EMPTY BRACKETS e.g. "()" if not content_segs: # If it's allowed, return a match. if not self._elements or all(e.is_optional() for e in self._elements): return MatchResult( start_match.matched_segments + end_match.matched_segments, end_match.unmatched_segments, ) # If not, don't. else: return MatchResult.from_unmatched(segments) # Then trim whitespace and deal with the case of no code content e.g. "( )" if self.allow_gaps: pre_nc, content_segs, post_nc = trim_non_code_segments( content_segs) else: pre_nc = () post_nc = () # If we don't have anything left after trimming, act accordingly. if not content_segs: if not self._elements or (all(e.is_optional() for e in self._elements) and self.allow_gaps): return MatchResult( start_match.matched_segments + pre_nc + post_nc + end_match.matched_segments, end_match.unmatched_segments, ) else: return MatchResult.from_unmatched(segments) # Match using super. Sequence will interpret the content of the elements. with parse_context.deeper_match() as ctx: content_match = super().match(content_segs, parse_context=ctx) # We require a complete match for the content (hopefully for obvious reasons) if content_match.is_complete(): # Append some indent and dedent tokens at the start and the end. return MatchResult( # We need to realign the meta segments so the pos markers are correct. BaseSegment._position_segments( ( # NB: The nc segments go *outside* the indents. start_match.matched_segments + (Indent(), ) # Add a meta indent here + pre_nc + content_match.matched_segments + post_nc + (Dedent(), ) # Add a meta indent here + end_match.matched_segments), ), end_match.unmatched_segments, ) # No complete match. Fail. else: return MatchResult.from_unmatched(segments)
def match(self, segments: Tuple["BaseSegment", ...], parse_context: ParseContext) -> MatchResult: """Match if a bracketed sequence, with content that matches one of the elements. 1. work forwards to find the first bracket. If we find something other that whitespace, then fail out. 2. Once we have the first bracket, we need to bracket count forward to find its partner. 3. Assuming we find its partner then we try and match what goes between them using the match method of Sequence. If we match, great. If not, then we return an empty match. If we never find its partner then we return an empty match but should probably log a parsing warning, or error? """ # Trim ends if allowed. if self.allow_gaps: pre_nc, seg_buff, post_nc = trim_non_code_segments(segments) else: seg_buff = segments # pragma: no cover TODO? # Rehydrate the bracket segments in question. # bracket_persits controls whether we make a BracketedSegment or not. start_bracket, end_bracket, bracket_persists = self.get_bracket_from_dialect( parse_context) # Allow optional override for special bracket-like things start_bracket = self.start_bracket or start_bracket end_bracket = self.end_bracket or end_bracket # Are we dealing with a pre-existing BracketSegment? if seg_buff[0].is_type("bracketed"): seg: BracketedSegment = cast(BracketedSegment, seg_buff[0]) content_segs = seg.segments[len(seg.start_bracket ):-len(seg.end_bracket)] bracket_segment = seg trailing_segments = seg_buff[1:] # Otherwise try and match the segments directly. else: # Look for the first bracket with parse_context.deeper_match() as ctx: start_match = start_bracket.match(seg_buff, parse_context=ctx) if start_match: seg_buff = start_match.unmatched_segments else: # Can't find the opening bracket. No Match. return MatchResult.from_unmatched(segments) # Look for the closing bracket content_segs, end_match, _ = self._bracket_sensitive_look_ahead_match( segments=seg_buff, matchers=[end_bracket], parse_context=parse_context, start_bracket=start_bracket, end_bracket=end_bracket, bracket_pairs_set=self.bracket_pairs_set, ) if not end_match: # pragma: no cover raise SQLParseError( "Couldn't find closing bracket for opening bracket.", segment=start_match.matched_segments[0], ) # Construct a bracket segment bracket_segment = BracketedSegment( segments=(start_match.matched_segments + content_segs + end_match.matched_segments), start_bracket=start_match.matched_segments, end_bracket=end_match.matched_segments, ) trailing_segments = end_match.unmatched_segments # Then trim whitespace and deal with the case of non-code content e.g. "( )" if self.allow_gaps: pre_segs, content_segs, post_segs = trim_non_code_segments( content_segs) else: # pragma: no cover TODO? pre_segs = () post_segs = () # If we've got a case of empty brackets check whether that is allowed. if not content_segs: if not self._elements or (all(e.is_optional() for e in self._elements) and (self.allow_gaps or (not pre_segs and not post_segs))): return MatchResult( (bracket_segment, ) if bracket_persists else bracket_segment.segments, trailing_segments, ) else: return MatchResult.from_unmatched(segments) # Match the content using super. Sequence will interpret the content of the # elements. with parse_context.deeper_match() as ctx: content_match = super().match(content_segs, parse_context=ctx) # We require a complete match for the content (hopefully for obvious reasons) if content_match.is_complete(): # Reconstruct the bracket segment post match. # We need to realign the meta segments so the pos markers are correct. # Have we already got indents? meta_idx = None for idx, seg in enumerate(bracket_segment.segments): if (seg.is_meta and cast(MetaSegment, seg).indent_val > 0 and not cast(MetaSegment, seg).is_template): meta_idx = idx break # If we've already got indents, don't add more. if meta_idx: bracket_segment.segments = BaseSegment._position_segments( bracket_segment.start_bracket + pre_segs + content_match.all_segments() + post_segs + bracket_segment.end_bracket) # Append some indent and dedent tokens at the start and the end. else: bracket_segment.segments = BaseSegment._position_segments( # NB: The nc segments go *outside* the indents. bracket_segment.start_bracket + (Indent(), ) # Add a meta indent here + pre_segs + content_match.all_segments() + post_segs + (Dedent(), ) # Add a meta indent here + bracket_segment.end_bracket) return MatchResult( (bracket_segment, ) if bracket_persists else bracket_segment.segments, trailing_segments, ) # No complete match. Fail. else: return MatchResult.from_unmatched(segments)
def match( self, segments: Tuple[BaseSegment, ...], parse_context: ParseContext, ) -> MatchResult: """Match an arbitrary number of elements separated by a delimiter. Note that if there are multiple elements passed in that they will be treated as different options of what can be delimited, rather than a sequence. """ # Have we been passed an empty list? if len(segments) == 0: return MatchResult.from_empty() # Make some buffers seg_buff = segments matched_segments: Tuple[BaseSegment, ...] = () unmatched_segments: Tuple[BaseSegment, ...] = () cached_matched_segments: Tuple[BaseSegment, ...] = () cached_unmatched_segments: Tuple[BaseSegment, ...] = () delimiters = 0 matched_delimiter = False # We want to render progress bar only for the main matching loop, # so disable it when in deeper parsing. disable_progress_bar = ( parse_context.parse_depth > 0 or progress_bar_configuration.disable_progress_bar) # We use amount of `NewLineSegment` to estimate how many steps could be in # a big file. It's not perfect, but should do a job in most cases. new_line_segments = [ s for s in segments if isinstance(s, NewlineSegment) ] progressbar_matching = tqdm( total=len(new_line_segments), desc="matching", miniters=30, disable=disable_progress_bar, leave=False, ) seeking_delimiter = False has_matched_segs = False terminated = False delimiter_matchers = [self.delimiter] terminator_matchers = [] if self.terminator: terminator_matchers.append(self.terminator) # If gaps aren't allowed, a gap (or non-code segment), acts like a terminator. if not self.allow_gaps: terminator_matchers.append(NonCodeMatcher()) while True: progressbar_matching.update(n=1) if seeking_delimiter: elements = delimiter_matchers else: elements = self._elements if len(seg_buff) > 0: pre_non_code, seg_content, post_non_code = trim_non_code_segments( seg_buff) if not self.allow_gaps and any(seg.is_whitespace for seg in pre_non_code): unmatched_segments = seg_buff break if not seg_content: # pragma: no cover matched_segments += pre_non_code break # Check whether there is a terminator before checking for content with parse_context.deeper_match() as ctx: match, _ = self._longest_trimmed_match( segments=seg_content, matchers=terminator_matchers, parse_context=ctx, # We've already trimmed trim_noncode=False, ) if match: terminated = True unmatched_segments = (pre_non_code + match.all_segments() + post_non_code) break with parse_context.deeper_match() as ctx: match, _ = self._longest_trimmed_match( segments=seg_content, matchers=elements, parse_context=ctx, # We've already trimmed trim_noncode=False, terminators=delimiter_matchers if elements != delimiter_matchers else None, ) if match: if elements == delimiter_matchers: delimiters += 1 matched_delimiter = True cached_matched_segments = matched_segments cached_unmatched_segments = seg_buff else: matched_delimiter = False has_matched_segs = True seg_buff = match.unmatched_segments + post_non_code unmatched_segments = match.unmatched_segments if match.is_complete(): matched_segments += (pre_non_code + match.matched_segments + post_non_code) unmatched_segments = match.unmatched_segments break matched_segments += pre_non_code + match.matched_segments seeking_delimiter = not seeking_delimiter else: matched_segments += pre_non_code unmatched_segments = match.unmatched_segments + post_non_code break else: break # pragma: no cover if self.min_delimiters: if delimiters < self.min_delimiters: return MatchResult.from_unmatched(matched_segments + unmatched_segments) if terminated: if has_matched_segs: return MatchResult(matched_segments, unmatched_segments) else: return MatchResult.from_unmatched(matched_segments + unmatched_segments) if matched_delimiter and not self.allow_trailing: if not unmatched_segments: return MatchResult.from_unmatched(matched_segments + unmatched_segments) else: return MatchResult(cached_matched_segments, cached_unmatched_segments) if not has_matched_segs: return MatchResult.from_unmatched(matched_segments + unmatched_segments) if not unmatched_segments: return MatchResult.from_matched(matched_segments) return MatchResult(matched_segments, unmatched_segments)
def match( self, segments: Tuple[BaseSegment, ...], parse_context: ParseContext, ) -> MatchResult: """Match an arbitrary number of elements separated by a delimiter. Note that if there are multiple elements passed in that they will be treated as different options of what can be delimited, rather than a sequence. """ # Have we been passed an empty list? if len(segments) == 0: return MatchResult.from_empty() # Make some buffers seg_buff = segments matched_segments = MatchResult.from_empty() # delimiters is a list of tuples containing delimiter segments as we find them. delimiters: List[BaseSegment] = [] # We want to render progress bar only for the main matching loop, # so disable it when in deeper parsing. disable_progress_bar = ( parse_context.parse_depth > 0 or progress_bar_configuration.disable_progress_bar ) # We use amount of `NewLineSegment` to estimate how many steps could be in # a big file. It's not perfect, but should do a job in most cases. new_line_segments = [s for s in segments if isinstance(s, NewlineSegment)] progressbar_matching = tqdm( total=len(new_line_segments), desc="matching", miniters=30, disable=disable_progress_bar, leave=False, ) # First iterate through all the segments, looking for the delimiter. # Second, split the list on each of the delimiters, and ensure that # each sublist in turn matches one of the elements. # In more detail, match against delimiter, if we match, put a slice # up to that point onto a list of slices. Carry on. while True: progressbar_matching.update(n=1) # Check to see whether we've exhausted the buffer, either by iterating # through it, or by consuming all the non-code segments already. # NB: If we're here then we've already tried matching the remaining segments # against the content, so we must be in a trailing case. if len(seg_buff) == 0: # Append the remaining buffer in case we're in the not is_code case. matched_segments += seg_buff # Nothing left, this is potentially a trailing case? if self.allow_trailing and ( self.min_delimiters is None or len(delimiters) >= self.min_delimiters ): # pragma: no cover TODO? # It is! (nothing left so no unmatched segments to append) return MatchResult.from_matched(matched_segments.matched_segments) else: # pragma: no cover TODO? return MatchResult.from_unmatched(segments) # We rely on _bracket_sensitive_look_ahead_match to do the bracket counting # element of this now. We look ahead to find a delimiter or terminator. matchers = [self.delimiter] if self.terminator: matchers.append(self.terminator) # If gaps aren't allowed, a gap (or non-code segment), acts like a # terminator. if not self.allow_gaps: matchers.append(NonCodeMatcher()) with parse_context.deeper_match() as ctx: ( pre_content, delimiter_match, delimiter_matcher, ) = self._bracket_sensitive_look_ahead_match( seg_buff, matchers, parse_context=ctx, bracket_pairs_set=self.bracket_pairs_set, ) # Store the mutated segments to reuse. mutated_segments = pre_content + delimiter_match.all_segments() # Have we found a delimiter or terminator looking forward? if delimiter_match: if delimiter_matcher is self.delimiter: # Yes. Store it and then match the contents up to now. delimiters.append(delimiter_match.matched_segments) # We now test the intervening section as to whether it matches one # of the things we're looking for. NB: If it's of zero length then # we return without trying it. if len(pre_content) > 0: pre_non_code, pre_content, post_non_code = trim_non_code_segments( pre_content ) # Check for whitespace gaps. # We do this explicitly here rather than relying on an # untrimmed match so we can handle _whitespace_ explicitly # compared to other non code segments like placeholders. if not self.allow_gaps and any( seg.is_whitespace for seg in pre_non_code + post_non_code ): return MatchResult.from_unmatched( mutated_segments ) # pragma: no cover TODO? with parse_context.deeper_match() as ctx: match, _ = self._longest_trimmed_match( segments=pre_content, matchers=self._elements, parse_context=ctx, # We've already trimmed trim_noncode=False, ) # No match - Not allowed if not match: if self.allow_trailing: # If we reach this point, the lookahead match has hit a # delimiter beyond the scope of this Delimited section. # Trailing delimiters are allowed, so return matched up to # this section. return MatchResult( matched_segments.matched_segments, pre_non_code + match.unmatched_segments + post_non_code + delimiter_match.all_segments(), ) else: return MatchResult.from_unmatched(mutated_segments) if not match.is_complete(): # If we reach this point, the lookahead match has hit a # delimiter beyond the scope of this Delimited section. We # should return a partial match, and the delimiter as unmatched. return MatchResult( matched_segments.matched_segments + pre_non_code + match.matched_segments, match.unmatched_segments + post_non_code + delimiter_match.all_segments(), ) # We have a complete match! # First add the segment up to the delimiter to the matched segments matched_segments += ( pre_non_code + match.matched_segments + post_non_code ) # Then it depends what we matched. # Delimiter if delimiter_matcher is self.delimiter: # Then add the delimiter to the matched segments matched_segments += delimiter_match.matched_segments # Break this for loop and move on, looking for the next # delimiter seg_buff = delimiter_match.unmatched_segments # Still got some buffer left. Carry on. continue # Terminator (or the gap terminator). elif delimiter_matcher is self.terminator or isinstance( delimiter_matcher, NonCodeMatcher ): # We just return straight away here. We don't add the terminator # to this match, it should go with the unmatched parts. # First check we've had enough delimiters if ( self.min_delimiters and len(delimiters) < self.min_delimiters ): return MatchResult.from_unmatched(mutated_segments) else: return MatchResult( matched_segments.matched_segments, delimiter_match.all_segments(), ) else: # pragma: no cover raise RuntimeError( ( "I don't know how I got here. Matched instead on {}, " "which doesn't appear to be delimiter or terminator" ).format(delimiter_matcher) ) else: # Zero length section between delimiters, or zero code # elements if appropriate. Return unmatched. return MatchResult.from_unmatched(mutated_segments) else: # No match for a delimiter looking forward, this means we're # at the end. In this case we look for a potential partial match # looking forward. We know it's a non-zero length section because # we checked that up front. # First check we're had enough delimiters, because if we haven't then # there's no sense to try matching if self.min_delimiters and len(delimiters) < self.min_delimiters: return MatchResult.from_unmatched(mutated_segments) # We use the whitespace padded match to hoover up whitespace if enabled, # and default to the longest matcher. We don't care which one matches. pre_non_code, trimmed_segments, post_non_code = trim_non_code_segments( mutated_segments ) # Check for whitespace gaps. # We do this explicitly here rather than relying on an # untrimmed match so we can handle _whitespace_ explicitly # compared to other non code segments like placeholders. if not self.allow_gaps and any( seg.is_whitespace for seg in pre_non_code + post_non_code ): return MatchResult.from_unmatched( mutated_segments ) # pragma: no cover TODO? with parse_context.deeper_match() as ctx: mat, _ = self._longest_trimmed_match( trimmed_segments, self._elements, parse_context=ctx, # We've already trimmed trim_noncode=False, ) if mat: # We've got something at the end. Return! if mat.unmatched_segments: # We have something unmatched and so we should let it also have # the trailing elements return MatchResult( matched_segments.matched_segments + pre_non_code + mat.matched_segments, mat.unmatched_segments + post_non_code, ) else: # If there's nothing unmatched in the most recent match, then we # can consume the trailing non code segments return MatchResult.from_matched( matched_segments.matched_segments + pre_non_code + mat.matched_segments + post_non_code, ) else: # No match at the end, are we allowed to trail? If we are then # return, otherwise we fail because we can't match the last element. if self.allow_trailing: return MatchResult(matched_segments.matched_segments, seg_buff) else: return MatchResult.from_unmatched(mutated_segments)