def expand(segments, parse_context): """Expand the list of child segments using their `parse` methods.""" segs = () for stmt in segments: try: if not stmt.is_expandable: parse_context.logger.info( "[PD:%s] Skipping expansion of %s...", parse_context.parse_depth, stmt, ) segs += (stmt,) continue except Exception as err: # raise ValueError("{0} has no attribute `is_expandable`. This segment appears poorly constructed.".format(stmt)) parse_context.logger.error( "%s has no attribute `is_expandable`. This segment appears poorly constructed.", stmt, ) raise err if not hasattr(stmt, "parse"): raise ValueError( "{0} has no method `parse`. This segment appears poorly constructed.".format( stmt ) ) parse_depth_msg = "Parse Depth {0}. Expanding: {1}: {2!r}".format( parse_context.parse_depth, stmt.__class__.__name__, curtail_string(stmt.raw, length=40), ) parse_context.logger.info(frame_msg(parse_depth_msg)) res = stmt.parse(parse_context=parse_context) if isinstance(res, BaseSegment): segs += (res,) else: # We might get back an iterable of segments segs += tuple(res) # Basic Validation check_still_complete(segments, segs, ()) return segs
def parse(self, parse_context=None, parse_grammar=None): """Use the parse grammar to find subsegments within this segment. A large chunk of the logic around this can be found in the `expand` method. Use the parse setting in the context for testing, mostly to check how deep to go. True/False for yes or no, an integer allows a certain number of levels. Optionally, this method allows a custom parse grammar to be provided which will override any existing parse grammar on the segment. """ # Clear the blacklist cache so avoid missteps if parse_context: parse_context.blacklist.clear() # the parse_depth and recurse kwargs control how deep we will recurse for testing. if not self.segments: # This means we're a root segment, just return an unmutated self return self # Check the Parse Grammar parse_grammar = parse_grammar or self.parse_grammar if parse_grammar is None: # No parse grammar, go straight to expansion parse_context.logger.debug( "{0}.parse: no grammar. Going straight to expansion".format( self.__class__.__name__)) else: # For debugging purposes. Ensure that we don't have non-code elements # at the start or end of the segments. They should always in the middle, # or in the parent expression. segments = self.segments if self.can_start_end_non_code: pre_nc, segments, post_nc = trim_non_code_segments(segments) else: pre_nc = () post_nc = () if (not segments[0].is_code) and (not segments[0].is_meta): raise ValueError( "Segment {0} starts with non code segment: {1!r}.\n{2!r}" .format(self, segments[0].raw, segments)) if (not segments[-1].is_code) and (not segments[-1].is_meta): raise ValueError( "Segment {0} ends with non code segment: {1!r}.\n{2!r}" .format(self, segments[-1].raw, segments)) # NOTE: No match_depth kwarg, because this is the start of the matching. with parse_context.matching_segment( self.__class__.__name__) as ctx: m = parse_grammar.match(segments=segments, parse_context=ctx) if not isinstance(m, MatchResult): raise TypeError( "[PD:{0}] {1}.match. Result is {2}, not a MatchResult!". format(parse_context.parse_depth, self.__class__.__name__, type(m))) # Basic Validation, that we haven't dropped anything. check_still_complete(segments, m.matched_segments, m.unmatched_segments) if m.has_match(): if m.is_complete(): # Complete match, happy days! self.segments = pre_nc + m.matched_segments + post_nc else: # Incomplete match. # For now this means the parsing has failed. Lets add the unmatched bit at the # end as something unparsable. # TODO: Do something more intelligent here. self.segments = ( pre_nc + m.matched_segments + (UnparsableSegment( segments=m.unmatched_segments + post_nc, expected="Nothing...", ), )) elif self.allow_empty and not segments: # Very edge case, but some segments are allowed to be empty other than non-code self.segments = pre_nc + post_nc else: # If there's no match at this stage, then it's unparsable. That's # a problem at this stage so wrap it in an unparsable segment and carry on. self.segments = ( pre_nc + ( UnparsableSegment( segments=segments, expected=self.name, ), # NB: tuple ) + post_nc) bencher = BenchIt() # starts the timer bencher("Parse complete of {0!r}".format(self.__class__.__name__)) # Recurse if allowed (using the expand method to deal with the expansion) parse_context.logger.debug( "{0}.parse: Done Parse. Plotting Recursion. Recurse={1!r}".format( self.__class__.__name__, parse_context.recurse)) parse_depth_msg = "###\n#\n# Beginning Parse Depth {0}: {1}\n#\n###\nInitial Structure:\n{2}".format( parse_context.parse_depth + 1, self.__class__.__name__, self.stringify()) if parse_context.may_recurse(): parse_context.logger.debug(parse_depth_msg) with parse_context.deeper_parse() as ctx: self.segments = self.expand(self.segments, parse_context=ctx) return self
def match(self, segments, parse_context): """Match a specific sequence of elements.""" if isinstance(segments, BaseSegment): segments = tuple(segments) matched_segments = MatchResult.from_empty() unmatched_segments = segments # Buffers of uninstantiated meta segments. meta_pre_nc = () meta_post_nc = () early_break = False for idx, elem in enumerate(self._elements): # Check for an early break. if early_break: break while True: # Consume non-code if appropriate if self.allow_gaps: pre_nc, mid_seg, post_nc = trim_non_code_segments( unmatched_segments) else: pre_nc = () mid_seg = unmatched_segments post_nc = () # Is it an indent or dedent? if elem.is_meta: # Elements with a negative indent value come AFTER # the whitespace. Positive or neutral come BEFORE. if elem.indent_val < 0: meta_post_nc += (elem(), ) else: meta_pre_nc += (elem(), ) break # Is it a conditional? If so is it active if isinstance( elem, Conditional) and not elem.is_enabled(parse_context): # If it's not active, skip it. break if len(pre_nc + mid_seg + post_nc) == 0: # We've run our of sequence without matching everything. # Do only optional or meta elements remain? if all(e.is_optional() or e.is_meta or isinstance(elem, Conditional) for e in self._elements[idx:]): # then it's ok, and we can return what we've got so far. # No need to deal with anything left over because we're at the end, # unless it's a meta segment. # We'll add those meta segments after any existing ones. So # the go on the meta_post_nc stack. for e in self._elements[idx:]: # If it's meta, instantiate it. if e.is_meta: meta_post_nc += (e(), ) # If it's conditional and it's enabled, match it. if isinstance(e, Conditional) and e.is_enabled( parse_context): meta_match = e.match(tuple(), parse_context) if meta_match: meta_post_nc += meta_match.matched_segments # Early break to exit via the happy match path. early_break = True break else: # we've got to the end of the sequence without matching all # required elements. return MatchResult.from_unmatched(segments) else: # We've already dealt with potential whitespace above, so carry on to matching with parse_context.deeper_match() as ctx: elem_match = elem.match(mid_seg, parse_context=ctx) if elem_match.has_match(): # We're expecting mostly partial matches here, but complete # matches are possible. Don't be greedy with whitespace! matched_segments += (meta_pre_nc + pre_nc + meta_post_nc + elem_match.matched_segments) meta_pre_nc = () meta_post_nc = () unmatched_segments = elem_match.unmatched_segments + post_nc # Each time we do this, we do a sense check to make sure we haven't # dropped anything. (Because it's happened before!). check_still_complete( segments, matched_segments.matched_segments, unmatched_segments, ) # Break out of the while loop and move to the next element. break else: # If we can't match an element, we should ascertain whether it's # required. If so then fine, move on, but otherwise we should crash # out without a match. We have not matched the sequence. if elem.is_optional(): # This will crash us out of the while loop and move us # onto the next matching element break else: return MatchResult.from_unmatched(segments) # If we get to here, we've matched all of the elements (or skipped them) # but still have some segments left (or perhaps have precisely zero left). # In either case, we're golden. Return successfully, with any leftovers as # the unmatched elements. Meta all go at the end regardless of wny trailing # whitespace. return MatchResult( BaseSegment._position_segments( matched_segments.matched_segments + meta_pre_nc + meta_post_nc, ), unmatched_segments, )