def _add_attribute(self, attr, val): logger.debug('Parser : Adding attribute %s with value %s', attr, val) if attr in self._attributes and self._attributes[attr] != None: logger.warning('Parser : Added attribute twice %s = %s', attr, val) self._attributes[attr] = val
def _append_attribute(self, attr, val): logger.debug('Parser : Appending attribute %s with value %s to %s', attr, val, attr + 's') if (attr + 's') in self._attributes.keys(): self._attributes[attr + 's'].append(val) else: self._attributes[attr + 's'] = [val]
def resolve_function_call(self, function): """ Assumes the file has checked itself for the function before calling this """ from pas_file_cache import get_unit_named logger.debug("File: Resolving Function call: %s", function.name) for unit_reference in self._contents.uses: unit = unit_reference.points_to.contents for unit_function in unit.functions: if unit_function.name.lower() == function.name.lower(): return unit_function for unit_declared_function in unit.function_declarations: if unit_declared_function.name.lower() == function.name.lower( ): return unit_declared_function # check the System unit... system = get_unit_named('System') if (system != None): for unit_function in system.contents.functions: if unit_function.name.lower() == function.name.lower(): return unit_function raise_error(("File: Unable to resolve function: %s in %s" % (function.name, self._filename)), '', is_critical=False)
def resolve_variable(self, var_name): from pas_file_cache import get_unit_named logger.debug("File: Resolving Variable: %s", var_name) for unit_reference in self._contents.uses: unit = unit_reference.points_to.contents # could be a variable... for unit_name, unit_var in unit.variables.items(): if unit_name.lower() == var_name.lower(): return unit_var # or it could be an enumeration value... for (name, type) in unit.types.items(): if type.kind is 'enumeration': for val in type.values: if val.name.lower() == var_name.lower(): return val # check the System unit... system = get_unit_named('System') if (system != None): for name, unit_var in system.contents.variables.items(): if name.lower() == var_name.lower(): return unit_var raise_error(("File: Unable to resolve variable: %s in %s" % (var_name, self._filename)), '', is_critical=False)
def resolve_unit_reference(self, reference): from pas_file_cache import get_unit_named logger.debug("File: Resolving Unit reference: %s", reference.name) unit = get_unit_named(reference.name) if not (unit is None): if (not unit.is_parsed): unit.parse() return unit return None
def convert_operator(the_dict, the_operator, dict_name = '_operator_conversion_table'): ''' converts operators from Pascal to other language operators Does not fail if the operator wasn't found, the returned value will be the same as the current operator ''' key = the_operator.value.lower() if the_operator != None else None if key not in the_dict: logger.debug(' : Adding \'%s[%s]\'????\',', dict_name, the_operator.value.lower()) the_dict[key] = the_operator.value return the_dict[key]
def next_token(self): current_token = None while current_token == None or current_token._kind == TokenKind.Comment: if len(self._lookahead_toks) > 0: current_token = self._lookahead_toks[0] self._lookahead_toks = self._lookahead_toks[1:] else: current_token = self._tokeniser.next_token() if current_token._kind == TokenKind.Comment: self._lookbehind_comments.append(current_token) logger.debug('TokenStream : Storing comment: %s', current_token.value) return current_token
def match_lookahead(self, token_kind, token_value = None, consume = False): """ looks forward one position for a token with a specific value and kind if the token is found it returns true and optionally consume the token. Otherwise it returns false without consuming a token. """ logger.debug('TokenStream : Looking to find %s (%s)%s', token_kind, token_value if token_value != None else 'any', ' will consume' if consume else '') token = self.lookahead(1)[0] result = token._kind == token_kind and (token_value == None or token_value == token._value.lower()) if consume and result: self.match_token(token_kind, token_value) return result
def lookahead(self,count=1): """ lookahead generates a list of Tokens that has count number of members It ignores comments """ logger.debug('TokenStream : Looking ahead %d', count) while len(self._lookahead_toks) < count: current_token = self._tokeniser.next_token() while current_token._kind == TokenKind.Comment: self._lookbehind_comments.append(current_token) logger.debug('TokenStream : Storing comment: %s', current_token.value) current_token = self._tokeniser.next_token() self._lookahead_toks.append(current_token) return self._lookahead_toks
def match_lookahead(self, token_kind, token_value=None, consume=False): """ looks forward one position for a token with a specific value and kind if the token is found it returns true and optionally consume the token. Otherwise it returns false without consuming a token. """ logger.debug('TokenStream : Looking to find %s (%s)%s', token_kind, token_value if token_value != None else 'any', ' will consume' if consume else '') token = self.lookahead(1)[0] result = token._kind == token_kind and ( token_value == None or token_value == token._value.lower()) if consume and result: self.match_token(token_kind, token_value) return result
def lookahead(self, count=1): """ lookahead generates a list of Tokens that has count number of members It ignores comments """ logger.debug('TokenStream : Looking ahead %d', count) while len(self._lookahead_toks) < count: current_token = self._tokeniser.next_token() while current_token._kind == TokenKind.Comment: self._lookbehind_comments.append(current_token) logger.debug('TokenStream : Storing comment: %s', current_token.value) current_token = self._tokeniser.next_token() self._lookahead_toks.append(current_token) return self._lookahead_toks
def parse(self, tokens, method): """Read in the parameters declared in a function, procedure or operator""" from pas_parser_utils import _parse_identifier_list, reservedWords, parse_type from pas_var import PascalVariable from types.pas_type import PascalType tokens.match_token(TokenKind.Symbol, '(') #look for parameters while not tokens.match_lookahead(TokenKind.Symbol, ')'): #consume ) at end param_tok, other_tok = tokens.lookahead(2) modifier = None #Look for modifier if param_tok.kind == TokenKind.Identifier: # First value is modifier if other_tok.kind == TokenKind.Identifier: modifier = tokens.match_token(TokenKind.Identifier).value # No modifier found else: modifier = None # get parameter names parameters = [tokens.match_token(TokenKind.Identifier).value] while tokens.match_lookahead(TokenKind.Symbol, ',', True): #there is a list of parameters parameters.append( tokens.match_token(TokenKind.Identifier).value) # colon seperates identifiers and type tokens.match_token(TokenKind.Symbol, ':') the_type = parse_type( tokens, self._block) # reads the type and returns PascalType for parameter_name in parameters: toAdd = PascalVariable(parameter_name, the_type, modifier, is_parameter=True) self._vars.append(toAdd) method.add_parameter(toAdd) logger.debug('Parser : Adding parameter %s (%s) to %s', parameter_name, the_type, method.name) tokens.match_lookahead( TokenKind.Symbol, ';', consume=True) # is there a semi-colon seperator? eat it. tokens.match_token(TokenKind.Symbol, ')')
def parse(self, tokens): """ parses an set of arguments of a function call consumes the enclosing braces """ from pas_expression import PascalExpression from tokeniser.pas_token_kind import TokenKind logger.debug("parsing arguments") tokens.match_token(TokenKind.Symbol, '(') while True: if (tokens.match_lookahead(TokenKind.Symbol, ')', consume=True) or tokens.match_lookahead(TokenKind.Symbol, ',', consume=True)): logger.debug("finished parsing arguments") break newExpression = PascalExpression(self._block) newExpression.parse(tokens) self._contents.append(newExpression)
def match_token(self, token_kind, token_value = None): """ Looks at the next token, and if it is the same kind as 'token_kind' and has the same value as 'token_value' then it is returned. Otherwise an error occurs and the program stops. If 'token_value' is None then only the 'token_kind' is checked. """ tok = self.next_token() if tok._kind != token_kind or (token_value != None and token_value != tok._value.lower()): raise_error(('TokenStream %s: found a %s (%s) expected %s (%s)' % (self._tokeniser.line_details(), tok._kind, tok._value, token_kind, token_value)), '', is_critical=False) logger.debug('TokenStream : Matched token %s (%s)', tok._kind, tok._value) return tok
def resolve_type(self, type): from pas_file_cache import get_unit_named logger.debug("File: Resolving Type: %s", type) for unit_reference in self._contents.uses: if unit_reference.points_to != None: unit = unit_reference.points_to.contents for unit_name, unit_type in unit.types.items(): if unit_name.lower() == type.lower(): return unit_type # check the System unit... system = get_unit_named('System') if (system != None): for name, unit_type in system.contents.types.items(): if name.lower() == type.lower(): return unit_type raise_error(("File: Unable to resolve type: %s in %s" % (type, self._filename)), '', is_critical=False)
def tokenise(self, filename): '''Initialises the tokeniser with characters loaded from the specified filename. Call `next_token` process each token. ''' self._filename = filename if isinstance(filename, list): logger.debug('Tokenising list') self.pas_lines = filename else: logger.debug('Tokenising %s', filename) self._filename = filename f = open(filename) self.pas_lines = f.readlines() f.close() self._char_no = -1 self._line_no = 0 #starts at first line self._token_val = 'none'
def parse(self, tokens): from pascal_parser.types.pas_record_field import PascalRecordField from pascal_parser.pas_parser_utils import _parse_identifier_list, parse_type while not tokens.match_lookahead(TokenKind.Identifier, 'end', consume=True): modifier = None # (modifier) identifier list, ':', type, ';' idList = _parse_identifier_list(tokens) tokens.match_token(TokenKind.Symbol, ':') type = parse_type(tokens, self._block) tokens.match_token(TokenKind.Symbol, ';') for varName in idList: if not varName in self._fields: field = PascalRecordField(varName, type, self) self._fields.append(field) logger.debug("Parsed field : " + varName + " : " + type.name)
def process_meta_comments(self): logger.debug('Parser : Starting to process meta comments: clearing old comments and attributes') tok = self._tokens.lookahead(1)[0] #_next_token() attrs_started = False while tok.kind in [TokenKind.MetaComment, TokenKind.Attribute, TokenKind.Comment]: if tok.kind == TokenKind.Attribute: attrs_started = True if attrs_started and tok.kind == TokenKind.MetaComment: tok = self._tokens.next_token() #actually read token if len(tok.value) > 0: logger.error('Parser Error %s: Found additional meta comment after start of attributes', tok) assert False else: tok = self._tokens.next_token() #actually read token self._processors[tok.kind](tok) tok = self._tokens.lookahead(1)[0]
def match_one_lookahead(self, token_lst, consume = False): """ Tries to match the next token's value with a list of values Returns True if a match is found, and false if there was no match """ tok = self.lookahead()[0] logger.debug('TokenStream : Looking to find %s %s', token_lst, ' will consume' if consume else '') for token_kind,token_value in token_lst: if tok._kind == token_kind and (token_value == None or token_value == tok._value): matched = True logger.debug('TokenStream : Found %s (%s)', tok._kind, tok._value) if consume: self.match_token(tok._kind, tok._value) return True return False
def parse(self, tokens, do_resolve=True): logger.debug("Parsing uses clause") tokens.match_token(TokenKind.Identifier, 'uses') while (True): if (tokens.match_lookahead(TokenKind.Symbol, ';')): tokens.match_token(TokenKind.Symbol, ';') break elif (tokens.match_lookahead(TokenKind.Symbol, ',')): tokens.match_token(TokenKind.Symbol, ',') elif (tokens.match_lookahead(TokenKind.Identifier)): new_reference = PascalUnitReference() new_reference.parse(tokens, self._file_owner, do_resolve) self._units.append(new_reference) else: raise_error( ('Error reading uses clause: ' + str(tokens.next_token())), '', is_critical=False) logger.debug("Finished parsing uses clause")
def parse(self, tokens): from pascal_parser.types.pas_record_field import PascalRecordField from pascal_parser.pas_parser_utils import _parse_identifier_list, parse_type while not tokens.match_lookahead( TokenKind.Identifier, 'end', consume=True): modifier = None # (modifier) identifier list, ':', type, ';' idList = _parse_identifier_list(tokens) tokens.match_token(TokenKind.Symbol, ':') type = parse_type(tokens, self._block) tokens.match_token(TokenKind.Symbol, ';') for varName in idList: if not varName in self._fields: field = PascalRecordField(varName, type, self) self._fields.append(field) logger.debug("Parsed field : " + varName + " : " + type.name)
def match_one_lookahead(self, token_lst, consume=False): """ Tries to match the next token's value with a list of values Returns True if a match is found, and false if there was no match """ tok = self.lookahead()[0] logger.debug('TokenStream : Looking to find %s %s', token_lst, ' will consume' if consume else '') for token_kind, token_value in token_lst: if tok._kind == token_kind and (token_value == None or token_value == tok._value): matched = True logger.debug('TokenStream : Found %s (%s)', tok._kind, tok._value) if consume: self.match_token(tok._kind, tok._value) return True return False
def resolve_function_call(self, function): """ Assumes the file has checked itself for the function before calling this """ from pas_file_cache import get_unit_named logger.debug("File: Resolving Function call: %s", function.name) for unit_reference in self._contents.uses: unit = unit_reference.points_to.contents for unit_function in unit.functions: if unit_function.name.lower() == function.name.lower(): return unit_function for unit_declared_function in unit.function_declarations: if unit_declared_function.name.lower() == function.name.lower(): return unit_declared_function # check the System unit... system = get_unit_named('System') if (system != None): for unit_function in system.contents.functions: if unit_function.name.lower() == function.name.lower(): return unit_function raise_error(("File: Unable to resolve function: %s in %s" % (function.name, self._filename)), '', is_critical=False)
def resolve_variable(self, var_name): from pas_file_cache import get_unit_named logger.debug("File: Resolving Variable: %s", var_name) for unit_reference in self._contents.uses: unit = unit_reference.points_to.contents # could be a variable... for unit_name, unit_var in unit.variables.items(): if unit_name.lower() == var_name.lower(): return unit_var # or it could be an enumeration value... for (name, type) in unit.types.items(): if type.kind is 'enumeration': for val in type.values: if val.name.lower() == var_name.lower(): return val # check the System unit... system = get_unit_named('System') if (system != None): for name, unit_var in system.contents.variables.items(): if name.lower() == var_name.lower(): return unit_var raise_error(("File: Unable to resolve variable: %s in %s" %( var_name, self._filename)), '', is_critical=False)
def match_token(self, token_kind, token_value=None): """ Looks at the next token, and if it is the same kind as 'token_kind' and has the same value as 'token_value' then it is returned. Otherwise an error occurs and the program stops. If 'token_value' is None then only the 'token_kind' is checked. """ tok = self.next_token() if tok._kind != token_kind or (token_value != None and token_value != tok._value.lower()): raise_error( ('TokenStream %s: found a %s (%s) expected %s (%s)' % (self._tokeniser.line_details(), tok._kind, tok._value, token_kind, token_value)), '', is_critical=False) logger.debug('TokenStream : Matched token %s (%s)', tok._kind, tok._value) return tok
def parse(self, tokens, method): """Read in the parameters declared in a function, procedure or operator""" from pas_parser_utils import _parse_identifier_list, reservedWords, parse_type from pas_var import PascalVariable from types.pas_type import PascalType tokens.match_token(TokenKind.Symbol, '(') #look for parameters while not tokens.match_lookahead(TokenKind.Symbol, ')'): #consume ) at end param_tok, other_tok = tokens.lookahead(2) modifier = None #Look for modifier if param_tok.kind == TokenKind.Identifier: # First value is modifier if other_tok.kind == TokenKind.Identifier: modifier = tokens.match_token(TokenKind.Identifier).value # No modifier found else: modifier = None # get parameter names parameters = [tokens.match_token(TokenKind.Identifier).value] while tokens.match_lookahead(TokenKind.Symbol, ',', True): #there is a list of parameters parameters.append(tokens.match_token(TokenKind.Identifier).value) # colon seperates identifiers and type tokens.match_token(TokenKind.Symbol, ':') the_type = parse_type(tokens, self._block) # reads the type and returns PascalType for parameter_name in parameters: toAdd = PascalVariable(parameter_name, the_type, modifier, is_parameter=True) self._vars.append(toAdd) method.add_parameter(toAdd) logger.debug('Parser : Adding parameter %s (%s) to %s', parameter_name, the_type, method.name) tokens.match_lookahead(TokenKind.Symbol, ';', consume=True) # is there a semi-colon seperator? eat it. tokens.match_token(TokenKind.Symbol, ')')
def process_meta_comments(self): logger.debug( 'Parser : Starting to process meta comments: clearing old comments and attributes' ) tok = self._tokens.lookahead(1)[0] #_next_token() attrs_started = False while tok.kind in [ TokenKind.MetaComment, TokenKind.Attribute, TokenKind.Comment ]: if tok.kind == TokenKind.Attribute: attrs_started = True if attrs_started and tok.kind == TokenKind.MetaComment: tok = self._tokens.next_token() #actually read token if len(tok.value) > 0: logger.error( 'Parser Error %s: Found additional meta comment after start of attributes', tok) assert False else: tok = self._tokens.next_token() #actually read token self._processors[tok.kind](tok) tok = self._tokens.lookahead(1)[0]
def parse(self, tokens): from pas_parser_utils import parse_type from pas_parser_utils import _parse_identifier_list, reservedWords from pas_var import PascalVariable paramDeclaration = False logger.debug("Parsing variable declaration") tokens.match_token(TokenKind.Identifier, 'var') variables = dict() while True: modifier = None # (modifier) identifier list, ':', type, ';' idList = _parse_identifier_list(tokens) tokens.match_token(TokenKind.Symbol, ':') type = parse_type(tokens, self._block) # assign value at creation... consume value, expression? #PascalExpression(self._block).parse(tokens) if tokens.match_lookahead(TokenKind.Operator, '=', consume=True): tokens.next_token( ) # $ consume the assigned value... not needed for now... tokens.next_token() # number... tokens.match_lookahead(TokenKind.Symbol, ';', consume=True) for varName in idList: if not varName in self._vars: self._vars[varName] = PascalVariable( varName, type, modifier) # create and assign the PascalVariable logger.debug("Parsed variable : " + varName + " : " + type.name) else: logger.error("Duplicate variable identifier found: " + str(tokens.next_token())) assert (False) if tokens.match_one_lookahead( reservedWords) or tokens.match_lookahead( TokenKind.Symbol, ')'): logger.debug("Finished parsing variable declaration") break
def parse(self, tokens): from pas_parser_utils import parse_type from pas_parser_utils import _parse_identifier_list, reservedWords from pas_var import PascalVariable paramDeclaration = False logger.debug("Parsing variable declaration") tokens.match_token(TokenKind.Identifier, 'var') variables = dict() while True: modifier = None # (modifier) identifier list, ':', type, ';' idList = _parse_identifier_list(tokens) tokens.match_token(TokenKind.Symbol, ':') type = parse_type(tokens, self._block) # assign value at creation... consume value, expression? #PascalExpression(self._block).parse(tokens) if tokens.match_lookahead(TokenKind.Operator, '=', consume=True): tokens.next_token() # $ consume the assigned value... not needed for now... tokens.next_token() # number... tokens.match_lookahead(TokenKind.Symbol, ';', consume=True) for varName in idList: if not varName in self._vars: self._vars[varName] = PascalVariable(varName, type, modifier) # create and assign the PascalVariable logger.debug("Parsed variable : " + varName + " : " + type.name) else: logger.error("Duplicate variable identifier found: " + str(tokens.next_token())) assert(False) if tokens.match_one_lookahead(reservedWords) or tokens.match_lookahead(TokenKind.Symbol, ')'): logger.debug("Finished parsing variable declaration") break
def process_meta_comment(self, token): logger.debug('Parser : Processing meta comment: %s', token.value) self._comments.append(token.value)
def _append_attribute(self, attr, val): logger.debug('Parser : Appending attribute %s with value %s to %s',attr,val, attr + 's') if (attr + 's') in self._attributes.keys(): self._attributes[attr + 's'].append(val) else: self._attributes[attr + 's'] = [val]
def _add_attribute(self, attr, val): logger.debug('Parser : Adding attribute %s with value %s',attr,val) if attr in self._attributes and self._attributes[attr] != None: logger.warning('Parser : Added attribute twice %s = %s', attr, val) self._attributes[attr] = val
def process_attribute(self, token): logger.debug('Parser : Processing attribute: %s', token.value) self._attribute_processors[token.value](token)
def next_token(self): ''' Find and return the next token ''' def num_match(cha, tmp): '''Checks for a number in format ##, ##.#. Returns False when at the end of a number.''' if cha in '1234567890': return True elif cha == '.' and '.' not in tmp: return self._peek(1) in '1234567890' else: return False while (True): t = self._next_char(); self._token_start = self._char_no kind = None value = None char_number = 0 # Ignore white space characters if t == ' ' or t == '\t' or t == '\r': #ignore white space pass # Move to next line (if at end of line) elif t == '\n': self._advance_line() # Is it a MetaComment? #elif self._in_meta_comment: # if t == '@': # char_number = self._char_no # kind = TokenKind.Attribute # value = self._read_matching('', lambda cha, tmp: cha.isalnum() or cha == '_') # Numbers (int or float style format or hexadeciaml ($) elif t in '$1234567890' or (t in '-+' and self._peek(1) in '1234567890'): #is digit or +/- kind = TokenKind.Number char_number = self._char_no value = self._read_matching(t, num_match) # Comment, single line // or meta comment line /// elif t == '/' and self._peek(1) == '/': #start of comment if self._match_and_read('/'): if self._match_and_read('/'): self._meta_comment_start = self._char_no self._meta_comment_line = self._line_no self._in_meta_comment = True kind = TokenKind.MetaComment char_number = self._char_no value = self.read_to_end_of_comment() else: kind = TokenKind.Comment char_number = self._char_no value = self.read_to_eol() else: logger.error("Unexpected error: " + self.line_details) # Attribute identified by an @ symbol then a name elif t == '@': char_number = self._char_no kind = TokenKind.Attribute value = self._read_matching('', lambda cha, tmp: cha.isalnum() or cha == '_') # Identifier (id) of alphanumeric characters including elif t.isalpha(): char_number = self._char_no value = self._read_matching(t, lambda cha, tmp: cha.isalnum() or cha == '_') if value.lower() in ['true','false']: kind = TokenKind.Boolean elif value.lower() in ['or', 'and', 'not', 'xor', 'mod', 'div', 'in']: kind = TokenKind.Operator else: kind = TokenKind.Identifier #Bound Comment elif t == '{' or (t == '(' and self._peek(1) == '*'): if t == '(' and self._match_and_read('*'): char_number = self._char_no comment = self._read_until('', lambda temp: temp[-2:] == '*)') kind = TokenKind.Comment value = comment[:-2] elif t == '{': char_number = self._char_no comment = self._read_until('', lambda temp: temp[-1:] == '}') kind = TokenKind.Comment value = comment[:-1] # Operator elif (t == ':' and self._peek(1) == '=') or t in '=+-*/><': kind = TokenKind.Operator char_number = self._char_no if t == ':' and self._match_and_read('='): value = ':=' elif t in '+' and self._match_and_read('='): value = t + '=' elif t in '-' and self._match_and_read('='): value = t + '=' elif t in '/' and self._match_and_read('='): value = t + '=' elif t in '*' and self._match_and_read('='): value = t + '=' elif t == '*' and self._match_and_read('*'): value = '**' elif t == '<' and self._match_and_read('>'): value = '<>' elif t in '<>' and self._match_and_read('='): value = t + '=' else: value = t # Symbol elif t in '(),:;[].^': kind = TokenKind.Symbol char_number = self._char_no value = t # Catch any single quotes inside a string value. elif t == "'": char_number = self._char_no string = self._read_until('', lambda temp: (temp[-1:] == "'") and (not self._match_and_read("'"))) kind = TokenKind.String value = string[:-1] # Hmm.. unknown token. What did we forget? else: logger.error("Unknown token type: " + (t if t else 'NONE!') + self.line_details()) if (not (kind is None)) and (value != None): logger.debug('Tokeniser : read %s (%s)', kind, value) return Token(kind, value, self._line_no+1, char_number+1, self._filename)