def method_invocation(element): if element[0].tag == "name": name = name_to_str(element.find("name")) to_file(element) declaration_site = element.env.get_declaration_site_for_method_name( name, argument_list(element)) element.attrib["type"] = element.env.get_type_for_declaration_site( declaration_site) element.declaration = declaration_site elif element[0].tag == "primary": primary_type = element[0].attrib["type"] error_if(is_primitive(primary_type), "Cannot invoke method on primitive " + primary_type) declaration_site = element.env.get_declaration_site_for_class_name( primary_type) identifier = element.find("tok_identifier").text method_declaration = \ declaration_site.env.get_declaration_site_for_method_name( identifier, argument_list(element)) element.attrib["type"] = \ method_declaration.env.get_type_for_declaration_site( method_declaration) element.declaration = method_declaration else: assert False
def equality_expression(element): lhs_type = element[0].attrib["type"] rhs_type = element[2].attrib["type"] error_if(not element.env.are_identity_comparable(lhs_type, rhs_type) or "void" in [lhs_type, rhs_type], "Incompatible types in equality '%s' vs '%s'" % (lhs_type, rhs_type)) element.attrib["type"] = "boolean"
def get_declaration_site_for_constructor_name(self, name, args): class_declaration = self.get_declaration_site_for_class_name(name) constructor_declaration = class_declaration.env.find_constructor(args) error_if(constructor_declaration is None, "No constructor for class %s with args %s found." % (name, str(args))) return constructor_declaration
def check_interfaces(tree, filename): for interface in tree.findall(".//interface"): name = interface.get("name") error_if(name != filename, "interface must have same name as file.") for method in interface.findall(".//abstract_method"): mods = modifiers(method) error_if(onein(["static", "final", "native"], mods), "An interface method cannot be static, final, or native.")
def initalize_method_environment(self, tree): for decl in tree.findall(".//param"): name = decl.find(".//variable//tok_identifier").text error_if(name in self.formal_parameters, "Method has two args with same name.") self.formal_parameters[name] = decl self.methods["$", ()] = tree tree.slot = 0
def assignment(element): left_hand_side_type = element.find("left_hand_side").attrib["type"] assignment_expression_type = element[-1].attrib["type"] error_if(not element.env.is_assignable(left_hand_side_type, assignment_expression_type), "Cannot assign '%s' to '%s'" % (assignment_expression_type, left_hand_side_type)) element.attrib["type"] = left_hand_side_type
def add_constructor_declarations_to_environments(self, tree): for ctor in tree.findall(".//constructor_declaration"): name = ctor.get("name") error_if(name != tree.get("name"), "Wrong constructor name.") args = argument_list_for_declaration(self, ctor) error_if(args in self.constructors, "Duplicate constructors not allowed.") self.constructors[args] = ctor
def add_method_declarations_to_environment(self, tree): for method in tree.findall(".//method") + tree.findall( ".//abstract_method"): name = method.get("name") args = argument_list_for_declaration(self, method) error_if((name, args) in self.methods, "Duplicate methods not allowed.") self.methods[name, args] = method self.tree.superclass = self.__superclass(self.tree)
def result(element): lhs_type = element.env.canonicalize_name(type_string(element[0])) rhs_type = element.env.canonicalize_name(type_string(element[2])) if is_integral_primitive(lhs_type) and is_integral_primitive(rhs_type): element.attrib["type"] = "int" return error_if(lhs_type != rhs_type != expected_type_string, "binary_expression type mismatch") element.attrib["type"] = expected_type_string
def array_creation_expression(element): base_name = collect_token_text(element[1]) if element.find(".//dim_expr") is not None: base_name += "[]" if element.find(".//dim_expr/expression") is not None: type_str = element.find(".//dim_expr/expression").attrib["type"] error_if(not is_integral_primitive(type_str), "dim_expr require integral type") element.attrib["type"] = element.env.canonicalize_name(base_name)
def unary_expression(element): if len(element) == 2: # Unary minus. if element[0].tag == "tok_minus": error_if(not is_integral_primitive(element[1].attrib["type"]), "must negate integral primitive") element.attrib["type"] = element[1].attrib["type"] if element[0].tag == "tok_complement": error_if(element[1].attrib["type"] != "boolean", "must !boolean") element.attrib["type"] = element[1].attrib["type"]
def local_variable_declaration(element): assigned_expression = element.find("expression") if assigned_expression is not None: declaration_type = element.env.get_type_for_declaration_site(element) assigned_expression_type = assigned_expression.attrib["type"] error_if(not element.env.is_assignable(declaration_type, assigned_expression_type), "Cannot assign type \"" + assigned_expression_type + "\" to \"" + declaration_type + "\"") element.attrib["type"] = "DANGER"
def relational_expression(element): lhs_type = element.env.canonicalize_name(element[0].attrib['type']) rhs_type = element.env.canonicalize_name(element[2].attrib['type']) if element[1].text in ["<", ">", "<=", ">="]: error_if(not (is_integral_primitive(lhs_type) and is_integral_primitive(rhs_type)), "Relational expression passed non-integral types.") elif element[1].text == "instanceof": error_if(is_primitive(lhs_type) or is_primitive(rhs_type), "Cannot have primitives in instanceof") element.attrib["type"] = "boolean"
def add_single_type_import_statements(self, pkg, this_clazz, tree, trees): import_names = [name_to_str(x) for x in tree.findall(".//import/name")] for fullname in import_names: name = fullname[fullname.rfind(".") + 1:] error_if(name in self.classes_this and not fullname == pkg + "." + this_clazz.get("name"), "Single-type imports clash with class defined in file.") error_if(name in self.classes_single and not self.__findclass(fullname, trees) == self.classes_single[name], "Two single-type import decls clash with each other.") self.classes_single[name] = self.__findclass(fullname, trees)
def constructor_declaration(subtree): mangled_name = mangle_fn_name(subtree) constructor_body = subtree.find("constructor_body") error_if(constructor_body is None, "No constructor body") superclass = subtree.env.findclass("this").superclass superclass_mangled_name = mangle_class_name(superclass.get("canonical_name")) superclass_constructor = superclass_mangled_name + mangle_class_name(superclass.get("name")) fields = subtree.env.findclass("this").findall(".//field") field_initializers = "" for field in fields: if "static" not in modifiers(field) and field.find("expression") is not None: if not hasattr(field.find("expression"), "assembly"): generate(field.find("expression")) field_initializers += field.find("expression").assembly + "\n" field_initializers += """ ; field_initializer mov eax, DWORD [ebp + 8] ; this -> eax add eax, {field_location} mov ebx, {value} mov DWORD [eax], ebx ; end_field_initializer """.format( field_location=field.env.find_nonlocal(collect_token_text(field.find("variable"))).field_offset * 4, value=stack_slot_to_operand(field.find("expression").slot), ) chained_constructor_call = """ ;initialize fields {field_initializers} ; call superclass default constructor push DWORD [ebp + 8] call {superclass_constructor} """.format( superclass_constructor=superclass_constructor, field_initializers=field_initializers ) this_class = subtree.env.findclass("this") if this_class.get("canonical_name") == "java.lang.Object": chained_constructor_call = field_initializers subtree.assembly = method_declaration_asm( constructor_body.assembly, mangled_name, subtree, chained_constructor_call ) subtree.assembly += """ mov eax, {this_ptr} leave ret ; end constructor {name} """.format( this_ptr=stack_slot_to_operand(this), name=mangled_name )
def field_access(element): primary_type = element[0].attrib["type"] error_if(is_primitive(primary_type), "No fields on primitive type.") declaration_site = \ element.env.get_declaration_site_for_class_name(primary_type) identifier = element.find("tok_identifier").text secondary_site = \ declaration_site.env.get_declaration_site_for_variable_name(identifier, element.find("tok_identifier")) secondary_type = \ secondary_site.env.get_type_for_declaration_site(secondary_site) element.attrib["type"] = secondary_type element.declaration = secondary_site
def check_hierarchy(tree): clazz = tree.find(".//class") if not clazz: clazz = tree.find(".//interface") ifaces = [clazz.env.findclass(name_to_str(x)) for x in clazz.findall(".//implements/name")] for i in range(0, len(ifaces)): error_if(ifaces[i] in ifaces[i + 1:], "Mention an interface more than once") if clazz.tag == "interface": check_cyclic(clazz, [])
def multiplicative_expression(subtree): lhs_location = subtree[0].slot rhs_location = subtree[2].slot new_stack_slot = generate_new_stack_slot(subtree) operator_type = collect_token_text(subtree[1]) result = operator = "" if operator_type == "*": operator = "imul" result = "eax" check = "" elif operator_type == "/": operator = "idiv" result = "eax" label_no = new_label_no() check = """ sub ebx, 0 jne .{okay_label} call __exception .{okay_label}: """.format( okay_label="DIV0_CHECK_" + str(label_no) ) elif operator_type == "%": operator = "idiv" result = "edx" check = "" else: error_if(True, "Unknown argument to mulitplicative expression") subtree.assembly = """ ; multiplicative {dbg} mov edx, {lhs} mov eax, edx sar edx, 31 mov ebx, {rhs} {check} {op} ebx mov {nss}, {result} ; end multiplicative """.format( nss=stack_slot_to_operand(new_stack_slot), lhs=stack_slot_to_operand(lhs_location), rhs=stack_slot_to_operand(rhs_location), result=result, check=check, op=operator, imul_part=operator == "imul" and ", edx" or "", dbg=collect_debug_text(subtree), ) subtree.slot = new_stack_slot
def class_methods(clazz): """Returns all methods declared in a 'clazz' subtree and it's parents.""" methods = clazz.findall(".//method") + interface_methods(clazz) superclazz = clazz.superclass if superclazz == clazz: return methods classes = [clazz, superclazz] methods += superclazz.findall(".//method") while superclazz != superclazz.superclass: superclazz = superclazz.superclass error_if(superclazz in classes, "Cycle in class extensions") classes += [superclazz] methods += superclazz.findall(".//method") + \ interface_methods(superclazz) return methods
def array_access(element): if element[0].tag == "name": name = name_to_str(element[0]) decl_site = element.env.get_declaration_site_for_variable_name(name, element[0]) type_name = element.env.get_type_for_declaration_site(decl_site) element.declaration = decl_site else: type_name = element[0].attrib["type"] error_if(not isarray(type_name), "Array access on non-array type.") element.attrib["type"] = type_name[:-2] index_expression_type = element[-2].attrib["type"] error_if(not is_integral_primitive(index_expression_type), "Cannot index into array with non-integral expression.")
def empty_expression(subtree): value = subtree.get("value") if value == "True": value = true elif value == "False": value = false error_if(value is None, "None in empty expression!") new_stack_slot = generate_new_stack_slot(subtree) subtree.assembly = """ ; empty expr {dbg} mov {nss}, {val} ; end empty expr """.format( nss=stack_slot_to_operand(new_stack_slot), val=value, dbg=collect_debug_text(subtree) ) subtree.slot = new_stack_slot
def add_import_star_statements(self, imported, tree, trees): star_import_names = [name_to_str(x) for x in tree.findall(".//star_import/name")] star_import_names += ["java.lang"] for fullname in star_import_names: if fullname not in imported: imported += [fullname] tmp = find_all_in_package(fullname, trees) used = [name_to_str(x) for x in tree.findall(".//name")] + ["Object"] for key in tmp: if key not in self.classes_single: # and key in used: error_if(key in self.classes_star and key in used, "Ambiguous class %s" % key) self.classes_star[key] = tmp[key]
def check_field_initializers(tree): #Check the rules specified in Section 8.3.2.3 of the Java Language #Specification regarding forward references. The initializer of a #non-static field must not use (i.e. read) by simple name (i.e. without an #explicit this) itself or a non-static field declared later in the same #class. #The declaration of a member needs to appear before it is used only if the #member is an instance (respectively static) field of a class or interface #C and all of the following conditions hold: #The usage occurs in an instance (respectively static) variable initializer #of C or in an instance (respectively static) initializer of C. #The usage is not on the left hand side of an assignment. #C is the innermost class or interface enclosing the usage. fields = tree.findall(".//field") for x in xrange(len(fields)): field = fields[x] forward_references = fields[x:] forward_reference_names = [] for forward_reference in forward_references: forward_reference_names.append( name_to_str(forward_reference.find(".//variable"))) potential_elements = set() expression_element = field.find("expression") if expression_element is not None: for child in expression_element.findall(".//name"): child_name = name_to_str(child).split(".")[0] if child_name in forward_reference_names: potential_elements.add(child) valid_references = field.findall(".//left_hand_side/name") valid_references += \ field.findall(".//class_instance_creation_expression/name") for child in valid_references: for subchild in child.getiterator(): potential_elements.discard(subchild) error_if(len(potential_elements), "Forward reference not allowed in initializer.") if static_declaration(field): for name in field.findall(".//name"): if hasattr(child, "declaration") and isvariable(child.declaration): field.env.get_declaration_site_for_variable_name(name_to_str(name),name)
def additive_expression(subtree): lhs_slot = subtree[0].slot rhs_slot = subtree[2].slot result_slot = generate_new_stack_slot(subtree) lhs_type = subtree[0].get("type") rhs_type = subtree[2].get("type") if "java.lang.String" in [lhs_type, rhs_type]: (lhs_slot, lhs_assembly) = generate_promotion_to_string(subtree[0]) (rhs_slot, rhs_assembly) = generate_promotion_to_string(subtree[2]) assembly = """ ; string additive expression {lhs_assembly} {rhs_assembly} mov eax, {lhs_slot} mov ebx, {rhs_slot} push ebx push eax call java_lang_String_concat_java_lang_String_ mov {result_slot}, eax """.format( lhs_assembly=lhs_assembly, rhs_assembly=rhs_assembly, rhs_slot=stack_slot_to_operand(rhs_slot), lhs_slot=stack_slot_to_operand(lhs_slot), result_slot=stack_slot_to_operand(result_slot), ) subtree.assembly = assembly subtree.slot = result_slot return operator_type = collect_token_text(subtree[1]) operator = "" if operator_type == "+": operator = "add" elif operator_type == "-": operator = "sub" else: error_if(True, "Unknown additive_expression operator_type") subtree.assembly = binary_operator_assembly(lhs_slot, rhs_slot, operator, result_slot, collect_debug_text(subtree)) subtree.slot = result_slot
def check_literals(tree): integer_literals = tree.findall(".//integer_literal") minus_expr = [x for x in tree.findall(".//unary_expression") if x.getchildren()[0].tag == "tok_minus"] minus_literals = [] for minus in minus_expr: if minus.find("./integer_literal") is not None: minus_literals += minus.findall(".//integer_literal") for literal in integer_literals: error_if((not literal in minus_literals) and int(literal.get("value")) >= 2 ** 31, "Integer literal too large") complement_expr = [x for x in tree.findall(".//unary_expression_not_plus_minus") if x.getchildren()[0].tag == "tok_complement" or x.getchildren()[0].tag == "tok_bit_complement"] for tag in complement_expr: error_if(tag.find("./integer_literal") is not None, "Bit complement not allowed.")
def cast_expression(element): if element[1].tag == "name": expression_being_cast = element.find("unary_expression_not_plus_minus") cast_type = element.env.canonicalize_name(name_to_str(element[1])) if element.find("dims"): cast_type += "[]" elif element[1].tag == "expression": # Expression case cast_type = element[1].attrib["type"] expression_being_cast = element[-1] else: # Primitive cast case cast_type = collect_token_text(element[1]) if element[2].tag == "dims": cast_type += "[]" expression_being_cast = element[-1] error_if(not element.env.can_be_cast(expression_being_cast.attrib["type"], cast_type), "TODO(thurn): This is an error.") element.attrib["type"] = cast_type
def build_envs(files): """Lexes/Parses/does checking for all files in files.""" global find_all_in_package_cache find_all_in_package_cache = {} trees = [ElementTree.ElementTree(file="Array.xml").getroot()] files = ["$Array.java"] + files trees[0].filename = "$Array.java" for f in files[1:]: if f in cached_trees: trees += [cached_trees[f]] else: CurrentFile.name = f tree = parse(lex(open(f).read()), f) cached_trees[f] = tree trees += [tree] name_to_class = {} for tree in trees: CurrentFile.name = tree.filename clazz = find_type_decl(tree) name = clazz.get("canonical_name") error_if(name in name_to_class, "Duplicate class defined.") if name.find(".") != -1: name_to_class[name] = clazz for x in range(0, len(trees)): CurrentFile.name = trees[x].filename if trees[x] not in cached_environments: build_environments(trees[x], trees, name_to_class) cached_environments[files[x]] = trees[x].env for tree in trees: CurrentFile.name = tree.filename clazz = find_type_decl(tree) clazz.env.add_superclass_methods() clazz.env.add_superclass_fields() check_types(tree) check_hierarchy(tree) return trees
def check_path_for_protected(self, path): if len(path) == 1: # Impossible to violate protected with a 1 length path return for x in range(0, len(path) - 1): previous_path_element = path[x] current_path_element = path[x + 1] if not "protected" in modifiers(current_path_element): continue if self.find_package_name() == \ current_path_element.env.find_package_name(): continue current_class_name = self.get_current_class_name() current_path_element_class_name = \ current_path_element.env.get_current_class_name() if previous_path_element.tag == "class": error_if(not self.is_subtype(current_class_name, current_path_element_class_name), "must be a subtype to access a proteced static member") else: previous_path_element_type = \ self.canonicalize_name(type_string(previous_path_element)) error_if(not self.is_subtype(previous_path_element_type, current_class_name), "must invoke on a subtype to access a protected member") error_if(not self.is_subtype(current_class_name, current_path_element_class_name), "Cannot invoke protected member in subtype")
def parse_token_list(tokens): """Returns a parse tree of a list of tokens. Uses LR(1) tree-building algorithm from the parsing handout. Input: stream: a stream to the parse table. tokens: a list of tokens Returns: a parse tree of the tokens""" stateStack = [0] nodeStack = [] for a in tokens: a_ = a.type error_if(not (stateStack[-1], a_) in shift_rules_, "Parse error at token " + a.__str__()) while "reduce" == shift_rules_[stateStack[-1], a_][0]: A = reduce_rules_[shift_rules_[stateStack[-1], a_][1]][0] y = reduce_rules_[shift_rules_[stateStack[-1], a_][1]][1:] child_nodes = [] for _ in range(0, len(y)): child_nodes = [nodeStack.pop()] + child_nodes stateStack.pop() nodeStack += [[A, child_nodes]] stateStack += [shift_rules_[stateStack[-1], A][1]] error_if(not (stateStack[-1], a_) in shift_rules_, "Parse error at token " + a.__str__()) error_if(not (stateStack[-1], a_) in shift_rules_, "Parse error at token " + a.__str__()) nodeStack += [a] stateStack += [(shift_rules_[(stateStack[-1], a_)][1])] return nodeStack[1]
def check(files): try: trees = build_envs(files) #All statements must be reachable. Details of the exact definition of #reachability are specified in Section 14.20 of the Java Language #Specification. for tree in trees: for block in (tree.findall(".//method/block") + tree.findall(".//constructor_body")): check_reachability(block) for method in tree.findall(".//method"): if method.find(".//tok_void") is None: returns = always_returns(method.find("block")) error_if(not returns, "Doesn't always return from nonvoid method") #Every local variable must have an initializer, and the variable must #not occur in its own initializer. for tree in trees: for lvar in tree.findall(".//local_variable_declaration"): name = lvar.find(".//variable/tok_identifier").text error_if(lvar.find(".//expression") is None, "Every local variable must have an initializer") for ident in lvar.findall(".//expression//name"): error_if(name == name_to_str(ident), "Self cannot appear in local variable initializer.") return 0 except JoosSyntaxException, e: if not Testing.testing: print e.msg return 42