def test_get_indent(self): from utils.line_features import get_indent self.assertEqual(get_indent("int a = 3; "), 0) self.assertEqual(get_indent(" int a = 3; "), 1) self.assertEqual(get_indent(" int a = 3; "), 4) self.assertEqual(get_indent("\t\tint a = 3; "), 2)
def compute(self, code, annotations): indentations = {} # how far indented levels should be lines = code.lines levels = get_indent_levels(lines) # find the distribution of indentations for each indent level for line_no, line in enumerate(lines): _levels = levels[line_no] if _levels == None: # lines ignored because they are comments continue indent = get_indent(line) for level in _levels: if level not in indentations: indentations[level] = [indent] else: indentations[level].append(indent) # find the most common indentations for each indent level # ensuring that each successive level is deeper try: common_indentation = {} for level, indents in indentations.iteritems(): s = set(indents) if level == 0: common_indentation[level] = max(s, key=indents.count) else: last_indentation = common_indentation[level - 1] most_common = max(s, key=indents.count) # if indent is not deeper if most_common <= last_indentation: # TODO(fil): pass error when indent is not deeper while most_common <= last_indentation: s -= set([most_common]) if len(s) == 0: most_common = max(set(indents), key=indents.count) break most_common = max(s, key=indents.count) common_indentation[level] = most_common except KeyError as e: print "Parsing error in feature_indentation (%s)" % e # find which lines don't match the expected indentation for line_no, line in enumerate(lines): _levels = levels[line_no] if _levels == None: continue expected_indents = [] for level in _levels: try: expected_indents.append(common_indentation[level]) except KeyError: continue indent = get_indent(line) if expected_indents and indent not in expected_indents: error = "expected indent %i, got %i" % (expected_indents[0], indent) self.add_to_annotations(line_no, error, annotations)