def get_line_types(repo, repo_uri, rev, path): """Returns an array, where each item means a line of code. Each item is labled 'code', 'comment' or 'empty'""" #profiler_start("Processing LineTypes for revision %s:%s", (self.rev, self.file_path)) uri = os.path.join(repo_uri, path) # concat repo_uri and file_path for full path file_content = _get_file_content(repo, uri, rev) # get file_content if file_content is None or file_content == '': printerr("[get_line_types] Error: No file content for " + str(rev) + ":" + str(path) + " found! Skipping.") line_types = None else: try: lexer = get_lexer_for_filename(path) except ClassNotFound: try: printdbg("[get_line_types] Guessing lexer for" + str(rev) + ":" + str(path) + ".") lexer = guess_lexer(file_content) except ClassNotFound: printdbg("[get_line_types] No guess or lexer found for " + str(rev) + ":" + str(path) + ". Using TextLexer instead.") lexer = TextLexer() if isinstance(lexer, NemerleLexer): # this lexer is broken and yield an unstoppable process # see https://bitbucket.org/birkenfeld/pygments-main/issue/706/nemerle-lexer-ends-in-an-infinite-loop lexer = TextLexer() # Not shure if this should be skipped, when the language uses off-side rules (e.g. python, # see http://en.wikipedia.org/wiki/Off-side_rule for list) stripped_code = _strip_lines(file_content) lexer_output = _iterate_lexer_output(lexer.get_tokens(stripped_code)) line_types_str = _comment_empty_or_code(lexer_output) line_types = line_types_str.split("\n") return line_types