def remove_commented_source_code(comments_to_keep): before = timeit.default_timer() print (len(comments_to_keep)) commented_source_code_regex = HeuristicHandlerConfig.get_parameter('commented_source_code_regex') connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select id, comment_text from raw_comments where id in %s", [tuple(comments_to_keep),]) raw_comment_results = cursor.fetchall() connection.close() for raw_comment_line in raw_comment_results: raw_comment_id = raw_comment_line[0] comment_text = raw_comment_line[1] commented_source_code_matcher = re.search(commented_source_code_regex, comment_text) if commented_source_code_matcher is not None: # print (raw_comment_id) comments_to_keep.remove(raw_comment_id) print (len(comments_to_keep)) after = timeit.default_timer() print (after - before) return comments_to_keep
def remove_license_comments(comments_to_keep): before = timeit.default_timer() print (len(comments_to_keep)) exception_words_to_remove_license_comments_regex = HeuristicHandlerConfig.get_parameter('exception_words_to_remove_license_comments_regex') connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select id, comment_text, end_line, class_declaration_lines from raw_comments where id in %s", [tuple(comments_to_keep),]) raw_comment_results = cursor.fetchall() connection.close() for raw_comment_line in raw_comment_results: raw_comment_id = raw_comment_line[0] comment_text = raw_comment_line[1] end_line = raw_comment_line[2] class_declaration_line = [int(i) for i in raw_comment_line[3].split(',')][0] if end_line < class_declaration_line : exception_words_to_remove_license_comments_matcher = re.search(exception_words_to_remove_license_comments_regex, comment_text) if exception_words_to_remove_license_comments_matcher is None: comments_to_keep.remove(raw_comment_id) print (len(comments_to_keep)) after = timeit.default_timer() print (after - before) return comments_to_keep
def remove_javadoc_comments(repository_id): before = timeit.default_timer() exception_words_to_remove_javadoc_comments_regex = HeuristicHandlerConfig.get_parameter('exception_words_to_remove_javadoc_comments_regex') comments_to_keep = [] connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select id, comment_text, comment_type, comment_format from raw_comments where repository_id = %s", (repository_id, )) raw_comment_results = cursor.fetchall() print (len(raw_comment_results)) for raw_comment_line in raw_comment_results: raw_comment_id = raw_comment_line[0] comment_text = raw_comment_line[1] comment_type = raw_comment_line[2] comment_format = raw_comment_line[3] if comment_format is not None and comment_format == 'javadoc': exception_words_to_remove_javadoc_comments_matcher = re.search(exception_words_to_remove_javadoc_comments_regex, comment_text) if exception_words_to_remove_javadoc_comments_matcher is not None: comments_to_keep.append(raw_comment_id) # print (raw_comment_id) else: comments_to_keep.append(raw_comment_id) connection.close() after = timeit.default_timer() print (len(comments_to_keep)) print (after - before) return comments_to_keep
def remove_license_comments(comments_to_keep): before = timeit.default_timer() print(len(comments_to_keep)) exception_words_to_remove_license_comments_regex = HeuristicHandlerConfig.get_parameter( 'exception_words_to_remove_license_comments_regex') connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute( "select id, comment_text, end_line, class_declaration_lines from raw_comments where id in %s", [ tuple(comments_to_keep), ]) raw_comment_results = cursor.fetchall() connection.close() for raw_comment_line in raw_comment_results: raw_comment_id = raw_comment_line[0] comment_text = raw_comment_line[1] end_line = raw_comment_line[2] class_declaration_line = [ int(i) for i in raw_comment_line[3].split(',') ][0] if end_line < class_declaration_line: exception_words_to_remove_license_comments_matcher = re.search( exception_words_to_remove_license_comments_regex, comment_text) if exception_words_to_remove_license_comments_matcher is None: comments_to_keep.remove(raw_comment_id) print(len(comments_to_keep)) after = timeit.default_timer() print(after - before) return comments_to_keep
def remove_commented_source_code(comments_to_keep): before = timeit.default_timer() print(len(comments_to_keep)) commented_source_code_regex = HeuristicHandlerConfig.get_parameter( 'commented_source_code_regex') connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select id, comment_text from raw_comments where id in %s", [ tuple(comments_to_keep), ]) raw_comment_results = cursor.fetchall() connection.close() for raw_comment_line in raw_comment_results: raw_comment_id = raw_comment_line[0] comment_text = raw_comment_line[1] commented_source_code_matcher = re.search(commented_source_code_regex, comment_text) if commented_source_code_matcher is not None: # print (raw_comment_id) comments_to_keep.remove(raw_comment_id) print(len(comments_to_keep)) after = timeit.default_timer() print(after - before) return comments_to_keep
def remove_javadoc_comments(repository_id): before = timeit.default_timer() exception_words_to_remove_javadoc_comments_regex = HeuristicHandlerConfig.get_parameter( 'exception_words_to_remove_javadoc_comments_regex') comments_to_keep = [] connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute( "select id, comment_text, comment_type, comment_format from raw_comments where repository_id = %s", (repository_id, )) raw_comment_results = cursor.fetchall() print(len(raw_comment_results)) for raw_comment_line in raw_comment_results: raw_comment_id = raw_comment_line[0] comment_text = raw_comment_line[1] comment_type = raw_comment_line[2] comment_format = raw_comment_line[3] if comment_format is not None and comment_format == 'javadoc': exception_words_to_remove_javadoc_comments_matcher = re.search( exception_words_to_remove_javadoc_comments_regex, comment_text) if exception_words_to_remove_javadoc_comments_matcher is not None: comments_to_keep.append(raw_comment_id) # print (raw_comment_id) else: comments_to_keep.append(raw_comment_id) connection.close() after = timeit.default_timer() print(len(comments_to_keep)) print(after - before) return comments_to_keep