def remove_commented_source_code(comments_to_keep):
    before = timeit.default_timer()
    
    print (len(comments_to_keep))    
    commented_source_code_regex = HeuristicHandlerConfig.get_parameter('commented_source_code_regex')
    
    connection = PSQLConnection.get_connection()
    cursor = connection.cursor()
    cursor.execute("select id, comment_text from raw_comments where id in %s", [tuple(comments_to_keep),])
    raw_comment_results = cursor.fetchall()
    connection.close()
    
    for raw_comment_line in raw_comment_results:
        raw_comment_id = raw_comment_line[0]
        comment_text = raw_comment_line[1]
        
        commented_source_code_matcher = re.search(commented_source_code_regex, comment_text)
        if commented_source_code_matcher is not None:
            # print (raw_comment_id)
            comments_to_keep.remove(raw_comment_id)

    print (len(comments_to_keep))
    after = timeit.default_timer()
    print (after - before)

    return comments_to_keep
def remove_license_comments(comments_to_keep):
    before = timeit.default_timer()
    print (len(comments_to_keep))    
    exception_words_to_remove_license_comments_regex = HeuristicHandlerConfig.get_parameter('exception_words_to_remove_license_comments_regex')
    
    connection = PSQLConnection.get_connection()
    cursor = connection.cursor()
    cursor.execute("select id, comment_text, end_line, class_declaration_lines from raw_comments where id in %s", [tuple(comments_to_keep),])
    raw_comment_results = cursor.fetchall()
    connection.close()
    
    for raw_comment_line in raw_comment_results:
        raw_comment_id = raw_comment_line[0]
        comment_text = raw_comment_line[1]
        end_line = raw_comment_line[2]
        class_declaration_line = [int(i) for i in raw_comment_line[3].split(',')][0]
        
        if end_line < class_declaration_line :
            exception_words_to_remove_license_comments_matcher = re.search(exception_words_to_remove_license_comments_regex, comment_text)
            if exception_words_to_remove_license_comments_matcher is None:
                comments_to_keep.remove(raw_comment_id)

    print (len(comments_to_keep))
    after = timeit.default_timer()
    print (after - before)

    return comments_to_keep
def remove_javadoc_comments(repository_id):
    before = timeit.default_timer()
    exception_words_to_remove_javadoc_comments_regex = HeuristicHandlerConfig.get_parameter('exception_words_to_remove_javadoc_comments_regex')
    comments_to_keep = []

    connection = PSQLConnection.get_connection()
    cursor = connection.cursor()

    cursor.execute("select id, comment_text, comment_type, comment_format from raw_comments where repository_id = %s", (repository_id, ))
    raw_comment_results = cursor.fetchall()

    print (len(raw_comment_results))
    for raw_comment_line in raw_comment_results:
        raw_comment_id = raw_comment_line[0]
        comment_text = raw_comment_line[1]
        comment_type = raw_comment_line[2]
        comment_format = raw_comment_line[3]

        if comment_format is not None and comment_format == 'javadoc':
            exception_words_to_remove_javadoc_comments_matcher = re.search(exception_words_to_remove_javadoc_comments_regex, comment_text)
            if exception_words_to_remove_javadoc_comments_matcher is not None:
                comments_to_keep.append(raw_comment_id)
                # print (raw_comment_id)
        else:
            comments_to_keep.append(raw_comment_id)

    connection.close()

    after = timeit.default_timer()
    print (len(comments_to_keep))
    print (after - before)
    return comments_to_keep
示例#4
0
def remove_license_comments(comments_to_keep):
    before = timeit.default_timer()
    print(len(comments_to_keep))
    exception_words_to_remove_license_comments_regex = HeuristicHandlerConfig.get_parameter(
        'exception_words_to_remove_license_comments_regex')

    connection = PSQLConnection.get_connection()
    cursor = connection.cursor()
    cursor.execute(
        "select id, comment_text, end_line, class_declaration_lines from raw_comments where id in %s",
        [
            tuple(comments_to_keep),
        ])
    raw_comment_results = cursor.fetchall()
    connection.close()

    for raw_comment_line in raw_comment_results:
        raw_comment_id = raw_comment_line[0]
        comment_text = raw_comment_line[1]
        end_line = raw_comment_line[2]
        class_declaration_line = [
            int(i) for i in raw_comment_line[3].split(',')
        ][0]

        if end_line < class_declaration_line:
            exception_words_to_remove_license_comments_matcher = re.search(
                exception_words_to_remove_license_comments_regex, comment_text)
            if exception_words_to_remove_license_comments_matcher is None:
                comments_to_keep.remove(raw_comment_id)

    print(len(comments_to_keep))
    after = timeit.default_timer()
    print(after - before)

    return comments_to_keep
示例#5
0
def remove_commented_source_code(comments_to_keep):
    before = timeit.default_timer()

    print(len(comments_to_keep))
    commented_source_code_regex = HeuristicHandlerConfig.get_parameter(
        'commented_source_code_regex')

    connection = PSQLConnection.get_connection()
    cursor = connection.cursor()
    cursor.execute("select id, comment_text from raw_comments where id in %s",
                   [
                       tuple(comments_to_keep),
                   ])
    raw_comment_results = cursor.fetchall()
    connection.close()

    for raw_comment_line in raw_comment_results:
        raw_comment_id = raw_comment_line[0]
        comment_text = raw_comment_line[1]

        commented_source_code_matcher = re.search(commented_source_code_regex,
                                                  comment_text)
        if commented_source_code_matcher is not None:
            # print (raw_comment_id)
            comments_to_keep.remove(raw_comment_id)

    print(len(comments_to_keep))
    after = timeit.default_timer()
    print(after - before)

    return comments_to_keep
示例#6
0
def remove_javadoc_comments(repository_id):
    before = timeit.default_timer()
    exception_words_to_remove_javadoc_comments_regex = HeuristicHandlerConfig.get_parameter(
        'exception_words_to_remove_javadoc_comments_regex')
    comments_to_keep = []

    connection = PSQLConnection.get_connection()
    cursor = connection.cursor()

    cursor.execute(
        "select id, comment_text, comment_type, comment_format from raw_comments where repository_id = %s",
        (repository_id, ))
    raw_comment_results = cursor.fetchall()

    print(len(raw_comment_results))
    for raw_comment_line in raw_comment_results:
        raw_comment_id = raw_comment_line[0]
        comment_text = raw_comment_line[1]
        comment_type = raw_comment_line[2]
        comment_format = raw_comment_line[3]

        if comment_format is not None and comment_format == 'javadoc':
            exception_words_to_remove_javadoc_comments_matcher = re.search(
                exception_words_to_remove_javadoc_comments_regex, comment_text)
            if exception_words_to_remove_javadoc_comments_matcher is not None:
                comments_to_keep.append(raw_comment_id)
                # print (raw_comment_id)
        else:
            comments_to_keep.append(raw_comment_id)

    connection.close()

    after = timeit.default_timer()
    print(len(comments_to_keep))
    print(after - before)
    return comments_to_keep