Пример #1
0
def code_method(idx):
    file = path_from + 'file_' + str(idx) + '_Method.csv'
    if not os.path.exists(file):
        return
    with open(file, 'r', encoding='utf-8') as infile:
        code = infile.readlines()

        code_method = list()
        code_package = list()
        for i in range(len(code)):
            line = str(code[i])
            line = line[line.index(';') + 1:-1]
            line = line.split(',')
            method = '[]\n'
            package = '[]\n'
            if len(line) == 3:
                package = line[1] + '\n'
                method = line[2] + '\n'
            code_method.append(method)
            code_package.append(package)

        cm.save_txt(path_to + 'method/method' + str(idx) + '.txt', code_method)
        cm.save_txt(path_to + 'package/package' + str(idx) + '.txt',
                    code_package)
    print('method over: ' + str(idx))
Пример #2
0
def code_source(idx):
    file = path_from + 'file_' + str(idx) + '_SourceCode.csv'
    if not os.path.exists(file):
        return
    with open(file, 'r', encoding='utf-8') as infile:
        code_source = infile.readlines()

        for i in range(len(code_source)):
            line = str(code_source[i])
            line = line[line.index(';') + 1:-1]
            if '[]' == line:
                code_source[i] = '[]\n'
            else:
                sub = line.split(';')
                cmt = ''
                for s in sub:
                    if s == '':
                        cmt += ';'
                        continue
                    s = s.strip()
                    if not s.startswith('//') and not s.startswith('/*') and not s.startswith('*') and \
                            not s.startswith('@') and not s.endswith('*') and not s.endswith('*/'):
                        cmt += s
                cmt = re.sub(' +', ' ', cmt).strip()
                cmt = re.sub(';+', ';', cmt)
                code_source[i] = cmt + '\n'

        cm.save_txt(path_to + 'source/source' + str(idx) + '.txt', code_source)
    print('source over: ' + str(idx))
Пример #3
0
def reranking(path_parsed_queries, path_queries, path_jdk, path_fuzzy_search, path_rerank):
    queries = cm.load_pkl(path_parsed_queries)
    jdk = cm.load_pkl(path_jdk)
    for i in range(len(queries)):
        query = queries[i]
        words = []
        for word in query:
            words.append(word[0])
        queries[i] = words

    queries_txt = cm.load_txt(path_queries)
    lines = []

    for i in range(99): # 50
        respond = cm.load_pkl(path_fuzzy_search + 'respond' + str(i) + '.pkl')
        query_cmd = cm.load_pkl(path_fuzzy_search + 'cmd' + str(i) + '.pkl')
        query = queries[i]
        query_txt = queries_txt[i]

        scores = list()
        for j in range(len(respond)):
            print(str(i) + '-50, iter-1, ' + str(j) + '-' + str(len(respond)))
            res = respond[j]['_source']
            line = res['method']
            cmd = query_cmd[j]
            scores.append([j, matcher_name(query, line, cmd)])
        scores.sort(key=operator.itemgetter(1), reverse=True)

        scores = scores[:100]

        for j in range(len(scores)):
            print(str(i + 1) + '-99, iter-2, ' + str(j) + '-' + str(len(scores)))
            idx = scores[j][0]
            res = respond[idx]['_source']
            line = res['parsed']
            scores[j].append(matcher_api(query, line, jdk))
        scores.sort(key=operator.itemgetter(1, 2), reverse=True)

        if '\n' not in query_txt:
            query_txt += '\n'
        lines.append(query_txt)
        results = min(len(scores), 10)
        if len(scores) > 0:
            for j in range(results):
                idx = scores[j][0]
                lines.append(respond[idx]['_source']['source'])
        lines.append('\n')

    cm.save_txt(path_rerank, lines)
Пример #4
0
def code_return(idx):
    file = path_from + 'file_' + str(idx) + '_Return.csv'
    if not os.path.exists(file):
        return
    with open(file, 'r', encoding='utf-8') as infile:
        code_return = infile.readlines()

        for i in range(len(code_return)):
            line = str(code_return[i])
            line = line[line.index(';') + 1:-1]
            if line == '':
                line = '[]'
            code_return[i] = line + '\n'

        cm.save_txt(path_to + 'return/return' + str(idx) + '.txt', code_return)
    print('return over: ' + str(idx))
Пример #5
0
def code_modifier(idx):
    file = path_from + 'file_' + str(idx) + '_Modifiers.csv'
    if not os.path.exists(file):
        return
    with open(file, 'r', encoding='utf-8') as infile:
        code_modifier = infile.readlines()

        for i in range(len(code_modifier)):
            line = str(code_modifier[i])
            line = line[line.index(';') + 2:-2]
            if line == '':
                line = '[]'
            code_modifier[i] = line + '\n'

        cm.save_txt(path_to + 'modifier/modifier' + str(idx) + '.txt',
                    code_modifier)
    print('modifier over: ' + str(idx))
Пример #6
0
def code_javadoc(idx):
    file = path_from + 'file_' + str(idx) + '_Javadoc.csv'
    if not os.path.exists(file):
        return
    with open(file, 'r', encoding='utf-8') as infile:
        code_javadoc = infile.readlines()

        for i in range(len(code_javadoc)):
            line = str(code_javadoc[i])
            line = line[line.index(';') + 1:-1]
            line = re.sub(';', ' ', line)
            line = re.sub(' +', ' ', line).strip()
            code_javadoc[i] = line + '\n'

        cm.save_txt(path_to + 'javadoc/javadoc' + str(idx) + '.txt',
                    code_javadoc)
    print('javadoc over: ' + str(idx))
Пример #7
0
def code_comment(idx):
    file = path_from + 'file_' + str(idx) + '_Comment.csv'
    if not os.path.exists(file):
        return
    with open(file, 'r', encoding='utf-8') as infile:
        code_comment = infile.readlines()

        for i in range(len(code_comment)):
            line = str(code_comment[i])
            line = line[line.index(';') + 2:-2]
            line = re.sub(';', ' ', line)
            line = re.sub(' +', ' ', line).strip()
            if line == '':
                line = '[]'
            code_comment[i] = line + '\n'

        cm.save_txt(path_to + 'comment/comment' + str(idx) + '.txt',
                    code_comment)
    print('comment over: ' + str(idx))
Пример #8
0
def code_parsed(idx):
    file = path_from + 'file_' + str(idx) + '_ParsedCode.csv'
    if not os.path.exists(file):
        return
    with open(file, 'r', encoding='utf-8') as infile:
        code_parsed = infile.readlines()

        for i in range(len(code_parsed)):
            line = str(code_parsed[i])
            line = line[line.index(';') + 1:-1]
            body = []
            if ';' in line:
                line = line.split(';')
                for j in range(len(line)):
                    l = line[j].split(',')
                    if len(l) == 3:
                        body.append(l[1])
            body = ','.join(body)
            if body == '':
                body = '[]'
            code_parsed[i] = body + '\n'

        cm.save_txt(path_to + 'parsed/parsed' + str(idx) + '.txt', code_parsed)
    print('parsed over: ' + str(idx))