Exemplo n.º 1
0
def query_index(query, hit_logs_for_each, score_logs_for_each):
    ### 1_Query Alternation
    user_code_query = Generator(query)

    directory = SimpleFSDirectory(File(INDICES_PATH + 'bigclonebench_3'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    bench_results = []
    benchsearcher = BenchSearcher(searcher)  # BigCloneBench

    ### 8_Querying for the Final Results
    # Log : Bench_result for each query
    bench_result, score_logs_for_each = benchsearcher.more_like_this3(5000, score_logs_for_each, user_code_query)

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    if bench_result:
        hit_logs_for_each += str(len(bench_result)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    sorted_bench_results = sorted(bench_result, key=attrgetter('score'), reverse=True)

    print 'Search Count : ', len(sorted_bench_results)
    recommended = recommend(sorted_bench_results)
    print 'Final Count : ', len(recommended)
    if bench_result:
        hit_logs_for_each += str(len(recommended)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')
    return recommended, hit_logs_for_each, score_logs_for_each
Exemplo n.º 2
0
def query_index(query, hit_logs_for_each, score_logs_for_each):
    ### 1_Query Alternation
    user_code_query = Generator(query)

    directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 2_Finding 3 Answer Snippets using the User Query (refined)
    answers = SnippetSearcher(searcher, user_code_query)
    answer_ids = answers.more_like_this(20, query=user_code_query)

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Answer count
    if answer_ids:
        hit_logs_for_each += str(len(answer_ids)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    ### 3_Finding the Associated Questions
    question_ids = answers.find_question_ids(answer_ids)
    # Log : Answer - Question count
    if question_ids:
        hit_logs_for_each += str(len(question_ids)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    getDoc = GettingQuestionDocs(searcher)
    item_docs = getDoc.search(
        question_ids, 20)[0:7]  # 순위대로 최소 7개의 question을 얻기 위해서 여기서 7개를 자름.

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Question ItemDoc count
    if item_docs:
        hit_logs_for_each += str(len(item_docs)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 4_Finding 3 Similar Questions per a Question (3 X 3)
    similar_questions = []
    question = SimilarQsSearcher(searcher)

    # Log : Similar Question count for each of Question ItemDoc
    i = 1
    if item_docs:
        for item_doc in item_docs:
            similar_question = question.more_like_this2(
                item_doc, 7)  # 각 question 들에 대해 7개씩 비슷한 것들 찾음.
            if similar_question:
                hit_logs_for_each += str(len(similar_question)) + '\t'
            else:
                hit_logs_for_each += ('0' + '\t')
            similar_questions += similar_question
            i += 1
    else:
        hit_logs_for_each += ('0' + '\t' + '0' + '\t' + '0' + '\t' + '0' +
                              '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t'
                              )  # 7개

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Similar Question result count
    if similar_questions:
        hit_logs_for_each += str(len(similar_questions)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    ### 5_Finding Associated Answers for each Question (9 - 9)
    answer_ids = find_answer_ids(similar_questions)

    # Log : Question - Answer count
    if answer_ids:
        hit_logs_for_each += str(len(answer_ids)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 6_Getting Answer Docs for the Final Query
    getDoc = GettingAnswerDocs(searcher)
    answer_docs = getDoc.search(answer_ids)

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    # Log : Answer Docs count
    if answer_docs:
        hit_logs_for_each += str(len(answer_docs)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    directory = SimpleFSDirectory(File(INDICES_PATH + 'bigclonebench_2'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    bench_results = []
    benchsearcher = BenchSearcher(searcher)  # BigCloneBench

    # Exceptional
    ### 7_Appending for the user query results

    ### 8_Querying for the Final Results
    # Log : Bench_result for each query
    for answer_doc in answer_docs:
        bench_result, score_logs_for_each = benchsearcher.more_like_this2(
            100, answer_doc, score_logs_for_each, user_code_query,
            0)  # , user_query=user_code_query)
        if bench_result:
            hit_logs_for_each += str(len(bench_result)) + '\t'
        else:
            hit_logs_for_each += ('0' + '\t')
        bench_results += bench_result

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    if answer_docs < 49:
        for a in range(49 - len(answer_docs)):
            hit_logs_for_each += ('0' + '\t')

    if bench_results:
        hit_logs_for_each += str(len(bench_results)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')

    sorted_bench_results = sorted(bench_results,
                                  key=attrgetter('score'),
                                  reverse=True)

    print 'Search Count : ', len(sorted_bench_results)
    recommended = recommend(sorted_bench_results)
    print 'Final Count : ', len(recommended)
    if bench_results:
        hit_logs_for_each += str(len(recommended)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')
    return recommended, hit_logs_for_each, score_logs_for_each
Exemplo n.º 3
0
def query_index(query):
    ### 1_Query Alternation
    user_code_query = Generator(query)
    print 'query: ', query
    print 'user_code_query: ', user_code_query

    #open
    directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 2_Finding 3 Answer Snippets using the User Query (refined)
    answers = SnippetSearcher(searcher, user_code_query)
    answer_ids = answers.more_like_this(10, query=user_code_query)
    print 'answer_ids: ', answer_ids

    #close
    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    ### 3_Finding the Associated Questions
    question_ids = answers.find_question_ids(answer_ids)
    print 'question ids: ', question_ids

    #open
    directory = SimpleFSDirectory(File(INDICES_PATH + 'questionIndex'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 4_Cutting items
    getDoc = GettingQuestionDocs(searcher)
    item_docs = getDoc.search(
        question_ids, 10)[0:3]  # 순위대로 최소 7개의 question을 얻기 위해서 여기서 7개를 자름.
    # print 'item docs: ', item_docs

    ### 5_Finding 3 Similar Questions per a Question (3 X 3)
    similar_questions = []
    question = SimilarQsSearcher(searcher)

    if item_docs:
        for item_doc in item_docs:
            similar_question = question.more_like_this2(
                item_doc, 3)  # 각 question 들에 대해 7개씩 비슷한 것들 찾음.
            similar_questions += similar_question

    print 'similar_questions: ', similar_questions

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    ### 6_Finding Associated Answers for each Question (9 - 9)
    answer_ids = find_answer_ids(similar_questions)
    print 'answer ids: ', answer_ids

    if not answer_ids:
        recommended = ''
        return recommended
        # dest_path = u'/Users/Falcon/Desktop/***Ongoing***/***[4]_FaCoY_Defect4J_Data_Share_Kui/Defect4J_Results/'
        # project_name = u'Chart/'###################################################
        # write_file()

    directory = SimpleFSDirectory(File(INDICES_PATH + 'stackoverflow'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    ### 7_Getting Answer Docs for the Final Query
    getDoc = GettingAnswerDocs(searcher)
    answer_docs = getDoc.search(answer_ids)

    # print 'answer docs: ', answer_docs

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    directory = SimpleFSDirectory(File(INDICES_PATH + 'github'))
    searchermgr = SearcherManager(directory, SearcherFactory())
    searchermgr.maybeRefresh()
    searcher = searchermgr.acquire()

    git_results = []
    gitsearcher = GitSearcher(searcher)

    ### 7_Appending for the user query results
    git_result = gitsearcher.more_like_this2(10, answer_docs[0],
                                             user_code_query, 1)
    git_results += git_result

    # print 'answer docs: ', answer_docs

    ### 8_Querying for the Final Results
    for answer_doc in answer_docs:
        git_result = gitsearcher.more_like_this2(10, answer_doc,
                                                 user_code_query, 0)
        git_results += git_result

    searchermgr.release(searcher)
    searchermgr.close()
    searcher = None
    directory.close()
    directory = None

    git_results = sorted(git_results,
                         key=attrgetter('so_item.answer_id'),
                         reverse=True)
    id = 0
    i = 0
    temp_result = []
    for item in git_results:
        if id != item.so_item.answer_id:
            id = item.so_item.answer_id
            i = 1
            temp_result.append(item)

        elif id == item.so_item.answer_id and i < 3:
            i += 1
            temp_result.append(item)
        elif id == item.so_item.answer_id and i > 3:
            continue

    sorted_git_results = sorted(temp_result,
                                key=attrgetter('score'),
                                reverse=True)

    print 'Search Count : ', len(sorted_git_results)
    recommended = recommend(sorted_git_results)
    print 'Final Count : ', len(recommended)

    # Defect4J 쿼리 결과저장
    # cot = 0
    # for c, item in enumerate(recommended):
    #     cot += 1
    #     if cot > 10:
    #         break
    #     result_file = "/Users/Falcon/Desktop/Pycharm_Project/FaCoY_Project/GitSearch/Defect4J_FaCoY/" + str(c+1) + "_" + str('_'.join(str(item[0]).split("/")[6:]))
    #     write_file_over(result_file, str(item.file_content))

    # result_file = '/Users/Falcon/Desktop/test.txt'
    # if os.path.exists(result_file):
    #     os.remove(result_file)
    #
    # write_file(result_file, 'User Code Query \n' + str(query) + '\n' + '---------------------------' + '\n')
    # for c, i in enumerate(recommended):
    #     contents = ''
    #     contents = 'Rank: %d' % (int(c)+int(1))
    #     contents += '\nFile path: %s' % str(i.file[6:]) + '\n' + '---------------------------' + '\n'
    #     contents += str(i.file_content) +'\n' + '=================================================================' + '\n\n\n'
    #     write_file(result_file, contents)

    return recommended
Exemplo n.º 4
0
def query_index(query, hit_logs_for_each, score_logs_for_each):
    print "*************** Searching Starts ***************"
    ### 1_Query Alternation
    user_code_query = Generator(query)

    ### 2_Finding 3 Answer Snippets using the User Query (refined)
    answers = SnippetSearcher("%sstackoverflow" % (INDICES_PATH),
                              user_code_query)
    answer_ids = answers.more_like_this(
        20, query=user_code_query
    )  #여기서 3개를 자르면, 3개의 answer 중 question 아이디가 존재하지 않을 경우, 그 수가 현저히 적어짐..

    #Log : Answer count
    if answer_ids: hit_logs_for_each += str(len(answer_ids)) + '\t'
    else: hit_logs_for_each += ('0' + '\t')

    answers.reader.close()
    answers.directory.close()

    ### 3_Finding the Associated Questions
    question_ids = answers.find_question_ids(answer_ids)
    # Log : Answer - Question count
    if question_ids: hit_logs_for_each += str(len(question_ids)) + '\t'
    else: hit_logs_for_each += ('0' + '\t')
    getDoc = GettingQuestionDocs("%squestionIndex" % (INDICES_PATH))
    item_docs = getDoc.search(
        question_ids, 20)[0:7]  #순위대로 최소 7개의 question을 얻기 위해서 여기서 7개를 자름.

    # Log : Question ItemDoc count
    if item_docs: hit_logs_for_each += str(len(item_docs)) + '\t'
    else: hit_logs_for_each += ('0' + '\t')
    getDoc.reader.close()
    getDoc.directory.close()

    ### 4_Finding 3 Similar Questions per a Question (3 X 3)
    similar_questions = []
    question = SimilarQsSearcher("%squestionIndex" % (INDICES_PATH))

    # Log : Similar Question count for each of Question ItemDoc
    i = 1
    if item_docs:
        for item_doc in item_docs:
            similar_question = question.more_like_this2(
                item_doc, 7)  #각 question 들에 대해 7개씩 비슷한 것들 찾음.
            if similar_question:
                hit_logs_for_each += str(len(similar_question)) + '\t'
            else:
                hit_logs_for_each += ('0' + '\t')
            similar_questions += similar_question
            i += 1
    else:
        hit_logs_for_each += ('0' + '\t' + '0' + '\t' + '0' + '\t' + '0' +
                              '\t' + '0' + '\t' + '0' + '\t' + '0' + '\t')  #7개

# Log : Similar Question result count
    if similar_questions:
        hit_logs_for_each += str(len(similar_questions)) + '\t'
    else:
        hit_logs_for_each += ('0' + '\t')
    question.reader.close()
    question.directory.close()

    ### 5_Finding Associated Answers for each Question (9 - 9)
    answer_ids = find_answer_ids(similar_questions)

    # Log : Question - Answer count
    if answer_ids: hit_logs_for_each += str(len(answer_ids)) + '\t'
    else: hit_logs_for_each += ('0' + '\t')

    ### 6_Getting Answer Docs for the Final Query
    getDoc = GettingAnswerDocs("%sstackoverflow" % (INDICES_PATH))
    answer_docs = getDoc.search(answer_ids)

    # Log : Answer Docs count
    if answer_docs: hit_logs_for_each += str(len(answer_docs)) + '\t'
    else: hit_logs_for_each += ('0' + '\t')

    # temp_doc = getDoc.search(['0',])

    getDoc.reader.close()
    getDoc.directory.close()

    bench_results = []
    benchsearcher = BenchSearcher("%sbigclonebench" %
                                  (INDICES_PATH))  # BigCloneBench

    # Exceptional
    ### 7_Appending for the user query results
    # Log : Bench_result for UQ
    #     temp_doc = ResultItem(None, 0, 'No Title', 'No Question id', 'No Answer id', 'No Description')

    bench_result, score_logs_for_each = benchsearcher.more_like_this2(
        1, answer_docs[0], score_logs_for_each, user_code_query, 1)
    if bench_result: hit_logs_for_each += str(len(bench_results)) + '\t'
    else: hit_logs_for_each += ('0' + '\t')
    bench_results += bench_result

    ### 8_Querying for the Final Results
    # Log : Bench_result for each query
    for answer_doc in answer_docs:
        bench_result, score_logs_for_each = benchsearcher.more_like_this2(
            1, answer_doc, score_logs_for_each, user_code_query,
            0)  #, user_query=user_code_query)
        if bench_result: hit_logs_for_each += str(len(bench_result)) + '\t'
        else: hit_logs_for_each += ('0' + '\t')
        bench_results += bench_result

    if answer_docs < 49:
        for a in range(49 - len(answer_docs)):
            hit_logs_for_each += ('0' + '\t')

    # print 'Count(Sum of the bench results) : ', len(bench_results)


# Log : Results count
    if bench_results: hit_logs_for_each += str(len(bench_results)) + '\t'
    else: hit_logs_for_each += ('0' + '\t')
    benchsearcher.reader.close()
    benchsearcher.directory.close()

    # print '%%%final_results : ', final_result
    sorted_bench_results = sorted(bench_results,
                                  key=attrgetter('score'),
                                  reverse=True)
    # results = sorted(final_result, key=attrgetter('so_item.answer_id'), reverse=True)
    # print '%%%final_results_____ : ', results

    print "***********************************************************************************"
    # print sorted_bench_results

    # Answer set에서 나오는 숫자 제한하기.. 즉, 이걸 제한하면 같은 종류의 answer에 대한 snippet들이 반복되어 출력되는걸 막는다.
    # id = 0; i = 0; final_temp_result = []
    # for item in sorted_bench_results:
    #     if id != item.so_item.answer_id:
    #         id = item.so_item.answer_id
    #         i = 1
    #         final_temp_result.append(item)
    #     elif id == item.so_item.answer_id and i < 500:
    #         i += 1
    #         final_temp_result.append(item)
    #     elif id == item.so_item.answer_id and i > 500:
    #         continue
    # final_results = sorted(final_temp_result, key=attrgetter('score'), reverse=True)
    # print 'Count(Final results) : ', len(final_results)
    # recommended = recommend(final_results)

    print 'Count(Final results) : ', len(sorted_bench_results)
    recommended = recommend(sorted_bench_results)
    return recommended, hit_logs_for_each, score_logs_for_each