示例#1
0
def evaluate_all_systems(p_dictoffolderpaths, p_run_all_systems=True):

    if p_run_all_systems:
        Task1.execute_system('')
        Task2.execute_system('')
        Task3A.execute_system('')
        Task3B.execute_system('')

    print "Evaluating all systems..."

    create_directory(ldirpath)

    lquerydict = get_given_queries_in_dict(CACM_QUERY_FILE + FILE_EXT)
    lquerydict = get_sorted_dict(lquerydict)

    lallsystemsmeanvaluesdict = {}
    lallsystemsavgprecisionvaluesdict = {}
    for lkey, lvalue in p_dictoffolderpaths.iteritems():

        lsystemname = get_system_name(lkey)
        print "Evaluating system: " + lsystemname

        lsystemmeanvaluesdict = {}
        ldictofavgprecisionvalues = {}
        llistofprecisionandrecallvalues = [
            "Query Id,DocId,Rank,Precision,Recall"
        ]
        llistofpatkvalues = ["Query Id,P@5,P@20"]

        evaluate_system(
            lvalue,  # results folder path to evaluate
            lquerydict,  # dictionary containing all queries with query id
            lsystemmeanvaluesdict,  # dictionary to hold the mean values for all systems
            ldictofavgprecisionvalues,  # dictionary to hold avg precision values of systems
            llistofprecisionandrecallvalues,  # results of precision recall values for all queries
            llistofpatkvalues  # list to hold pat5 and pat20 values for all queries for this system
        )  # evaluate_system..

        lallsystemsmeanvaluesdict[lkey] = lsystemmeanvaluesdict

        # ldictofavgprecisionvalues = get_sorted_dict(ldictofavgprecisionvalues)
        lallsystemsavgprecisionvaluesdict[lkey] = ldictofavgprecisionvalues

        # print "Writing Precision and Recall values for system: " + lsystemname
        llfilename = ldirpath + "/" + FILE_FOR_PRECISON_RECALL_RESULTS_OF_SYSTEM + "_" + lsystemname + CSV_FILE_EXT
        create_file(llfilename, '')
        convert_data_from_collection_to_file(llfilename,
                                             llistofprecisionandrecallvalues)

        # print "Writing P@5 and P@20 values for system: " + lsystemname
        llfilename = ldirpath + "/" + FILE_FOR_PATK_RESULTS_OF_SYSTEM + "_" + lsystemname + CSV_FILE_EXT
        create_file(llfilename, '')
        convert_data_from_collection_to_file(llfilename, llistofpatkvalues)

    # print "Writing mean values to file"
    llistofmeanvalues = ["System,MAP,MRR,P@5,P@20"]
    for lkey, lvalue in lallsystemsmeanvaluesdict.iteritems():
        lsystemname = get_system_name(lkey)
        lstr = lsystemname + "," + str(lvalue[MAP_CONST]) + "," + str(lvalue[MRR_CONST]) + \
               "," + str(lvalue[PAT5_CONST]) + "," + str(lvalue[PAT20_CONST])
        llistofmeanvalues.append(lstr)

    lfilename = ldirpath + "/" + FILE_FOR_ALL_SYSTEMS_MEAN_VALUES + CSV_FILE_EXT
    create_file(lfilename, '')
    convert_data_from_collection_to_file(lfilename, llistofmeanvalues)

    # print "Writing average precision values to file"
    llistavgprecisionresults = ["System,Query Id,Average Precision"]
    for lkey, lvalue in lallsystemsavgprecisionvaluesdict.iteritems():
        lsystemname = get_system_name(lkey)
        for ljkey, ljvalue in lvalue.iteritems():
            lstr = lsystemname + "," + str(ljkey) + "," + str(ljvalue)
            llistavgprecisionresults.append(lstr)

    lfilename = ldirpath + "/" + FILE_FOR_ALL_SYSTEMS_AVG_PRECISION_VALUES + CSV_FILE_EXT
    create_file(lfilename, '')
    convert_data_from_collection_to_file(lfilename, llistavgprecisionresults)

    # print "Run t-tests for models"
    run_tests_for_models(lallsystemsavgprecisionvaluesdict, len(lquerydict))
示例#2
0
# read command line params for stemmed data file
lstemmeddatafile = CACM_STEM_FILE + FILE_EXT
if len(input_arguments) > 2:
    lstemmeddatafile = input_arguments[2]

# Tokenize raw text
tokenize_raw_data(ldatafilesdir)

# Write given queries to a file
create_directory(DIR_FOR_OUTPUT_FILES)
write_given_queries_to_file(
    CACM_QUERY_FILE + FILE_EXT,
    DIR_FOR_OUTPUT_FILES + "/" + FILE_FOR_QUERIES + FILE_EXT)

Task1.execute_system(ldatafilesdir)
Task2.execute_system(ldatafilesdir)
Task3A.execute_system(ldatafilesdir)
Task3B.execute_system(lstemmeddatafile)

ldictoffolderpaths = {}
ldictoffolderpaths[
    1] = DIR_FOR_OUTPUT_FILES + "/" + TASK1_CONST + "/" + DIR_FOR_BM25_OUTPUT
ldictoffolderpaths[
    2] = DIR_FOR_OUTPUT_FILES + "/" + TASK1_CONST + "/" + DIR_FOR_TFIDF_OUTPUT
ldictoffolderpaths[3] = LUCENE + "/" + LUCENE_RESULTS
ldictoffolderpaths[
    4] = DIR_FOR_OUTPUT_FILES + "/" + TASK2_CONST + "/" + DIR_FOR_BM25_OUTPUT
ldictoffolderpaths[
    5] = DIR_FOR_OUTPUT_FILES + "/" + TASK2_CONST + "/" + DIR_FOR_TFIDF_OUTPUT
ldictoffolderpaths[