Exemplo n.º 1
0
def main():

    if '-h' in sys.argv:
        print_usage_message()
        exit()

    vectFileName = fn.create_prof_vect_name(sys.argv, True)
    simMatFileName = fn.create_sim_mat_name(sys.argv)
    predsFileName = fn.create_preds_name(sys.argv)

    print(vectFileName)
    print(simMatFileName)
    print(predsFileName)

    # Grab the ratings vector
    if '-d' in sys.argv:
        ratings = read.difficulty_rating_vect(vectFileName)
    else:
        ratings = read.overall_rating_vect(vectFileName)

    # Assign similarity metric
    sim_f = vp.inverse_euclidean_distance
    if '-cos' in sys.argv:
        sim_f = vp.cosine_similarity
    elif '-pear' in sys.argv:
        sim_f = vp.abs_pearson_correlation

    # Set if weighted or not
    weighted = True
    if '-unweighted' in sys.argv:
        weighted = False

    # Grab predictions or create them if not available
    predictions = read.knn_predictions(predsFileName)
    if predictions is None:

        simMat = read.similarity_matrix(simMatFileName)
        if simMat is None:
            wordVects = read.word_vects(vectFileName)
            if wordVects is None:
                print("Vector file " + vectFileName + " does not exist")
                exit()
            wordVects = vp.process_token_vectors(wordVects, sys.argv)
            simMat = knn.get_similarity_matrix(wordVects, sim_f)
            write.similarity_matrix(simMat, simMatFileName)

        predictions = knn.knn_dataset(ratings, MaxK, simMat, weighted)
        write.knn_predictions(predictions, predsFileName)

    idxToPlot = None

    if '-maxK' in sys.argv:
        maxK = int(sys.argv[sys.argv.index('-maxK') + 1])
        predictions = predictions[:, :maxK]

    pidVect = read.pid_vect(vectFileName)
    singleRevIdxs = vp.pids_to_idxs(pidVect,
                                    read.pids_file(fn.PidsSingleRevFile))
    smallLenIdxs = vp.pids_to_idxs(pidVect,
                                   read.pids_file(fn.PidsSmallRevLenFile))

    plotFileName = None
    if '-save' in sys.argv:
        plotFileName = fn.create_knn_accuracy_plot_name(sys.argv)

    # Output results of the run
    plot.knn_error(
        predictions,
        ratings,
        title=plot.create_knn_error_title(sys.argv),
        idxToPlot=[singleRevIdxs, smallLenIdxs],
        subTitles=[
            "Error with profs with one review",
            "Error with profs with aggrigate review " +
            "lengths one std div above the mean " + "review length or less"
        ],
        saveFile=plotFileName)