Пример #1
0
def new_test(model, test_file_name, test_file_length):
    test_file = open(test_file_name)
    i = 0
    list_ccci = []
    list_ccwi = []
    list_mcci = []
    list_mcwi = []

    for line in test_file:
        correct_line = line.replace("\n", "")
        mispelled_line = test_file.next().replace("\n", "")
        i = i + 1
        print "[" + str(i) + "/" + str(
            test_file_length) + "] Checking:\n\t " + mispelled_line
        corrected_tweet = TweetChecker.dull_check(mispelled_line, model)

        print "Calulating allignment ccci..."
        cc_char_identity = align_char(corrected_tweet, correct_line)
        print "Calulating allignment ccwi..."
        cc_word_identity = count_correct_words(corrected_tweet, correct_line)
        print "Calulating allignment mcci..."
        mc_char_identity = align_char(mispelled_line, correct_line)
        print "Calulating allignment mcwi..."
        mc_word_identity = count_correct_words(mispelled_line, correct_line)

        list_ccci.append(cc_char_identity)
        list_ccwi.append(cc_word_identity)
        list_mcci.append(mc_char_identity)
        list_mcwi.append(mc_word_identity)

    mean_ccci = numpy.mean(list_ccci)
    mean_ccwi = numpy.mean(list_ccwi)
    mean_mcci = numpy.mean(list_mcci)
    mean_mcwi = numpy.mean(list_mcwi)

    first_not_account_index = 135

    accounts_mean_ccci = numpy.mean(list_ccci[:first_not_account_index])
    accounts_mean_ccwi = numpy.mean(list_ccwi[:first_not_account_index])
    accounts_mean_mcci = numpy.mean(list_mcci[:first_not_account_index])
    accounts_mean_mcwi = numpy.mean(list_mcwi[:first_not_account_index])

    others_mean_ccci = numpy.mean(list_ccci[first_not_account_index:])
    others_mean_ccwi = numpy.mean(list_ccwi[first_not_account_index:])
    others_mean_mcci = numpy.mean(list_mcci[first_not_account_index:])
    others_mean_mcwi = numpy.mean(list_mcwi[first_not_account_index:])

    std_ccci = numpy.std(list_ccci)
    std_ccwi = numpy.std(list_ccwi)
    std_mcci = numpy.std(list_mcci)
    std_mcwi = numpy.std(list_mcwi)

    results_string = ""

    results_string += "<CORRECTED TWEET, CORRECT TWEET> CHARS IDENTITY:\n"
    results_string += str(list_ccci) + "\n"
    results_string += "\nMEAN: " + str(mean_ccci)
    results_string += "\nSTD: " + str(std_ccci)
    results_string += "\nSAME ACCOUNTS MEAN: " + str(accounts_mean_ccci)
    results_string += "\nOTHER ACCOUNTS MEAN: " + str(others_mean_ccci)
    results_string += "\n\n"

    results_string += "<CORRECTED TWEET, CORRECT TWEET> CORRECT WORDS:\n"
    results_string += str(list_ccwi) + "\n"
    results_string += "\nMEAN: " + str(mean_ccwi)
    results_string += "\nSTD: " + str(std_ccwi)
    results_string += "\nSAME ACCOUNTS MEAN: " + str(accounts_mean_ccwi)
    results_string += "\nOTHER ACCOUNTS MEAN: " + str(others_mean_ccwi)
    results_string += "\n\n"

    results_string += "<MISPELLED TWEET, CORRECT TWEET> CHARS IDENTITY:\n"
    results_string += str(list_mcci) + "\n"
    results_string += "\nMEAN: " + str(mean_mcci)
    results_string += "\nSTD: " + str(std_mcci)
    results_string += "\nSAME ACCOUNTS MEAN: " + str(accounts_mean_mcci)
    results_string += "\nOTHER ACCOUNTS MEAN: " + str(others_mean_mcci)
    results_string += "\n\n"

    results_string += "<MISPELLED TWEET, CORRECT TWEET> CORRECT WORDS:\n"
    results_string += str(list_mcwi) + "\n"
    results_string += "\nMEAN: " + str(mean_mcwi)
    results_string += "\nSTD: " + str(std_mcwi)
    results_string += "\nSAME ACCOUNTS MEAN: " + str(accounts_mean_mcwi)
    results_string += "\nOTHER ACCOUNTS MEAN: " + str(others_mean_mcwi)
    results_string += "\n\n"

    results_string += ("GAIN:\n")
    results_string += "\nWORD MEAN: " + str(mean_ccwi) + " - " + str(
        mean_mcwi) + " = " + str(mean_ccwi - mean_mcwi)
    results_string += "\nWORD STD: " + str(std_ccwi) + " - " + str(
        std_mcwi) + " = " + str(std_ccwi - std_mcwi)
    results_string += "\nCHARS MEAN: " + str(mean_ccci) + " - " + str(
        mean_mcci) + " = " + str(mean_ccci - mean_mcci)
    results_string += "\nCHARS STD: " + str(std_ccci) + " - " + str(
        std_mcci) + " = " + str(std_ccci - std_mcci)
    results_string += "\n\n"

    return results_string