def test_Omikuji(self): tc = TweetChecker(u'くじ追加') assert_true(tc.isOmikujiAdd()) tc = TweetChecker(u'このくじ追加して') assert_true(tc.isOmikujiAdd())
def test_Peropero(self): tc = TweetChecker(u'ペロペロ') assert_true(tc.isPeropero()) tc = TweetChecker(u'ペロペロ') assert_true(tc.isPeropero()) tc = TweetChecker(u'ペロペロ') assert_true(tc.isPeropero()) tc = TweetChecker(u'ペロペロ') assert_true(tc.isPeropero()) tc = TweetChecker(u'これにペロペロ機能つける') assert_true(tc.isPeropero())
def get_emission_matrix(training_file_name, max_identity_value): matrix = [1] * len(alphabet) for i in range(0, len(alphabet)): matrix[i] = [1] * len(observable) training = open(training_file_name, "rb").readlines() for i in range(0, len(training), 2): correct_tweet = TweetChecker.parse(training[i].replace("\n", "")) misspelled_tweet = parseObservation(training[i+1].replace("\n", "")) for i in range(0, len(correct_tweet)): raw = alphabet.index(correct_tweet[i]) col = observable.index(misspelled_tweet[i]) matrix[raw][col] = matrix[raw][col] + 1 for i in range(0, len(matrix)): den = sum(matrix[i]) k = i + 1 if max_identity_value != 0 and matrix[i][k] > den*max_identity_value: matrix[i][k] = den*max_identity_value matrix[i] = [float(j) / den for j in matrix[i]] return matrix
def test_NotQuestion(self): tc = TweetChecker(u'そうにきまてる') assert_false(tc.isQuestion()) tc = TweetChecker(u'そんなきがしてた') assert_false(tc.isQuestion()) tc = TweetChecker(u'君はBLなのかな') assert_false(tc.isQuestion()) tc = TweetChecker(u'そんな装備で大丈夫か?') assert_false(tc.isQuestion())
def test_Omikuji(self): tc = TweetChecker(u'おみくじくれ') assert_true(tc.isOmikuji()) tc = TweetChecker(u'御神籤ください') assert_true(tc.isOmikuji()) tc = TweetChecker(u'おねがい、お御籤ちょうだい') assert_true(tc.isOmikuji()) tc = TweetChecker(u'僕のこと占って!') assert_true(tc.isOmikuji())
def createTrainingFile(tweetsFile, percentageOfError, file_name, without_underscore): with open(file_name, 'w+') as f: for line in tweetsFile: line = TweetChecker.parse(line) f.write(line.replace("\n", "") + "\n") wrongLine = introduceError(line.replace("\n", ""), percentageOfError) if without_underscore: wrongLine = wrongLine.replace("_", "") f.write(wrongLine + "\n") return file_name
def test_Iyaho(self): tc = TweetChecker(u'イィイイイヤッホォオオオオォオォォォォウ!!!!!') assert_true(tc.isIyaho()) tc = TweetChecker(u'ニャアアアアホオオオオゥゥゥゥ!!!!') assert_true(tc.isIyaho()) tc = TweetChecker(u'イヤホゥ!') assert_true(tc.isIyaho())
def test_NotPeropero(self): tc = TweetChecker(u'レロレロ') assert_false(tc.isPeropero()) tc = TweetChecker(u'ぺ') assert_false(tc.isPeropero()) tc = TweetChecker(u'ブホホフギィ!!') assert_false(tc.isPeropero())
def test_3(self): mock_twitter = Mock() TweetChecker.tweet(mock_twitter, "Say hello") mock_twitter.PostUpdate.assert_called_with("Say helo")
def test_1(self): mock_twitter = Mock() TweetChecker.tweet(mock_twitter, "message") mock_twitter.PostUpdate.assert_called_with("message")
def test_Uhyou(self): tc = TweetChecker(u'ウッヒョォオオオォオォォォウ!!!') assert_true(tc.isUhyou()) tc = TweetChecker(u'ウッヒョオオオオォォゥ!!!!!') assert_true(tc.isUhyou())
def test_NotUhyou(self): tc = TweetChecker(u'ブホホフギィ!!') assert_false(tc.isUhyou())
def test_4(self): mock_twitter = Mock() TweetChecker.tweet(mock_twitter, "Hello, it's me") mock_twitter.PostUpdate.assert_called_with("Hello, it's me")
def test_NotIyaho(self): tc = TweetChecker(u'ブホホフギィ!!') assert_false(tc.isIyaho())
def new_test(model, test_file_name, test_file_length): test_file = open(test_file_name) i = 0 list_ccci = [] list_ccwi = [] list_mcci = [] list_mcwi = [] for line in test_file: correct_line = line.replace("\n", "") mispelled_line = test_file.next().replace("\n", "") i = i + 1 print "[" + str(i) + "/" + str( test_file_length) + "] Checking:\n\t " + mispelled_line corrected_tweet = TweetChecker.dull_check(mispelled_line, model) print "Calulating allignment ccci..." cc_char_identity = align_char(corrected_tweet, correct_line) print "Calulating allignment ccwi..." cc_word_identity = count_correct_words(corrected_tweet, correct_line) print "Calulating allignment mcci..." mc_char_identity = align_char(mispelled_line, correct_line) print "Calulating allignment mcwi..." mc_word_identity = count_correct_words(mispelled_line, correct_line) list_ccci.append(cc_char_identity) list_ccwi.append(cc_word_identity) list_mcci.append(mc_char_identity) list_mcwi.append(mc_word_identity) mean_ccci = numpy.mean(list_ccci) mean_ccwi = numpy.mean(list_ccwi) mean_mcci = numpy.mean(list_mcci) mean_mcwi = numpy.mean(list_mcwi) first_not_account_index = 135 accounts_mean_ccci = numpy.mean(list_ccci[:first_not_account_index]) accounts_mean_ccwi = numpy.mean(list_ccwi[:first_not_account_index]) accounts_mean_mcci = numpy.mean(list_mcci[:first_not_account_index]) accounts_mean_mcwi = numpy.mean(list_mcwi[:first_not_account_index]) others_mean_ccci = numpy.mean(list_ccci[first_not_account_index:]) others_mean_ccwi = numpy.mean(list_ccwi[first_not_account_index:]) others_mean_mcci = numpy.mean(list_mcci[first_not_account_index:]) others_mean_mcwi = numpy.mean(list_mcwi[first_not_account_index:]) std_ccci = numpy.std(list_ccci) std_ccwi = numpy.std(list_ccwi) std_mcci = numpy.std(list_mcci) std_mcwi = numpy.std(list_mcwi) results_string = "" results_string += "<CORRECTED TWEET, CORRECT TWEET> CHARS IDENTITY:\n" results_string += str(list_ccci) + "\n" results_string += "\nMEAN: " + str(mean_ccci) results_string += "\nSTD: " + str(std_ccci) results_string += "\nSAME ACCOUNTS MEAN: " + str(accounts_mean_ccci) results_string += "\nOTHER ACCOUNTS MEAN: " + str(others_mean_ccci) results_string += "\n\n" results_string += "<CORRECTED TWEET, CORRECT TWEET> CORRECT WORDS:\n" results_string += str(list_ccwi) + "\n" results_string += "\nMEAN: " + str(mean_ccwi) results_string += "\nSTD: " + str(std_ccwi) results_string += "\nSAME ACCOUNTS MEAN: " + str(accounts_mean_ccwi) results_string += "\nOTHER ACCOUNTS MEAN: " + str(others_mean_ccwi) results_string += "\n\n" results_string += "<MISPELLED TWEET, CORRECT TWEET> CHARS IDENTITY:\n" results_string += str(list_mcci) + "\n" results_string += "\nMEAN: " + str(mean_mcci) results_string += "\nSTD: " + str(std_mcci) results_string += "\nSAME ACCOUNTS MEAN: " + str(accounts_mean_mcci) results_string += "\nOTHER ACCOUNTS MEAN: " + str(others_mean_mcci) results_string += "\n\n" results_string += "<MISPELLED TWEET, CORRECT TWEET> CORRECT WORDS:\n" results_string += str(list_mcwi) + "\n" results_string += "\nMEAN: " + str(mean_mcwi) results_string += "\nSTD: " + str(std_mcwi) results_string += "\nSAME ACCOUNTS MEAN: " + str(accounts_mean_mcwi) results_string += "\nOTHER ACCOUNTS MEAN: " + str(others_mean_mcwi) results_string += "\n\n" results_string += ("GAIN:\n") results_string += "\nWORD MEAN: " + str(mean_ccwi) + " - " + str( mean_mcwi) + " = " + str(mean_ccwi - mean_mcwi) results_string += "\nWORD STD: " + str(std_ccwi) + " - " + str( std_mcwi) + " = " + str(std_ccwi - std_mcwi) results_string += "\nCHARS MEAN: " + str(mean_ccci) + " - " + str( mean_mcci) + " = " + str(mean_ccci - mean_mcci) results_string += "\nCHARS STD: " + str(std_ccci) + " - " + str( std_mcci) + " = " + str(std_ccci - std_mcci) results_string += "\n\n" return results_string
def test_2(self): mock_twitter = Mock() TweetChecker.tweet(mock_twitter, "“Can’t repeat the past?…Why of course you can!” ― F. Scott Fitzgerald, The Great Gatsby") mock_twitter.PostUpdate.assert_called_with("“Can’t repeat the past?…Why of course you can!” ― F. Scott Fitzgerald, The Great Gatsby")
def test_3(self): mock_twitter = Mock() TweetChecker.tweet(mock_twitter, "He smiled understandingly-much more than understandingly. It was one of those rare smiles with a quality of eternal reassurance in it, that you may come across four or five times in life. It faced--or seemed to face--the whole eternal world for an instant, and then concentrated on you with an irresistible prejudice in your favor. It understood you just as far as you wanted to be understood, believed in you as you would like to believe in yourself, and assured you that it had precisely the impression of you that, at your best, you hoped to convey. ― F. Scott Fitzgerald, The Great Gatsby") mock_twitter.PostUpdate.assert_called_with("He smiled understandingly-much more than understandingly. It was one of those rare smiles with a quality of eternal reassurance in it, that you may come across four or five times in life. It faced--or seemed to face--the whole eternal world for an instant, and then concentrated on you with an irresistible prejudice in your favor. It understood you just as far as you wanted to be understood, believed in you as you would like to believe in yourself, and assured you that it had precisely the impression of you that, at your best, you hoped to convey. ― F. Scott Fitzgerald, The Great Gatsby")
def test_Question(self): tc = TweetChecker(u'だろ?') assert_true(tc.isQuestion()) tc = TweetChecker(u'そう思うよね?') assert_true(tc.isQuestion()) tc = TweetChecker(u'美味しいんだろ?') assert_true(tc.isQuestion()) tc = TweetChecker(u'BLなんですか?') assert_true(tc.isQuestion()) tc = TweetChecker(u'ロボットナンダロ?') assert_true(tc.isQuestion()) tc = TweetChecker(u'ネ? ソウダヨネ?') assert_true(tc.isQuestion()) tc = TweetChecker(u'だろ!') assert_true(tc.isQuestion()) tc = TweetChecker(u'な!') assert_true(tc.isQuestion()) tc = TweetChecker(u'ダロ!') assert_true(tc.isQuestion()) tc = TweetChecker(u'大丈夫ですか?') assert_true(tc.isQuestion())
def test_4(self): mock_twitter = Mock() TweetChecker.tweet(mock_twitter, "“He looked at her the way all women want to be looked at by a man.” ― F. Scott Fitzgerald, The Great Gatsby") mock_twitter.PostUpdate.assert_called_with("message")
def test_Daijyoubuka(self): tc = TweetChecker(u'そんな装備で大丈夫か?') assert_true(tc.isDaijyoubuka()) tc = TweetChecker(u'あたま大丈夫か?') assert_true(tc.isDaijyoubuka()) tc = TweetChecker(u'大丈夫か?ほんとに') assert_true(tc.isDaijyoubuka()) tc = TweetChecker(u'それで大丈夫か?まじで?') assert_true(tc.isDaijyoubuka()) tc = TweetChecker(u'そんな装備で、イーノック?') assert_true(tc.isDaijyoubuka()) tc = TweetChecker(u'そんな装備で、いーのっく?') assert_true(tc.isDaijyoubuka())
return chmm if __name__ == '__main__': file_name = "/home/umberto/Documents/HMMTweetChecker/src/training_sets/DownloadedTweet.txt" voc_name ="/home/umberto/Documents/HMMTweetChecker/src/vocabularies/Vocabulary.txt" #model = HMMTrainer(file_name, voc_name).hmm #print model #model.save_hmm(None) model = HMMFile.load("/home/umberto/Documents/HMMTweetChecker/src/HMM_2/") #print model.vocabulary # Test for completly random extraction #test = "Authurities aoe inveNtmgating afteroan kCEzdetaVnee facing pos7ible deportation apparentky kBlledFhimself " # Test for qwerty sample test = "Hap0y ValDntGn DaY @MiXelKeObama You jAkeBSv3ryDdaY and eveGy place bet5er" test = "8aopy alentinVs Day @DichelleObamq YouRmakeSdvery day and every Olace getter " test = "All acrows Amerida people cEose tI getGinvolveS get engAged aHd stand up Each of ys can majW a dIRdeeence ane alN if us oughE toGtrh woBFT keeO chSngiRg the worlc in 2H1I" test = "We cSn neveR trUly repSL te EHbt we Iwe our faJlen heroeZ But wd vanTremember them honortheir sacrificdJInQ arfirm" test = "Helo i m studing for the exams" #print TweetChecker.dull_check(test, model, model.obs_states, model.vocabulary).replace("[", "\n").replace("]", "\n") print TweetChecker.sentense_check(test, model, model.obs_states)