def testPart4WithTestdata(self): m = model.Model("SG/train") m.train() readfile = "testdata/small_test" # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) # 1st order transition dataframe from part 3 tr_df = part3.GetTransitionDataFrame(m) # 2nd order transition dataframe from part 4 tr_2_df = part4.GetTransitionDataFrame(m) # print(tr_2_df) # quit() # part 4 tagging viterbi.TagWithViterbi( _out="testdata/part4_out.txt", _file=readfile, _model=m, _emission_df=em_df, _transition_df=tr_df, _2nd_order_df=tr_2_df, )
def testSmall(self): starttime = time.time() m = model.Model("SG/train") m.train() traintime = time.time() print("Trained in {}s".format(traintime - starttime)) readfile = "testdata/small_test" # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) # 1st order transition dataframe from part 3 tr_df = part3.GetTransitionDataFrame(m) dftime = time.time() print("DF generated in {}s".format(dftime - traintime)) # part 3 tagging viterbi.TagWithViterbi( _out="testdata/output.txt", _file=readfile, _model=m, _emission_df=em_df, _transition_df=tr_df) endtime = time.time() print("Tagged in {}s".format(endtime - dftime))
def testEmission(self): m = model.Model('SG/train') m.train() df = part2helper.GetEmissionDataFrame(m) print('emission prob:\n\n', df.loc['not']) tr_df = part3.GetTransitionDataFrame(m) print('transition prob:\n\n', tr_df)
def testPart2WithTestdata(self): m = model.Model("SG/train") m.train() readfile = "testdata/small_test" # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) part2helper.TagTweets(_out='testdata/part2_out.txt', _emission_df=em_df, _file=readfile)
def testCompleteSmoothed(self): starttime = time.time() m = model.Model('SG/train') m.train() midtime = time.time() print("Trained in {}s".format(midtime - starttime)) df = part2helper.GetEmissionDataFrame(m, 1) endtime = time.time() print("Finished in {}s".format(endtime - starttime)) # print("Final emission:", df) part2helper.TagTweets('testdata/part2test.out', df, 'testdata/small_test') endtime2 = time.time() print("done with tagging in {}s".format(endtime2 - endtime))
def testPart3WithTestdata(self): m = model.Model("SG/train") m.train() readfile = "testdata/small_test" # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) # 1st order transition dataframe from part 3 tr_df = part3.GetTransitionDataFrame(m) viterbi.TagWithViterbi( _out="testdata/part3_out.txt", _file=readfile, _model=m, _emission_df=em_df, _transition_df=tr_df, )
def testAll(self): m = model.Model("EN/train") m.train() readfile = "testdata/small_test" # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) # 1st order transition dataframe from part 3 tr_df = part3.GetTransitionDataFrame(m) # 2nd order transition dataframe from part 4 tr_2_df = part4.GetTransitionDataFrame(m) # part 2 tagging part2helper.TagTweets(_out='testdata/part2_out.txt', _emission_df=em_df, _file=readfile) # part 3 tagging print("part 3:\n") viterbi.TagWithViterbi( _out="testdata/part3_out.txt", _file=readfile, _model=m, _emission_df=em_df, _transition_df=tr_df, ) print("part 4:\n") # part 4 tagging viterbi.TagWithViterbi( _out="testdata/part4_out.txt", _file=readfile, _model=m, _emission_df=em_df, _transition_df=tr_df, _2nd_order_df=tr_2_df, )
import time if __name__ == "__main__": begintime = time.time() languages = ["EN", 'SG', 'CN', "FR"] for lang in languages: print("Starting language {}".format(lang)) starttime = time.time() m = Model(lang + "/train") m.train() traintime = time.time() print("Finished training {} in {}s".format(lang, starttime-traintime)) # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) part2time = time.time() print("Finished part2 df in {}s".format(part2time - traintime)) # 1st order transition dataframe from part 3 tr_df = part3.GetTransitionDataFrame(m) part3time = time.time() print("Finished part3 df in {}s".format(part3time - part2time)) # 2nd order HMM transition dataframe from part 4 tr_2nd_order = part4.GetTransitionDataFrame(m) part4time = time.time() print("Finished part4 df in {}s".format(part4time - part3time))
def testSimpleDF(self): m = model.Model('SG/train') m.train() df = part2helper.GetEmissionDataFrame(m, 1) print(df)