def testSmall(self): starttime = time.time() m = model.Model("SG/train") m.train() traintime = time.time() print("Trained in {}s".format(traintime - starttime)) readfile = "testdata/small_test" # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) # 1st order transition dataframe from part 3 tr_df = part3.GetTransitionDataFrame(m) dftime = time.time() print("DF generated in {}s".format(dftime - traintime)) # part 3 tagging viterbi.TagWithViterbi( _out="testdata/output.txt", _file=readfile, _model=m, _emission_df=em_df, _transition_df=tr_df) endtime = time.time() print("Tagged in {}s".format(endtime - dftime))
def testPart4WithTestdata(self): m = model.Model("SG/train") m.train() readfile = "testdata/small_test" # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) # 1st order transition dataframe from part 3 tr_df = part3.GetTransitionDataFrame(m) # 2nd order transition dataframe from part 4 tr_2_df = part4.GetTransitionDataFrame(m) # print(tr_2_df) # quit() # part 4 tagging viterbi.TagWithViterbi( _out="testdata/part4_out.txt", _file=readfile, _model=m, _emission_df=em_df, _transition_df=tr_df, _2nd_order_df=tr_2_df, )
def testEmission(self): m = model.Model('SG/train') m.train() df = part2helper.GetEmissionDataFrame(m) print('emission prob:\n\n', df.loc['not']) tr_df = part3.GetTransitionDataFrame(m) print('transition prob:\n\n', tr_df)
def testPart3WithTestdata(self): m = model.Model("SG/train") m.train() readfile = "testdata/small_test" # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) # 1st order transition dataframe from part 3 tr_df = part3.GetTransitionDataFrame(m) viterbi.TagWithViterbi( _out="testdata/part3_out.txt", _file=readfile, _model=m, _emission_df=em_df, _transition_df=tr_df, )
def testAll(self): m = model.Model("EN/train") m.train() readfile = "testdata/small_test" # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) # 1st order transition dataframe from part 3 tr_df = part3.GetTransitionDataFrame(m) # 2nd order transition dataframe from part 4 tr_2_df = part4.GetTransitionDataFrame(m) # part 2 tagging part2helper.TagTweets(_out='testdata/part2_out.txt', _emission_df=em_df, _file=readfile) # part 3 tagging print("part 3:\n") viterbi.TagWithViterbi( _out="testdata/part3_out.txt", _file=readfile, _model=m, _emission_df=em_df, _transition_df=tr_df, ) print("part 4:\n") # part 4 tagging viterbi.TagWithViterbi( _out="testdata/part4_out.txt", _file=readfile, _model=m, _emission_df=em_df, _transition_df=tr_df, _2nd_order_df=tr_2_df, )
for lang in languages: print("Starting language {}".format(lang)) starttime = time.time() m = Model(lang + "/train") m.train() traintime = time.time() print("Finished training {} in {}s".format(lang, starttime-traintime)) # Emission dataframe from part 2 em_df = part2helper.GetEmissionDataFrame(m, 1) part2time = time.time() print("Finished part2 df in {}s".format(part2time - traintime)) # 1st order transition dataframe from part 3 tr_df = part3.GetTransitionDataFrame(m) part3time = time.time() print("Finished part3 df in {}s".format(part3time - part2time)) # 2nd order HMM transition dataframe from part 4 tr_2nd_order = part4.GetTransitionDataFrame(m) part4time = time.time() print("Finished part4 df in {}s".format(part4time - part3time)) print(" ---- ---- ---- \n") readfile = lang + "/dev.in" # part 2 tagging
def testSmoothedEmission(self): m = model.Model('SG/train') m.train() df = part3.GetTransitionDataFrame(m) print(df)
def testDataFrame(self): m = model.Model('SG/train') m.train() tr_df = part3.GetTransitionDataFrame(m) print('transition prob:\n\n', tr_df)