Exemplo n.º 1
0
    def testPart4WithTestdata(self):
        m = model.Model("SG/train")
        m.train()

        readfile = "testdata/small_test"

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        # 1st order transition dataframe from part 3
        tr_df = part3.GetTransitionDataFrame(m)

        # 2nd order transition dataframe from part 4
        tr_2_df = part4.GetTransitionDataFrame(m)

        # print(tr_2_df)
        # quit()

        # part 4 tagging
        viterbi.TagWithViterbi(
            _out="testdata/part4_out.txt",
            _file=readfile,
            _model=m,
            _emission_df=em_df,
            _transition_df=tr_df,
            _2nd_order_df=tr_2_df,
        )
    def testSmall(self):
        starttime = time.time()
        m = model.Model("SG/train")
        m.train()
        traintime = time.time()
        print("Trained in {}s".format(traintime - starttime))

        readfile = "testdata/small_test"

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        # 1st order transition dataframe from part 3
        tr_df = part3.GetTransitionDataFrame(m)

        dftime = time.time()
        print("DF generated in {}s".format(dftime - traintime))

        # part 3 tagging
        viterbi.TagWithViterbi(
            _out="testdata/output.txt",
            _file=readfile,
            _model=m,
            _emission_df=em_df,
            _transition_df=tr_df)
        endtime = time.time()
        print("Tagged in {}s".format(endtime - dftime))
    def testEmission(self):
       m = model.Model('SG/train')
       m.train()
       df = part2helper.GetEmissionDataFrame(m)
       print('emission prob:\n\n', df.loc['not'])

       tr_df = part3.GetTransitionDataFrame(m)
       print('transition prob:\n\n', tr_df)
Exemplo n.º 4
0
    def testPart2WithTestdata(self):
        m = model.Model("SG/train")
        m.train()

        readfile = "testdata/small_test"

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        part2helper.TagTweets(_out='testdata/part2_out.txt',
                              _emission_df=em_df,
                              _file=readfile)
Exemplo n.º 5
0
    def testCompleteSmoothed(self):
        starttime = time.time()
        m = model.Model('SG/train')
        m.train()

        midtime = time.time()
        print("Trained in {}s".format(midtime - starttime))
        df = part2helper.GetEmissionDataFrame(m, 1)
        endtime = time.time()
        print("Finished in {}s".format(endtime - starttime))
        # print("Final emission:", df)

        part2helper.TagTweets('testdata/part2test.out', df,
                              'testdata/small_test')
        endtime2 = time.time()
        print("done with tagging in {}s".format(endtime2 - endtime))
Exemplo n.º 6
0
    def testPart3WithTestdata(self):
        m = model.Model("SG/train")
        m.train()

        readfile = "testdata/small_test"

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        # 1st order transition dataframe from part 3
        tr_df = part3.GetTransitionDataFrame(m)

        viterbi.TagWithViterbi(
            _out="testdata/part3_out.txt",
            _file=readfile,
            _model=m,
            _emission_df=em_df,
            _transition_df=tr_df,
        )
Exemplo n.º 7
0
    def testAll(self):
        m = model.Model("EN/train")
        m.train()

        readfile = "testdata/small_test"

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        # 1st order transition dataframe from part 3
        tr_df = part3.GetTransitionDataFrame(m)

        # 2nd order transition dataframe from part 4
        tr_2_df = part4.GetTransitionDataFrame(m)

        # part 2 tagging
        part2helper.TagTweets(_out='testdata/part2_out.txt',
                              _emission_df=em_df,
                              _file=readfile)

        # part 3 tagging
        print("part 3:\n")
        viterbi.TagWithViterbi(
            _out="testdata/part3_out.txt",
            _file=readfile,
            _model=m,
            _emission_df=em_df,
            _transition_df=tr_df,
        )

        print("part 4:\n")
        # part 4 tagging
        viterbi.TagWithViterbi(
            _out="testdata/part4_out.txt",
            _file=readfile,
            _model=m,
            _emission_df=em_df,
            _transition_df=tr_df,
            _2nd_order_df=tr_2_df,
        )
import time


if __name__ == "__main__":
    begintime = time.time()
    languages = ["EN", 'SG', 'CN', "FR"]
    for lang in languages:
        print("Starting language {}".format(lang))
        starttime = time.time()
        m = Model(lang + "/train")
        m.train()
        traintime = time.time()
        print("Finished training {} in {}s".format(lang, starttime-traintime))

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        part2time = time.time()
        print("Finished part2 df in {}s".format(part2time - traintime))

        # 1st order transition dataframe from part 3
        tr_df = part3.GetTransitionDataFrame(m)

        part3time = time.time()
        print("Finished part3 df in {}s".format(part3time - part2time))

        # 2nd order HMM transition dataframe from part 4
        tr_2nd_order = part4.GetTransitionDataFrame(m)

        part4time = time.time()
        print("Finished part4 df in {}s".format(part4time - part3time))
Exemplo n.º 9
0
 def testSimpleDF(self):
     m = model.Model('SG/train')
     m.train()
     df = part2helper.GetEmissionDataFrame(m, 1)
     print(df)