def testSmall(self):
        starttime = time.time()
        m = model.Model("SG/train")
        m.train()
        traintime = time.time()
        print("Trained in {}s".format(traintime - starttime))

        readfile = "testdata/small_test"

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        # 1st order transition dataframe from part 3
        tr_df = part3.GetTransitionDataFrame(m)

        dftime = time.time()
        print("DF generated in {}s".format(dftime - traintime))

        # part 3 tagging
        viterbi.TagWithViterbi(
            _out="testdata/output.txt",
            _file=readfile,
            _model=m,
            _emission_df=em_df,
            _transition_df=tr_df)
        endtime = time.time()
        print("Tagged in {}s".format(endtime - dftime))
Exemplo n.º 2
0
    def testPart4WithTestdata(self):
        m = model.Model("SG/train")
        m.train()

        readfile = "testdata/small_test"

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        # 1st order transition dataframe from part 3
        tr_df = part3.GetTransitionDataFrame(m)

        # 2nd order transition dataframe from part 4
        tr_2_df = part4.GetTransitionDataFrame(m)

        # print(tr_2_df)
        # quit()

        # part 4 tagging
        viterbi.TagWithViterbi(
            _out="testdata/part4_out.txt",
            _file=readfile,
            _model=m,
            _emission_df=em_df,
            _transition_df=tr_df,
            _2nd_order_df=tr_2_df,
        )
    def testEmission(self):
       m = model.Model('SG/train')
       m.train()
       df = part2helper.GetEmissionDataFrame(m)
       print('emission prob:\n\n', df.loc['not'])

       tr_df = part3.GetTransitionDataFrame(m)
       print('transition prob:\n\n', tr_df)
Exemplo n.º 4
0
    def testPart3WithTestdata(self):
        m = model.Model("SG/train")
        m.train()

        readfile = "testdata/small_test"

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        # 1st order transition dataframe from part 3
        tr_df = part3.GetTransitionDataFrame(m)

        viterbi.TagWithViterbi(
            _out="testdata/part3_out.txt",
            _file=readfile,
            _model=m,
            _emission_df=em_df,
            _transition_df=tr_df,
        )
Exemplo n.º 5
0
    def testAll(self):
        m = model.Model("EN/train")
        m.train()

        readfile = "testdata/small_test"

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        # 1st order transition dataframe from part 3
        tr_df = part3.GetTransitionDataFrame(m)

        # 2nd order transition dataframe from part 4
        tr_2_df = part4.GetTransitionDataFrame(m)

        # part 2 tagging
        part2helper.TagTweets(_out='testdata/part2_out.txt',
                              _emission_df=em_df,
                              _file=readfile)

        # part 3 tagging
        print("part 3:\n")
        viterbi.TagWithViterbi(
            _out="testdata/part3_out.txt",
            _file=readfile,
            _model=m,
            _emission_df=em_df,
            _transition_df=tr_df,
        )

        print("part 4:\n")
        # part 4 tagging
        viterbi.TagWithViterbi(
            _out="testdata/part4_out.txt",
            _file=readfile,
            _model=m,
            _emission_df=em_df,
            _transition_df=tr_df,
            _2nd_order_df=tr_2_df,
        )
    for lang in languages:
        print("Starting language {}".format(lang))
        starttime = time.time()
        m = Model(lang + "/train")
        m.train()
        traintime = time.time()
        print("Finished training {} in {}s".format(lang, starttime-traintime))

        # Emission dataframe from part 2
        em_df = part2helper.GetEmissionDataFrame(m, 1)

        part2time = time.time()
        print("Finished part2 df in {}s".format(part2time - traintime))

        # 1st order transition dataframe from part 3
        tr_df = part3.GetTransitionDataFrame(m)

        part3time = time.time()
        print("Finished part3 df in {}s".format(part3time - part2time))

        # 2nd order HMM transition dataframe from part 4
        tr_2nd_order = part4.GetTransitionDataFrame(m)

        part4time = time.time()
        print("Finished part4 df in {}s".format(part4time - part3time))

        print("    ---- ---- ----    \n")

        readfile = lang + "/dev.in"

        # part 2 tagging
 def testSmoothedEmission(self):
     m = model.Model('SG/train')
     m.train()
     df = part3.GetTransitionDataFrame(m)
     print(df)
 def testDataFrame(self):
     m = model.Model('SG/train')
     m.train()
     tr_df = part3.GetTransitionDataFrame(m)
     print('transition prob:\n\n', tr_df)