예제 #1
0
def NGramLangModel():
    cl = Loader(MAIN_DIR+DS_DIR)
    f = cl.loadLarge('tb_kota_bywiki.txt',lazy_load=True)#tb_berita_onlinemedia, tb_kota_bywiki
    w = cl.processRaw(f,to_lower=True)
    r = cl.rawForLangmodel(w,punct_remove=True,to_token=True)
    
    lms = NGramModels(ngram=2)
    # njump parameter belum bisa digunakan untuk modkn optimizer
    models = lms.train(r, optimizer='modkn',\
                       separate=False, njump=0, verbose=False)

    print "##########################################################"
예제 #2
0
def NGramLangModel():
    cl = Loader('C:\\BimaNLP\\dataset\\')
    f = cl.loadLarge('tb_kota_bywiki.txt',lazy_load=True)#tb_berita_onlinemedia, tb_kota_bywiki
    w = cl.processRaw(f,to_lower=True)
    r = cl.rawForLangmodel(w,punct_remove=True,to_token=True)
                           
    dataset=[['saya','suka','kamu'],
         ['kamu','suka','saya'],
         ['saya','tidak','suka','jika','kamu','pergi','dengan','dia']
         ]
    
    lms = NGramModels(ngram=2)
    # njump parameter belum bisa digunakan untuk modkn optimizer
    models = lms.train(dataset, optimizer='modkn',\
                       separate=False, njump=0, verbose=True)

    print "##########################################################"