示例#1
0
def wiki_50M_model_test():
    """train lsa model with 50M wikipedia dumps, normal accuracy 24.2% with 2100 unseen words, definition accuracy 17% with 4000+ unseen words"""
    load_gre_answer(answer_path='/Users/apple/Dropbox/NLP/GREVerbal.txt')
    print 'finish loading answers'
    lsi = lsamodel.load('/Users/apple/graduate/Courses/544NLP/data/wiki_article/wiki_part_model.model')
    print 'finish loading lsa model'
    word2id = lsamodel.load_word2id(dic_txt_file='/Users/apple/graduate/Courses/544NLP/data/wiki_article/wiki_wordids.txt')
    print 'finish loading word2id dictionary'
    load_gre_sentence_definition(sentence_path="/Users/apple/Dropbox/NLP/bi_d_plaintext.txt", lsi=lsi, word2id=word2id);
    #load_gre_sentence(sentence_path="/Users/apple/Dropbox/NLP/bi_plaintext.txt", lsi=lsi, word2id=word2id);
    print 'finish loading and selecting gre sentence completion task answers'
    print 'accuracy: ' + str(eval.eval_accuracy(answer, cal_ans))
    print len(unseen_word)
    print unseen_word
示例#2
0
def wiki_10G_model_test():
    """train lsa model with wiki 10G data, 753 unseen words, 23.4% accuracy, definition accuracy 20.6% with 1810 unseen words"""
    load_gre_answer(answer_path='/Users/apple/Dropbox/NLP/GREVerbal.txt')
    print 'finish loading answers'
    lsi = lsamodel.load('/Users/apple/graduate/Courses/544NLP/data/wiki_article/wiki_latest_model/lsi.model')
    print 'finish loading lsa model'
    word2id = lsamodel.load_word2id(dic_txt_file='/Users/apple/graduate/Courses/544NLP/data/wiki_article/wiki_latest_model/wiki_en_wordids.txt')
    print 'finish loading word2id dictionary'
    load_gre_sentence_definition(sentence_path="/Users/apple/Dropbox/NLP/bi_d_plaintext.txt", lsi=lsi, word2id=word2id, algorithm=TOTAL_SIMILARITY_WITH_RAKE);
    # load_gre_sentence(sentence_path="/Users/apple/Dropbox/NLP/bi_plaintext.txt", lsi=lsi, word2id=word2id, algorithm=TOTAL_SIMILARITY_WITH_RAKE);
    print 'finish loading and selecting gre sentence completion task answers'
    print 'accuracy: ' + str(eval.eval_accuracy(answer, cal_ans))
    print len(unseen_word)
    print unseen_word
示例#3
0
def wiki_10G_model_test():
    """train lsa model with wiki 10G data, 753 unseen words, 23.4% accuracy, definition accuracy 20.6% with 1810 unseen words"""
    load_gre_answer(answer_path='/Users/junchen/Documents/CSCI544/project/GREVerbal.txt')
    print 'finish loading answers'
    lsi = lsamodel.load('/Users/junchen/Documents/CSCI544/project/lsi model/lsi.model')
    print 'finish loading lsa model'
    word2id = lsamodel.load_word2id(dic_txt_file='/Users/junchen/Documents/CSCI544/project/wiki_data/wiki_en_wordids.txt')
    print 'finish loading word2id dictionary'
    for i in range(2, 3):
        #load_gre_sentence_definition(sentence_path="/Users/junchen/Documents/CSCI544/project/bi_d_plaintext.txt", lsi=lsi, word2id=word2id);
        load_gre_sentence(sentence_path="/Users/junchen/Documents/CSCI544/project/bi_plaintext.txt", lsi=lsi, word2id=word2id, algorithm=TOTAL_SIMILARITY_WITH_RAKE , k=i)
        print 'finish loading and selecting gre sentence completion task answers'
        print str(i) + ': accuracy: ' + str(eval.eval_accuracy(answer, cal_ans))
        clear_answer()
 #   load_gre_sentence_definition(sentence_path="/Users/junchen/Documents/CSCI544/project/bi_d_plaintext.txt", lsi=lsi, word2id=word2id);
    # load_gre_sentence(sentence_path="/Users/apple/Dropbox/NLP/bi_plaintext.txt", lsi=lsi, word2id=word2id, algorithm=TOTAL_SIMILARITY_WITH_RAKE);
    
    print len(unseen_word)
    print unseen_word