示例#1
0
文件: rss.py 项目: stes/nbot
def test_db():
    gl_vlist = VocabList()
    log('searching directory: %s' % FEED_DIR)
    for dir in os.listdir(FEED_DIR):
        if '.mk4' in dir[-4:]:
            log('found database: %s' % dir)
            # open database
            db = metakit.storage(os.path.join(FEED_DIR, dir), 0)
            data = read_database(db)
            if len(data) > 0:
                # feed content in database
                log('create library')
                lib = Library()
                for feed in data:
                    lib.add_document(read_data(feed))
                vlist = lib.gen_vocablist()
                vlist.clean(5)
                gl_vlist.merge(vlist)
            db = None # close database
    print gl_vlist
示例#2
0
文件: main.py 项目: stes/nbot
def train_recsys():
    from nbot.document import Document, Library, VocabList, load_document
    
    doc0 = load_document('res/sample/blubb.html')
    doc1 = load_document('res/sample/page.html')
    doc2 = load_document('res/sample/dislikepage.html')
    
    lib_like = Library()
    lib_like.load('res/like', False)
    lib_dislike = Library()
    lib_dislike.load('res/dislike', False)
    
    like_cv = []
    keys = lib_like.get_keys()
    shuffle(keys)
    for key in keys[:5]:
        like_cv.append(lib_like.rmv_document(key))
    
    dislike_cv = []
    keys = lib_dislike.get_keys()
    shuffle(keys)
    for key in keys[:5]:
        dislike_cv.append(lib_dislike.rmv_document(key))
    
    vlist_like = lib_like.gen_vocablist()
    vlist_dislike = lib_dislike.gen_vocablist()
    
    vlist_like.clean(10)
    vlist_dislike.clean(10)
    
    like_mask = vlist_like.gen_mask()
    dislike_mask = vlist_dislike.gen_mask()
    
    mask = []
    mask.extend(like_mask)
    mask.extend(dislike_mask)
    
    rsys = RecommenderSystem(mask, len(mask))
    for key in lib_like.get_keys():
        doc = lib_like.get_document(key)
        rsys.set_rate(doc.content(), 1.)
    
    for key in lib_dislike.get_keys():
        doc = lib_dislike.get_document(key)
        rsys.set_rate(doc.content(), 0.)
    
    rsys.train(10000000, 0.1)

    likes = lib_like.get_keys()
    shuffle(likes)
    for key in likes[:5]:
        doc = lib_like.get_document(key)
        print rsys.rate(doc.content())
    
    dislikes = lib_dislike.get_keys()
    shuffle(dislikes)
    for key in dislikes[:5]:
        doc = lib_dislike.get_document(key)
        print rsys.rate(doc.content())
    
    print '---------------------------------------'
    print rsys.rate(doc0.content())
    print rsys.rate(doc1.content())
    print rsys.rate(doc2.content())
    print '---------------------------------------'
    print 'CV data'
    print '(1) LIKE'
    for doc in like_cv:
        print rsys.rate(doc.content())
    
    print '(2) DISLIKE'
    for doc in dislike_cv:
        print rsys.rate(doc.content())
    
    # This seems to work, however, more training/cv data will be necessary!  
    
    print '---------------------------------------'
    return rsys
示例#3
0
文件: recsys.py 项目: stes/nbot
 lib_dislike = Library()
 lib_dislike.load('res/dislike', False)
 
 like_cv = []
 keys = lib_like.get_keys()
 shuffle(keys)
 for key in keys[:5]:
     like_cv.append(lib_like.rmv_document(key))
 
 dislike_cv = []
 keys = lib_dislike.get_keys()
 shuffle(keys)
 for key in keys[:5]:
     dislike_cv.append(lib_dislike.rmv_document(key))
 
 vlist_like = lib_like.gen_vocablist()
 vlist_dislike = lib_dislike.gen_vocablist()
 
 vlist_like.clean(10)
 vlist_dislike.clean(10)
 
 like_mask = vlist_like.gen_mask()
 dislike_mask = vlist_dislike.gen_mask()
 
 printlist(like_mask)
 print '-------------------------------------------'
 printlist(dislike_mask)
 
 mask = []
 mask.extend(like_mask)
 mask.extend(dislike_mask)