def test_db(): gl_vlist = VocabList() log('searching directory: %s' % FEED_DIR) for dir in os.listdir(FEED_DIR): if '.mk4' in dir[-4:]: log('found database: %s' % dir) # open database db = metakit.storage(os.path.join(FEED_DIR, dir), 0) data = read_database(db) if len(data) > 0: # feed content in database log('create library') lib = Library() for feed in data: lib.add_document(read_data(feed)) vlist = lib.gen_vocablist() vlist.clean(5) gl_vlist.merge(vlist) db = None # close database print gl_vlist
def print80(text): tmp = '' for c in text: tmp+=c if len(tmp) > 80: print(tmp) tmp = '' print(tmp) def printlist(list): for item in list: print(item) if __name__ == '__main__': from nbot.document import Document, Library q = ['file:///home/stes/dislike.html'] lib = Library() url = q.pop(0) page = fetch_content(url) hrefs = get_hyperlinks(page) q.extend(hrefs) while q: print 'currently %d elements in the queue' % len(q) url = q.pop(0) print 'getting %s' % url page = fetch_content(url) doc = Document(page) lib.add_document(doc) lib.save('res/dislike')