{ "id":314159, "citations": [57721], "references": [271828], "keywords": ["xyzzy"] } ] if __name__=="__main__": #snag the docs from the server server = couchdb.client.Server(url='https://vertex.skizzerz.net:6984/') db = server['papers'] result = db.view('all/all') docs = [] for row in result: docs.append(row.value) _hits_obj = hits.sparseHITS() #index all the docs _hits_obj.index_docs(docs) #FIXME run lda on the docs #NOW READY FOR QUERIES bottle.run(host='localhost', port=8080, reloader=True)
#okay. let's train an LDA model. :3 _root_lda_model.setup_lda(iter([x for x in docs if 'id' in x])) for doc in docs: # cache the ID's topicid = _root_lda_model.pick_topic(_root_lda_model._abst(doc)) _topic_dict[topicid].append(doc) for topicid, docs in _topic_dict.items(): print "topicid, len" print (topicid, len(docs)) # hits per topic, yo for topicid, docs in _topic_dict.items(): print topicid _topic_hits[topicid] = hits.sparseHITS() _topic_hits[topicid].index_docs(docs) print 'HITS!' _hits_obj = hits.sparseHITS() #index all the docs _hits_obj.index_docs(docs) #NOW READY FOR QUERIES bottle.run(host='localhost', port=8080, reloader=True)