def save(self,d1,d2,pkt): # todo new code to directly addtoset mongo-style if not pkt: return pippi=Pippi(pkt['pippi']) Docs.update({'_id': d1._id}, { '$addToSet' : { 'pippies' : pippi._id } }) Docs.update({'_id': d2._id}, { '$addToSet' : { 'pippies' : pippi._id } }) Pippies.update({'_id' : pippi._id}, {'$addToSet': { 'docs' : { '$each' : [d for d in [d1._id, d2._id]]}}, '$inc' : { 'docslen' : 2 }}) [Frags.save({'pos': p['pos'], 'txt': p['txt'], 'l': pkt['l'], 'doc': d, 'pippi': pippi._id}) for (d,p) in [(d1._id, p) for p in pkt['d1ps']]+[(d2._id, p) for p in pkt['d2ps']]] return pkt
def main(): print "updateing pippies.relevance" pippies=Pippies.find({},['docs','len']) pippieslen=pippies.count() i=1 for pippi in pippies: if (i*100/pippieslen)!=((i-1)*100/pippieslen): if (i*100/pippieslen) % 10 == 0: sys.stdout.write("%d" % (i*100/pippieslen)) sys.stdout.flush() else: sys.stdout.write('.') sys.stdout.flush() if len(pippi['docs'])>0: Pippies.update({'_id' : pippi['_id']}, { '$set': { 'relevance': float(pippi['len'])/float(len(pippi['docs'])), 'docslen': len(pippi['docs']),}, }) i=i+1 sys.stdout.write('\n') sys.stdout.flush() print "updateing docs.idf" docs=Docs.find({},['termcnt','docid','stemsid','rawid']) docslen=docs.count() i=1 for dd in docs: if (i*100/docslen)!=((i-1)*100/docslen): if (i*100/docslen) % 10 == 0: sys.stdout.write("%d" % (i*100/docslen)) sys.stdout.flush() else: sys.stdout.write('.') sys.stdout.flush() Docs.update({'_id': dd['_id']},{ '$set': { 'tfidf': Doc(d=dd).tfidf } }) i=i+1 sys.stdout.write('\n') sys.stdout.flush()