def sync_corpus(self): """Creates a new corpus on all notes if we already have synced before TODO: Store other data in the corpus besides basic text content, ie, extracted image, attribute note data, etc... catch corpus not found file error? """ docs =[] corpus_check = self.mongo.users.find_one({'_id':self.user_id}, {'corpus':1}).get('corpus') # make sure we already created corpus if corpus_check and self.need_sync: update_guids = self.resync_db() corpus = self.load_corpus() # only those that need to be updated from the update_guids for x in self.mongo.notes.find( {'_id':{'$in':update_guids}},{'tokens_content':1,'str_title':1}): # create the updated doc d = Document(x['tokens_content'],name=x['str_title'],top=50) # set the id to what we want d._id = x['_id'] docs.append(d) # remove old doc because corpus will still have old content corpus.remove(d) corpus.extend(docs) self.save_corpus(corpus,update=True) # dont need the sync, do nothing elif corpus_check: return # corpus sync has not been done before else: for x in self.mongo.notes.find( # all notes of this user {'_id_user':self.user_id},{'tokens_content':1,'str_title':1}): d = Document(x['tokens_content'],name=x['str_title'],top=30) d._id = x['_id'] docs.append(d) corpus = Corpus(docs) self.save_corpus(corpus) self.mongo.users.update({'_id':self.user_id},{'$set':{'corpus':True}})