Пример #1
0
 def sync_corpus(self):
     """Creates  a new corpus on all notes if we already have synced before
     TODO:
         Store other data in the corpus besides basic text content, ie,
         extracted image, attribute note data, etc...
         catch corpus not found file error?
     """
     docs =[]
     corpus_check =  self.mongo.users.find_one({'_id':self.user_id},
             {'corpus':1}).get('corpus')
     # make sure we already created corpus
     if corpus_check and self.need_sync:
         update_guids = self.resync_db()
         corpus = self.load_corpus()
         # only those that need to be updated from the update_guids
         for x in self.mongo.notes.find(
                 {'_id':{'$in':update_guids}},{'tokens_content':1,'str_title':1}):
             # create the updated doc
             d =  Document(x['tokens_content'],name=x['str_title'],top=50)
             # set the id to what we want
             d._id = x['_id']
             docs.append(d)
             # remove old doc because corpus will still have old content
             corpus.remove(d)
         corpus.extend(docs)
         self.save_corpus(corpus,update=True)
     # dont need the sync, do nothing
     elif corpus_check:
         return
     # corpus sync has not been done before
     else: 
         for x in self.mongo.notes.find( # all notes of this user
                     {'_id_user':self.user_id},{'tokens_content':1,'str_title':1}):
                 d =  Document(x['tokens_content'],name=x['str_title'],top=30)
                 d._id = x['_id']
                 docs.append(d)
         corpus = Corpus(docs)
         self.save_corpus(corpus)
         self.mongo.users.update({'_id':self.user_id},{'$set':{'corpus':True}})