示例#1
0
    def commit_indexing_set_old(self):
        ''' after filling an indexing set the actual indexing needs to be done
        '''         
        import sqlite3
        training_id = str(self.training_id)
        conn = sqlite3.connect(self.sqlserver)
        
        c = conn.cursor()

        # fetch the content 
        sql = "SELECT * FROM gensimIndexingSet"+str(training_id)
        print ( sql )
        c.execute( sql )
        
        # just fetch all items
        
        indexing_data = c.fetchmany(500);
        
        #service = similarities.SessionServer(self.rootlocation, autosession=True)
        service = SessionServer(self.rootlocation + 'gensimTraining'+str(self.training_id), autosession=True) # create a local server
        import Pyro4
        service = Pyro4.Proxy(Pyro4.locateNS().lookup('gensim.testserver'))
        print self.rootlocation + 'gensimTraining'+str(self.training_id)
        
        while len(indexing_data) > 0 :       
            corpus = [{'id': str(id), 'tokens': utils.simple_preprocess(text)}
                for (text, id) in indexing_data]
            
            service.index(corpus) ## TODO we don't have a corpus yet, but we definatly need one big      
            indexing_data = c.fetchmany(500)
            service.autosession = True
            time.sleep(0.5)          
        
        # sql = "DROP TABLE IF EXISTS gensimIndexingSet"+str(training_id)
        # c.execute( sql )
        
        self.delete_set()
        self.init_indexing_set()
        
        return 'indexing done'