示例#1
0
    def commit_WIKIPEDIA_training_set(self, ):
            ''' after filling a training set the actual training needs to be done
            '''         
            from packages.controller.gensim_sim import gensim_sim
            w = gensim_sim()

            service = SessionServer(self.rootlocation + 'gensimTraining'+str(self.training_id), autosession=True) # create a local server
            
            factor=20000
            for d in range (0, 100):
                
                print "currently working on text row " + str( d*factor ) + "up to" + str( (d+1)*factor )
                
                training_data = w.init_sql_connection(d*factor, factor )
                corpus = [{'id': id, 'tokens': utils.simple_preprocess(text)}
                    for (id, text) in training_data]
            
                 
                service.train(corpus, method='lsi') ## TODO we don't have a corpus yet, but we definatly need one big      
                
                        
                    
            #self.init_training_set()    
            return 'training done'