def commit_WIKIPEDIA_training_set(self, ): ''' after filling a training set the actual training needs to be done ''' from packages.controller.gensim_sim import gensim_sim w = gensim_sim() service = SessionServer(self.rootlocation + 'gensimTraining'+str(self.training_id), autosession=True) # create a local server factor=20000 for d in range (0, 100): print "currently working on text row " + str( d*factor ) + "up to" + str( (d+1)*factor ) training_data = w.init_sql_connection(d*factor, factor ) corpus = [{'id': id, 'tokens': utils.simple_preprocess(text)} for (id, text) in training_data] service.train(corpus, method='lsi') ## TODO we don't have a corpus yet, but we definatly need one big #self.init_training_set() return 'training done'