Пример #1
0
 def test_best_solution(self, dataset_key = 'tweets', regression = 'linear_regression1', 
                        classification = 'naive_bayes1'):
     
     dataset = self.datasets[dataset_key]
     
     models_manager = ModelManager()
     regression_model       = models_manager.get_models(dataset = dataset, model_key = regression)[0]
     classification_model   = models_manager.get_models(dataset = dataset, model_key = classification, 
                                                             model_type = 'classifier')[0]
     
     
     final_dataset = self.datasets['final']
     
     regression_model.prediction_file        = PREDICTION_PATH + regression_model.name + '_' + final_dataset.dataset_key + '_prediction.dat'
     classification_model.prediction_file    = PREDICTION_PATH + classification_model.name + '_' + final_dataset.dataset_key + '_prediction.dat'
     
     regression_model.test_model(test_data = final_dataset.test_data_regression, 
                                 empty_solution = final_dataset.empty_solution)
     classification_model.test_model(test_data = final_dataset.test_data_classification, 
                                     empty_solution = final_dataset.empty_solution)
         
     regression_solution     = read_sheet(file_name = regression_model.prediction_file)
     classification_solution = read_sheet(file_name = classification_model.prediction_file)
     
     solution_obj = Solution()
     solution    = map(lambda x, y: solution_obj.combine_classification_regression(x, y), regression_solution, 
                               classification_solution)
             
     discretize_solution(prediction_in = solution, file_out = DATASET_PATH + 'final_solution.dat')
Пример #2
0
    def test_model(self, test_data, empty_solution, evaluate = False):
        model_weka = None
        if os.path.isfile(self.prediction_file):
            print 'Model ' + self.name + ' already tested.'
        elif not os.path.isfile(self.model_file):
            print 'Impossible testing this model. It should be trained first.'
            return
        else: 
            print 'Starting to test_model model ' + self.name + '.'
            model_weka = Classifier(jobject = serialization.read(self.model_file)) 
            evaluation = Evaluation(data = test_data)
            evaluation.test_model(classifier = model_weka, data = test_data)
            
            predictions = evaluation.predictions()
            rows        = read_sheet(file_name = empty_solution)
            solutions   = []

            for row in rows:
                solution = [row['userid'], row['tweetid'], predictions.pop(0).predicted()]
                solutions.append(solution)
            write_the_solution_file(solutions, self.prediction_file)
            print 'Model ' + self.name + ' tested.'
        
        if evaluate == True:
            if os.path.isfile(self.evaluation_file):
                print 'Model ' + self.name + ' already evaluated.'
                return
            elif model_weka == None:
                model_weka = Classifier(jobject = serialization.read(self.model_file)) 
                evaluation = Evaluation(data = test_data)
                evaluation.test_model(classifier = model_weka, data = test_data)
            save_file(file_name = self.evaluation_file, content = evaluation.to_summary())
            print 'Model ' + self.name + ' evaluated.'
Пример #3
0
    def create_solution(self, dataset, force, solutions_file = None):
        models_manager = ModelManager()
        if os.path.isfile(self.solution_file) and force == False:
            print 'Solution ' + self.name + ' ' + self.classification + ' ' + self.regression + ' already created.'
            return
        elif self.classification == 'None':
            models  = models_manager.get_models(dataset = dataset, model_key = self.regression)
            if len(models) == 1:
                discretize_solution(file_in = models[0].prediction_file, file_out = self.solution_file)
            else:
                if self.regression == 'ranking':
                    
                    print solutions_file
                    solutions_models    = [read_sheet(file_name = solution_file) for solution_file in solutions_file]

                    regressions         = map(lambda x: self._order(x), solutions_models)
            
                else:
                    regressions = [read_sheet(file_name = model.prediction_file) for model in models]
                
                regression  = map(lambda *args: self._combine_regressions(*args), *regressions)
                discretize_solution(prediction_in = regression, file_out = self.solution_file)
        else:
            regression_models       = models_manager.get_models(dataset = dataset, model_key = self.regression)
            classification_models   = models_manager.get_models(dataset = dataset, model_key = self.classification, 
                                                                model_type = 'classifier')

            regression_solution     = read_sheet(file_name = regression_models[0].prediction_file)
            
            if self.classification == 'voting':
                classification_solutions = [read_sheet(file_name = classification.prediction_file) 
                                            for classification in classification_models]
                
                #print classification_solutions
                solution = map(lambda r, *c: self._combine_classifications_regression(r, *c), regression_solution, 
                                  *classification_solutions)
            else:
                classification_solution = read_sheet(file_name = classification_models[0].prediction_file)
                solution    = map(lambda x, y: self.combine_classification_regression(x, y), regression_solution, 
                                  classification_solution)
                
                
            
            discretize_solution(prediction_in = solution, file_out = self.solution_file)
        print 'Solution ' + self.name + ' ' + self.classification + ' ' + self.regression + ' created.'
Пример #4
0
    def test_solution(self):
        rows        = read_sheet(file_name = DATASET_PATH + 'empty_real_solution.dat')
        solutions = []

        for row in rows:
            solution = {'userid': row['userid'], 'tweetid': row['tweetid'], 'engagement': 0.0}
            solutions.append(solution)
        discretize_solution(prediction_in = solutions, file_out = DATASET_PATH + 'teste_zeros.dat')
        #write_the_solution_file(solutions, self.prediction_file)
        
Пример #5
0
def order_solution():
    solutions = read_sheet(DATASET_PATH + 'neural_solution.dat')
    
    # Sort the solutions on user id (desc), engagement (desc) and tweet id (desc)
    solutions = sorted(solutions, key=lambda data: (-int(data['userid']), -float(data['engagement']), -int(data['tweetid'])))
    
    solution_final = []
    
    for solution in solutions:
        solution_final.append([solution['userid'], solution['tweetid'], solution['engagement']])
    # Write the _solution file
    write_the_solution_file(solution_final, DATASET_PATH + 'neural_solution2.dat')