def test_model(self, test_data, empty_solution, evaluate = False): model_weka = None if os.path.isfile(self.prediction_file): print 'Model ' + self.name + ' already tested.' elif not os.path.isfile(self.model_file): print 'Impossible testing this model. It should be trained first.' return else: print 'Starting to test_model model ' + self.name + '.' model_weka = Classifier(jobject = serialization.read(self.model_file)) evaluation = Evaluation(data = test_data) evaluation.test_model(classifier = model_weka, data = test_data) predictions = evaluation.predictions() rows = read_sheet(file_name = empty_solution) solutions = [] for row in rows: solution = [row['userid'], row['tweetid'], predictions.pop(0).predicted()] solutions.append(solution) write_the_solution_file(solutions, self.prediction_file) print 'Model ' + self.name + ' tested.' if evaluate == True: if os.path.isfile(self.evaluation_file): print 'Model ' + self.name + ' already evaluated.' return elif model_weka == None: model_weka = Classifier(jobject = serialization.read(self.model_file)) evaluation = Evaluation(data = test_data) evaluation.test_model(classifier = model_weka, data = test_data) save_file(file_name = self.evaluation_file, content = evaluation.to_summary()) print 'Model ' + self.name + ' evaluated.'
def order_solution(): solutions = read_sheet(DATASET_PATH + 'neural_solution.dat') # Sort the solutions on user id (desc), engagement (desc) and tweet id (desc) solutions = sorted(solutions, key=lambda data: (-int(data['userid']), -float(data['engagement']), -int(data['tweetid']))) solution_final = [] for solution in solutions: solution_final.append([solution['userid'], solution['tweetid'], solution['engagement']]) # Write the _solution file write_the_solution_file(solution_final, DATASET_PATH + 'neural_solution2.dat')
def random_solution(): # Read the training file # Read the _empty file (the task) todos = read_todo_from_empty_file(DATASET_PATH + 'test_empty.dat') # For all (user,tweet) pairs, generate their engagement solutions = list() #random.seed(1) for (user,tweet) in todos: # Random guess the engagement between 0-50 engagement = random.randint(0,50) solutions.append((user,tweet,engagement)) # Sort the solutions on user id (desc), engagement (desc) and tweet id (desc) solutions = sorted(solutions, key=lambda data: (-int(data[0]), -int(data[2]), -int(data[1]))) # Write the _solution file write_the_solution_file(solutions, DATASET_PATH + 'random_solution.dat') print 'done.'