class TestEngine(unittest.TestCase): def setUp(self): self.data = test_utils.load_test_dataset() self.engine = Engine() def test_trainer(self): with self.subTest("Sanity - everything just works"): best_algo, best_params, best_score, tasks = self.engine.train( data=self.data, cpu_time_limit=60, max_evals=20) self.assertTrue(best_algo) self.assertTrue(best_score) self.assertTrue(tasks) self.assertCountEqual(list(tasks.keys()), FULL_ALGO_LIST)
sep=';', error_bad_lines=False, encoding="latin-1") df.columns = ['user', 'item', 'rating'] reader = Reader(rating_scale=(0, 10)) data = Dataset.load_from_df(df.sample(n=100000, random_state=134), reader=reader) del (df) benchmark_results = {'Algorithm': [], 'RMSE': [], 'MAE': [], 'Time': []} # Evaluate AutoSurprise start_time = time.time() time_limt = 60 * 60 * 12 # Run for 12 hours engine = Engine(verbose=False) best_model, best_params, best_score, tasks = engine.train( data=data, target_metric='test_rmse', quick_compute=False, cpu_time_limit=time_limt, max_evals=10000, hpo_algo=hyperopt.atpe.suggest) cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time))) cv_results = cross_validate(engine.build_model(best_model, best_params), data, ['rmse', 'mae']) mean_rmse = '{:.4f}'.format(np.mean(cv_results['test_rmse'])) mean_mae = '{:.4f}'.format(np.mean(cv_results['test_mae'])) print("--------- Done ----------")
datetime.timedelta(seconds=int(time.time() - start_time))) mean_rmse = '{:.3f}'.format(np.mean(cv_results['test_rmse'])) mean_mae = '{:.3f}'.format(np.mean(cv_results['test_mae'])) benchmark_results['Algorithm'].append(algo_name) benchmark_results['RMSE'].append(mean_rmse) benchmark_results['MAE'].append(mean_mae) benchmark_results['Best params'].append({}) benchmark_results['Time'].append(cv_time) except Exception as exc: print('Exception : ', exc) # Evaluate AutoSurprise start_time = time.time() engine = Engine(verbose=False) best_model, best_params, best_score, tasks = engine.train( data=data, target_metric='test_rmse', quick_compute=False, cpu_time_limit=3600, max_evals=500) cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time))) print("--------- Done ----------") print("Best model: ", best_model) print("Best params: ", best_params) print("Best score: ", best_score) print("All tasks: ", tasks) benchmark_results['Algorithm'].append('AutoSurprise')
import time import datetime import os import sys from surprise import Dataset from auto_surprise.engine import Engine if __name__ == "__main__": data = Dataset.load_builtin("ml-100k") # Run auto surprise start_time = time.time() engine = Engine(verbose=True) # This is just a demo configuration. You'd ideally want to change the time limit best_algo, best_params, best_score, tasks = engine.train( data=data, target_metric="test_rmse", cpu_time_limit=720, max_evals=100) cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time))) print("--------- Done ----------") print("Time taken: ", cv_time) print("Best algorithm: ", best_algo) print("Best params: ", best_params) print("Best score: ", best_score) print("All tasks: ", tasks)
random.seed(1) numpy.random.seed(1) if __name__ == "__main__": # Load Movielens 100k dataset Dataset file_path = os.path.expanduser("../datasets/ml-100k/u.data") reader = Reader(line_format="user item rating timestamp", sep="\t", rating_scale=(1, 5)) data = Dataset.load_from_file(file_path, reader=reader) # Run auto surprise start_time = time.time() engine = Engine(verbose=True, random_state=numpy.random.RandomState(1), algorithms=["baseline_only", "knn_basic"]) best_model, best_params, best_score, tasks = engine.train( data=data, target_metric="test_rmse", cpu_time_limit=180, max_evals=100, hpo_algo=hyperopt.tpe.suggest, ) cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time))) print("--------- Done ----------") print("Time taken: ", cv_time) print("Best model: ", best_model) print("Best params: ", best_params) print("Best score: ", best_score)
def setUp(self): self.data = test_utils.load_test_dataset() self.engine = Engine()
sys.path.insert(1, './') from auto_surprise.engine import Engine if __name__ == '__main__': # Load Movielens 100k dataset Dataset file_path = os.path.expanduser('../datasets/ml-100k/u.data') reader = Reader(line_format='user item rating timestamp', sep='\t', rating_scale=(1, 5)) data = Dataset.load_from_file(file_path, reader=reader) # Run auto surprise start_time = time.time() engine = Engine(debug=True) best_model, best_params, best_score, tasks = engine.train( data=data, target_metric='test_rmse', cpu_time_limit=180, max_evals=100, hpo_algo=hyperopt.atpe.suggest) cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time))) print("--------- Done ----------") print("Time taken: ", cv_time) print("Best model: ", best_model) print("Best params: ", best_params) print("Best score: ", best_score) print("All tasks: ", tasks)
import time import datetime import os import sys from surprise import Dataset from auto_surprise.engine import Engine if __name__ == '__main__': data = Dataset.load_builtin('ml-100k') # Run auto surprise start_time = time.time() engine = Engine(debug=False) best_model, best_params, best_score, tasks = engine.train( data=data, target_metric='test_rmse', cpu_time_limit=720, max_evals=100) cv_time = str(datetime.timedelta(seconds=int(time.time() - start_time))) print("--------- Done ----------") print("Time taken: ", cv_time) print("Best model: ", best_model) print("Best params: ", best_params) print("Best score: ", best_score) print("All tasks: ", tasks)