def example1(): log.set_log_level(log.DEBUG) als_option = ALSOption().get_default_option() als_option.validation = aux.Option({'topk': 10}) data_option = MatrixMarketOptions().get_default_option() data_option.input.main = '../tests/ext/ml-100k/main' data_option.input.iid = '../tests/ext/ml-100k/iid' als = ALS(als_option, data_opt=data_option) als.initialize() als.train() print('MovieLens 100k metrics for validations\n%s' % json.dumps(als.get_validation_results(), indent=2)) print('Similar movies to Star_Wars_(1977)') for rank, (movie_name, score) in enumerate(als.most_similar('49.Star_Wars_(1977)')): print(f'{rank + 1:02d}. {score:.3f} {movie_name}') print('Run hyper parameter optimization for val_ndcg...') als.opt.num_workers = 4 als.opt.evaluation_period = 10 als.opt.optimize = aux.Option({ 'loss': 'val_ndcg', 'max_trials': 100, 'deployment': True, 'start_with_default_parameters': True, 'space': { 'd': ['randint', ['d', 10, 128]], 'reg_u': ['uniform', ['reg_u', 0.1, 1.0]], 'reg_i': ['uniform', ['reg_i', 0.1, 1.0]], 'alpha': ['randint', ['alpha', 1, 10]], } }) log.set_log_level(log.INFO) als.opt.model_path = './example1.ml100k.als.optimize.bin' print(json.dumps({'alpha': als.opt.alpha, 'd': als.opt.d, 'reg_u': als.opt.reg_u, 'reg_i': als.opt.reg_i}, indent=2)) als.optimize() als.load('./example1.ml100k.als.optimize.bin') print('Similar movies to Star_Wars_(1977)') for rank, (movie_name, score) in enumerate(als.most_similar('49.Star_Wars_(1977)')): print(f'{rank + 1:02d}. {score:.3f} {movie_name}') optimization_res = als.get_optimization_data() best_parameters = optimization_res['best_parameters'] print(json.dumps(optimization_res['best'], indent=2)) print(json.dumps({'alpha': int(best_parameters['alpha']), 'd': int(best_parameters['d']), 'reg_u': best_parameters['reg_u'], 'reg_i': best_parameters['reg_i']}, indent=2))
def test4_optimize(self): set_log_level(2) opt = ALSOption().get_default_option() opt.d = 5 opt.num_workers = 2 opt.model_path = 'als.bin' opt.validation = aux.Option({'topk': 10}) optimize_option = aux.Option({ 'loss': 'val_rmse', 'max_trials': 10, 'deployment': True, 'start_with_default_parameters': True, 'space': { 'd': ['randint', ['d', 10, 20]], 'reg_u': ['uniform', ['reg_u', 0.1, 0.3]], 'reg_i': ['uniform', ['reg_i', 0.1, 0.3]], 'alpha': ['randint', ['alpha', 8, 10]] } }) opt.optimize = optimize_option opt.evaluation_period = 1 opt.tensorboard = aux.Option({'root': './tb', 'name': 'als'}) data_opt = MatrixMarketOptions().get_default_option() data_opt.input.main = self.ml_100k + 'main' data_opt.input.uid = self.ml_100k + 'uid' data_opt.input.iid = self.ml_100k + 'iid' data_opt.data.value_prepro = aux.Option({'name': 'OneBased'}) als = ALS(opt, data_opt=data_opt) als.init_factors() als.train() default_result = als.get_validation_results() als.optimize() base_loss = default_result['rmse'] # val_rmse optimize_loss = als.get_optimization_data()['best']['val_rmse'] self.assertTrue(base_loss > optimize_loss) als.load('als.bin') loss = als.get_validation_results() self.assertAlmostEqual(loss['rmse'], optimize_loss) os.remove('als.bin')
users = pd.read_json('./model/users.json', typ='frame') books = pd.read_json('./model/books.json', typ='frame') #books = pd.read_excel('./model/books.xlsx') books['ISBN'] = books['ISBN'].astype(str) books['권'] = books['권'].fillna('') userbook_map = hp.get_userbook_map(users) user_items, uid_to_idx, idx_to_uid, mid_to_idx, idx_to_mid = hp.df_to_matrix( userbook_map, 'user_id', 'book_id') iid = list(idx_to_mid.values()) uid = list(idx_to_uid.values()) #model = ALS(opt, data_opt=data_opt) model = ALS() model.load('./model/als.optimize.bin') headers = { 'X-Naver-Client-Id': '', 'X-Naver-Client-Secret': '', } def get_bookimage(params): response = requests.get('https://openapi.naver.com/v1/search/book_adv.xml', headers=headers, params=params) root = ElementTree.fromstring(response.text) img_url = '' if root.tag == 'rss':