def main(project_name): logger = Logger('_02_valid_model_{}'.format(project_name)) logger.info('=' * 50) model_path = '_model/embedding_model_{}.pt'.format(project_name) logger.info('load model from {}'.format(model_path)) model = torch.load(model_path) evaluator = Evaluator() evaluator.evaluate(model)
def __init__(self, recommender, hyperparameters, verbose=True, report_name='grid_search_results'): """ Train number of recommenders using UV decomposition using different parameters. :param AbstractRecommender recommender: :param dict hyperparameters: A dictionary of the hyperparameters. :param boolean verbose: A flag to decide printing progress. :param str report_name: The name of the csv file in which the analysis of the grid search will be dumped. """ self.recommender = recommender self.hyperparameters = hyperparameters self._verbose = verbose self.evaluator = Evaluator(recommender.get_ratings()) self.all_errors = dict() self.results_file_name = report_name + '.csv'
def setUp(self): """ Setup method that is called at the beginning of each test. """ self.documents, self.users = 18, 10 documents_cnt, users_cnt = self.documents, self.users self.n_iterations = 15 self.k_folds = 3 self.hyperparameters = {'n_factors': 5, '_lambda': 0.01} self.options = {'n_iterations': self.n_iterations, 'k_folds': self.k_folds} self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iterations) self.n_recommendations = 1 def mock_get_ratings_matrix(self=None): return [[int(not bool((article + user) % 3)) for article in range(documents_cnt)] for user in range(users_cnt)] self.ratings_matrix = numpy.array(mock_get_ratings_matrix()) setattr(DataParser, "get_ratings_matrix", mock_get_ratings_matrix) self.evaluator = Evaluator(self.ratings_matrix) self.cf = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, load_matrices=True) self.cf.train() self.cf.evaluator.k_folds = self.k_folds self.test_data = self.cf.test_data self.predictions = self.cf.get_predictions() self.rounded_predictions = self.cf.rounded_predictions()
def runTest(self): evaluator = Evaluator(self.ratings_matrix) cf = CollaborativeFiltering(self.initializer, evaluator, self.initial_config, self.options, load_matrices=True) grid_search = GridSearch(cf, self.hyperparameters, False) self.checkKeyGenerator(grid_search) self.checkCombinationsGenerator(grid_search) self.checkGridSearch(grid_search)
def main(args): logger = Logger(args.output_dir) args.logger = logger trainer = Trainer(args) evaluator = Evaluator(trainer) for i_epoch in range(0, args.epoch + 1): # train log_dict = { 'i_epoch': i_epoch, 'train_losses': [], # per batch 'test_bleus': [] } # per sample trainer.train_one_epoch(log_dict) # evaluation and logging logger.log('%d th epoch' % i_epoch) evaluator.bleu(log_dict) evaluator.sample_translation() log_dict_mean = { 'i_epoch': log_dict['i_epoch'], 'train_loss': np.mean(log_dict['train_losses']), 'test_bleu': np.mean(log_dict['test_bleus']) } logger.dump(log_dict_mean) trainer.save_best(log_dict_mean) logger.log('-' * 10)
def runTest(self): m1, m2 = numpy.random.random((4, 8)), numpy.random.random((4, 8)) self.assertTrue(abs(self.cf.evaluator.get_rmse(m1, m2) - numpy.sqrt(mean_squared_error(m1, m2))) < 1e-6) train, test = self.cf.evaluator.naive_split() self.assertEqual(numpy.count_nonzero(train) + numpy.count_nonzero(test), numpy.count_nonzero(self.ratings_matrix)) test_indices = self.cf.evaluator.get_kfold_indices() # k = 3 first_fold_indices = test_indices[0::self.k_folds] second_fold_indices = test_indices[1::self.k_folds] third_fold_indices = test_indices[2::self.k_folds] train1, test1 = self.cf.evaluator.generate_kfold_matrix(first_fold_indices) train2, test2 = self.cf.evaluator.generate_kfold_matrix(second_fold_indices) train3, test3 = self.cf.evaluator.generate_kfold_matrix(third_fold_indices) total_ratings = numpy.count_nonzero(self.ratings_matrix) # ensure that each fold has 1/k of the total ratings k_inverse = 1 / self.k_folds self.assertTrue(abs(k_inverse - ((numpy.count_nonzero(test1)) / total_ratings)) < 1e-6) self.assertTrue(abs(k_inverse - ((numpy.count_nonzero(test1)) / total_ratings)) < 1e-6) self.assertTrue(abs(k_inverse - ((numpy.count_nonzero(test1)) / total_ratings)) < 1e-6) # assert that the folds don't intertwine self.assertTrue(numpy.all((train1 * test1) == 0)) self.assertTrue(numpy.all((train2 * test2) == 0)) self.assertTrue(numpy.all((train3 * test3) == 0)) # assert that test sets dont contain the same elements self.assertTrue(numpy.all((test1 * test2) == 0)) self.assertTrue(numpy.all((test2 * test3) == 0)) self.assertTrue(numpy.all((test1 * test3) == 0)) evaluator = Evaluator(self.ratings_matrix) self.assertEqual(self.predictions.shape, self.ratings_matrix.shape) recall = evaluator.calculate_recall(self.ratings_matrix, self.predictions) # if predictions are perfect if recall == 1: for row in range(self.users): for col in range(self.documents): self.assertEqual(self.rounded_predictions[row, col], self.ratings_matrix[row, col]) self.setUp() evaluator.ratings = self.ratings_matrix.copy() # restore the unmodified rating matrix self.setUp() evaluator.ratings = self.ratings_matrix.copy() # mrr will always decrease as we set the highest prediction's index # to 0 in the rating matrix. top_n recommendations set to 0. mrr = [] for i in range(self.users): evaluator.ratings[i, (numpy.argmax(self.predictions[i], axis=0))] = 0 mrr.append(evaluator.calculate_mrr(self.n_recommendations, self.predictions, self.rounded_predictions, evaluator.ratings)) if i > 1: self.assertLessEqual(mrr[i], mrr[i-1])
def setUp(self): """ Setup method that is called at the beginning of each test. """ self.documents, self.users = 8, 10 documents_cnt, users_cnt = self.documents, self.users self.n_iterations = 5 self.n_factors = 5 self.k_folds = 5 self.hyperparameters = {'n_factors': self.n_factors} self.options = {'n_iterations': self.n_iterations, 'k_folds': self.k_folds} self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iterations) def mock_process(self=None): pass def mock_get_abstracts(self=None): return {0: 'hell world berlin dna evolution', 1: 'freiburg is green', 2: 'the best dna is the dna of dinasours', 3: 'truth is absolute', 4: 'berlin is not that green', 5: 'truth manifests itself', 6: 'plato said truth is beautiful', 7: 'freiburg has dna'} def mock_get_ratings_matrix(self=None): return [[int(not bool((article + user) % 3)) for article in range(documents_cnt)] for user in range(users_cnt)] def mock_get_word_distribution(self=None): abstracts = mock_get_abstracts() vocab = set(itertools.chain(*list(map(lambda ab: ab.split(' '), abstracts.values())))) w2i = dict(zip(vocab, range(len(vocab)))) word_to_count = [(w2i[word], sum(abstract.split(' ').count(word) for doc_id, abstract in abstracts.items())) for word in vocab] article_to_word = list(set([(doc_id, w2i[word]) for doc_id, abstract in abstracts.items() for word in abstract.split(' ')])) article_to_word_to_count = list(set([(doc_id, w2i[word], abstract.count(word)) for doc_id, abstract in abstracts.items() for word in abstract.split(' ')])) return word_to_count, article_to_word, article_to_word_to_count abstracts = mock_get_abstracts() word_to_count, article_to_word, article_to_word_to_count = mock_get_word_distribution() self.abstracts_preprocessor = AbstractsPreprocessor(abstracts, word_to_count, article_to_word, article_to_word_to_count) self.ratings_matrix = numpy.array(mock_get_ratings_matrix()) self.evaluator = Evaluator(self.ratings_matrix, self.abstracts_preprocessor) setattr(DataParser, "get_abstracts", mock_get_abstracts) setattr(DataParser, "process", mock_process) setattr(DataParser, "get_ratings_matrix", mock_get_ratings_matrix) setattr(DataParser, "get_word_distribution", mock_get_word_distribution)
def setUp(self): """ Setting up the ratings, expected ratings and recommendations. The comments are showing where are the matching recommendations. A matching recommendation will occur at the recommendation_indcies list, and the corresponding ratings and expected rating are both positive. """ # 0 1 2 3 4 5 6 7 8 self.ratings = numpy.array([ [1, 1, 0, 0, 1, 0, 1, 0, 0], # ^ [0, 0, 1, 1, 0, 0, 0, 1, 0], # ^ [1, 1, 0, 1, 0, 0, 1, 0, 1], # ^ ^ [1, 0, 0, 0, 1, 0, 0, 0, 0], # ^ [0, 0, 0, 0, 0, 0, 0, 0, 1] ]) # # 0 1 2 3 4 5 6 7 8 self.expected_ratings = numpy.array([ [0, 1, 0, 0, 0, 0, 0, 0, 0], # ^ [0, 0, 1, 0, 0, 0, 0, 0, 0], # ^ [0, 0, 0, 1, 0, 0, 0, 0, 1], # ^ ^ [1, 0, 0, 0, 0, 0, 0, 0, 0], # ^ [0, 1, 0, 0, 0, 0, 0, 0, 0] ]) # self.recommendation_indices = numpy.array([ [1], # 1 matches -> 1/1 [3, 2], # 3 doesn't match, 2 matches -> 1/2 [4, 6, 3, 0, 8], # 4,6,0 don't match, 3, 8 match -> 1/3, 1/5 [0], # 0 matches -> 1/1 [0] ]) # no matches -> 0 self.n_users, self.n_items = self.ratings.shape self.evaluator = Evaluator(self.ratings) self.evaluator.recs_loaded = True self.evaluator.recommendation_indices = self.recommendation_indices
def setUp(self): """ Setup method that is called at the beginning of each test. """ self.documents, self.users = 30, 4 documents_cnt, users_cnt = self.documents, self.users self.n_factors = 5 self.n_iterations = 20 self.k_folds = 3 self.hyperparameters = {'n_factors': self.n_factors, '_lambda': 0.01} self.options = {'k_folds': self.k_folds, 'n_iterations': self.n_iterations} self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iterations) def mock_get_ratings_matrix(self=None): return [[int(not bool((article + user) % 3)) for article in range(documents_cnt)] for user in range(users_cnt)] self.ratings_matrix = numpy.array(mock_get_ratings_matrix()) self.evaluator = Evaluator(self.ratings_matrix) setattr(DataParser, "get_ratings_matrix", mock_get_ratings_matrix)
def train(model, project_name): sampler = SubSampler() list_train_imgs = sampler.get_train_imgs() dataset = LandmarkDataset('../../input_large_delf/train', list_train_imgs) evaluator = Evaluator() dir_model = '_model' os.makedirs(dir_model, exist_ok=True) # for training batch_size = 240 group_size = 12 iter_outside = 10 iter_inside = 600 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3) scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=iter_outside * iter_inside) for param_group in optimizer.param_groups: logger.info('start lerning rate with: {:.6f}'.format(param_group['lr'])) for ep in range(1, iter_outside + 1): logger.info('-' * 30) logger.info('epoch: {:d}'.format(ep)) for param_group in optimizer.param_groups: logger.info('current lerning rate with: {:.8f}'.format(param_group['lr'])) model.train() if ep > 1: set_batch_norm_eval(model) train_loss1 = 0 train_loss3 = 0 count_sample = 0 ave_good_index = 0 pt_sampler = PyTorchSampler(sampler, batch_size, group_size, iter_inside) dataloader = DataLoader(dataset, batch_sampler=pt_sampler, num_workers=8) for batch in tqdm(dataloader): batch_cuda = batch.cuda() # forward with requires_grad=False with torch.no_grad(): v_batch_no_bp = batch_cuda optimizer.zero_grad() out = model.forward(v_batch_no_bp) batch_indeses, num_good_index = get_apn_index(out, batch_size, group_size) # forward with requires_grad=True v_batch = batch_cuda[batch_indeses, ...] optimizer.zero_grad() out = model.forward(v_batch) out_anchor = out[:batch_size] hard_positive = out[batch_size:batch_size*2] hard_negative = out[batch_size*2:batch_size*3] # calc loss loss1 = smooth_pairwise_loss(out_anchor, hard_positive) * 0.1 loss3 = hard_negative_triplet_loss(out_anchor, hard_positive, hard_negative) loss = loss3 loss.backward() optimizer.step() scheduler.step() train_loss1 += float(loss1.data.cpu().numpy()) * batch_size train_loss3 += float(loss3.data.cpu().numpy()) * batch_size ave_good_index += num_good_index * batch_size count_sample += batch_size logger.info('train loss (pair-pos): {:.6f}'.format(train_loss1 / count_sample)) logger.info('train loss (triplet) : {:.6f}'.format(train_loss3 / count_sample)) logger.info('average number of far negative: {:.2f} / {:d}'.format(ave_good_index / count_sample, batch_size)) evaluator.evaluate(model) if ep % 4 == 0 and ep != iter_outside: model_name = 'embedding_model_{}_ep{}.pt'.format(project_name, ep) logger.info('save model: {}'.format(model_name)) torch.save(model, os.path.join(dir_model, model_name)) model_name = 'embedding_model_{}.pt'.format(project_name) logger.info('save model: {}'.format(model_name)) torch.save(model, os.path.join(dir_model, model_name))
def run_training(H): # torch.cuda.is_available = lambda : False # torch.backends.cudnn.enabled=False torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = True create_logger(H) random.seed(H.SEED) np.random.seed(H.SEED) torch.manual_seed(H.SEED) if torch.cuda.is_available(): torch.cuda.manual_seed(H.SEED) torch.cuda.manual_seed_all(H.SEED) logger.info("Training start.") logger.info(repr(H)) train_loader, valid_loader, vocab = create_data_pipelines(H) logger.info(train_loader.dataset) logger.info(valid_loader.dataset) m = Metric([('train_loss', np.inf), ('train_score', np.inf), ('valid_loss', np.inf), ('valid_score', 0), ('train_lr', 0), ('valid_cer', np.inf)]) model = SpeechCNN(len(vocab), input_size=256, hidden_size=H.CNN_HIDDEN_SIZE, dropout=H.CNN_DROPOUT, initialize=torch_weight_init) if H.USE_CUDA: model.cuda() if H.PRELOAD_MODEL_PATH: path = os.path.join(H.EXPERIMENT, H.PRELOAD_MODEL_PATH) state = torch.load(path) model.load_state_dict(state) print("Preloaded model: {}".format(path)) criterion = PytorchCTCLoss(vocab) optimizer = optim.SGD(list( filter(lambda p: p.requires_grad, model.parameters())), lr=H.LR, weight_decay=H.WEIGHT_DECAY, momentum=H.MOMENTUM, nesterov=H.NESTEROV) stopping = Stopping(model, patience=H.STOPPING_PATIENCE) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[H.LR_LAMBDA]) ctc_decoder = CTCGreedyDecoder(vocab) scorer = Scorer(reduction='sum') tlogger = TensorboardLogger(root_dir=H.EXPERIMENT, experiment_dir=H.TIMESTAMP) # PytorchLogger() checkpoint = Checkpoint(model, optimizer, stopping, m, root_dir=H.EXPERIMENT, experiment_dir=H.TIMESTAMP, restore_from=-1, interval=H.CHECKPOINT_INTERVAL, verbose=0) trainer = Trainer(model, train_loader, optimizer, scheduler, criterion, ctc_decoder, scorer, H.MAX_GRAD_NORM) evaluator = Evaluator(model, valid_loader, criterion, ctc_decoder, scorer) epoch_start = 1 if H.CHECKPOINT_RESTORE: epoch_start = checkpoint.restore() + 1 train_loader.batch_sampler.shuffle(epoch_start) epoch = epoch_start try: epoch_itr = tlogger.set_itr(range(epoch_start, H.MAX_EPOCHS + 1)) for epoch in epoch_itr: with DelayedKeyboardInterrupt(): m.train_loss, m.train_score, m.train_lr = trainer(epoch) m.valid_loss, m.valid_score = evaluator() if checkpoint: checkpoint.step(epoch) stopping_flag = stopping.step(epoch, m.valid_loss, m.valid_score) epoch_itr.log_values(m.train_loss, m.train_score, m.train_lr, m.valid_loss, m.valid_score, stopping.best_score_epoch, stopping.best_score) if stopping_flag: logger.info( "Early stopping at epoch: %d, score %f" % (stopping.best_score_epoch, stopping.best_score)) break train_loader.batch_sampler.shuffle(epoch) except KeyboardInterrupt: logger.info("Training interrupted at: {}".format(epoch)) pass checkpoint.create(epoch) model.load_state_dict(stopping.best_score_state) torch.save(model.state_dict(), os.path.join(H.EXPERIMENT, H.MODEL_NAME + '.tar')) logger.info(repr(tlogger)) logger.info(repr(stopping)) logger.info(repr(checkpoint)) logger.info("Training end.")
def main(): args = parse_args() update_config(args.cfg_file) if args.gpus: config.GPUS = args.gpus else: config.CUDA = False if args.workers: config.WORKERS = args.workers print('Using config:') pprint.pprint(config) torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED if config.CUDA: os.environ["CUDA_VISIBLE_DEVICES"] = config.GPUS device = torch.device('cuda' if config.CUDA else 'cpu') # Redirect print to both console and log file sys.stdout = Logger(osp.join(config.OUTPUT_DIR, 'log-eval.txt')) # Create data loaders dataset = DataSet(config.DATASET.ROOT, config.DATASET.DATASET) normalizer = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transformer = T.Compose([ T.Resize(config.MODEL.IMAGE_SIZE, interpolation=3), T.ToTensor(), normalizer, ]) query_loader = DataLoader( Preprocessor(dataset.query, root=osp.join(dataset.images_dir, dataset.query_path), transform=transformer), batch_size=config.TEST.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True) gallery_loader = DataLoader( Preprocessor(dataset.gallery, root=osp.join(dataset.images_dir, dataset.gallery_path), transform=transformer), batch_size=config.TEST.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True) # Create model model = models.create(config.MODEL.NAME) # Load from checkpoint checkpoint = load_checkpoint(config.TEST.MODEL_FILE) print('best model at epoch: {}'.format(checkpoint['epoch'])) model.load_state_dict(checkpoint['state_dict'], strict=False) # Set model model = nn.DataParallel(model).to(device) print('Test with best model:') evaluator = Evaluator(model) evaluator.evaluate(query_loader, gallery_loader, dataset.query, dataset.gallery, config.TEST.OUTPUT_FEATURES)
def main(): args = parse_args() update_config(args.cfg_file) if args.gpus: config.GPUS = args.gpus else: config.CUDA = False if args.workers: config.WORKERS = args.workers print('Using config:') pprint.pprint(config) if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if config.CUDA: torch.cuda.manual_seed_all(args.manualSeed) torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = config.CUDNN.ENABLED if config.CUDA: os.environ["CUDA_VISIBLE_DEVICES"] = config.GPUS device = torch.device('cuda' if config.CUDA else 'cpu') # Redirect print to both console and log file sys.stdout = Logger(osp.join(config.OUTPUT_DIR, 'log.txt')) # Create data loaders dataset = DataSet(config.DATASET.ROOT, config.DATASET.DATASET) normalizer = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transformer = T.Compose([ T.RandomSizedRectCrop(*config.MODEL.IMAGE_SIZE), T.RandomHorizontalFlip(), T.RandomRotation(10), T.ColorJitter(0.2, 0.2, 0.2), T.ToTensor(), normalizer, T.RandomErasing(EPSILON=config.DATASET.RE), ]) test_transformer = T.Compose([ T.Resize(config.MODEL.IMAGE_SIZE, interpolation=3), T.ToTensor(), normalizer, ]) train_loader = DataLoader(UnsupervisedCamStylePreprocessor( dataset.train, root=osp.join(dataset.images_dir, dataset.train_path), camstyle_root=osp.join(dataset.images_dir, dataset.train_camstyle_path), num_cam=dataset.num_cam, use_gan=True, transform=train_transformer), batch_size=config.TRAIN.BATCH_SIZE, num_workers=config.WORKERS, shuffle=config.TRAIN.SHUFFLE, pin_memory=True, drop_last=False) query_loader = DataLoader(Preprocessor(dataset.query, root=osp.join( dataset.images_dir, dataset.query_path), transform=test_transformer), batch_size=config.TEST.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True) gallery_loader = DataLoader(Preprocessor(dataset.gallery, root=osp.join( dataset.images_dir, dataset.gallery_path), transform=test_transformer), batch_size=config.TEST.BATCH_SIZE, num_workers=config.WORKERS, shuffle=False, pin_memory=True) # Create model model = models.create(config.MODEL.NAME, pretrained=config.MODEL.PRETRAINED, num_classes=dataset.num_train_ids) # Memory Network num_tgt = len(dataset.train) memory = models.create('memory', config.MODEL.FEATURES, num_tgt) # Load from checkpoint if config.TRAIN.RESUME: checkpoint = load_checkpoint(config.TRAIN.CHECKPOINT) model.load_state_dict(checkpoint['state_dict'], strict=False) memory.load_state_dict(checkpoint['state_dict_memory'], strict=False) print("=> Start epoch {} ".format(checkpoint['epoch'])) # Set model model = nn.DataParallel(model).to(device) memory = memory.to(device) # Optimizer base_param_ids = set(map(id, model.module.base.parameters())) base_params_need_for_grad = filter(lambda p: p.requires_grad, model.module.base.parameters()) new_params = [p for p in model.parameters() if id(p) not in base_param_ids] param_groups = [{ 'params': base_params_need_for_grad, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = get_optimizer(config, param_groups) # Trainer trainer = Trainer(config, model, memory) def adjust_lr(epoch): step_size = config.TRAIN.LR_STEP lr = config.TRAIN.LR * (config.TRAIN.LR_FACTOR**(epoch // step_size)) for g in optimizer.param_groups: g['lr'] = lr * g.get('lr_mult', 1) best_r1 = 0.0 # Start training for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH): # lr_scheduler.step() adjust_lr(epoch) trainer.train(epoch, train_loader, optimizer) print('Test with latest model:') evaluator = Evaluator(model) r1 = evaluator.evaluate(query_loader, gallery_loader, dataset.query, dataset.gallery, config.TEST.OUTPUT_FEATURES) if r1 > best_r1: best_r1 = r1 save_checkpoint( { 'state_dict': model.module.state_dict(), 'state_dict_memory': memory.state_dict(), 'epoch': epoch + 1, }, fpath=osp.join(config.OUTPUT_DIR, 'checkpoint.pth.tar')) print('\n * Finished epoch {:3d} \n'.format(epoch)) # Final test print('Test with best model:') evaluator = Evaluator(model) checkpoint = load_checkpoint( osp.join(config.OUTPUT_DIR, 'checkpoint.pth.tar')) print('best model at epoch: {}'.format(checkpoint['epoch'])) model.module.load_state_dict(checkpoint['state_dict']) evaluator.evaluate(query_loader, gallery_loader, dataset.query, dataset.gallery, config.TEST.OUTPUT_FEATURES)
def runTest(self): train, test = self.cf.evaluator.naive_split() self.assertEqual( numpy.count_nonzero(train) + numpy.count_nonzero(test), numpy.count_nonzero(self.ratings_matrix)) train_indices, test_indices = self.cf.evaluator.get_kfold_indices() # k = 3 first_fold_indices = train_indices[0::self.k_folds], test_indices[ 0::self.k_folds] second_fold_indices = train_indices[1::self.k_folds], test_indices[ 1::self.k_folds] third_fold_indices = train_indices[2::self.k_folds], test_indices[ 2::self.k_folds] train1, test1 = self.cf.evaluator.generate_kfold_matrix( first_fold_indices[0], first_fold_indices[1]) train2, test2 = self.cf.evaluator.generate_kfold_matrix( second_fold_indices[0], second_fold_indices[1]) train3, test3 = self.cf.evaluator.generate_kfold_matrix( third_fold_indices[0], third_fold_indices[1]) total_ratings = numpy.count_nonzero(self.ratings_matrix) # ensure that each fold has 1/k of the total ratings k_inverse = (1 / self.k_folds) self.assertEqual(k_inverse, numpy.count_nonzero(test1) / total_ratings) self.assertEqual(k_inverse, numpy.count_nonzero(test2) / total_ratings) self.assertEqual(k_inverse, numpy.count_nonzero(test2) / total_ratings) # assert that the folds don't intertwine self.assertTrue(numpy.all((train1 * test1) == 0)) self.assertTrue(numpy.all((train2 * test2) == 0)) self.assertTrue(numpy.all((train3 * test3) == 0)) # assert that test sets dont contain the same elements self.assertTrue(numpy.all((test1 * test2) == 0)) self.assertTrue(numpy.all((test2 * test3) == 0)) self.assertTrue(numpy.all((test1 * test3) == 0)) evaluator = Evaluator(self.ratings_matrix) self.assertEqual(self.predictions.shape, self.ratings_matrix.shape) recall = evaluator.calculate_recall(self.ratings_matrix, self.predictions) # if predictions are perfect if recall == 1: for row in range(self.users): for col in range(self.documents): self.assertEqual(self.rounded_predictions[row, col], self.ratings_matrix[row, col]) # If we modify all the top predictions for half the users, # recall should be 0.5 by definition for i in range(0, self.users, 2): evaluator.ratings[i, self.predictions[i].nonzero()[0]] = 0 recall_at_x = evaluator.recall_at_x(self.n_recommendations, self.predictions, self.ratings_matrix, self.rounded_predictions) self.assertEqual(0.5, recall_at_x) self.setUp() evaluator.ratings[:] = self.ratings_matrix # removing all top hits, should yield ndcg of 0 as number of recs is 1. for i in range(0, self.users): evaluator.ratings[i, self.predictions[i].nonzero()[0]] = 0 ndcg = evaluator.calculate_ndcg(self.n_recommendations, self.predictions, self.ratings_matrix, self.test_data) self.assertEqual(0.0, ndcg) # restore the unmodified rating matrix self.setUp() evaluator.ratings[:] = self.ratings_matrix # mrr will always decrease as we set the highest prediction's index # to 0 in the rating matrix. top_n recommendations set to 0. mrr = [] for i in range(self.users): mrr.append( evaluator.calculate_mrr(self.n_recommendations, self.predictions, self.rounded_predictions, self.test_data)) evaluator.ratings[i, (numpy.argmax(self.predictions[i], axis=0))] = 0 if i > 1: self.assertLessEqual(mrr[i], mrr[i - 1])
def train(model, project_name): sampler = Sampler() loader = ImgLoader('../../input_large_delf/train') evaluator = Evaluator() dir_model = '_model' os.makedirs(dir_model, exist_ok=True) # for training batch_size = 240 group_size = 12 iter_outside = 10 iter_inside = 500 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4) for param_group in optimizer.param_groups: logger.info('start lerning rate with: {:.6f}'.format( param_group['lr'])) for ep in range(1, iter_outside + 1): logger.info('-' * 30) logger.info('epoch: {:d}'.format(ep)) model.train() if ep > 1: set_batch_norm_eval(model) train_loss1 = 0 train_loss3 = 0 count_sample = 0 ave_good_index = 0 for _ in tqdm(range(iter_inside)): batch = torch.FloatTensor(batch_size * group_size, 40, 1000).zero_() ids = sampler.get_sample(batch_size, group_size) for i in range(batch_size * group_size): batch[i] = loader.load_image('{}.delf'.format(ids[i])) batch_cuda = batch.cuda() # forward with requires_grad=False v_batch_no_bp = Variable(batch_cuda, volatile=True) optimizer.zero_grad() out = model.forward(v_batch_no_bp) batch_indeces, num_good_index = get_apn_index( out, batch_size, group_size) # forward with requires_grad=True v_batch = Variable(batch_cuda[batch_indeces, :, :]) optimizer.zero_grad() out = model.forward(v_batch) out_anchor = out[:batch_size] hard_positive = out[batch_size:batch_size * 2] hard_negative = out[batch_size * 2:batch_size * 3] # calc loss loss1 = smooth_pairwise_loss(out_anchor, hard_positive) * 0.1 loss3 = hard_negative_triplet_loss(out_anchor, hard_positive, hard_negative) loss = loss3 loss.backward() optimizer.step() train_loss1 += float(loss1.data.cpu().numpy()) * batch_size train_loss3 += float(loss3.data.cpu().numpy()) * batch_size ave_good_index += num_good_index * batch_size count_sample += batch_size logger.info('train loss (pair-pos): {:.6f}'.format(train_loss1 / count_sample)) logger.info('train loss (triplet) : {:.6f}'.format(train_loss3 / count_sample)) logger.info('average number of far negative: {:.2f} / {:d}'.format( ave_good_index / count_sample, batch_size)) evaluator.evaluate(model) if ep % 4 == 0 and ep != iter_outside: model_name = 'embedding_model_{}_ep{}.pt'.format(project_name, ep) logger.info('save model: {}'.format(model_name)) torch.save(model, os.path.join(dir_model, model_name)) if ep % 8 == 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 logger.info('change learning rate into: {:.6f}'.format( param_group['lr'])) model_name = 'embedding_model_{}.pt'.format(project_name) logger.info('save model: {}'.format(model_name)) torch.save(model, os.path.join(dir_model, model_name))
def run_training(H): # torch.cuda.is_available = lambda : False # torch.backends.cudnn.enabled=False torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = True create_logger(H) random.seed(H.SEED) np.random.seed(H.SEED) torch.manual_seed(H.SEED) if torch.cuda.is_available(): torch.cuda.manual_seed(H.SEED) torch.cuda.manual_seed_all(H.SEED) logger.info("Training start.") logger.info(repr(H)) train_loader, valid_loader, vocab = create_data_pipelines(H) logger.info(train_loader.dataset) logger.info(valid_loader.dataset) m = Metric([('train_loss', np.inf), ('train_score', np.inf), ('valid_loss', np.inf), ('valid_score', 0), ('train_lr', 0), ('valid_cer', np.inf)]) model = NeuralSpeechRecognizer( vocab, train_loader.dataset.max_seq_length, rnn_hidden_size=H.RNN_HIDDEN_SIZE, rnn_num_layers=H.RNN_NUM_LAYERS, rnn_dropout=H.RNN_DROPOUT, cnn_dropout=H.CNN_DROPOUT, teacher_forcing_ratio=H.TEACHER_FORCING_RATIO, sample_rate=H.AUDIO_SAMPLE_RATE, window_size=H.SPECT_WINDOW_SIZE, initialize=torch_weight_init) if H.USE_CUDA: model.cuda() logging.info(model_summary(model, line_length=100)) if H.PRELOAD_MODEL_PATH: path = os.path.join(H.EXPERIMENT, H.PRELOAD_MODEL_PATH) state = torch.load(path) model.load_state_dict(state) logging.info("Preloaded model: {}".format(path)) criterion = LabelSmoothingLoss(padding_idx=0, label_smoothing=H.LABEL_SMOOTHING) sts_decoder = STSDecoder(vocab) scorer = Scorer() optimizer = optim.Adam(list( filter(lambda p: p.requires_grad, model.parameters())), amsgrad=False, betas=(0.9, 0.999), eps=1e-08, lr=H.LR, weight_decay=H.WEIGHT_DECAY) stopping = Stopping(model, patience=H.STOPPING_PATIENCE) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[H.LR_LAMBDA]) tlogger = TensorboardLogger(root_dir=H.EXPERIMENT, experiment_dir=H.TIMESTAMP) # PytorchLogger() checkpoint = Checkpoint(model, optimizer, stopping, m, root_dir=H.EXPERIMENT, experiment_dir=H.TIMESTAMP, restore_from=-1, interval=H.CHECKPOINT_INTERVAL, verbose=0) trainer = Trainer(model, train_loader, optimizer, scheduler, criterion, sts_decoder, scorer, H.MAX_GRAD_NORM) evaluator = Evaluator(model, valid_loader, criterion, sts_decoder, scorer) epoch_start = 1 if H.CHECKPOINT_RESTORE: epoch_start = checkpoint.restore() + 1 train_loader.batch_sampler.shuffle(epoch_start) epoch = epoch_start try: epoch_itr = tlogger.set_itr(range(epoch_start, H.MAX_EPOCHS + 1)) for epoch in epoch_itr: with DelayedKeyboardInterrupt(): m.train_loss, m.train_score, m.train_lr = trainer(epoch) m.valid_loss, m.valid_score = evaluator() if checkpoint: checkpoint.step(epoch) stopping_flag = stopping.step(epoch, m.valid_loss, m.valid_score) epoch_itr.log_values(m.train_loss, m.train_score, m.train_lr, m.valid_loss, m.valid_score, stopping.best_score_epoch, stopping.best_score) if stopping_flag: logger.info( "Early stopping at epoch: %d, score %f" % (stopping.best_score_epoch, stopping.best_score)) break train_loader.batch_sampler.shuffle(epoch) except KeyboardInterrupt: logger.info("Training interrupted at: {}".format(epoch)) pass checkpoint.create(epoch) model.load_state_dict(stopping.best_score_state) torch.save(model.state_dict(), os.path.join(H.EXPERIMENT, H.MODEL_NAME + '.tar')) logger.info(repr(tlogger)) logger.info(repr(stopping)) logger.info(repr(checkpoint)) logger.info("Training end.")
def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None, process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True, random_seed=False, results_file_name='top_recommendations'): """ Constructor of the RecommenderSystem. :param ModelInitializer initializer: A model initializer. :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried. :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database. :param boolean process_parser: A Flag deceiding process the dataparser. :param boolean verbose: A flag deceiding to print progress. :param boolean dump_matrices: A flag for saving output matrices. :param boolean train_more: train_more the collaborative filtering after loading matrices. :param boolean random_seed: A flag to determine if we will use random seed or not. :param str results_file_name: Top recommendations results' file name """ if process_parser: DataParser.process() if ratings is None: self.ratings = numpy.array(DataParser.get_ratings_matrix()) else: self.ratings = ratings if abstracts_preprocessor is None: self.abstracts_preprocessor = AbstractsPreprocessor( DataParser.get_abstracts(), *DataParser.get_word_distribution()) else: self.abstracts_preprocessor = abstracts_preprocessor # Get configurations self.config = RecommenderConfiguration(config) # Set flags self.results_file_name = results_file_name + '.dat' self._verbose = verbose self._dump_matrices = dump_matrices self._load_matrices = load_matrices self._train_more = train_more self._split_type = 'user' self._random_seed = random_seed self.set_hyperparameters(self.config.get_hyperparameters()) self.set_options(self.config.get_options()) self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose) if self.config.get_error_metric() == 'RMS': self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor, self._random_seed, self._verbose) else: raise NameError( "Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric()) # Initialize content based. if self.config.get_content_based() == 'None': self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA': self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA2Vec': self.content_based = LDA2VecRecommender( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) else: raise NameError( "Not a valid content based %s. Options are 'None', " "'LDA', 'LDA2Vec'" % self.config.get_content_based()) # Initialize collaborative filtering. if self.config.get_collaborative_filtering() == 'ALS': is_hybrid = self.config.get_recommender() == 'hybrid' if self.config.get_content_based() == 'None': raise NameError( "Not valid content based 'None' with hybrid recommender") self.collaborative_filtering = CollaborativeFiltering( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices, self._train_more, is_hybrid) elif self.config.get_collaborative_filtering() == 'SDAE': self.collaborative_filtering = SDAERecommender( self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) if not self.config.get_content_based() == 'None': raise NameError( "Not a valid content based %s with SDAE. You can only use 'None'" % self.config.get_content_based()) elif self.config.get_collaborative_filtering() == 'None': if not self.config.get_recommender() == 'itembased': raise NameError( "None collaborative filtering is only valid with itembased recommender type" ) elif self.config.get_content_based() == 'None': raise NameError( "Not valid content based 'None' with item-based recommender" ) self.collaborative_filtering = None else: raise NameError("Not a valid collaborative filtering %s. " "Only options are 'None', 'ALS', 'SDAE'" % self.config.get_collaborative_filtering()) # Initialize recommender if self.config.get_recommender() == 'itembased': self.recommender = self.content_based elif self.config.get_recommender() == 'userbased': self.recommender = self.collaborative_filtering elif self.config.get_recommender() == 'hybrid': self.recommender = self else: raise NameError( "Invalid recommender type %s. " "Only options are 'userbased','itembased', and 'hybrid'" % self.config.get_recommender())
filename=absolute_path(content_dictionary_filename)) elif file_exists(basepaths_filename): print('Building content dictionary...') content_dictionary = ContentDictionary().build( basepaths_filename=absolute_path(basepaths_filename), dictionary_filename=absolute_path(content_dictionary_filename), url=args.remote_url, niceness=args.niceness) else: print("Error, neither %s or %s found" % (content_dictionary_filename, basepaths_filename)) sys.exit(1) if args.evaluate: print('Evaluating', args.theme_name, 'theme') evaluator = Evaluator(absolute_path(model_filename), content_dictionary) evaluator.save_results() else: model_class = LdaModel(absolute_path(model_filename), num_topics=args.num_topics) if model_class.no_pretrained_model_exists(): print('Training model with', args.num_topics, 'topics') model_class.train_model(content_dictionary=content_dictionary, cores=args.cores) else: print('Loading model') model = model_class.load_model() corpus = model_class.load_corpus()
def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None, process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True): """ Constructor of the RecommenderSystem. :param ModelInitializer initializer: A model initializer. :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried. :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database. :param boolean process_parser: A Flag deceiding process the dataparser. :param boolean verbose: A flag deceiding to print progress. :param boolean dump_matrices: A flag for saving output matrices. :param boolean train_more: train_more the collaborative filtering after loading matrices. """ if process_parser: DataParser.process() if ratings is None: self.ratings = numpy.array(DataParser.get_ratings_matrix()) else: self.ratings = ratings if abstracts_preprocessor is None: self.abstracts_preprocessor = AbstractsPreprocessor(DataParser.get_abstracts(), *DataParser.get_word_distribution()) else: self.abstracts_preprocessor = abstracts_preprocessor # Get configurations self.config = RecommenderConfiguration(config) self.set_hyperparameters(self.config.get_hyperparameters()) self.set_options(self.config.get_options()) # Set flags self._verbose = verbose self._dump_matrices = dump_matrices self._load_matrices = load_matrices self._train_more = train_more self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose) if self.config.get_error_metric() == 'RMS': self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor) else: raise NameError("Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric()) # Initialize content based. if self.config.get_content_based() == 'None': self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA': self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) elif self.config.get_content_based() == 'LDA2Vec': self.content_based = LDA2VecRecommender(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices) else: raise NameError("Not a valid content based %s. Options are 'None', " "'LDA', 'LDA2Vec'" % self.config.get_content_based()) # Initialize collaborative filtering. if self.config.get_collaborative_filtering() == 'ALS': self.collaborative_filtering = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters, self.options, self._verbose, self._load_matrices, self._dump_matrices, self._train_more) else: raise NameError("Not a valid collaborative filtering %s. " "Only option is 'ALS'" % self.config.get_collaborative_filtering()) # Initialize recommender if self.config.get_recommender() == 'itembased': self.recommender = self.content_based elif self.config.get_recommender() == 'userbased': self.recommender = self.collaborative_filtering else: raise NameError("Invalid recommender type %s. " "Only options are 'userbased' and 'itembased'" % self.config.get_recommender())
class GridSearch(object): """ A class to perform grid search and find the best hyperparameters for a recommender. """ def __init__(self, recommender, hyperparameters, verbose=True, report_name='grid_search_results'): """ Train number of recommenders using UV decomposition using different parameters. :param AbstractRecommender recommender: :param dict hyperparameters: A dictionary of the hyperparameters. :param boolean verbose: A flag to decide printing progress. :param str report_name: The name of the csv file in which the analysis of the grid search will be dumped. """ self.recommender = recommender self.hyperparameters = hyperparameters self._verbose = verbose self.evaluator = Evaluator(recommender.get_ratings()) self.all_errors = dict() self.results_file_name = report_name + '.csv' def get_all_combinations(self): """ The method retuns all possible combinations of the hyperparameters. :returns: array of dicts containing all combinations :rtype: list[dict] >>> get_all_combinations({'_lambda': [0, 0.1], 'n_factors': [20, 40]}) [{'n_factors': 20, '_lambda': 0}, {'n_factors': 40, '_lambda': 0}, {'n_factors': 20, '_lambda': 0.1}, {'n_factors': 40, '_lambda': 0.1}] """ names = sorted(self.hyperparameters) return [dict(zip(names, prod)) for prod in it.product( *(self.hyperparameters[name] for name in names))] def train(self): """ The method loops on all possible combinations of hyperparameters and calls the train and split method on the recommender. the train and test errors are saved and the hyperparameters that produced the best test error are returned :returns: Pair of best hyperparameters dictionary, and list of lists of metrics' results :rtype: tuple(dict, float[][]) """ best_error = numpy.inf best_params = dict() train, test = self.recommender.evaluator.naive_split(self.recommender._split_type) predictions = None all_results = [['n_factors', '_lambda', 'rmse', 'train_recall', 'test_recall', 'recall_at_200', 'ratio', 'mrr @ 5', 'ndcg @ 5', 'mrr @ 10', 'ndcg @ 10']] for hyperparameters in self.get_all_combinations(): if self._verbose: print("Running config: %s" % hyperparameters) self.recommender.set_hyperparameters(hyperparameters) current_result = [hyperparameters['n_factors'], hyperparameters['_lambda']] self.recommender.train() current_result.extend(self.recommender.get_evaluation_report()) all_results.append(current_result) if predictions is None: predictions = self.recommender.get_predictions() rounded_predictions = self.recommender.rounded_predictions() test_recall = self.evaluator.calculate_recall(test, rounded_predictions) train_recall = self.evaluator.calculate_recall(self.recommender.get_ratings(), rounded_predictions) if self._verbose: print('Train error: %f, Test error: %f' % (train_recall, test_recall)) if 1 - test_recall < best_error: best_params = hyperparameters best_error = 1 - test_recall current_key = self.get_key(hyperparameters) self.all_errors[current_key] = dict() self.all_errors[current_key]['train_recall'] = train_recall self.all_errors[current_key]['test_recall'] = test_recall self.dump_csv(all_results) if self._verbose: print("Best config: %s" % best_params) return best_params, all_results def get_key(self, config): """ Given a dict (config) the function generates a key that uniquely represents this config to be used to store all errors :param dict config: given configuration. :returns: string reperesenting the unique key of the configuration :rtype: str >>> get_key({n_iter: 1, n_factors:200}) 'n_iter:1,n_factors:200' """ generated_key = '' keys_array = sorted(config) for key in keys_array: generated_key += key + ':' generated_key += str(config[key]) + ',' return generated_key.strip(',') def dump_csv(self, all_results): """ Given some results as a list of lists, the function dumps to a csv file :param str[][] all_results: all results from all runs. """ base_dir = os.path.dirname(os.path.realpath(__file__)) path = os.path.join(os.path.dirname(base_dir), 'matrices/%s' % self.results_file_name) with open(path, "a") as f: writer = csv.writer(f) writer.writerows(all_results) if self._verbose: print("dumped to %s" % path) def get_all_errors(self): """ The method returns all errors calculated for every configuration. :returns: containing every single computed test error. :rtype: dict """ return self.all_errors
def __init__(self, use_database=True, verbose=True, load_matrices=True, dump=True, train_more=True, random_seed=False, config=None): """ Setup the data and configuration for the recommenders. """ if use_database: self.ratings = numpy.array(DataParser.get_ratings_matrix()) self.documents, self.users = self.ratings.shape self.abstracts_preprocessor = AbstractsPreprocessor( DataParser.get_abstracts(), *DataParser.get_word_distribution()) else: abstracts = { 0: 'hell world berlin dna evolution', 1: 'freiburg is green', 2: 'the best dna is the dna of dinasours', 3: 'truth is absolute', 4: 'berlin is not that green', 5: 'truth manifests itself', 6: 'plato said truth is beautiful', 7: 'freiburg has dna' } vocab = set( itertools.chain( *list(map(lambda ab: ab.split(' '), abstracts.values())))) w2i = dict(zip(vocab, range(len(vocab)))) word_to_count = [(w2i[word], sum( abstract.split(' ').count(word) for doc_id, abstract in abstracts.items())) for word in vocab] article_to_word = list( set([(doc_id, w2i[word]) for doc_id, abstract in abstracts.items() for word in abstract.split(' ')])) article_to_word_to_count = list( set([(doc_id, w2i[word], abstract.count(word)) for doc_id, abstract in abstracts.items() for word in abstract.split(' ')])) self.abstracts_preprocessor = AbstractsPreprocessor( abstracts, word_to_count, article_to_word, article_to_word_to_count) self.documents, self.users = 8, 10 self.ratings = numpy.array([[ int(not bool((article + user) % 3)) for article in range(self.documents) ] for user in range(self.users)]) self.verbose = verbose self.load_matrices = load_matrices self.dump = dump self.train_more = train_more self.random_seed = random_seed self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor, self.random_seed, self.verbose) self.config = RecommenderConfiguration() self.hyperparameters = self.config.get_hyperparameters() self.options = self.config.get_options() self.initializer = ModelInitializer(self.hyperparameters.copy(), self.options['n_iterations'], self.verbose)