Пример #1
0
    def setUp(self):
        """
        Setup method that is called at the beginning of each test.
        """
        self.documents, self.users = 18, 10
        documents_cnt, users_cnt = self.documents, self.users
        self.n_iterations = 15
        self.k_folds = 3
        self.hyperparameters = {'n_factors': 5, '_lambda': 0.01}
        self.options = {'n_iterations': self.n_iterations, 'k_folds': self.k_folds}
        self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iterations)
        self.n_recommendations = 1

        def mock_get_ratings_matrix(self=None):
            return [[int(not bool((article + user) % 3)) for article in range(documents_cnt)]
                    for user in range(users_cnt)]

        self.ratings_matrix = numpy.array(mock_get_ratings_matrix())
        setattr(DataParser, "get_ratings_matrix", mock_get_ratings_matrix)

        self.evaluator = Evaluator(self.ratings_matrix)
        self.cf = CollaborativeFiltering(self.initializer, self.evaluator, self.hyperparameters,
                                         self.options, load_matrices=True)
        self.cf.train()
        self.cf.evaluator.k_folds = self.k_folds
        self.test_data = self.cf.test_data
        self.predictions = self.cf.get_predictions()
        self.rounded_predictions = self.cf.rounded_predictions()
Пример #2
0
def main(args):
    logger = Logger(args.output_dir)
    args.logger = logger
    trainer = Trainer(args)
    evaluator = Evaluator(trainer)
    for i_epoch in range(0, args.epoch + 1):

        # train
        log_dict = {
            'i_epoch': i_epoch,
            'train_losses': [],  # per batch
            'test_bleus': []
        }  # per sample
        trainer.train_one_epoch(log_dict)

        # evaluation and logging
        logger.log('%d th epoch' % i_epoch)
        evaluator.bleu(log_dict)
        evaluator.sample_translation()
        log_dict_mean = {
            'i_epoch': log_dict['i_epoch'],
            'train_loss': np.mean(log_dict['train_losses']),
            'test_bleu': np.mean(log_dict['test_bleus'])
        }
        logger.dump(log_dict_mean)
        trainer.save_best(log_dict_mean)
        logger.log('-' * 10)
Пример #3
0
    def runTest(self):
        m1, m2 = numpy.random.random((4, 8)), numpy.random.random((4, 8))
        self.assertTrue(abs(self.cf.evaluator.get_rmse(m1, m2) - numpy.sqrt(mean_squared_error(m1, m2))) < 1e-6)
        train, test = self.cf.evaluator.naive_split()
        self.assertEqual(numpy.count_nonzero(train) + numpy.count_nonzero(test),
                         numpy.count_nonzero(self.ratings_matrix))

        test_indices = self.cf.evaluator.get_kfold_indices()
        # k = 3
        first_fold_indices = test_indices[0::self.k_folds]
        second_fold_indices = test_indices[1::self.k_folds]
        third_fold_indices = test_indices[2::self.k_folds]
        train1, test1 = self.cf.evaluator.generate_kfold_matrix(first_fold_indices)
        train2, test2 = self.cf.evaluator.generate_kfold_matrix(second_fold_indices)
        train3, test3 = self.cf.evaluator.generate_kfold_matrix(third_fold_indices)

        total_ratings = numpy.count_nonzero(self.ratings_matrix)

        # ensure that each fold has 1/k of the total ratings
        k_inverse = 1 / self.k_folds
        self.assertTrue(abs(k_inverse - ((numpy.count_nonzero(test1)) / total_ratings)) < 1e-6)
        self.assertTrue(abs(k_inverse - ((numpy.count_nonzero(test1)) / total_ratings)) < 1e-6)
        self.assertTrue(abs(k_inverse - ((numpy.count_nonzero(test1)) / total_ratings)) < 1e-6)

        # assert that the folds don't intertwine
        self.assertTrue(numpy.all((train1 * test1) == 0))
        self.assertTrue(numpy.all((train2 * test2) == 0))
        self.assertTrue(numpy.all((train3 * test3) == 0))
        # assert that test sets dont contain the same elements
        self.assertTrue(numpy.all((test1 * test2) == 0))
        self.assertTrue(numpy.all((test2 * test3) == 0))
        self.assertTrue(numpy.all((test1 * test3) == 0))

        evaluator = Evaluator(self.ratings_matrix)
        self.assertEqual(self.predictions.shape, self.ratings_matrix.shape)
        recall = evaluator.calculate_recall(self.ratings_matrix, self.predictions)
        # if predictions are  perfect
        if recall == 1:
            for row in range(self.users):
                for col in range(self.documents):
                    self.assertEqual(self.rounded_predictions[row, col], self.ratings_matrix[row, col])

        self.setUp()
        evaluator.ratings = self.ratings_matrix.copy()

        # restore the unmodified rating matrix
        self.setUp()
        evaluator.ratings = self.ratings_matrix.copy()

        # mrr will always decrease as we set the highest prediction's index
        # to 0 in the rating matrix. top_n recommendations set to 0.
        mrr = []
        for i in range(self.users):
            evaluator.ratings[i, (numpy.argmax(self.predictions[i], axis=0))] = 0
            mrr.append(evaluator.calculate_mrr(self.n_recommendations, self.predictions,
                                               self.rounded_predictions, evaluator.ratings))
            if i > 1:
                self.assertLessEqual(mrr[i], mrr[i-1])
def main(project_name):

    logger = Logger('_02_valid_model_{}'.format(project_name))
    logger.info('=' * 50)

    model_path = '_model/embedding_model_{}.pt'.format(project_name)
    logger.info('load model from {}'.format(model_path))
    model = torch.load(model_path)

    evaluator = Evaluator()
    evaluator.evaluate(model)
Пример #5
0
 def runTest(self):
     evaluator = Evaluator(self.ratings_matrix)
     cf = CollaborativeFiltering(self.initializer,
                                 evaluator,
                                 self.initial_config,
                                 self.options,
                                 load_matrices=True)
     grid_search = GridSearch(cf, self.hyperparameters, False)
     self.checkKeyGenerator(grid_search)
     self.checkCombinationsGenerator(grid_search)
     self.checkGridSearch(grid_search)
Пример #6
0
    def setUp(self):
        """
        Setup method that is called at the beginning of each test.
        """
        self.documents, self.users = 8, 10
        documents_cnt, users_cnt = self.documents, self.users
        self.n_iterations = 5
        self.n_factors = 5
        self.k_folds = 5
        self.hyperparameters = {'n_factors': self.n_factors}
        self.options = {'n_iterations': self.n_iterations, 'k_folds': self.k_folds}
        self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iterations)

        def mock_process(self=None):
            pass

        def mock_get_abstracts(self=None):
            return {0: 'hell world berlin dna evolution', 1: 'freiburg is green',
                    2: 'the best dna is the dna of dinasours', 3: 'truth is absolute',
                    4: 'berlin is not that green', 5: 'truth manifests itself',
                    6: 'plato said truth is beautiful', 7: 'freiburg has dna'}

        def mock_get_ratings_matrix(self=None):
            return [[int(not bool((article + user) % 3)) for article in range(documents_cnt)]
                    for user in range(users_cnt)]

        def mock_get_word_distribution(self=None):
            abstracts = mock_get_abstracts()
            vocab = set(itertools.chain(*list(map(lambda ab: ab.split(' '), abstracts.values()))))
            w2i = dict(zip(vocab, range(len(vocab))))
            word_to_count = [(w2i[word], sum(abstract.split(' ').count(word)
                                             for doc_id, abstract in abstracts.items())) for word in vocab]
            article_to_word = list(set([(doc_id, w2i[word])
                                        for doc_id, abstract in abstracts.items() for word in abstract.split(' ')]))
            article_to_word_to_count = list(set([(doc_id, w2i[word], abstract.count(word))
                                                 for doc_id, abstract in abstracts.items()
                                                 for word in abstract.split(' ')]))
            return word_to_count, article_to_word, article_to_word_to_count

        abstracts = mock_get_abstracts()
        word_to_count, article_to_word,  article_to_word_to_count = mock_get_word_distribution()
        self.abstracts_preprocessor = AbstractsPreprocessor(abstracts, word_to_count,
                                                            article_to_word, article_to_word_to_count)
        self.ratings_matrix = numpy.array(mock_get_ratings_matrix())
        self.evaluator = Evaluator(self.ratings_matrix, self.abstracts_preprocessor)
        setattr(DataParser, "get_abstracts", mock_get_abstracts)
        setattr(DataParser, "process", mock_process)
        setattr(DataParser, "get_ratings_matrix", mock_get_ratings_matrix)
        setattr(DataParser, "get_word_distribution", mock_get_word_distribution)
Пример #7
0
 def setUp(self):
     """
     Setting up the ratings, expected ratings and recommendations.
     The comments are showing where are the matching recommendations.
     A matching recommendation will occur at the recommendation_indcies list,
     and the corresponding ratings and expected rating are both positive.
     """
     # 0  1  2  3  4  5  6  7  8
     self.ratings = numpy.array([
         [1, 1, 0, 0, 1, 0, 1, 0, 0],
         #    ^
         [0, 0, 1, 1, 0, 0, 0, 1, 0],
         #       ^
         [1, 1, 0, 1, 0, 0, 1, 0, 1],
         #          ^              ^
         [1, 0, 0, 0, 1, 0, 0, 0, 0],
         # ^
         [0, 0, 0, 0, 0, 0, 0, 0, 1]
     ])
     #
     # 0  1  2  3  4  5  6  7  8
     self.expected_ratings = numpy.array([
         [0, 1, 0, 0, 0, 0, 0, 0, 0],
         #    ^
         [0, 0, 1, 0, 0, 0, 0, 0, 0],
         #       ^
         [0, 0, 0, 1, 0, 0, 0, 0, 1],
         #          ^              ^
         [1, 0, 0, 0, 0, 0, 0, 0, 0],
         # ^
         [0, 1, 0, 0, 0, 0, 0, 0, 0]
     ])
     #
     self.recommendation_indices = numpy.array([
         [1],
         # 1 matches -> 1/1
         [3, 2],
         # 3 doesn't match, 2 matches -> 1/2
         [4, 6, 3, 0, 8],
         # 4,6,0 don't match, 3, 8 match -> 1/3, 1/5
         [0],
         # 0 matches -> 1/1
         [0]
     ])
     # no matches -> 0
     self.n_users, self.n_items = self.ratings.shape
     self.evaluator = Evaluator(self.ratings)
     self.evaluator.recs_loaded = True
     self.evaluator.recommendation_indices = self.recommendation_indices
Пример #8
0
    def __init__(self, recommender, hyperparameters, verbose=True, report_name='grid_search_results'):
        """
        Train number of recommenders using UV decomposition using different parameters.

        :param AbstractRecommender recommender:
        :param dict hyperparameters: A dictionary of the hyperparameters.
        :param boolean verbose: A flag to decide printing progress.
        :param str report_name: The name of the csv file in which the analysis of the grid search will be dumped.
        """
        self.recommender = recommender
        self.hyperparameters = hyperparameters
        self._verbose = verbose
        self.evaluator = Evaluator(recommender.get_ratings())
        self.all_errors = dict()
        self.results_file_name = report_name + '.csv'
Пример #9
0
    def setUp(self):
        """
        Setup method that is called at the beginning of each test.
        """
        self.documents, self.users = 30, 4
        documents_cnt, users_cnt = self.documents, self.users
        self.n_factors = 5
        self.n_iterations = 20
        self.k_folds = 3
        self.hyperparameters = {'n_factors': self.n_factors, '_lambda': 0.01}
        self.options = {'k_folds': self.k_folds, 'n_iterations': self.n_iterations}
        self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iterations)

        def mock_get_ratings_matrix(self=None):
            return [[int(not bool((article + user) % 3)) for article in range(documents_cnt)]
                    for user in range(users_cnt)]
        self.ratings_matrix = numpy.array(mock_get_ratings_matrix())
        self.evaluator = Evaluator(self.ratings_matrix)
        setattr(DataParser, "get_ratings_matrix", mock_get_ratings_matrix)
Пример #10
0
def main():
    args = parse_args()
    update_config(args.cfg_file)

    if args.gpus:
        config.GPUS = args.gpus
    else:
        config.CUDA = False
    if args.workers:
        config.WORKERS = args.workers
    print('Using config:')
    pprint.pprint(config)

    torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    if config.CUDA:
        os.environ["CUDA_VISIBLE_DEVICES"] = config.GPUS
    device = torch.device('cuda' if config.CUDA else 'cpu')

    # Redirect print to both console and log file
    sys.stdout = Logger(osp.join(config.OUTPUT_DIR, 'log-eval.txt'))

    # Create data loaders
    dataset = DataSet(config.DATASET.ROOT, config.DATASET.DATASET)

    normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    transformer = T.Compose([
        T.Resize(config.MODEL.IMAGE_SIZE, interpolation=3),
        T.ToTensor(),
        normalizer,
    ])

    query_loader = DataLoader(
        Preprocessor(dataset.query,
                     root=osp.join(dataset.images_dir, dataset.query_path), transform=transformer),
        batch_size=config.TEST.BATCH_SIZE, num_workers=config.WORKERS,
        shuffle=False, pin_memory=True)

    gallery_loader = DataLoader(
        Preprocessor(dataset.gallery,
                     root=osp.join(dataset.images_dir, dataset.gallery_path), transform=transformer),
        batch_size=config.TEST.BATCH_SIZE, num_workers=config.WORKERS,
        shuffle=False, pin_memory=True)

    # Create model
    model = models.create(config.MODEL.NAME)

    # Load from checkpoint
    checkpoint = load_checkpoint(config.TEST.MODEL_FILE)
    print('best model at epoch: {}'.format(checkpoint['epoch']))
    model.load_state_dict(checkpoint['state_dict'], strict=False)

    # Set model
    model = nn.DataParallel(model).to(device)
    
    print('Test with best model:')
    evaluator = Evaluator(model)
    evaluator.evaluate(query_loader, gallery_loader, dataset.query,
                       dataset.gallery, config.TEST.OUTPUT_FEATURES)
def run_training(H):
    # torch.cuda.is_available = lambda : False
    # torch.backends.cudnn.enabled=False
    torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = True

    create_logger(H)

    random.seed(H.SEED)
    np.random.seed(H.SEED)
    torch.manual_seed(H.SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(H.SEED)
        torch.cuda.manual_seed_all(H.SEED)

    logger.info("Training start.")
    logger.info(repr(H))

    train_loader, valid_loader, vocab = create_data_pipelines(H)

    logger.info(train_loader.dataset)
    logger.info(valid_loader.dataset)

    m = Metric([('train_loss', np.inf), ('train_score', np.inf),
                ('valid_loss', np.inf), ('valid_score', 0), ('train_lr', 0),
                ('valid_cer', np.inf)])

    model = SpeechCNN(len(vocab),
                      input_size=256,
                      hidden_size=H.CNN_HIDDEN_SIZE,
                      dropout=H.CNN_DROPOUT,
                      initialize=torch_weight_init)
    if H.USE_CUDA:
        model.cuda()

    if H.PRELOAD_MODEL_PATH:
        path = os.path.join(H.EXPERIMENT, H.PRELOAD_MODEL_PATH)
        state = torch.load(path)
        model.load_state_dict(state)
        print("Preloaded model: {}".format(path))

    criterion = PytorchCTCLoss(vocab)

    optimizer = optim.SGD(list(
        filter(lambda p: p.requires_grad, model.parameters())),
                          lr=H.LR,
                          weight_decay=H.WEIGHT_DECAY,
                          momentum=H.MOMENTUM,
                          nesterov=H.NESTEROV)

    stopping = Stopping(model, patience=H.STOPPING_PATIENCE)

    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[H.LR_LAMBDA])

    ctc_decoder = CTCGreedyDecoder(vocab)

    scorer = Scorer(reduction='sum')

    tlogger = TensorboardLogger(root_dir=H.EXPERIMENT,
                                experiment_dir=H.TIMESTAMP)  # PytorchLogger()

    checkpoint = Checkpoint(model,
                            optimizer,
                            stopping,
                            m,
                            root_dir=H.EXPERIMENT,
                            experiment_dir=H.TIMESTAMP,
                            restore_from=-1,
                            interval=H.CHECKPOINT_INTERVAL,
                            verbose=0)

    trainer = Trainer(model, train_loader, optimizer, scheduler, criterion,
                      ctc_decoder, scorer, H.MAX_GRAD_NORM)

    evaluator = Evaluator(model, valid_loader, criterion, ctc_decoder, scorer)

    epoch_start = 1
    if H.CHECKPOINT_RESTORE:
        epoch_start = checkpoint.restore() + 1
        train_loader.batch_sampler.shuffle(epoch_start)

    epoch = epoch_start
    try:
        epoch_itr = tlogger.set_itr(range(epoch_start, H.MAX_EPOCHS + 1))

        for epoch in epoch_itr:

            with DelayedKeyboardInterrupt():

                m.train_loss, m.train_score, m.train_lr = trainer(epoch)

                m.valid_loss, m.valid_score = evaluator()

                if checkpoint:
                    checkpoint.step(epoch)

                stopping_flag = stopping.step(epoch, m.valid_loss,
                                              m.valid_score)

                epoch_itr.log_values(m.train_loss, m.train_score, m.train_lr,
                                     m.valid_loss, m.valid_score,
                                     stopping.best_score_epoch,
                                     stopping.best_score)

                if stopping_flag:
                    logger.info(
                        "Early stopping at epoch: %d, score %f" %
                        (stopping.best_score_epoch, stopping.best_score))
                    break

                train_loader.batch_sampler.shuffle(epoch)

    except KeyboardInterrupt:
        logger.info("Training interrupted at: {}".format(epoch))
        pass

    checkpoint.create(epoch)

    model.load_state_dict(stopping.best_score_state)
    torch.save(model.state_dict(),
               os.path.join(H.EXPERIMENT, H.MODEL_NAME + '.tar'))

    logger.info(repr(tlogger))
    logger.info(repr(stopping))
    logger.info(repr(checkpoint))

    logger.info("Training end.")
Пример #12
0
    def runTest(self):
        train, test = self.cf.evaluator.naive_split()
        self.assertEqual(
            numpy.count_nonzero(train) + numpy.count_nonzero(test),
            numpy.count_nonzero(self.ratings_matrix))

        train_indices, test_indices = self.cf.evaluator.get_kfold_indices()
        # k = 3
        first_fold_indices = train_indices[0::self.k_folds], test_indices[
            0::self.k_folds]
        second_fold_indices = train_indices[1::self.k_folds], test_indices[
            1::self.k_folds]
        third_fold_indices = train_indices[2::self.k_folds], test_indices[
            2::self.k_folds]

        train1, test1 = self.cf.evaluator.generate_kfold_matrix(
            first_fold_indices[0], first_fold_indices[1])
        train2, test2 = self.cf.evaluator.generate_kfold_matrix(
            second_fold_indices[0], second_fold_indices[1])
        train3, test3 = self.cf.evaluator.generate_kfold_matrix(
            third_fold_indices[0], third_fold_indices[1])

        total_ratings = numpy.count_nonzero(self.ratings_matrix)

        # ensure that each fold has 1/k of the total ratings
        k_inverse = (1 / self.k_folds)
        self.assertEqual(k_inverse, numpy.count_nonzero(test1) / total_ratings)

        self.assertEqual(k_inverse, numpy.count_nonzero(test2) / total_ratings)

        self.assertEqual(k_inverse, numpy.count_nonzero(test2) / total_ratings)

        # assert that the folds don't intertwine
        self.assertTrue(numpy.all((train1 * test1) == 0))
        self.assertTrue(numpy.all((train2 * test2) == 0))
        self.assertTrue(numpy.all((train3 * test3) == 0))
        # assert that test sets dont contain the same elements
        self.assertTrue(numpy.all((test1 * test2) == 0))
        self.assertTrue(numpy.all((test2 * test3) == 0))
        self.assertTrue(numpy.all((test1 * test3) == 0))

        evaluator = Evaluator(self.ratings_matrix)
        self.assertEqual(self.predictions.shape, self.ratings_matrix.shape)
        recall = evaluator.calculate_recall(self.ratings_matrix,
                                            self.predictions)
        # if predictions are  perfect
        if recall == 1:
            for row in range(self.users):
                for col in range(self.documents):
                    self.assertEqual(self.rounded_predictions[row, col],
                                     self.ratings_matrix[row, col])

        # If we modify all the top predictions for half the users,
        # recall should be 0.5 by definition
        for i in range(0, self.users, 2):
            evaluator.ratings[i, self.predictions[i].nonzero()[0]] = 0
        recall_at_x = evaluator.recall_at_x(self.n_recommendations,
                                            self.predictions,
                                            self.ratings_matrix,
                                            self.rounded_predictions)
        self.assertEqual(0.5, recall_at_x)

        self.setUp()
        evaluator.ratings[:] = self.ratings_matrix

        # removing all top hits, should yield ndcg of 0 as number of recs is 1.
        for i in range(0, self.users):
            evaluator.ratings[i, self.predictions[i].nonzero()[0]] = 0
        ndcg = evaluator.calculate_ndcg(self.n_recommendations,
                                        self.predictions, self.ratings_matrix,
                                        self.test_data)

        self.assertEqual(0.0, ndcg)

        # restore the unmodified rating matrix
        self.setUp()
        evaluator.ratings[:] = self.ratings_matrix

        # mrr will always decrease as we set the highest prediction's index
        # to 0 in the rating matrix. top_n recommendations set to 0.
        mrr = []
        for i in range(self.users):
            mrr.append(
                evaluator.calculate_mrr(self.n_recommendations,
                                        self.predictions,
                                        self.rounded_predictions,
                                        self.test_data))
            evaluator.ratings[i,
                              (numpy.argmax(self.predictions[i], axis=0))] = 0
            if i > 1:
                self.assertLessEqual(mrr[i], mrr[i - 1])
def run_training(H):
    # torch.cuda.is_available = lambda : False
    # torch.backends.cudnn.enabled=False
    torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = True

    create_logger(H)

    random.seed(H.SEED)
    np.random.seed(H.SEED)
    torch.manual_seed(H.SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(H.SEED)
        torch.cuda.manual_seed_all(H.SEED)

    logger.info("Training start.")
    logger.info(repr(H))

    train_loader, valid_loader, vocab = create_data_pipelines(H)

    logger.info(train_loader.dataset)
    logger.info(valid_loader.dataset)

    m = Metric([('train_loss', np.inf), ('train_score', np.inf),
                ('valid_loss', np.inf), ('valid_score', 0), ('train_lr', 0),
                ('valid_cer', np.inf)])

    model = NeuralSpeechRecognizer(
        vocab,
        train_loader.dataset.max_seq_length,
        rnn_hidden_size=H.RNN_HIDDEN_SIZE,
        rnn_num_layers=H.RNN_NUM_LAYERS,
        rnn_dropout=H.RNN_DROPOUT,
        cnn_dropout=H.CNN_DROPOUT,
        teacher_forcing_ratio=H.TEACHER_FORCING_RATIO,
        sample_rate=H.AUDIO_SAMPLE_RATE,
        window_size=H.SPECT_WINDOW_SIZE,
        initialize=torch_weight_init)
    if H.USE_CUDA:
        model.cuda()

    logging.info(model_summary(model, line_length=100))

    if H.PRELOAD_MODEL_PATH:
        path = os.path.join(H.EXPERIMENT, H.PRELOAD_MODEL_PATH)
        state = torch.load(path)
        model.load_state_dict(state)
        logging.info("Preloaded model: {}".format(path))

    criterion = LabelSmoothingLoss(padding_idx=0,
                                   label_smoothing=H.LABEL_SMOOTHING)

    sts_decoder = STSDecoder(vocab)

    scorer = Scorer()

    optimizer = optim.Adam(list(
        filter(lambda p: p.requires_grad, model.parameters())),
                           amsgrad=False,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           lr=H.LR,
                           weight_decay=H.WEIGHT_DECAY)

    stopping = Stopping(model, patience=H.STOPPING_PATIENCE)

    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[H.LR_LAMBDA])

    tlogger = TensorboardLogger(root_dir=H.EXPERIMENT,
                                experiment_dir=H.TIMESTAMP)  # PytorchLogger()

    checkpoint = Checkpoint(model,
                            optimizer,
                            stopping,
                            m,
                            root_dir=H.EXPERIMENT,
                            experiment_dir=H.TIMESTAMP,
                            restore_from=-1,
                            interval=H.CHECKPOINT_INTERVAL,
                            verbose=0)

    trainer = Trainer(model, train_loader, optimizer, scheduler, criterion,
                      sts_decoder, scorer, H.MAX_GRAD_NORM)

    evaluator = Evaluator(model, valid_loader, criterion, sts_decoder, scorer)

    epoch_start = 1
    if H.CHECKPOINT_RESTORE:
        epoch_start = checkpoint.restore() + 1
        train_loader.batch_sampler.shuffle(epoch_start)

    epoch = epoch_start
    try:
        epoch_itr = tlogger.set_itr(range(epoch_start, H.MAX_EPOCHS + 1))

        for epoch in epoch_itr:

            with DelayedKeyboardInterrupt():

                m.train_loss, m.train_score, m.train_lr = trainer(epoch)

                m.valid_loss, m.valid_score = evaluator()

                if checkpoint:
                    checkpoint.step(epoch)

                stopping_flag = stopping.step(epoch, m.valid_loss,
                                              m.valid_score)

                epoch_itr.log_values(m.train_loss, m.train_score, m.train_lr,
                                     m.valid_loss, m.valid_score,
                                     stopping.best_score_epoch,
                                     stopping.best_score)

                if stopping_flag:
                    logger.info(
                        "Early stopping at epoch: %d, score %f" %
                        (stopping.best_score_epoch, stopping.best_score))
                    break

                train_loader.batch_sampler.shuffle(epoch)

    except KeyboardInterrupt:
        logger.info("Training interrupted at: {}".format(epoch))
        pass

    checkpoint.create(epoch)

    model.load_state_dict(stopping.best_score_state)
    torch.save(model.state_dict(),
               os.path.join(H.EXPERIMENT, H.MODEL_NAME + '.tar'))

    logger.info(repr(tlogger))
    logger.info(repr(stopping))
    logger.info(repr(checkpoint))

    logger.info("Training end.")
def train(model, project_name):

    sampler = Sampler()
    loader = ImgLoader('../../input_large_delf/train')
    evaluator = Evaluator()

    dir_model = '_model'
    os.makedirs(dir_model, exist_ok=True)

    # for training
    batch_size = 240
    group_size = 12
    iter_outside = 10
    iter_inside = 500

    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=1e-4)

    for param_group in optimizer.param_groups:
        logger.info('start lerning rate with: {:.6f}'.format(
            param_group['lr']))

    for ep in range(1, iter_outside + 1):

        logger.info('-' * 30)
        logger.info('epoch: {:d}'.format(ep))

        model.train()
        if ep > 1:
            set_batch_norm_eval(model)

        train_loss1 = 0
        train_loss3 = 0
        count_sample = 0
        ave_good_index = 0

        for _ in tqdm(range(iter_inside)):

            batch = torch.FloatTensor(batch_size * group_size, 40,
                                      1000).zero_()
            ids = sampler.get_sample(batch_size, group_size)

            for i in range(batch_size * group_size):
                batch[i] = loader.load_image('{}.delf'.format(ids[i]))

            batch_cuda = batch.cuda()

            # forward with requires_grad=False

            v_batch_no_bp = Variable(batch_cuda, volatile=True)
            optimizer.zero_grad()
            out = model.forward(v_batch_no_bp)

            batch_indeces, num_good_index = get_apn_index(
                out, batch_size, group_size)

            # forward with requires_grad=True

            v_batch = Variable(batch_cuda[batch_indeces, :, :])

            optimizer.zero_grad()
            out = model.forward(v_batch)

            out_anchor = out[:batch_size]
            hard_positive = out[batch_size:batch_size * 2]
            hard_negative = out[batch_size * 2:batch_size * 3]

            # calc loss

            loss1 = smooth_pairwise_loss(out_anchor, hard_positive) * 0.1
            loss3 = hard_negative_triplet_loss(out_anchor, hard_positive,
                                               hard_negative)

            loss = loss3

            loss.backward()
            optimizer.step()

            train_loss1 += float(loss1.data.cpu().numpy()) * batch_size
            train_loss3 += float(loss3.data.cpu().numpy()) * batch_size
            ave_good_index += num_good_index * batch_size
            count_sample += batch_size

        logger.info('train loss (pair-pos): {:.6f}'.format(train_loss1 /
                                                           count_sample))
        logger.info('train loss (triplet) : {:.6f}'.format(train_loss3 /
                                                           count_sample))
        logger.info('average number of far negative: {:.2f} / {:d}'.format(
            ave_good_index / count_sample, batch_size))

        evaluator.evaluate(model)

        if ep % 4 == 0 and ep != iter_outside:

            model_name = 'embedding_model_{}_ep{}.pt'.format(project_name, ep)
            logger.info('save model: {}'.format(model_name))
            torch.save(model, os.path.join(dir_model, model_name))

            if ep % 8 == 0:

                for param_group in optimizer.param_groups:
                    param_group['lr'] *= 0.1
                    logger.info('change learning rate into: {:.6f}'.format(
                        param_group['lr']))

    model_name = 'embedding_model_{}.pt'.format(project_name)
    logger.info('save model: {}'.format(model_name))
    torch.save(model, os.path.join(dir_model, model_name))
Пример #15
0
    def __init__(self,
                 initializer=None,
                 abstracts_preprocessor=None,
                 ratings=None,
                 config=None,
                 process_parser=False,
                 verbose=False,
                 load_matrices=True,
                 dump_matrices=True,
                 train_more=True,
                 random_seed=False,
                 results_file_name='top_recommendations'):
        """
        Constructor of the RecommenderSystem.

        :param ModelInitializer initializer: A model initializer.
        :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried.
        :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database.
        :param boolean process_parser: A Flag deceiding process the dataparser.
        :param boolean verbose: A flag deceiding to print progress.
        :param boolean dump_matrices: A flag for saving output matrices.
        :param boolean train_more: train_more the collaborative filtering after loading matrices.
        :param boolean random_seed: A flag to determine if we will use random seed or not.
        :param str results_file_name: Top recommendations results' file name
        """
        if process_parser:
            DataParser.process()

        if ratings is None:
            self.ratings = numpy.array(DataParser.get_ratings_matrix())
        else:
            self.ratings = ratings

        if abstracts_preprocessor is None:
            self.abstracts_preprocessor = AbstractsPreprocessor(
                DataParser.get_abstracts(),
                *DataParser.get_word_distribution())
        else:
            self.abstracts_preprocessor = abstracts_preprocessor

        # Get configurations
        self.config = RecommenderConfiguration(config)

        # Set flags
        self.results_file_name = results_file_name + '.dat'
        self._verbose = verbose
        self._dump_matrices = dump_matrices
        self._load_matrices = load_matrices
        self._train_more = train_more
        self._split_type = 'user'
        self._random_seed = random_seed

        self.set_hyperparameters(self.config.get_hyperparameters())
        self.set_options(self.config.get_options())

        self.initializer = ModelInitializer(self.hyperparameters.copy(),
                                            self.n_iter, self._verbose)

        if self.config.get_error_metric() == 'RMS':
            self.evaluator = Evaluator(self.ratings,
                                       self.abstracts_preprocessor,
                                       self._random_seed, self._verbose)
        else:
            raise NameError(
                "Not a valid error metric %s. Only option is 'RMS'" %
                self.config.get_error_metric())

        # Initialize content based.
        if self.config.get_content_based() == 'None':
            self.content_based = ContentBased(self.initializer, self.evaluator,
                                              self.hyperparameters,
                                              self.options, self._verbose,
                                              self._load_matrices,
                                              self._dump_matrices)
        elif self.config.get_content_based() == 'LDA':
            self.content_based = LDARecommender(self.initializer,
                                                self.evaluator,
                                                self.hyperparameters,
                                                self.options, self._verbose,
                                                self._load_matrices,
                                                self._dump_matrices)
        elif self.config.get_content_based() == 'LDA2Vec':
            self.content_based = LDA2VecRecommender(
                self.initializer, self.evaluator, self.hyperparameters,
                self.options, self._verbose, self._load_matrices,
                self._dump_matrices)
        else:
            raise NameError(
                "Not a valid content based %s. Options are 'None', "
                "'LDA', 'LDA2Vec'" % self.config.get_content_based())

        # Initialize collaborative filtering.
        if self.config.get_collaborative_filtering() == 'ALS':
            is_hybrid = self.config.get_recommender() == 'hybrid'
            if self.config.get_content_based() == 'None':
                raise NameError(
                    "Not valid content based 'None' with hybrid recommender")
            self.collaborative_filtering = CollaborativeFiltering(
                self.initializer, self.evaluator, self.hyperparameters,
                self.options, self._verbose, self._load_matrices,
                self._dump_matrices, self._train_more, is_hybrid)
        elif self.config.get_collaborative_filtering() == 'SDAE':
            self.collaborative_filtering = SDAERecommender(
                self.initializer, self.evaluator, self.hyperparameters,
                self.options, self._verbose, self._load_matrices,
                self._dump_matrices)
            if not self.config.get_content_based() == 'None':
                raise NameError(
                    "Not a valid content based %s with SDAE. You can only use 'None'"
                    % self.config.get_content_based())
        elif self.config.get_collaborative_filtering() == 'None':
            if not self.config.get_recommender() == 'itembased':
                raise NameError(
                    "None collaborative filtering is only valid with itembased recommender type"
                )
            elif self.config.get_content_based() == 'None':
                raise NameError(
                    "Not valid content based 'None' with item-based recommender"
                )
            self.collaborative_filtering = None
        else:
            raise NameError("Not a valid collaborative filtering %s. "
                            "Only options are 'None', 'ALS', 'SDAE'" %
                            self.config.get_collaborative_filtering())

        # Initialize recommender
        if self.config.get_recommender() == 'itembased':
            self.recommender = self.content_based
        elif self.config.get_recommender() == 'userbased':
            self.recommender = self.collaborative_filtering
        elif self.config.get_recommender() == 'hybrid':
            self.recommender = self
        else:
            raise NameError(
                "Invalid recommender type %s. "
                "Only options are 'userbased','itembased', and 'hybrid'" %
                self.config.get_recommender())
Пример #16
0
            filename=absolute_path(content_dictionary_filename))
    elif file_exists(basepaths_filename):
        print('Building content dictionary...')
        content_dictionary = ContentDictionary().build(
            basepaths_filename=absolute_path(basepaths_filename),
            dictionary_filename=absolute_path(content_dictionary_filename),
            url=args.remote_url,
            niceness=args.niceness)
    else:
        print("Error, neither %s or %s found" %
              (content_dictionary_filename, basepaths_filename))
        sys.exit(1)

    if args.evaluate:
        print('Evaluating', args.theme_name, 'theme')
        evaluator = Evaluator(absolute_path(model_filename),
                              content_dictionary)
        evaluator.save_results()

    else:
        model_class = LdaModel(absolute_path(model_filename),
                               num_topics=args.num_topics)

        if model_class.no_pretrained_model_exists():
            print('Training model with', args.num_topics, 'topics')
            model_class.train_model(content_dictionary=content_dictionary,
                                    cores=args.cores)
        else:
            print('Loading model')

        model = model_class.load_model()
        corpus = model_class.load_corpus()
Пример #17
0
def main():
    args = parse_args()
    update_config(args.cfg_file)

    if args.gpus:
        config.GPUS = args.gpus
    else:
        config.CUDA = False
    if args.workers:
        config.WORKERS = args.workers
    print('Using config:')
    pprint.pprint(config)

    if args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    random.seed(args.manualSeed)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if config.CUDA:
        torch.cuda.manual_seed_all(args.manualSeed)

    torch.backends.cudnn.benchmark = config.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = config.CUDNN.ENABLED

    if config.CUDA:
        os.environ["CUDA_VISIBLE_DEVICES"] = config.GPUS
    device = torch.device('cuda' if config.CUDA else 'cpu')

    # Redirect print to both console and log file
    sys.stdout = Logger(osp.join(config.OUTPUT_DIR, 'log.txt'))

    # Create data loaders
    dataset = DataSet(config.DATASET.ROOT, config.DATASET.DATASET)
    normalizer = T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])

    train_transformer = T.Compose([
        T.RandomSizedRectCrop(*config.MODEL.IMAGE_SIZE),
        T.RandomHorizontalFlip(),
        T.RandomRotation(10),
        T.ColorJitter(0.2, 0.2, 0.2),
        T.ToTensor(),
        normalizer,
        T.RandomErasing(EPSILON=config.DATASET.RE),
    ])
    test_transformer = T.Compose([
        T.Resize(config.MODEL.IMAGE_SIZE, interpolation=3),
        T.ToTensor(),
        normalizer,
    ])
    train_loader = DataLoader(UnsupervisedCamStylePreprocessor(
        dataset.train,
        root=osp.join(dataset.images_dir, dataset.train_path),
        camstyle_root=osp.join(dataset.images_dir,
                               dataset.train_camstyle_path),
        num_cam=dataset.num_cam,
        use_gan=True,
        transform=train_transformer),
                              batch_size=config.TRAIN.BATCH_SIZE,
                              num_workers=config.WORKERS,
                              shuffle=config.TRAIN.SHUFFLE,
                              pin_memory=True,
                              drop_last=False)

    query_loader = DataLoader(Preprocessor(dataset.query,
                                           root=osp.join(
                                               dataset.images_dir,
                                               dataset.query_path),
                                           transform=test_transformer),
                              batch_size=config.TEST.BATCH_SIZE,
                              num_workers=config.WORKERS,
                              shuffle=False,
                              pin_memory=True)

    gallery_loader = DataLoader(Preprocessor(dataset.gallery,
                                             root=osp.join(
                                                 dataset.images_dir,
                                                 dataset.gallery_path),
                                             transform=test_transformer),
                                batch_size=config.TEST.BATCH_SIZE,
                                num_workers=config.WORKERS,
                                shuffle=False,
                                pin_memory=True)

    # Create model
    model = models.create(config.MODEL.NAME,
                          pretrained=config.MODEL.PRETRAINED,
                          num_classes=dataset.num_train_ids)

    # Memory Network
    num_tgt = len(dataset.train)
    memory = models.create('memory', config.MODEL.FEATURES, num_tgt)

    # Load from checkpoint
    if config.TRAIN.RESUME:
        checkpoint = load_checkpoint(config.TRAIN.CHECKPOINT)
        model.load_state_dict(checkpoint['state_dict'], strict=False)
        memory.load_state_dict(checkpoint['state_dict_memory'], strict=False)
        print("=> Start epoch {} ".format(checkpoint['epoch']))

    # Set model
    model = nn.DataParallel(model).to(device)
    memory = memory.to(device)

    # Optimizer
    base_param_ids = set(map(id, model.module.base.parameters()))

    base_params_need_for_grad = filter(lambda p: p.requires_grad,
                                       model.module.base.parameters())

    new_params = [p for p in model.parameters() if id(p) not in base_param_ids]
    param_groups = [{
        'params': base_params_need_for_grad,
        'lr_mult': 0.1
    }, {
        'params': new_params,
        'lr_mult': 1.0
    }]

    optimizer = get_optimizer(config, param_groups)

    # Trainer
    trainer = Trainer(config, model, memory)

    def adjust_lr(epoch):
        step_size = config.TRAIN.LR_STEP
        lr = config.TRAIN.LR * (config.TRAIN.LR_FACTOR**(epoch // step_size))
        for g in optimizer.param_groups:
            g['lr'] = lr * g.get('lr_mult', 1)

    best_r1 = 0.0
    # Start training
    for epoch in range(config.TRAIN.BEGIN_EPOCH, config.TRAIN.END_EPOCH):
        # lr_scheduler.step()
        adjust_lr(epoch)
        trainer.train(epoch, train_loader, optimizer)

        print('Test with latest model:')
        evaluator = Evaluator(model)
        r1 = evaluator.evaluate(query_loader, gallery_loader, dataset.query,
                                dataset.gallery, config.TEST.OUTPUT_FEATURES)

        if r1 > best_r1:
            best_r1 = r1
            save_checkpoint(
                {
                    'state_dict': model.module.state_dict(),
                    'state_dict_memory': memory.state_dict(),
                    'epoch': epoch + 1,
                },
                fpath=osp.join(config.OUTPUT_DIR, 'checkpoint.pth.tar'))

        print('\n * Finished epoch {:3d} \n'.format(epoch))

    # Final test
    print('Test with best model:')
    evaluator = Evaluator(model)
    checkpoint = load_checkpoint(
        osp.join(config.OUTPUT_DIR, 'checkpoint.pth.tar'))
    print('best model at epoch: {}'.format(checkpoint['epoch']))
    model.module.load_state_dict(checkpoint['state_dict'])
    evaluator.evaluate(query_loader, gallery_loader, dataset.query,
                       dataset.gallery, config.TEST.OUTPUT_FEATURES)
def train(model, project_name):

    sampler = SubSampler()
    list_train_imgs = sampler.get_train_imgs()
    dataset = LandmarkDataset('../../input_large_delf/train', list_train_imgs)
    evaluator = Evaluator()

    dir_model = '_model'
    os.makedirs(dir_model, exist_ok=True)

    # for training
    batch_size = 240
    group_size = 12
    iter_outside = 10
    iter_inside = 600

    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
    scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=iter_outside * iter_inside)

    for param_group in optimizer.param_groups:
        logger.info('start lerning rate with: {:.6f}'.format(param_group['lr']))

    for ep in range(1, iter_outside + 1):

        logger.info('-' * 30)
        logger.info('epoch: {:d}'.format(ep))

        for param_group in optimizer.param_groups:
            logger.info('current lerning rate with: {:.8f}'.format(param_group['lr']))

        model.train()
        if ep > 1:
            set_batch_norm_eval(model)

        train_loss1 = 0
        train_loss3 = 0
        count_sample = 0
        ave_good_index = 0

        pt_sampler = PyTorchSampler(sampler, batch_size, group_size, iter_inside)

        dataloader = DataLoader(dataset, batch_sampler=pt_sampler, num_workers=8)

        for batch in tqdm(dataloader):
            batch_cuda = batch.cuda()

            # forward with requires_grad=False

            with torch.no_grad():
                v_batch_no_bp = batch_cuda
                optimizer.zero_grad()
                out = model.forward(v_batch_no_bp)

                batch_indeses, num_good_index = get_apn_index(out, batch_size, group_size)

            # forward with requires_grad=True

            v_batch = batch_cuda[batch_indeses, ...]

            optimizer.zero_grad()
            out = model.forward(v_batch)

            out_anchor = out[:batch_size]
            hard_positive = out[batch_size:batch_size*2]
            hard_negative = out[batch_size*2:batch_size*3]

            # calc loss

            loss1 = smooth_pairwise_loss(out_anchor, hard_positive) * 0.1
            loss3 = hard_negative_triplet_loss(out_anchor, hard_positive, hard_negative)

            loss = loss3

            loss.backward()
            optimizer.step()
            scheduler.step()

            train_loss1 += float(loss1.data.cpu().numpy()) * batch_size
            train_loss3 += float(loss3.data.cpu().numpy()) * batch_size
            ave_good_index += num_good_index * batch_size
            count_sample += batch_size

        logger.info('train loss (pair-pos): {:.6f}'.format(train_loss1 / count_sample))
        logger.info('train loss (triplet) : {:.6f}'.format(train_loss3 / count_sample))
        logger.info('average number of far negative: {:.2f} / {:d}'.format(ave_good_index / count_sample, batch_size))

        evaluator.evaluate(model)

        if ep % 4 == 0 and ep != iter_outside:

            model_name = 'embedding_model_{}_ep{}.pt'.format(project_name, ep)
            logger.info('save model: {}'.format(model_name))
            torch.save(model, os.path.join(dir_model, model_name))

    model_name = 'embedding_model_{}.pt'.format(project_name)
    logger.info('save model: {}'.format(model_name))
    torch.save(model, os.path.join(dir_model, model_name))
Пример #19
0
    def __init__(self,
                 use_database=True,
                 verbose=True,
                 load_matrices=True,
                 dump=True,
                 train_more=True,
                 random_seed=False,
                 config=None):
        """
        Setup the data and configuration for the recommenders.
        """
        if use_database:
            self.ratings = numpy.array(DataParser.get_ratings_matrix())
            self.documents, self.users = self.ratings.shape
            self.abstracts_preprocessor = AbstractsPreprocessor(
                DataParser.get_abstracts(),
                *DataParser.get_word_distribution())
        else:
            abstracts = {
                0: 'hell world berlin dna evolution',
                1: 'freiburg is green',
                2: 'the best dna is the dna of dinasours',
                3: 'truth is absolute',
                4: 'berlin is not that green',
                5: 'truth manifests itself',
                6: 'plato said truth is beautiful',
                7: 'freiburg has dna'
            }

            vocab = set(
                itertools.chain(
                    *list(map(lambda ab: ab.split(' '), abstracts.values()))))
            w2i = dict(zip(vocab, range(len(vocab))))
            word_to_count = [(w2i[word],
                              sum(
                                  abstract.split(' ').count(word)
                                  for doc_id, abstract in abstracts.items()))
                             for word in vocab]
            article_to_word = list(
                set([(doc_id, w2i[word])
                     for doc_id, abstract in abstracts.items()
                     for word in abstract.split(' ')]))
            article_to_word_to_count = list(
                set([(doc_id, w2i[word], abstract.count(word))
                     for doc_id, abstract in abstracts.items()
                     for word in abstract.split(' ')]))
            self.abstracts_preprocessor = AbstractsPreprocessor(
                abstracts, word_to_count, article_to_word,
                article_to_word_to_count)
            self.documents, self.users = 8, 10
            self.ratings = numpy.array([[
                int(not bool((article + user) % 3))
                for article in range(self.documents)
            ] for user in range(self.users)])

        self.verbose = verbose
        self.load_matrices = load_matrices
        self.dump = dump
        self.train_more = train_more
        self.random_seed = random_seed
        self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor,
                                   self.random_seed, self.verbose)
        self.config = RecommenderConfiguration()
        self.hyperparameters = self.config.get_hyperparameters()
        self.options = self.config.get_options()
        self.initializer = ModelInitializer(self.hyperparameters.copy(),
                                            self.options['n_iterations'],
                                            self.verbose)
Пример #20
0
    def __init__(self, initializer=None, abstracts_preprocessor=None, ratings=None, config=None,
                 process_parser=False, verbose=False, load_matrices=True, dump_matrices=True, train_more=True):
        """
        Constructor of the RecommenderSystem.

        :param ModelInitializer initializer: A model initializer.
        :param AbstractsPreprocessor abstracts_preprocessor: A preprocessor of abstracts, if None then queried.
        :param int[][] ratings: Ratings matrix; if None, matrix gets queried from the database.
        :param boolean process_parser: A Flag deceiding process the dataparser.
        :param boolean verbose: A flag deceiding to print progress.
        :param boolean dump_matrices: A flag for saving output matrices.
        :param boolean train_more: train_more the collaborative filtering after loading matrices.
        """
        if process_parser:
            DataParser.process()

        if ratings is None:
            self.ratings = numpy.array(DataParser.get_ratings_matrix())
        else:
            self.ratings = ratings

        if abstracts_preprocessor is None:
            self.abstracts_preprocessor = AbstractsPreprocessor(DataParser.get_abstracts(),
                                                                *DataParser.get_word_distribution())
        else:
            self.abstracts_preprocessor = abstracts_preprocessor

        # Get configurations
        self.config = RecommenderConfiguration(config)
        self.set_hyperparameters(self.config.get_hyperparameters())
        self.set_options(self.config.get_options())

        # Set flags
        self._verbose = verbose
        self._dump_matrices = dump_matrices
        self._load_matrices = load_matrices
        self._train_more = train_more

        self.initializer = ModelInitializer(self.hyperparameters.copy(), self.n_iter, self._verbose)

        if self.config.get_error_metric() == 'RMS':
            self.evaluator = Evaluator(self.ratings, self.abstracts_preprocessor)
        else:
            raise NameError("Not a valid error metric %s. Only option is 'RMS'" % self.config.get_error_metric())

        # Initialize content based.
        if self.config.get_content_based() == 'None':
            self.content_based = ContentBased(self.initializer, self.evaluator, self.hyperparameters, self.options,
                                              self._verbose, self._load_matrices, self._dump_matrices)
        elif self.config.get_content_based() == 'LDA':
            self.content_based = LDARecommender(self.initializer, self.evaluator, self.hyperparameters, self.options,
                                                self._verbose, self._load_matrices, self._dump_matrices)
        elif self.config.get_content_based() == 'LDA2Vec':
            self.content_based = LDA2VecRecommender(self.initializer, self.evaluator, self.hyperparameters,
                                                    self.options, self._verbose,
                                                    self._load_matrices, self._dump_matrices)
        else:
            raise NameError("Not a valid content based %s. Options are 'None', "
                            "'LDA', 'LDA2Vec'" % self.config.get_content_based())

        # Initialize collaborative filtering.
        if self.config.get_collaborative_filtering() == 'ALS':
            self.collaborative_filtering = CollaborativeFiltering(self.initializer, self.evaluator,
                                                                  self.hyperparameters, self.options,
                                                                  self._verbose, self._load_matrices,
                                                                  self._dump_matrices, self._train_more)
        else:
            raise NameError("Not a valid collaborative filtering %s. "
                            "Only option is 'ALS'" % self.config.get_collaborative_filtering())

        # Initialize recommender
        if self.config.get_recommender() == 'itembased':
            self.recommender = self.content_based
        elif self.config.get_recommender() == 'userbased':
            self.recommender = self.collaborative_filtering
        else:
            raise NameError("Invalid recommender type %s. "
                            "Only options are 'userbased' and 'itembased'" % self.config.get_recommender())