def run(self, epochs=70, batch_size=1000, lambda_i=0.0, lambda_j=0.0, learning_rate=0.01, topK=1500, sgd_mode='adagrad', export_results=True, export_r_hat=False): """ meant as a shortcut to run the model after the validation procedure, allowing the export of the scores on the playlists or of the estimated csr matrix :param epochs(int) :param batch_size(int) after how many items the params should be updated :param lambda_i(float) first regularization term :param lambda_j(float) second regularization term :param learning_rate(float) algorithm learning rate :param topK(int) how many elements should be taken into account while computing URM*W :param sgd_mode(string) optimization algorithm :param export_results(bool) export a ready-to-kaggle csv with the predicted songs for each playlist :param export_r_hat(bool) whether to export or not the estimated csr matrix """ self.fit(URM_train=d.get_urm(), epochs=epochs, URM_test=None, user_ids=None, batch_size=batch_size, validate_every_N_epochs=1, start_validation_after_N_epochs=epochs + 1, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK, sgd_mode=sgd_mode) if export_results: print('exporting results') recs = self.recommend_batch(d.get_target_playlists(), N=10, urm=d.get_urm(), filter_already_liked=True, with_scores=False, items_to_exclude=[], verbose=False) importexport.exportcsv( recs, 'submission', self._print(epochs=epochs, batch_size=batch_size, lambda_i=lambda_i, lambda_j=lambda_j, learning_rate=learning_rate, topK=topK, sgd_mode=sgd_mode)) elif export_r_hat: print('saving estimated urm') self.save_r_hat()
def run(self, num_factors, urm_train=None, urm=None, urm_test=None, targetids=None, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Parameters ---------- num_factors : int, number of latent factors urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the entire URM for which the targetids corresponds to the row indexes. urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test() targetids : list, target user ids. If None, use: data.get_target_playlists() Returns ------- recs: (list) recommendations map10: (float) MAP10 for the provided recommendations """ _urm = data.get_urm_train() _icm = data.get_icm() _urm_test = data.get_urm_test() _targetids = data.get_target_playlists() #_targetids = data.get_all_playlists() start = time.time() urm_train = _urm if urm_train is None else urm_train #urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(urm_train=urm_train, num_factors=num_factors) recs = self.recommend_batch(userids=targetids) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name=self.name, verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time() - start)) return recs, map10
def run(self, distance, urm=None, icm=None, urm_test=None, targetids=None, k=100, shrink=10, threshold=0, alpha=None, beta=None, l=None, c=None, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Parameters ---------- distance : str, distance metric urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the entire URM for which the targetids corresponds to the row indexes. icm : csr matrix, ICM. If None, used: data.get_icm() urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test() targetids : list, target user ids. If None, use: data.get_target_playlists() k : int, K nearest neighbour to consider shrink : float, shrink term used in the normalization threshold : float, all the values under this value are cutted from the final result Returns ------- recs: (list) recommendations map10: (float) MAP10 for the provided recommendations """ # _urm = data.get_urm_train() # _icm = data.get_icm() # _urm_test = data.get_urm_test() # _targetids = data.get_target_playlists() #_targetids = data.get_all_playlists() start = time.time() # urm = _urm if urm is None else urm # icm = _icm if icm is None else icm # urm_test = _urm_test if urm_test is None else urm_test # targetids = _targetids if targetids is None else targetids self._print(distance, k, shrink, threshold, alpha, beta, l, c) self.fit(urm, icm=icm, k=k, distance=distance, shrink=shrink, threshold=threshold, alpha=alpha, beta=beta, l=l, c=c) recs = self.recommend_batch(userids=targetids, urm=urm, N=10, verbose=verbose) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name='{}_{}'.format(self.name,distance), verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time()-start)) return recs, map10
def run(self, distance, ucm_train=None, urm=None, urm_test=None, targetids=None, k=100, shrink=10, threshold=0, implicit=True, alpha=None, beta=None, l=None, c=None, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Parameters ---------- distance : str, distance metric targetids : list, target user ids. If None, use: data.get_target_playlists() k : int, K nearest neighbour to consider shrink : float, shrink term used in the normalization threshold : float, all the values under this value are cutted from the final result implicit : bool, if true, treat the URM as implicit, otherwise consider explicit ratings (real values) in the URM Returns ------- recs: (list) recommendations map10: (float) MAP10 for the provided recommendations """ start = time.time() _ucm_train = data.get_ucm_train() _urm = data.get_urm_train_1() _urm_test = data.get_urm_test_1() _targetids = data.get_target_playlists() ucm_train = _ucm_train if ucm_train is None else ucm_train urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(ucm_train, k=k, distance=distance, alpha=alpha, beta=beta, c=c, l=l, shrink=shrink, threshold=threshold, implicit=implicit) recs = self.recommend_batch(targetids, urm=urm, with_scores=with_scores, verbose=verbose) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name='{}_{}'.format(self.name,distance), verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time()-start)) return recs, map10
def run(self, normalize_similarity=False, add_zeros_quota=1, loss_tolerance=1e-6, iteration_limit=30, damp_coeff=1, use_incremental=False, export_results=True, export_r_hat=False, export_for_validation=False): if export_r_hat and export_for_validation: urm = d.get_urm_train_1() else: urm = d.get_urm() self.fit(ICM=d.get_icm(), URM_train=urm, normalize_similarity=normalize_similarity, add_zeros_quota=add_zeros_quota, loss_tolerance=loss_tolerance, iteration_limit=iteration_limit, damp_coeff=damp_coeff, use_incremental=use_incremental) if export_results: print('exporting results') recs = self.recommend_batch(d.get_target_playlists(), N=10, urm=urm, filter_already_liked=True, with_scores=False, items_to_exclude=[], verbose=False) importexport.exportcsv( recs, 'submission', self._print(normalize_similarity=normalize_similarity, add_zeros_quota=add_zeros_quota, loss_tolerance=loss_tolerance, iteration_limit=iteration_limit, damp_coeff=damp_coeff, use_incremental=use_incremental)) elif export_r_hat: print('saving estimated urm') self.save_r_hat(export_for_validation)
def run(self, distance, h, k=100, shrink=10, threshold=0, alpha=None, beta=None, l=None, c=None, export=True, verbose=True): """ Run the model and export the results to a file Parameters ---------- distance : str, distance metric k : int, K nearest neighbour to consider shrink : float, shrink term used in the normalization threshold : float, all the values under this value are cutted from the final result implicit : bool, if true, treat the URM as implicit, otherwise consider explicit ratings (real values) in the URM Returns ------- recs: (list) recommendations map10: (float) MAP10 for the provided recommendations """ start = time.time() self.fit(k=k, distance=distance, shrink=shrink, alpha=alpha, beta=beta, l=l, c=c, verbose=verbose) recs = self.recommend_batch(N=10, filter_already_liked=True, verbose=verbose) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=data.get_urm_test_1(), verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name='{}_{}'.format(self.name,distance), verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time()-start)) return recs, map10
def run(self, urm_train=None, urm=None, urm_test=None, targetids=None, factors=100, regularization=0.01, iterations=100, alpha=25, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Returns ------- :return: recs: (list) recommendations :return: map10: (float) MAP10 for the provided recommendations """ _urm_train = data.get_urm_train_1() _urm = data.get_urm() _icm = data.get_icm() _urm_test = data.get_urm_test_1() _targetids = data.get_target_playlists() # _targetids = data.get_all_playlists() start = time.time() urm_train = _urm_train if urm_train is None else urm_train urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(l1_ratio=0.1, positive_only=True, alpha=1e-4, fit_intercept=False, copy_X=False, precompute=False, selection='random', max_iter=100, topK=100, tol=1e-4, workers=multiprocessing.cpu_count()) recs = self.recommend_batch(userids=targetids, with_scores=with_scores, verbose=verbose) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name=self.name, verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time() - start)) return recs, map10
sps.save_npz(path + model.RECOMMENDER_NAME, r_hat) elif arg == 's': model.fit(topK=500, alpha=1.7, min_rating=1, normalize_similarity=True) print('Saving the similarity matrix...') sps.save_npz( 'raw_data/saved_sim_matrix_evaluation_1/{}'.format( model.RECOMMENDER_NAME), model.W_sparse) elif arg == 'v': model = P3alphaRecommender(data.get_urm_train_1()) model.validate(iterations=15, urm_test=data.get_urm_test_1(), targetids=data.get_target_playlists(), normalize_similarity=True, k=(50, 900), min_rating=(0, 2), alpha=(0, 2), verbose=False) elif arg == 'e': model = P3alphaRecommender(data.get_urm()) model.fit(topK=900, alpha=1.2, min_rating=0, implicit=True, normalize_similarity=False) recs = model.recommend_batch(data.get_target_playlists()) export.exportcsv(recs, name=model.RECOMMENDER_NAME) elif arg == 'x': pass else: log.error('Wrong option!')
def wizard_hybrid(): SIM_MATRIX = ['saved_sim_matrix', 'saved_sim_matrix_evaluation'] R_HAT = ['saved_r_hat', 'saved_r_hat_evaluation'] SAVE = ['saved_sim_matrix', 'saved_r_hat'] EVALUATE = ['saved_sim_matrix_evaluation', 'saved_r_hat_evaluation'] start = time.time() matrices_array, folder, models = hb.create_matrices_array() print('matrices loaded in {:.2f} s'.format(time.time() - start)) log.success('You have loaded: {}'.format(models)) NORMALIZATION_MODE = normalization_mode_selection() if folder in SAVE: WEIGHTS = weights_selection(models) if folder in SIM_MATRIX: name, urm_filter_tracks, rel_path = option_selection_save('SIM') hybrid_rec = HybridSimilarity( matrices_array, normalization_mode=NORMALIZATION_MODE, urm_filter_tracks=urm_filter_tracks) sps.save_npz('raw_data/' + rel_path + name, hybrid_rec.get_r_hat(weights_array=WEIGHTS)) if folder in R_HAT: name, urm_filter_tracks, rel_path, EXPORT = option_selection_save( 'R_HAT') hybrid_rec = HybridRHat(matrices_array, normalization_mode=NORMALIZATION_MODE, urm_filter_tracks=urm_filter_tracks) if EXPORT: N = ask_number_recommendations() recommendations = hybrid_rec.recommend_batch( weights_array=WEIGHTS, target_userids=data.get_target_playlists(), N=N) exportcsv(recommendations, path='submission', name=name) else: sps.save_npz('raw_data/' + rel_path + name, hybrid_rec.get_r_hat(weights_array=WEIGHTS)) elif folder in EVALUATE: log.success('|WHAT YOU WANT TO DO ???|') log.warning('\'1\' BAYESIAN SEARCH VALIDATION') log.warning('\'2\' HAND CRAFTED WEIGHTS') mode = input()[0] # BAYESIAN SEARCH if mode == '1': log.success( '|SELECT A NUMBER OF |||ITERATIONS||| FOR THE ALGORITHM|') iterations = float(input()) urm_filter_tracks = data.get_urm_train_1() if folder in SIM_MATRIX: hybrid_rec = HybridSimilarity( matrices_array, normalization_mode=NORMALIZATION_MODE, urm_filter_tracks=urm_filter_tracks) if folder in R_HAT: hybrid_rec = HybridRHat(matrices_array, normalization_mode=NORMALIZATION_MODE, urm_filter_tracks=urm_filter_tracks) hybrid_rec.validate(iterations=iterations, urm_test=data.get_urm_test_1(), userids=data.get_target_playlists()) # MANUAL WEIGHTS elif mode == '2': WEIGHTS = weights_selection(models) urm_filter_tracks = data.get_urm_train_1() chose = option_selection_evaluation_2() # save, evaluate or csv if chose == 's': log.success('|CHOSE A NAME FOR THE MATRIX...|') name = input() if folder in SIM_MATRIX: type = 'SIM' hybrid_rec = HybridSimilarity( matrices_array, normalization_mode=NORMALIZATION_MODE, urm_filter_tracks=urm_filter_tracks) elif folder in R_HAT: type = 'R_HAT' hybrid_rec = HybridRHat( matrices_array, normalization_mode=NORMALIZATION_MODE, urm_filter_tracks=urm_filter_tracks) sps.save_npz('raw_data/saved_r_hat_evaluation/' + name, hybrid_rec.get_r_hat(weights_array=WEIGHTS)) sym_rec = symmetric_recommender_creator( models, type, NORMALIZATION_MODE, urm_filter_tracks=data.get_urm_train_2()) sps.save_npz('raw_data/saved_r_hat_evaluation_2/' + name, sym_rec.get_r_hat(weights_array=WEIGHTS)) elif chose == 'e': if folder in SIM_MATRIX: type = 'SIM' hybrid_rec = HybridSimilarity( matrices_array, normalization_mode=NORMALIZATION_MODE, urm_filter_tracks=urm_filter_tracks) elif folder in R_HAT: type = 'R_HAT' hybrid_rec = HybridRHat( matrices_array, normalization_mode=NORMALIZATION_MODE, urm_filter_tracks=urm_filter_tracks) N = ask_number_recommendations() print('Recommending...') recs = hybrid_rec.recommend_batch( weights_array=WEIGHTS, target_userids=data.get_target_playlists(), N=N) hybrid_rec.evaluate(recommendations=recs, test_urm=data.get_urm_test_1()) # export the recommendations log.success( 'Do you want to save the CSV with these recomendations? (y/n)' ) if input()[0] == 'y': export_csv_wizard(recs) sym_rec = symmetric_recommender_creator( models, type, NORMALIZATION_MODE, urm_filter_tracks=data.get_urm_train_2()) recs2 = sym_rec.recommend_batch( weights_array=WEIGHTS, target_userids=data.get_target_playlists()) sym_rec.evaluate(recommendations=recs2, test_urm=data.get_urm_test_2()) elif chose == 'c': if folder in R_HAT: hybrid_rec = HybridRHat( matrices_array, normalization_mode=NORMALIZATION_MODE, urm_filter_tracks=urm_filter_tracks) N = ask_number_recommendations() print('Recommending...') recs = hybrid_rec.recommend_batch( weights_array=WEIGHTS, target_userids=data.get_target_playlists(), N=N) export_csv_wizard(recs) else: log.error('not implemented yet') else: log.error('WRONG FOLDER')
def export_csv_wizard(recommendations): log.info('Choose a name for the CSV:') name = input() exportcsv(recommendations, name=name) log.success('CSV saved!')