def setUpClass(self): cfg = Config() cfg.popcon_index = "test_data/.sample_pxi" cfg.popcon_dir = "test_data/popcon_dir" cfg.clusters_dir = "test_data/clusters_dir" cfg.popcon = 0 self.rec = Recommender()
def run_strategy(cfg, sample_file): rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() results = ExperimentResults(repo_size) label = get_label(cfg) population_sample = [] sample_str = sample_file.split('/')[-1] with open(sample_file, 'r') as f: for line in f.readlines(): user_id = line.strip('\n') population_sample.append( os.path.join(cfg.popcon_dir, user_id[:2], user_id)) sample_dir = ("results/roc-sample/%s" % sample_str) if not os.path.exists(sample_dir): os.makedirs(sample_dir) log_file = os.path.join(sample_dir, label["values"]) # n iterations per population user for submission_file in population_sample: user = PopconSystem(submission_file) user.filter_pkg_profile(cfg.pkgs_filter) user.maximal_pkg_profile() for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation(iteration_user, repo_size) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) plot_roc(results, log_file) plot_roc(results, log_file, 1) with open(log_file + "-roc.jpg.comment", 'w') as f: f.write("# %s\n# %s\n\n" % (label["description"], label["values"])) f.write("# roc AUC\n%.4f\n\n" % results.get_auc()) f.write( "# threshold\tmean_fpr\tdev_fpr\t\tmean_tpr\tdev_tpr\t\tcoverage\n") # noqa for size in results.thresholds: f.write("%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" % (size, numpy.mean(results.fpr[size]), numpy.std(results.fpr[size]), numpy.mean(results.recall[size]), numpy.std(results.recall[size]), numpy.mean(results.coverage(size))))
def run_strategy(cfg, user): for weight in weighting: cfg.weight = weight[0] cfg.bm25_k1 = weight[1] rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() for proportion in sample_proportions: results = ExperimentResults(repo_size) label = get_label(cfg, proportion) log_file = "results/strategies/" + label["values"] for n in range(iterations): # Fill sample profile profile_size = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_size * proportion) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation( iteration_user, repo_size) write_recall_log( label, n, sample, recommendation, profile_size, repo_size, log_file) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) with open(log_file, 'w') as f: precision_10 = sum(results.precision[10]) / len( results.precision[10]) f1_10 = sum(results.f1[10]) / len(results.f1[10]) f05_10 = sum(results.f05[10]) / len(results.f05[10]) f.write("# %s\n# %s\n\ncoverage %d\n\n" % (label["description"], label["values"], recommendation.size)) f.write("# best results (recommendation size; metric)\n") f.write( "precision (%d; %.2f)\nf1 (%d; %.2f)\nf05 (%d; %.2f)\n\n" % (results.best_precision()[0], results.best_precision()[1], results.best_f1()[0], results.best_f1()[1], results.best_f05()[0], results.best_f05()[1])) f.write("# recommendation size 10\nprecision (10; %.2f)\nf1 (10; %.2f)\nf05 (10; %.2f)" % # noqa (precision_10, f1_10, f05_10)) precision = results.get_precision_summary() recall = results.get_recall_summary() f1 = results.get_f1_summary() f05 = results.get_f05_summary() accuracy = results.get_accuracy_summary() plot_summary(precision, recall, f1, f05, accuracy, log_file)
class AppRecommender: def __init__(self): self.recommender = Recommender() self.config = Config() def make_recommendation(self, reference_pkgs=None, print_recommendation=True): begin_time = datetime.datetime.now() logging.info("Computation started at %s" % begin_time) if not reference_pkgs: reference_pkgs = [] user = LocalSystem(reference_pkgs) recommendation_size = Config().num_recommendations user_recommendation = (self.recommender.get_recommendation( user, recommendation_size)) logging.info("Recommending applications for user %s" % user.user_id) if print_recommendation: print(user_recommendation) end_time = datetime.datetime.now() logging.info("Computation completed at %s" % end_time) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds) return user_recommendation
class AppRecommender: def __init__(self): self.recommender = Recommender() self.config = Config() def make_recommendation(self, print_recommendation=True): begin_time = datetime.datetime.now() logging.info("Computation started at %s" % begin_time) # user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir, # "desktopapps")) user = LocalSystem() recommendation_size = Config().num_recommendations user_recommendation = (self.recommender.get_recommendation( user, recommendation_size)) logging.info("Recommending applications for user %s" % user.user_id) if print_recommendation: print(user_recommendation) end_time = datetime.datetime.now() logging.info("Computation completed at %s" % end_time) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds) return user_recommendation
def run_strategy(cfg, user): rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() results = ExperimentResults(repo_size) label = get_label(cfg) user_dir = ("results/roc-suite/%s/%s" % (user.user_id[:8], cfg.strategy)) if not os.path.exists(user_dir): os.makedirs(user_dir) log_file = os.path.join(user_dir, label["values"]) for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation(iteration_user, repo_size) write_recall_log( label, n, sample, recommendation, profile_len, repo_size, log_file) if hasattr(recommendation, "ranking"): results.add_result(recommendation.ranking, sample) with open(log_file + "-roc.jpg.comment", 'w') as f: f.write("# %s\n# %s\n\n" % (label["description"], label["values"])) f.write("# roc AUC\n%.4f\n\n" % results.get_auc()) f.write("# threshold\tprecision\trecall\t\tf05\t\tcoverage\n") for size in results.thresholds: f.write("%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" % (size, numpy.mean(results.precision[size]), numpy.mean(results.recall[size]), numpy.mean(results.f05[size]), numpy.mean(results.coverage(size)))) shutil.copy(log_file + "-roc.jpg.comment", log_file + ".jpg.comment") shutil.copy(log_file + "-roc.jpg.comment", log_file + "-logscale.jpg.comment") plot_roc(results, log_file) plot_summary(results, log_file)
def reset(self, params, rep): if params['name'].startswith("content"): cfg = Config() # if the index was not built yet # app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi") cfg.axi = "data/AppAxi" cfg.index_mode = "old" cfg.weight = params['weight'] self.rec = Recommender(cfg) self.rec.set_strategy(params['strategy']) self.repo_size = self.rec.items_repository.get_doccount() self.user = LocalSystem() self.user.app_pkg_profile(self.rec.items_repository) self.sample_size = int( len(self.user.pkg_profile) * params['sample'])
class AppRecommender: def __init__(self): self.recommender = Recommender() def make_recommendation(self, recommendation_size, no_auto_pkg_profile=False): begin_time = datetime.datetime.now() logging.info("Computation started at %s" % begin_time) # user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir, # "desktopapps")) user = self.get_user(no_auto_pkg_profile) user_reccomendation = (self.recommender.get_recommendation( user, recommendation_size)) logging.info("Recommending applications for user %s" % user.user_id) logging.info(user_reccomendation) end_time = datetime.datetime.now() logging.info("Computation completed at %s" % end_time) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds) return user_reccomendation def get_user(self, no_auto_pkg_profile): config = Config() user = LocalSystem() user.filter_pkg_profile(os.path.join(config.filters_dir, "desktopapps")) user.maximal_pkg_profile() if no_auto_pkg_profile: user.no_auto_pkg_profile() return user
class AppRecommender: def __init__(self): self.recommender = Recommender() def make_recommendation(self, recommendation_size, no_auto_pkg_profile=False): begin_time = datetime.datetime.now() logging.info("Computation started at %s" % begin_time) # user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir, # "desktopapps")) user = self.get_user(no_auto_pkg_profile) user_reccomendation = (self.recommender.get_recommendation( user, recommendation_size)) logging.info("Recommending applications for user %s" % user.user_id) logging.info(user_reccomendation) end_time = datetime.datetime.now() logging.info("Computation completed at %s" % end_time) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds) return user_reccomendation def get_user(self, no_auto_pkg_profile): config = Config() user = LocalSystem() user.filter_pkg_profile( os.path.join(config.filters_dir, "desktopapps")) user.maximal_pkg_profile() if no_auto_pkg_profile: user.no_auto_pkg_profile() return user
import sys sys.path.insert(0, '../') import logging import datetime from apprecommender.config import Config from apprecommender.recommender import Recommender from apprecommender.user import LocalSystem from apprecommender.error import Error if __name__ == '__main__': try: cfg = Config() rec = Recommender(cfg) user = LocalSystem() user.no_auto_pkg_profile() # user.maximal_pkg_profile() begin_time = datetime.datetime.now() logging.debug("Recommendation computation started at %s" % begin_time) print rec.get_recommendation(user) end_time = datetime.datetime.now() logging.debug("Recommendation computation completed at %s" % end_time) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds) except Error:
class RecommenderTests(unittest.TestCase): @classmethod def setUpClass(self): cfg = Config() cfg.popcon_index = "test_data/.sample_pxi" cfg.popcon_dir = "test_data/popcon_dir" cfg.clusters_dir = "test_data/clusters_dir" cfg.popcon = 0 self.rec = Recommender() def test_set_strategy(self): self.rec.set_strategy("cb") self.assertIsInstance(self.rec.strategy, ContentBased) self.assertEqual(self.rec.strategy.content, "mix") self.rec.set_strategy("cbt") self.assertIsInstance(self.rec.strategy, ContentBased) self.assertEqual(self.rec.strategy.content, "tag") self.rec.set_strategy("cbd") self.assertIsInstance(self.rec.strategy, ContentBased) self.assertEqual(self.rec.strategy.content, "desc") self.rec.set_strategy("cbtm") self.assertIsInstance(self.rec.strategy, ContentBased) self.assertEqual(self.rec.strategy.content, "time") self.rec.set_strategy("mlbva") self.assertIsInstance(self.rec.strategy, MachineLearningBVA) self.assertEqual(self.rec.strategy.content, "mlbva_mix") self.rec.set_strategy("mlbow") self.assertIsInstance(self.rec.strategy, MachineLearningBOW) self.assertEqual(self.rec.strategy.content, "mlbow_mix") self.rec.set_strategy("mlbva_eset") self.assertIsInstance(self.rec.strategy, MachineLearningBVA) self.assertEqual(self.rec.strategy.content, "mlbva_mix_eset") self.rec.set_strategy("mlbow_eset") self.assertIsInstance(self.rec.strategy, MachineLearningBOW) self.assertEqual(self.rec.strategy.content, "mlbow_mix_eset") self.rec.set_strategy("cbpkg") self.assertIsInstance(self.rec.strategy, PackageReference) self.assertEqual(self.rec.strategy.content, "mix") def test_get_recommendation(self): user = User({"inkscape": 1, "gimp": 1, "eog": 1, "vim": 1}) result = self.rec.get_recommendation(user) self.assertIsInstance(result, RecommendationResult) self.assertGreater(len(result.item_score), 0)
import sys sys.path.insert(0, '../') import logging import datetime from apprecommender.config import Config from apprecommender.evaluation import (Precision, Recall, F1, Accuracy, SimpleAccuracy, CrossValidation) from apprecommender.recommender import Recommender from apprecommender.user import LocalSystem from apprecommender.error import Error if __name__ == '__main__': try: cfg = Config() rec = Recommender(cfg) print "\nRecommender strategy: ", rec.strategy.description user = LocalSystem() # user.app_pkg_profile(rec.items_repository) user.no_auto_pkg_profile() begin_time = datetime.datetime.now() logging.debug("Cross-validation started at %s" % begin_time) metrics = [] metrics.append(Precision()) metrics.append(Recall()) metrics.append(F1()) metrics.append(Accuracy()) metrics.append(SimpleAccuracy()) validation = CrossValidation(0.9, 10, rec, metrics, 0.1) validation.run(user)
class RecommenderTests(unittest.TestCase): @classmethod def setUpClass(self): cfg = Config() cfg.popcon_index = "test_data/.sample_pxi" cfg.popcon_dir = "test_data/popcon_dir" cfg.clusters_dir = "test_data/clusters_dir" cfg.popcon = 0 self.rec = Recommender() def test_set_strategy(self): self.rec.set_strategy("cb") self.assertIsInstance(self.rec.strategy, ContentBased) self.assertEqual(self.rec.strategy.content, "mix") self.rec.set_strategy("cbt") self.assertIsInstance(self.rec.strategy, ContentBased) self.assertEqual(self.rec.strategy.content, "tag") self.rec.set_strategy("cbd") self.assertIsInstance(self.rec.strategy, ContentBased) self.assertEqual(self.rec.strategy.content, "desc") self.rec.set_strategy("cbtm") self.assertIsInstance(self.rec.strategy, ContentBased) self.assertEqual(self.rec.strategy.content, "time") self.rec.set_strategy("mlbva") self.assertIsInstance(self.rec.strategy, MachineLearningBVA) self.assertEqual(self.rec.strategy.content, "mlbva_mix") self.rec.set_strategy("mlbow") self.assertIsInstance(self.rec.strategy, MachineLearningBOW) self.assertEqual(self.rec.strategy.content, "mlbow_mix") self.rec.set_strategy("mlbva_eset") self.assertIsInstance(self.rec.strategy, MachineLearningBVA) self.assertEqual(self.rec.strategy.content, "mlbva_mix_eset") self.rec.set_strategy("mlbow_eset") self.assertIsInstance(self.rec.strategy, MachineLearningBOW) self.assertEqual(self.rec.strategy.content, "mlbow_mix_eset") # self.rec.set_strategy("knn") # self.assertIsInstance(self.rec.strategy,Collaborative) def test_get_recommendation(self): user = User({"inkscape": 1, "gimp": 1, "eog": 1}) result = self.rec.get_recommendation(user) self.assertIsInstance(result, RecommendationResult) self.assertGreater(len(result.item_score), 0)
class ContentBasedSuite(expsuite.PyExperimentSuite): def reset(self, params, rep): if params['name'].startswith("content"): cfg = Config() # if the index was not built yet # app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi") cfg.axi = "data/AppAxi" cfg.index_mode = "old" cfg.weight = params['weight'] self.rec = Recommender(cfg) self.rec.set_strategy(params['strategy']) self.repo_size = self.rec.items_repository.get_doccount() self.user = LocalSystem() self.user.app_pkg_profile(self.rec.items_repository) self.sample_size = int( len(self.user.pkg_profile) * params['sample']) # iteration should be set to 10 in config file # self.profile_size = range(10,101,10) def iterate(self, params, rep, n): if params['name'].startswith("content"): item_score = dict.fromkeys(self.user.pkg_profile, 1) # Prepare partition sample = {} for i in range(self.sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) # Get full recommendation user = User(item_score) recommendation = self.rec.get_recommendation(user, self.repo_size) # Write recall log recall_file = "results/content/recall/%s-%s-%.2f-%d" % \ (params['strategy'], params[ 'weight'], params['sample'], n) output = open(recall_file, 'w') output.write("# weight=%s\n" % params['weight']) output.write("# strategy=%s\n" % params['strategy']) output.write("# sample=%f\n" % params['sample']) output.write("\n%d %d %d\n" % (self.repo_size, len(item_score), self.sample_size)) notfound = [] ranks = [] for pkg in sample.keys(): if pkg in recommendation.ranking: ranks.append(recommendation.ranking.index(pkg)) else: notfound.append(pkg) for r in sorted(ranks): output.write(str(r) + "\n") if notfound: output.write("Out of recommendation:\n") for pkg in notfound: output.write(pkg + "\n") output.close() # Plot metrics summary accuracy = [] precision = [] recall = [] f1 = [] g = Gnuplot.Gnuplot() g('set style data lines') g.xlabel('Recommendation size') for size in range(1, len(recommendation.ranking) + 1, 100): predicted = RecommendationResult( dict.fromkeys(recommendation.ranking[:size], 1)) real = RecommendationResult(sample) evaluation = Evaluation(predicted, real, self.repo_size) accuracy.append([size, evaluation.run(Accuracy())]) precision.append([size, evaluation.run(Precision())]) recall.append([size, evaluation.run(Recall())]) f1.append([size, evaluation.run(F1())]) g.plot(Gnuplot.Data(accuracy, title="Accuracy"), Gnuplot.Data(precision, title="Precision"), Gnuplot.Data(recall, title="Recall"), Gnuplot.Data(f1, title="F1")) g.hardcopy(recall_file + "-plot.ps", enhanced=1, color=1) # Iteration log result = {'iteration': n, 'weight': params['weight'], 'strategy': params['strategy'], 'accuracy': accuracy[20], 'precision': precision[20], 'recall:': recall[20], 'f1': f1[20]} return result
import os import logging import datetime import sys sys.path.insert(0, '../') from apprecommender.config import Config from apprecommender.evaluation import (CrossValidation, Precision, Recall, F_score, FPR, Accuracy) from apprecommender.recommender import Recommender from apprecommender.user import PopconSystem if __name__ == '__main__': cfg = Config() rec = Recommender() # user = LocalSystem() # user = RandomPopcon(cfg.popcon_dir) # user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir, # "desktopapps")) popcon_entries = "~/.app-recommender/popcon-entries/" \ "00/0001166d0737c6dffb083071e5ee69f5" user = PopconSystem(os.path.expanduser(popcon_entries)) user.filter_pkg_profile(os.path.join(cfg.filters_dir, "desktopapps")) user.maximal_pkg_profile() begin_time = datetime.datetime.now() metrics = [] metrics.append(Precision()) metrics.append(Recall())
def __init__(self): self.recommender = Recommender() self.config = Config()
""" import sys sys.path.insert(0, '../') import logging import datetime from apprecommender.config import Config from apprecommender.recommender import Recommender from apprecommender.user import LocalSystem from apprecommender.error import Error if __name__ == '__main__': try: cfg = Config() rec = Recommender(cfg) user = LocalSystem() begin_time = datetime.datetime.now() logging.debug("Recommendation computation started at %s" % begin_time) print rec.get_recommendation(user) end_time = datetime.datetime.now() logging.debug("Recommendation computation completed at %s" % end_time) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds) except Error: logging.critical("Aborting proccess. Use '--debug' for more details.")
def __init__(self): self.recommender = Recommender()
f05_100_summary[size] = [] with open(log_file + "-%s%.3d" % (option_str, size), 'w') as f: f.write("# sample %s\n" % sample_str) f.write("# strategy %s-%s%.3d\n\n" % (cfg.strategy, option_str, size)) f.write("# p_10\tf05_100\n\n") # main loop per user for submission_file in population_sample: user = PopconSystem(submission_file) user.filter_pkg_profile(cfg.pkgs_filter) user.maximal_pkg_profile() for size in sizes: cfg.profile_size = size cfg.k_neighbors = size rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() p_10 = [] f05_100 = [] for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score)
precision_summary[k] = [] f05_summary[k] = [] mcc_summary[k] = [] with open(log_file + "-k%.3d" % k, 'w') as f: f.write("# %s\n\n" % sample_file.split('/')[-1]) f.write("# strategy-k %s-k%.3d\n\n" % (cfg.strategy, k)) f.write("# roc_point \tprecision \tf05 \tmcc\n\n") # main loop per user for submission_file in population_sample: user = PopconSystem(submission_file) user.filter_pkg_profile(cfg.pkgs_filter) user.maximal_pkg_profile() for k in neighbors: cfg.k_neighbors = k rec = Recommender(cfg) repo_size = rec.items_repository.get_doccount() results = ExperimentResults(repo_size) # n iterations for same recommender and user for n in range(iterations): # Fill sample profile profile_len = len(user.pkg_profile) item_score = {} for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score)