Пример #1
0
class ContentBasedSuite(expsuite.PyExperimentSuite):

    def reset(self, params, rep):
        if params['name'].startswith("content"):
            cfg = Config()
            # if the index was not built yet
            # app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
            cfg.axi = "data/AppAxi"
            cfg.index_mode = "old"
            cfg.weight = params['weight']
            self.rec = Recommender(cfg)
            self.rec.set_strategy(params['strategy'])
            self.repo_size = self.rec.items_repository.get_doccount()
            self.user = LocalSystem()
            self.user.app_pkg_profile(self.rec.items_repository)
            self.sample_size = int(
                len(self.user.pkg_profile) * params['sample'])
            # iteration should be set to 10 in config file
            # self.profile_size = range(10,101,10)

    def iterate(self, params, rep, n):
        if params['name'].startswith("content"):
            item_score = dict.fromkeys(self.user.pkg_profile, 1)
            # Prepare partition
            sample = {}
            for i in range(self.sample_size):
                key = random.choice(item_score.keys())
                sample[key] = item_score.pop(key)
            # Get full recommendation
            user = User(item_score)
            recommendation = self.rec.get_recommendation(user, self.repo_size)
            # Write recall log
            recall_file = "results/content/recall/%s-%s-%.2f-%d" % \
                          (params['strategy'], params[
                           'weight'], params['sample'], n)
            output = open(recall_file, 'w')
            output.write("# weight=%s\n" % params['weight'])
            output.write("# strategy=%s\n" % params['strategy'])
            output.write("# sample=%f\n" % params['sample'])
            output.write("\n%d %d %d\n" %
                         (self.repo_size, len(item_score), self.sample_size))
            notfound = []
            ranks = []
            for pkg in sample.keys():
                if pkg in recommendation.ranking:
                    ranks.append(recommendation.ranking.index(pkg))
                else:
                    notfound.append(pkg)
            for r in sorted(ranks):
                output.write(str(r) + "\n")
            if notfound:
                output.write("Out of recommendation:\n")
                for pkg in notfound:
                    output.write(pkg + "\n")
            output.close()
            # Plot metrics summary
            accuracy = []
            precision = []
            recall = []
            f1 = []
            g = Gnuplot.Gnuplot()
            g('set style data lines')
            g.xlabel('Recommendation size')
            for size in range(1, len(recommendation.ranking) + 1, 100):
                predicted = RecommendationResult(
                    dict.fromkeys(recommendation.ranking[:size], 1))
                real = RecommendationResult(sample)
                evaluation = Evaluation(predicted, real, self.repo_size)
                accuracy.append([size, evaluation.run(Accuracy())])
                precision.append([size, evaluation.run(Precision())])
                recall.append([size, evaluation.run(Recall())])
                f1.append([size, evaluation.run(F1())])
            g.plot(Gnuplot.Data(accuracy, title="Accuracy"),
                   Gnuplot.Data(precision, title="Precision"),
                   Gnuplot.Data(recall, title="Recall"),
                   Gnuplot.Data(f1, title="F1"))
            g.hardcopy(recall_file + "-plot.ps", enhanced=1, color=1)
            # Iteration log
            result = {'iteration': n,
                      'weight': params['weight'],
                      'strategy': params['strategy'],
                      'accuracy': accuracy[20],
                      'precision': precision[20],
                      'recall:': recall[20],
                      'f1': f1[20]}
            return result