示例#1
0
class FeatureWeightedLinearStacking(base_recommender):
    def __init__(self):
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

        self.wcb1 = Decimal(0.65221204)
        self.wcb2 = Decimal(-0.14638855)
        self.wcf1 = Decimal(-0.0062952)
        self.wcf2 = Decimal(0.09139193)

    def fun1(self):
        return Decimal(1.0)

    def fun2(self, user_id):
        count = Rating.objects.filter(user_id=user_id).count()
        if count > 3.0:
            return Decimal(1.0)
        return Decimal(0.0)

    def recommend_items(self, user_id, num=6):
        cb_recs = self.cb.recommend_items(user_id, num * 5)
        cf_recs = self.cf.recommend_items(user_id, num * 5)

        combined_recs = dict()
        for rec in cb_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            combined_recs[movie_id] = {'cb': pred}

        for rec in cf_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            if movie_id in combined_recs.keys():
                combined_recs[movie_id]['cf'] = pred
            else:
                combined_recs[movie_id] = {'cf': pred}
        fwls_preds = dict()
        for key, recs in combined_recs.items():
            if 'cb' not in recs.keys():
                recs['cb'] = self.cb.predict_score(user_id, key)
            if 'cf' not in recs.keys():
                recs['cf'] = self.cf.predict_score(user_id, key)
            pred = self.prediction(recs['cb'], recs['cf'], user_id)
            fwls_preds[key] = {'prediction': pred}
        sorted_items = sorted(fwls_preds.items(), key=lambda item: -float(item[1]['prediction']))[:num]
        return sorted_items


    def predict_score(self, user_id, item_id):
        p_cb = self.cb.predict_score(user_id, item_id)
        p_cf = self.cf.predict_score(user_id, item_id)

        self.prediction(p_cb, p_cf, user_id)

    def prediction(self, p_cb, p_cf, user_id):
        p = (self.wcb1 * self.fun1() * p_cb +
             self.wcb2 * self.fun2(user_id) * p_cb +
             self.wcf1 * self.fun1() * p_cf +
             self.wcf2 * self.fun2(user_id) * p_cf)
        return p
示例#2
0
    def __init__(self):
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

        self.wcb1 = Decimal(0.65221204)
        self.wcb2 = Decimal(-0.14638855)
        self.wcf1 = Decimal(-0.0062952)
        self.wcf2 = Decimal(0.09139193)
示例#3
0
 def __init__(self, data_size=1000):
     self.logger = logging.getLogger('FWLS')
     self.train_data = None
     self.test_data = None
     self.rating_count = None
     self.cb = ContentBasedRecs()
     self.cf = NeighborhoodBasedRecs()
     self.fwls = FeatureWeightedLinearStacking()
     self.data_size = data_size
示例#4
0
def recs_cb(request, user_id, num=6):

    sorted_items = ContentBasedRecs().recommend_items(user_id, num)

    data = {'user_id': user_id, 'data': sorted_items}

    return JsonResponse(data, safe=False)
示例#5
0
def similar_content(request, content_id, num=6):
    # lda = models.ldamodel.LdaModel.load('./lda/model.lda')
    #
    # dictionary = corpora.Dictionary.load('./lda/dict.lda')
    #
    # corpus = corpora.MmCorpus('./lda/corpus.mm')
    # content_sims = dict()
    #
    # md = MovieDescriptions.objects.filter(imdb_id=content_id).first()
    #
    #
    # if md is not None:
    #     index = similarities.MatrixSimilarity.load('./lda/index.lda')
    #
    #     lda_vector = lda[corpus[int(md.lda_vector)]]
    #     sims = index[lda_vector]
    #
    #     sorted_sims = sorted(enumerate(sims), key=lambda item: -item[1])[:num]
    #
    #     movies = get_movie_ids(sorted_sims, corpus, dictionary)
    #
    #     for movie in movies:
    #         target = movie['target']
    #         if target in content_sims.keys():
    #             if movie['sim'] > content_sims[target]['sim']:
    #                 content_sims[target] = movie
    #         else:
    #             content_sims[target] = movie

    sorted_items = ContentBasedRecs().recommend_items_from_items([content_id],
                                                                 num)
    data = {'source_id': content_id, 'data': sorted_items}

    return JsonResponse(data, safe=False)
示例#6
0
def evaluate_cb_recommender():
    min_sim = 0
    min_num_of_ratings = 0
    min_rank = 0

    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-cb-k.csv'.format(timestr)

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "ar, map, mae, k, min_sim, min_num_of_ratings, min_rank\n")

        for k in np.arange(5, 20, 3):
            recommender = ContentBasedRecs()

            er = EvaluationRunner(0, None, recommender, k)

            result = er.calculate(10, 5, number_test_users=-1)

            map = result['map']
            mae = result['mae']
            ar = result['ar']
            logfile.write("{}, {}, {}, {}, {}, {}\n".format(
                ar, map, mae, k, min_sim, min_num_of_ratings, min_rank))
            logfile.flush()
示例#7
0
def evaluate_cb_recommender():

    K = 20
    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-cb-k.csv'.format(timestr)

    lda_path = './lda/'
    corpus = corpora.MmCorpus(lda_path + 'corpus.mm'),
    index = similarities.MatrixSimilarity.load(lda_path + 'index.lda')

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "rak, pak, mae, min_overlap, min_sim, K, min_num_of_ratings, min_rank, user_coverage, "
            "movie_coverage\n")

        for K in np.arange(2, 20, 2):
            recommender = ContentBasedRecs()

            er = EvaluationRunner(0, None, recommender, K)

            result = er.calculate(1, 5, number_test_users=-1)

            user_coverage, movie_coverage = RecommenderCoverage(
                recommender).calculate_coverage()
            pak = result['pak']
            mae = result['mae']
            rak = result['rak']
            logfile.write("{}, {}, {}, {}, {}, {}, {}, {}, {}, {}\n".format(
                rak, pak, mae, min_overlap, min_sim, K, min_number_of_ratings,
                min_rank, user_coverage, movie_coverage))
            logfile.flush()
示例#8
0
def evaluate_cb_recommender(coverage=False):

    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-cb-k.csv'.format(timestr)

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "ar, map, mae, min_overlap, min_sim, K, min_num_of_ratings, min_rank, user_coverage, "
            "movie_coverage\n")

        for k in np.arange(5, 20, 3):
            recommender = ContentBasedRecs()

            er = EvaluationRunner(0, None, recommender, k)

            result = er.calculate(10, 5, number_test_users=-1)

            user_coverage, movie_coverage = 0, 0
            if coverage:
                user_coverage, movie_coverage = RecommenderCoverage(
                    recommender).calculate_coverage(k)

            map = result['map']
            mae = result['mae']
            ar = result['ar']
            logfile.write("{}, {}, {}, {}, {}, {}\n".format(
                ar, map, mae, k, user_coverage, movie_coverage))
            logfile.flush()
示例#9
0
class FWLSCalculator(object):
    def __init__(self, data_size=1000):
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(
            lambda data: self.cb.predict_score(data['user_id'], data['movie_id'
                                                                     ]),
            axis=1)
        self.train_data['cf'] = self.train_data.apply(
            lambda data: self.cf.predict_score(data['user_id'], data['movie_id'
                                                                     ]),
            axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(
            lambda data: data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(
            lambda data: data['cb'] * self.fwls.fun2(data['user_id']), axis=1)

        self.train_data['cf1'] = self.train_data.apply(
            lambda data: data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(
            lambda data: data['cf'] * self.fwls.fun2(data['user_id']), axis=1)

        self.logger.debug("[END] calculating functions")
        return None

    def train(self):
        #model = sm.ols(formula="rating ~ cb1+cb2+cf1+cf2", data=self.train_data[['rating', 'cb1','cb2','cf1','cf2']])
        #results = model.fit()
        #self.logger.info(results.summary())
        #self.logger.info(results.params)
        regr = linear_model.LinearRegression()

        regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']],
                 self.train_data['rating'])
        self.logger.info(regr.coef_)
        return regr.coef_
示例#10
0
def recs_cb(request, user_id, num=6):
    start_time = datetime.now()

    print(f"lda loaded in {datetime.now()-start_time}")
    sorted_items = ContentBasedRecs().recommend_items(user_id, num)

    data = {'user_id': user_id, 'data': sorted_items}

    return JsonResponse(data, safe=False)
示例#11
0
def similar_content(request, content_id, num=6):

    sorted_items = ContentBasedRecs().seeded_rec([content_id], num)
    data = {
        'source_id': content_id,
        'data': sorted_items
    }

    return JsonResponse(data, safe=False)
 def __init__(self, save_path, data_size=1000):
     self.save_path = save_path
     self.logger = logging.getLogger('FWLS')
     self.train_data = None
     self.test_data = None
     self.rating_count = None
     self.cb = ContentBasedRecs()
     self.cf = NeighborhoodBasedRecs()
     self.fwls = FeatureWeightedLinearStacking()
     self.data_size = data_size
示例#13
0
class FWLSCalculator(object):
    def __init__(self):
        self.train = None
        self.test = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train, self.test = train_test_split(df, test_size=0.2)

    def get_training_data(self):
        print('load data')

        data = np.array([['1', '2', 3.6], ['1', '3', 5.0], ['1', '4', 5.0],
                         ['2', '2', 3.0]])
        self.train = pd.DataFrame(data,
                                  columns=['user_id', 'movie_id', 'rating'])
        self.rating_count = self.train.groupby('user_id').count().reset_index()
        return self.train

    def calculate_predictions_for_training_data(self):
        self.train['cb'] = self.train.apply(lambda data: self.cb.predict_score(
            data['user_id'], data['movie_id']),
                                            axis=1)
        self.train['cf'] = self.train.apply(lambda data: self.cf.predict_score(
            data['user_id'], data['movie_id']),
                                            axis=1)
        return None

    def calculate_feature_functions_for_training_data(self):
        self.train['cb1'] = self.train.apply(
            lambda data: data.cb * self.func1())
        self.train['cb2'] = self.train.apply(
            lambda data: data.cb * self.func2(data['user_id']), axis=1)

        self.train['cf1'] = self.train.apply(
            lambda data: data.cf * self.func1())
        self.train['cf2'] = self.train.apply(
            lambda data: data.cf * self.func2(data['user_id']), axis=1)

        return None

    def train(self):
        result = sm.ols(formula="rating ~ cb1+cb2+cf1+cf2",
                        data=fwls.train).fit()
        print(result)
示例#14
0
class FWLSCalculator(object):

    def __init__(self, save_path, data_size = 1000):
        self.save_path = save_path
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(lambda data:
                                            self.cb.predict_score(data['user_id'], data['movie_id']), axis=1)
        self.train_data['cf'] = self.train_data.apply(lambda data:
                                            self.cf.predict_score(data['user_id'], data['movie_id']), axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(lambda data:
                                             data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(lambda data:
                                             data['cb'] * self.fwls.fun2(data['user_id']), axis = 1)

        self.train_data['cf1'] = self.train_data.apply(lambda data:
                                             data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(lambda data:
                                             data['cf'] * self.fwls.fun2(data['user_id']), axis = 1)

        self.logger.debug("[END] calculating functions")
        return None

    def build(self, train_data = None, params = None):

        if params:
            self.save_path = params['save_path']

        if train_data is None:
            self.get_real_training_data()

        self.train_data = train_data
        self.calculate_predictions_for_training_data()
        self.calculate_feature_functions_for_training_data()

        return self.train()

    def train(self, ratings = None, train_feature_recs= False):

        if train_feature_recs:
            ItemSimilarityMatrixBuilder().build(ratings)
            LdaModel.build()

        regr = linear_model.LinearRegression()

        regr.fit(self.train_data[['cb1','cb2','cf1','cf2']], self.train_data['rating'])
        self.logger.info(regr.coef_)

        result = {'cb1': regr.coef_[0],
                'cb2': regr.coef_[1],
                'cf1': regr.coef_[2],
                'cf2': regr.coef_[3]
                }

        ensure_dir(self.save_path)
        with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file:
            pickle.dump(result, ub_file)
        return result
示例#15
0
class FeatureWeightedLinearStacking(base_recommender):
    def __init__(self):
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

        self.wcb1 = Decimal(0.65221204)
        self.wcb2 = Decimal(-0.14638855)
        self.wcf1 = Decimal(-0.0062952)
        self.wcf2 = Decimal(0.09139193)
        self.intercept = Decimal(0)

    def fun1(self):
        return Decimal(1.0)

    def fun2(self, user_id):
        count = Rating.objects.filter(user_id=user_id).count()
        if count > 3.0:
            return Decimal(1.0)
        return Decimal(0.0)

    def set_save_path(self, save_path):
        with open(save_path + 'fwls_parameters.data', 'rb') as ub_file:
            parameters = pickle.load(ub_file)
            self.wcb1 = Decimal(parameters['cb1'])
            self.wcb2 = Decimal(parameters['cb2'])
            self.wcf1 = Decimal(parameters['cb1'])
            self.wcf2 = Decimal(parameters['cf2'])
            self.intercept = Decimal(parameters['intercept'])

    def recommend_items_by_ratings(self, user_id, active_user_items, num=6):

        cb_recs = self.cb.recommend_items_by_ratings(user_id,
                                                     active_user_items,
                                                     num * 5)
        cf_recs = self.cf.recommend_items_by_ratings(user_id,
                                                     active_user_items,
                                                     num * 5)

        return self.merge_predictions(user_id, cb_recs, cf_recs, num)

    def recommend_items(self, user_id, num=6):
        cb_recs = self.cb.recommend_items(user_id, num * 5)
        cf_recs = self.cf.recommend_items(user_id, num * 5)

        return self.merge_predictions(user_id, cb_recs, cf_recs, num)

    def merge_predictions(self, user_id, cb_recs, cf_recs, num):

        combined_recs = dict()
        for rec in cb_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            combined_recs[movie_id] = {'cb': pred}

        for rec in cf_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            if movie_id in combined_recs.keys():
                combined_recs[movie_id]['cf'] = pred
            else:
                combined_recs[movie_id] = {'cf': pred}
        fwls_preds = dict()
        for key, recs in combined_recs.items():
            if 'cb' not in recs.keys():
                recs['cb'] = self.cb.predict_score(user_id, key)
            if 'cf' not in recs.keys():
                recs['cf'] = self.cf.predict_score(user_id, key)
            pred = self.prediction(recs['cb'], recs['cf'], user_id)
            fwls_preds[key] = {'prediction': pred}
        sorted_items = sorted(
            fwls_preds.items(),
            key=lambda item: -float(item[1]['prediction']))[:num]
        return sorted_items

    def predict_score(self, user_id, item_id):
        p_cb = self.cb.predict_score(user_id, item_id)
        p_cf = self.cf.predict_score(user_id, item_id)

        self.prediction(p_cb, p_cf, user_id)

    def prediction(self, p_cb, p_cf, user_id):
        p = (self.wcb1 * self.fun1() * p_cb +
             self.wcb2 * self.fun2(user_id) * p_cb +
             self.wcf1 * self.fun1() * p_cf +
             self.wcf2 * self.fun2(user_id) * p_cf)
        return p + self.intercept
示例#16
0
 def __init__(self):
     self.train = None
     self.test = None
     self.rating_count = None
     self.cb = ContentBasedRecs()
     self.cf = NeighborhoodBasedRecs()
示例#17
0
    args = parser.parse_args()

    print(args.fwls)
    k = 10
    cov = None
    if args.fwls:
        logger.debug("evaluating coverage of fwls")
        cov = RecommenderCoverage(FeatureWeightedLinearStacking)
        cov.calculate_coverage(K=k, recName='fwls{}'.format(k))

    if args.cf:
        logger.debug("evaluating coverage of cf")
        cov = RecommenderCoverage(NeighborhoodBasedRecs())
        cov.calculate_coverage(K=k, recName='cf{}'.format(k))

    if args.cb:
        logger.debug("evaluating coverage of cb")
        cov = RecommenderCoverage(ContentBasedRecs())
        cov.calculate_coverage(K=k, recName='cb{}'.format(k))

    if args.ltr:
        logger.debug("evaluating coverage of ltr")
        cov = RecommenderCoverage(BPRRecs())
        cov.calculate_coverage(K=k, recName='bpr{}'.format(k))

    if args.funk:
        logger.debug("evaluating coverage of funk")
        cov = RecommenderCoverage(FunkSVDRecs())

        cov.calculate_coverage(K=k, recName='funk{}'.format(k))
class FWLSCalculator(object):
    def __init__(self, save_path, data_size=1000):
        self.save_path = save_path
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(lambda data:
                                                      self.cb.predict_score(data['user_id'],
                                                                            data['movie_id']), axis=1)

        self.train_data['cf'] = self.train_data.apply(lambda data:
                                                      self.cf.predict_score(data['user_id'],
                                                                            data['movie_id']), axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(lambda data:
                                                       data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(lambda data:
                                                       data['cb'] * self.fwls.fun2(data['user_id']), axis=1)

        self.train_data['cf1'] = self.train_data.apply(lambda data:
                                                       data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(lambda data:
                                                       data['cf'] * self.fwls.fun2(data['user_id']), axis=1)

        self.logger.debug("[END] calculating functions")
        return None

    def build(self, train_data=None, params=None):

        if params:
            self.save_path = params['save_path']
            self.data_size = params['data_sample']

        if train_data is not None:
            self.train_data = train_data
            if self.data_size > 0:
                self.train_data = self.train_data.sample(self.data_size)
                self.logger.debug("training sample of size {}".format(self.train_data.shape[0]))
        else:
            self.get_real_training_data()

        self.calculate_predictions_for_training_data()
        self.calculate_feature_functions_for_training_data()

        return self.train()

    def train(self, ratings=None, train_feature_recs=False):

        if train_feature_recs:
            ItemSimilarityMatrixBuilder().build(ratings)
            LdaModel.build()

        regr = linear_model.LinearRegression(fit_intercept=True,
                                             n_jobs=-1,
                                             normalize=True)

        regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']], self.train_data['rating'])
        self.logger.info(regr.coef_)

        result = {'cb1': regr.coef_[0],
                  'cb2': regr.coef_[1],
                  'cf1': regr.coef_[2],
                  'cf2': regr.coef_[3],
                  'intercept': regr.intercept_}
        self.logger.debug(result)
        self.logger.debug(self.train_data.iloc[100])
        ensure_dir(self.save_path)
        with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file:
            pickle.dump(result, ub_file)
        return result