示例#1
0
class FeatureWeightedLinearStacking(base_recommender):
    def __init__(self):
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

        self.wcb1 = Decimal(0.65221204)
        self.wcb2 = Decimal(-0.14638855)
        self.wcf1 = Decimal(-0.0062952)
        self.wcf2 = Decimal(0.09139193)

    def fun1(self):
        return Decimal(1.0)

    def fun2(self, user_id):
        count = Rating.objects.filter(user_id=user_id).count()
        if count > 3.0:
            return Decimal(1.0)
        return Decimal(0.0)

    def recommend_items(self, user_id, num=6):
        cb_recs = self.cb.recommend_items(user_id, num * 5)
        cf_recs = self.cf.recommend_items(user_id, num * 5)

        combined_recs = dict()
        for rec in cb_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            combined_recs[movie_id] = {'cb': pred}

        for rec in cf_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            if movie_id in combined_recs.keys():
                combined_recs[movie_id]['cf'] = pred
            else:
                combined_recs[movie_id] = {'cf': pred}
        fwls_preds = dict()
        for key, recs in combined_recs.items():
            if 'cb' not in recs.keys():
                recs['cb'] = self.cb.predict_score(user_id, key)
            if 'cf' not in recs.keys():
                recs['cf'] = self.cf.predict_score(user_id, key)
            pred = self.prediction(recs['cb'], recs['cf'], user_id)
            fwls_preds[key] = {'prediction': pred}
        sorted_items = sorted(fwls_preds.items(), key=lambda item: -float(item[1]['prediction']))[:num]
        return sorted_items


    def predict_score(self, user_id, item_id):
        p_cb = self.cb.predict_score(user_id, item_id)
        p_cf = self.cf.predict_score(user_id, item_id)

        self.prediction(p_cb, p_cf, user_id)

    def prediction(self, p_cb, p_cf, user_id):
        p = (self.wcb1 * self.fun1() * p_cb +
             self.wcb2 * self.fun2(user_id) * p_cb +
             self.wcf1 * self.fun1() * p_cf +
             self.wcf2 * self.fun2(user_id) * p_cf)
        return p
示例#2
0
class FWLSCalculator(object):
    def __init__(self, data_size=1000):
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(
            lambda data: self.cb.predict_score(data['user_id'], data['movie_id'
                                                                     ]),
            axis=1)
        self.train_data['cf'] = self.train_data.apply(
            lambda data: self.cf.predict_score(data['user_id'], data['movie_id'
                                                                     ]),
            axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(
            lambda data: data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(
            lambda data: data['cb'] * self.fwls.fun2(data['user_id']), axis=1)

        self.train_data['cf1'] = self.train_data.apply(
            lambda data: data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(
            lambda data: data['cf'] * self.fwls.fun2(data['user_id']), axis=1)

        self.logger.debug("[END] calculating functions")
        return None

    def train(self):
        #model = sm.ols(formula="rating ~ cb1+cb2+cf1+cf2", data=self.train_data[['rating', 'cb1','cb2','cf1','cf2']])
        #results = model.fit()
        #self.logger.info(results.summary())
        #self.logger.info(results.params)
        regr = linear_model.LinearRegression()

        regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']],
                 self.train_data['rating'])
        self.logger.info(regr.coef_)
        return regr.coef_
示例#3
0
class FWLSCalculator(object):
    def __init__(self):
        self.train = None
        self.test = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train, self.test = train_test_split(df, test_size=0.2)

    def get_training_data(self):
        print('load data')

        data = np.array([['1', '2', 3.6], ['1', '3', 5.0], ['1', '4', 5.0],
                         ['2', '2', 3.0]])
        self.train = pd.DataFrame(data,
                                  columns=['user_id', 'movie_id', 'rating'])
        self.rating_count = self.train.groupby('user_id').count().reset_index()
        return self.train

    def calculate_predictions_for_training_data(self):
        self.train['cb'] = self.train.apply(lambda data: self.cb.predict_score(
            data['user_id'], data['movie_id']),
                                            axis=1)
        self.train['cf'] = self.train.apply(lambda data: self.cf.predict_score(
            data['user_id'], data['movie_id']),
                                            axis=1)
        return None

    def calculate_feature_functions_for_training_data(self):
        self.train['cb1'] = self.train.apply(
            lambda data: data.cb * self.func1())
        self.train['cb2'] = self.train.apply(
            lambda data: data.cb * self.func2(data['user_id']), axis=1)

        self.train['cf1'] = self.train.apply(
            lambda data: data.cf * self.func1())
        self.train['cf2'] = self.train.apply(
            lambda data: data.cf * self.func2(data['user_id']), axis=1)

        return None

    def train(self):
        result = sm.ols(formula="rating ~ cb1+cb2+cf1+cf2",
                        data=fwls.train).fit()
        print(result)
class FWLSCalculator(object):
    def __init__(self, save_path, data_size=1000):
        self.save_path = save_path
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(lambda data:
                                                      self.cb.predict_score(data['user_id'],
                                                                            data['movie_id']), axis=1)

        self.train_data['cf'] = self.train_data.apply(lambda data:
                                                      self.cf.predict_score(data['user_id'],
                                                                            data['movie_id']), axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(lambda data:
                                                       data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(lambda data:
                                                       data['cb'] * self.fwls.fun2(data['user_id']), axis=1)

        self.train_data['cf1'] = self.train_data.apply(lambda data:
                                                       data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(lambda data:
                                                       data['cf'] * self.fwls.fun2(data['user_id']), axis=1)

        self.logger.debug("[END] calculating functions")
        return None

    def build(self, train_data=None, params=None):

        if params:
            self.save_path = params['save_path']
            self.data_size = params['data_sample']

        if train_data is not None:
            self.train_data = train_data
            if self.data_size > 0:
                self.train_data = self.train_data.sample(self.data_size)
                self.logger.debug("training sample of size {}".format(self.train_data.shape[0]))
        else:
            self.get_real_training_data()

        self.calculate_predictions_for_training_data()
        self.calculate_feature_functions_for_training_data()

        return self.train()

    def train(self, ratings=None, train_feature_recs=False):

        if train_feature_recs:
            ItemSimilarityMatrixBuilder().build(ratings)
            LdaModel.build()

        regr = linear_model.LinearRegression(fit_intercept=True,
                                             n_jobs=-1,
                                             normalize=True)

        regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']], self.train_data['rating'])
        self.logger.info(regr.coef_)

        result = {'cb1': regr.coef_[0],
                  'cb2': regr.coef_[1],
                  'cf1': regr.coef_[2],
                  'cf2': regr.coef_[3],
                  'intercept': regr.intercept_}
        self.logger.debug(result)
        self.logger.debug(self.train_data.iloc[100])
        ensure_dir(self.save_path)
        with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file:
            pickle.dump(result, ub_file)
        return result
示例#5
0
class FWLSCalculator(object):

    def __init__(self, save_path, data_size = 1000):
        self.save_path = save_path
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(lambda data:
                                            self.cb.predict_score(data['user_id'], data['movie_id']), axis=1)
        self.train_data['cf'] = self.train_data.apply(lambda data:
                                            self.cf.predict_score(data['user_id'], data['movie_id']), axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(lambda data:
                                             data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(lambda data:
                                             data['cb'] * self.fwls.fun2(data['user_id']), axis = 1)

        self.train_data['cf1'] = self.train_data.apply(lambda data:
                                             data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(lambda data:
                                             data['cf'] * self.fwls.fun2(data['user_id']), axis = 1)

        self.logger.debug("[END] calculating functions")
        return None

    def build(self, train_data = None, params = None):

        if params:
            self.save_path = params['save_path']

        if train_data is None:
            self.get_real_training_data()

        self.train_data = train_data
        self.calculate_predictions_for_training_data()
        self.calculate_feature_functions_for_training_data()

        return self.train()

    def train(self, ratings = None, train_feature_recs= False):

        if train_feature_recs:
            ItemSimilarityMatrixBuilder().build(ratings)
            LdaModel.build()

        regr = linear_model.LinearRegression()

        regr.fit(self.train_data[['cb1','cb2','cf1','cf2']], self.train_data['rating'])
        self.logger.info(regr.coef_)

        result = {'cb1': regr.coef_[0],
                'cb2': regr.coef_[1],
                'cf1': regr.coef_[2],
                'cf2': regr.coef_[3]
                }

        ensure_dir(self.save_path)
        with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file:
            pickle.dump(result, ub_file)
        return result
示例#6
0
class FeatureWeightedLinearStacking(base_recommender):
    def __init__(self):
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

        self.wcb1 = Decimal(0.65221204)
        self.wcb2 = Decimal(-0.14638855)
        self.wcf1 = Decimal(-0.0062952)
        self.wcf2 = Decimal(0.09139193)
        self.intercept = Decimal(0)

    def fun1(self):
        return Decimal(1.0)

    def fun2(self, user_id):
        count = Rating.objects.filter(user_id=user_id).count()
        if count > 3.0:
            return Decimal(1.0)
        return Decimal(0.0)

    def set_save_path(self, save_path):
        with open(save_path + 'fwls_parameters.data', 'rb') as ub_file:
            parameters = pickle.load(ub_file)
            self.wcb1 = Decimal(parameters['cb1'])
            self.wcb2 = Decimal(parameters['cb2'])
            self.wcf1 = Decimal(parameters['cb1'])
            self.wcf2 = Decimal(parameters['cf2'])
            self.intercept = Decimal(parameters['intercept'])

    def recommend_items_by_ratings(self, user_id, active_user_items, num=6):

        cb_recs = self.cb.recommend_items_by_ratings(user_id,
                                                     active_user_items,
                                                     num * 5)
        cf_recs = self.cf.recommend_items_by_ratings(user_id,
                                                     active_user_items,
                                                     num * 5)

        return self.merge_predictions(user_id, cb_recs, cf_recs, num)

    def recommend_items(self, user_id, num=6):
        cb_recs = self.cb.recommend_items(user_id, num * 5)
        cf_recs = self.cf.recommend_items(user_id, num * 5)

        return self.merge_predictions(user_id, cb_recs, cf_recs, num)

    def merge_predictions(self, user_id, cb_recs, cf_recs, num):

        combined_recs = dict()
        for rec in cb_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            combined_recs[movie_id] = {'cb': pred}

        for rec in cf_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            if movie_id in combined_recs.keys():
                combined_recs[movie_id]['cf'] = pred
            else:
                combined_recs[movie_id] = {'cf': pred}
        fwls_preds = dict()
        for key, recs in combined_recs.items():
            if 'cb' not in recs.keys():
                recs['cb'] = self.cb.predict_score(user_id, key)
            if 'cf' not in recs.keys():
                recs['cf'] = self.cf.predict_score(user_id, key)
            pred = self.prediction(recs['cb'], recs['cf'], user_id)
            fwls_preds[key] = {'prediction': pred}
        sorted_items = sorted(
            fwls_preds.items(),
            key=lambda item: -float(item[1]['prediction']))[:num]
        return sorted_items

    def predict_score(self, user_id, item_id):
        p_cb = self.cb.predict_score(user_id, item_id)
        p_cf = self.cf.predict_score(user_id, item_id)

        self.prediction(p_cb, p_cf, user_id)

    def prediction(self, p_cb, p_cf, user_id):
        p = (self.wcb1 * self.fun1() * p_cb +
             self.wcb2 * self.fun2(user_id) * p_cb +
             self.wcf1 * self.fun1() * p_cf +
             self.wcf2 * self.fun2(user_id) * p_cf)
        return p + self.intercept