示例#1
0
 def __init__(self, item_features=None, thresh=2.9, alpha=0.01, beta=0.01):
     self._count_tables = {}
     self._item_features = item_features
     self.selector = UnratedItemCandidateSelector()
     self._nb_table = NaiveBayesTable(thresh, alpha, beta)
     self.ensure_minimum_score(alpha)
     self.ensure_minimum_score(beta)
    def __init__(self, algorithms, weights):
        """
        Args:
            algorithms: a list of component algorithms.  Each one will be trained.
            weights: weights for each component to combine predictions.
        """
        # HWK 3: Code here
        if len(algorithms) != len(weights):
            raise Exception(
                'general exceptions not caught by specific handling')

        self.algorithms = [my_clone(algo) for algo in algorithms]
        self.weights = [my_clone(weight / sum(weights)) for weight in weights]
        self.selector = UnratedItemCandidateSelector()
示例#3
0
    def __init__(self, algorithms, weights):
        """
        Args:
            algorithms: a list of component algorithms.  Each one will be trained.
            weights: weights for each component to combine predictions.
        """
        # HWK 3: Code here
        self.algorithms = algorithms

        self.weights = weights
        self.selector = UnratedItemCandidateSelector()
        list3 = []
        for i in self.weights:
            a = (i / sum(self.weights)) * 1
            list3.append(a)
        self.weights = list3
    def __init__(self, predictor, genders, selector=None):
        self.predictor = predictor
        self.genders = genders
        self.genders.name = 'gender'
        self.genders.index.name = 'item'
        self.genders.sort_index(inplace=True)

        self.selector = selector if selector is not None else UnratedItemCandidateSelector()
    def __init__(self, algorithms, weights):
        """
        Args:
            algorithms: a list of component algorithms.  Each one will be trained.
            weights: weights for each component to combine predictions.
        """
        # HWK 3: Code here
        # for algo in algorithms:
        #     self.algorithms.append(algo)
        self.algorithms = algorithms

        self.weights = []
        w_sum = sum(weights)
        for i in range(0, len(weights)):
            self.weights.append(weights[i] / w_sum)

        self.selector = UnratedItemCandidateSelector()
class Retrainer:
    """
    Exploit internal model structures and relationships to reduce retrain cost for item-item k-NN
    search process.
    """
    def __init__(self, implicit):
        self.initial = default(implicit)
        self.initialized = False
        self.selector = UnratedItemCandidateSelector()

    def fit_initial(self, ratings):
        _log.info('fitting initial model %s', self.initial)
        self.initial.fit(ratings)
        fd, path = tempfile.mkstemp(prefix='lkpy-predict', suffix='.pkl',
                                    dir=util.scratch_dir(joblib=True))
        self.path = pathlib.Path(path)
        os.close(fd)

        del self.initial._sim_inv_
        _log.info('persisting initial model file to shared memory')
        joblib.dump(self.initial.sim_matrix_, path)
        self.initial.sim_matrix_ = joblib.load(path)

        self.selector.fit(ratings)
        self.initialized = True

    def instantiate(self, opts):
        nnbrs, smin = opts
        model = copy(self.initial)
        _log.info('updating model to use %d sims', nnbrs)
        model.nnbrs = nnbrs

        keep = model.sim_matrix_.values >= smin

        _log.info('trimming model to keep %d sims', np.sum(keep))
        model.sim_matrix_ = model.sim_matrix_.filter_nnzs(keep)
        model._sim_inv_ = model.sim_matrix_.transpose()

        return TopN(model, self.selector)
    def __init__(self,
                 predictor,
                 genders,
                 calibrationFactor,
                 alpha=0.1,
                 selector=None):
        self.predictor = predictor
        self.genders = genders
        self.genders.name = 'gender'
        self.genders.index.name = 'item'
        self.genders.sort_index(inplace=True)
        self.calibrationFactor = calibrationFactor
        self.alpha = alpha
        self.inputGenderBalance = None

        self.selector = selector if selector is not None else UnratedItemCandidateSelector(
        )
 def __init__(self, implicit):
     self.initial = default(implicit)
     self.initialized = False
     self.selector = UnratedItemCandidateSelector()
示例#9
0
class NaiveBayesRecommender(Recommender):

    _count_tables = {}
    _item_features = None
    _nb_table = None
    _min_float = np.power(2.0, -149)

    def __init__(self, item_features=None, thresh=2.9, alpha=0.01, beta=0.01):
        self._count_tables = {}
        self._item_features = item_features
        self.selector = UnratedItemCandidateSelector()
        self._nb_table = NaiveBayesTable(thresh, alpha, beta)
        self.ensure_minimum_score(alpha)
        self.ensure_minimum_score(beta)

    # TODO: HOMEWORK 4
    def fit(self, ratings, *args, **kwargs):
        # Must fit the selector
        self.selector.fit(ratings)

        self._nb_table.reset()
        # For each rating
        # Get associated item features
        # Update NBTable
        for index, row in ratings.iterrows():
            user, rating, item = row['user'], row['rating'], row['item']

            features = self.get_features_list(item)
            self._nb_table.process_rating(user, rating, features)

    # TODO: HOMEWORK 4
    # Should return ordered data frame with items and score
    def recommend(self, user, n=None, candidates=None, ratings=None):
        # n is None or zero, return DataFrame with an empty item column
        if n is None or n == 0:
            return pd.DataFrame({'item': []})

        if candidates is None:
            candidates = self.selector.candidates(user, ratings)

        # Initialize scores
        scores = []
        # for each candidate
        for candidate in candidates:
            # Score the candidate for the user
            score = self.score_item(user, candidate)
            # Build list of candidate, score pairs
            lists = [candidate, score]
            scores.append(lists)
        # Turn result into data frame
        scores = pd.DataFrame(scores, columns=['item', 'score'])
        # Retain n largest scoring rows (nlargest)
        scores = scores.nlargest(n, 'score')
        # Sort by score (sort_values)
        scores = scores.sort_values(by='score', ascending=False)
        # return data frame
        return scores

    # TODO: HOMEWORK 4
    # Helper function to return a list of features for an item from features data frame
    def get_features_list(self, item):
        if item not in self._count_tables:
            self._count_tables[item] = self._item_features[
                self._item_features.item == item]['feature']
        return self._count_tables[item]

    # TODO: HOMEWORK 4
    def score_item(self, user, item):
        # get the features
        # initialize the liked and nliked scores with the base probability
        features = self.get_features_list(item)
        liked_scores = self._nb_table.user_prob(user, True)
        nliked_scores = self._nb_table.user_prob(user, False)
        # for each feature
        # update scores by multiplying with conditional probability
        for feature in features:
            liked_scores *= self._nb_table.user_feature_prob(
                user, feature, True)

            nliked_scores *= self._nb_table.user_feature_prob(
                user, feature, False)
        # Handle the case when scores go to zero.
        liked_scores = self.ensure_minimum_score(liked_scores)
        nliked_scores = self.ensure_minimum_score(nliked_scores)
        # Compute log-likelihood
        log_likelihood = np.log(liked_scores) - np.log(nliked_scores)
        # Handle zero again
        log_likelihood = self.ensure_minimum_score(log_likelihood)
        # Return result
        return log_likelihood

    # DO NOT ALTER
    def get_params(self, deep=True):

        return {
            'item_features': self._item_features,
            'thresh': self._nb_table.thresh,
            'alpha': self._nb_table.alpha,
            'beta': self._nb_table.beta
        }

    # DO NOT ALTER
    def ensure_minimum_score(self, val):
        if val == 0.0:
            return self._min_float
        else:
            return val
示例#10
0
class WeightedHybrid(Predictor):
    """

    """

    # HOMEWORK 3 TODO: Follow the constructor for Fallback, which can be found at
    # https: // github.com / lenskit / lkpy / blob / master / lenskit / algorithms / basic.py
    # Note that you will need to
    # -- Check for agreement between the set of weights and the number of algorithms supplied.
    # -- You should clone the algorithms with hwk3_util.my_clone() and store the cloned version.
    # -- You should normalize the weights so they sum to 1.
    # -- Keep the line that set the `selector` function.

    algorithms = []
    weights = []

    def __init__(self, algorithms, weights):
        """
        Args:
            algorithms: a list of component algorithms.  Each one will be trained.
            weights: weights for each component to combine predictions.
        """
        # HWK 3: Code here
        self.algorithms = algorithms

        self.weights = weights
        self.selector = UnratedItemCandidateSelector()
        list3 = []
        for i in self.weights:
            a = (i / sum(self.weights)) * 1
            list3.append(a)
        self.weights = list3

    def clone(self):
        return WeightedHybrid(self.algorithms, self.weights)

    # HOMEWORK 3 TODO: Complete this implementation
    # Will be similar to Fallback. Must also call self.selector.fit()
    def fit(self, ratings, *args, **kwargs):

        self.selector.fit(ratings)
        # HWK 3: Code here
        for algo in self.algorithms:
            algo.fit(ratings, *args, **kwargs)
        return self

    def candidates(self, user, ratings):
        return self.selector.candidates(user, ratings)

    # HOMEWORK 3 TODO: Complete this implementation
    # Computes the weighted average of the predictions from the component algorithms
    def predict_for_user(self, user, items, ratings=None):
        preds = None
        predall = []
        # HWK 3: Code here

        #preds = None

        for algo in self.algorithms:
            #_logger.debug('predicting for %d items for user %s', len(remaining), user)
            aps = algo.predict_for_user(user, items, ratings=ratings)
            predall.append(aps)

        preds = predall[0] * self.weights[0] + predall[1] * self.weights[1]

        return preds

    def __str__(self):
        return 'Weighted([{}])'.format(', '.join(self.algorithms))
示例#11
0
 def __init__(self, item_features=None, thresh=2.9, alpha=0.01, beta=0.01):
     self._item_features = item_features
     self.selector = UnratedItemCandidateSelector()
     self._nb_table = NaiveBayesTable(thresh, alpha, beta)
示例#12
0
class NaiveBayesRecommender(Recommender):
    _count_tables = {}
    _item_features = None
    _nb_table = None
    _min_float = np.power(2.0, -149)

    def __init__(self, item_features=None, thresh=2.9, alpha=0.01, beta=0.01):
        self._item_features = item_features
        self.selector = UnratedItemCandidateSelector()
        self._nb_table = NaiveBayesTable(thresh, alpha, beta)

    # TODO: HOMEWORK 4
    def fit(self, ratings, *args, **kwargs):
        # Must fit the selector
        self.selector.fit(ratings)
        self._nb_table.reset()

        self._item_features.columns = ['item', 'feature']
        # For each rating
        for indexR, rowR in ratings.iterrows():
            user = rowR['user']
            item = rowR['item']
            rating = rowR['rating']
            # print("processing: ", user)
            # Get associated item features
            feature = self.get_features_list(item)
            self._nb_table.process_rating(user, rating, feature)

    # TODO: HOMEWORK 4
    # Should return ordered data frame with items and score
    def recommend(self, user, n=None, candidates=None, ratings=None):
        # n is None or zero, return DataFrame with an empty item column
        if n is None or n == 0:
            return pd.DataFrame({'item': []})

        if candidates is None:
            candidates = self.selector.candidates(user, ratings)

        # Initialize scores
        scores = []

        # for each candidate
        for candidate in candidates:
            scores.append(self.score_item(user, candidate))
            # Score the candidate for the user

            # Build list of candidate, score pairs

        # Turn result into data frame
        data = {'item': candidates, 'score': scores}
        df = pd.DataFrame(data, columns=['item', 'score'])

        # Retain n largest scoring rows (nlargest)
        df = df.nlargest(n, 'score')
        # Sort by score (sort_values)
        df = df.sort_values(by=['score'], ascending=False)

        # return data frame
        return df

    # TODO: HOMEWORK 4
    # Helper function to return a list of features for an item from features data frame
    def get_features_list(self, item):
        features_list = []
        for indexF, rowF in self._item_features.loc[self._item_features['item']
                                                    == item].iterrows():
            features_list.append(rowF['feature'])
        return features_list

    # TODO: HOMEWORK 4
    def score_item(self, user, item):
        # get the features
        features = self.get_features_list(item)
        # initialize the liked and nliked scores with the base probability
        baseP = self._nb_table.user_prob(user, True)
        baseNP = self._nb_table.user_prob(user, False)

        likeP = 1
        nlikeP = 1
        # for each feature
        for feature in features:
            likeP = likeP * self._nb_table.user_feature_prob(
                user, feature, True)
            nlikeP = nlikeP * self._nb_table.user_feature_prob(
                user, feature, False)
        # update scores by multiplying with conditional probability
        likeP = likeP * baseP
        nlikeP = nlikeP * baseNP

        try:
            ratio = likeP / nlikeP
        except ZeroDivisionError:
            # Handle the case when scores go to zero.
            return 0

        # Compute log-likelihood
        try:
            LL = math.log(ratio, math.e)
        except ValueError:
            # Handle zero again
            return 0
        # Return result
        return LL

    # DO NOT ALTER
    def get_params(self, deep=True):

        return {
            'item_features': self._item_features,
            'thresh': self._nb_table.thresh,
            'alpha': self._nb_table.alpha,
            'beta': self._nb_table.beta
        }

    # DO NOT ALTER
    def ensure_minimum_score(self, val):
        if val == 0.0:
            return self._min_float
        else:
            return val
class WeightedHybrid(Predictor):
    """

    """

    # HOMEWORK 3 TODO: Follow the constructor for Fallback, which can be found at
    # https: // github.com / lenskit / lkpy / blob / master / lenskit / algorithms / basic.py
    # Note that you will need to
    # -- Check for agreement between the set of weights and the number of algorithms supplied.
    # -- You should clone the algorithms with hwk3_util.my_clone() and store the cloned version.
    # -- You should normalize the weights so they sum to 1.
    # -- Keep the line that set the `selector` function.

    algorithms = []
    weights = []

    def __init__(self, algorithms, weights):
        """
        Args:
            algorithms: a list of component algorithms.  Each one will be trained.
            weights: weights for each component to combine predictions.
        """
        # HWK 3: Code here
        if len(algorithms) != len(weights):
            raise Exception(
                'general exceptions not caught by specific handling')

        self.algorithms = [my_clone(algo) for algo in algorithms]
        self.weights = [my_clone(weight / sum(weights)) for weight in weights]
        self.selector = UnratedItemCandidateSelector()

    def clone(self):
        return WeightedHybrid(self.algorithms, self.weights)

    # HOMEWORK 3 TODO: Complete this implementation
    # Will be similar to Fallback. Must also call self.selector.fit()
    def fit(self, ratings, *args, **kwargs):

        # HWK 3: Code here
        for algo in self.algorithms:
            algo.fit(ratings)

        self.selector.fit(ratings)
        return self

    def candidates(self, user, ratings):
        return self.selector.candidates(user, ratings)

    # HOMEWORK 3 TODO: Complete this implementation
    # Computes the weighted average of the predictions from the component algorithms
    def predict_for_user(self, user, items, ratings=None):
        preds = np.zeros_like(items.shape[0])
        # HWK 3: Code here
        for i in range(len(self.algorithms)):
            algo_pred = self.algorithms[i].predict_for_user(user,
                                                            items,
                                                            ratings=ratings)
            preds = preds + self.weights[i] * algo_pred

        return preds

    def __str__(self):
        return 'Weighted([{}])'.format(', '.join(self.algorithms))
示例#14
0
class WeightedHybrid(Predictor):
    """

    """

    # HOMEWORK 3 TODO: Follow the constructor for Fallback, which can be found at
    # https: // github.com / lenskit / lkpy / blob / master / lenskit / algorithms / basic.py
    # Note that you will need to
    # -- Check for agreement between the set of weights and the number of algorithms supplied.
    # -- You should clone the algorithms with hwk3_util.my_clone() and store the cloned version.
    # -- You should normalize the weights so they sum to 1.
    # -- Keep the line that set the `selector` function.

    algorithms = []
    weights = []

    def __init__(self, algorithms, weights):
        """
        Args:
            algorithms: a list of component algorithms.  Each one will be trained.
            weights: weights for each component to combine predictions.
        """
        # HWK 3: Code here
        # for algo in algorithms:
        #     self.algorithms.append(algo)
        self.algorithms = algorithms

        self.weights = []
        w_sum = sum(weights)
        for i in range(0, len(weights)):
            self.weights.append(weights[i] / w_sum)

        self.selector = UnratedItemCandidateSelector()

    def clone(self):
        return WeightedHybrid(self.algorithms, self.weights)

    # HOMEWORK 3 TODO: Complete this implementation
    # Will be similar to Fallback. Must also call self.selector.fit()
    def fit(self, ratings, *args, **kwargs):

        # HWK 3: Code here
        for algo in self.algorithms:
            # print(algo)
            algo.fit(ratings, *args, **kwargs)

        return self

    def candidates(self, user, ratings):
        return self.selector.candidates(user, ratings)

    # HOMEWORK 3 TODO: Complete this implementation
    # Computes the weighted average of the predictions from the component algorithms
    def predict_for_user(self, user, items, ratings=None):
        preds = None
        # HWK 3: Code here
        index = 0
        for algo in self.algorithms:
            # print(algo)
            aps = algo.predict_for_user(user, items, ratings=ratings)
            aps = aps[aps.notna()]
            if preds is None:
                preds = aps * self.weights[index]
            else:
                preds = preds + aps * self.weights[index]
            index = index + 1

        return preds

    def __str__(self):
        return 'Weighted([{}])'.format(', '.join(self.algorithms))