Exemplo n.º 1
0
    def predict_all(self, user):
        """
        Predict ratings for all items.

        Parameters
        ----------
        user : int
            The user to make predictions for.

        Returns
        -------
        out : XFrame
            Each row of the frame consists of a user id, an item id, and a predicted rating.
        """

        # build rdd to pass to predictAll
        user_item = XFrame()
        user_item[self.item_col] = self.items
        user_item[self.user_col] = user
        user_item.swap_columns(self.item_col, self.user_col)
        rdd = user_item.to_rdd()
        res = self.model.predictAll(rdd)
        res = res.map(lambda rating: (rating.user, rating.product, rating.rating))
        col_names = [self.user_col, self.item_col, self.rating_col]
        user_type = self.users.dtype()
        item_type = self.items.dtype()
        col_types = [user_type, item_type, float]
        return XFrame.from_rdd(res, column_names=col_names, column_types=col_types)
Exemplo n.º 2
0
    def load(cls, path):
        """
        Load a model that was saved previously.

        Parameters
        ----------
        path : str
            The path where the model files are stored.
            This is the same path that was passed to ``save``.
            There are three files/directories based on this path, with
            extensions '.model', '.ratings', and '.metadata'.

        Returns
        -------
        out : MatrixFactorizationModel
            A model that can be used to predict ratings.
        """
        sc = CommonSparkContext.Instance().sc()
        model_path, ratings_path, metadata_path = cls._file_paths(path)
        # load model
        model = recommendation.MatrixFactorizationModel.load(sc, model_path)
        # load ratings
        ratings = XFrame.load(ratings_path)
        # load metadata
        with open(metadata_path) as f:
            user_col, item_col, rating_col = pickle.load(f)

        return cls(model, ratings, user_col, item_col, rating_col)
Exemplo n.º 3
0
    def __init__(self, features, labels, standardize=False):
        self.standardize = standardize
        self.means = None
        self.stdevs = None
        if standardize:
            self.features = self._standardize(features)
        else:
            self.features = features
        self.labels = labels
        self.feature_cols = features.column_names()
        labeled_feature_vector = XFrame(features)
        label_col = 'label'     # TODO what if there is a feature with this name ?
        feature_cols = self.feature_cols   # need local reference
        labeled_feature_vector[label_col] = labels
        def build_labeled_features(row):
            label = row[label_col]
            features =[row[col] for col in feature_cols]
            return LabeledPoint(label, features)

        self.labeled_feature_vector = labeled_feature_vector.apply(build_labeled_features)
Exemplo n.º 4
0
    def _base_evaluate(self, data, labels):
        """
        Evaluate the performance of the classifier.

        Use the data to make predictions, then test the effectiveness of 
        the predictions against the labels.

        The data must be a collection of items (XArray of SenseVector).

        Returns
        -------
        out : A list of:
            - overall correct prediction proportion
            - true positive proportion
            - true negative proportion
            - false positive proportion
            - false negative proportion
        """
        results = XFrame()
        predictions = self._base_predict(data)
        results['predicted'] = predictions
        results['actual'] = labels
#        print results
        def evaluate(row):
            prediction = row['predicted']
            actual = row['actual']
            return {'correct': 1 if prediction == actual else 0,
                    'true_pos': 1 if prediction == 1 and actual == 1 else 0,
                    'true_neg': 1 if prediction == 0 and actual == 0 else 0,
                    'false_pos': 1 if prediction == 1 and actual == 0 else 0,
                    'false_neg': 1 if prediction == 0 and actual == 1 else 0,
                    'positive': 1 if actual == 1 else 0,
                    'negative': 1 if actual == 0 else 0
                    }

        score = results.apply(evaluate)
        def sum_item(item):
            return score.apply(lambda x: x[item]).sum()

        all_scores = float(len(labels))
        correct = float(sum_item('correct'))
        tp = float(sum_item('true_pos'))
        tn = float(sum_item('true_neg'))
        fp = float(sum_item('false_pos'))
        fn = float(sum_item('false_neg'))
        pos = float(sum_item('positive'))
        neg = float(sum_item('negative'))

        # precision = true pos / (true pos + false pos)
        # recall = true pos / (true pos + false neg)
        # true pos rate = true pos / positive
        # false pos rate = false pos / negative
        result = {}
        result['correct'] = correct
        result['true_pos'] = tp
        result['true_neg'] = tn
        result['false_pos'] = fp
        result['false_neg'] = fn
        result['all'] = all_scores
        result['accuracy'] = correct / all_scores if all_scores > 0 else float('nan')
        result['precision'] = tp / (tp + fp) if (tp + fp) > 0 else float('nan')
        result['recall'] = tp / (tp + fn) if (tp + fn) > 0 else float('nan')
        result['tpr'] = tp / pos if pos > 0 else float('nan')
        result['fpr'] = fp / neg if neg > 0 else float('nan')
        return result