예제 #1
0
    def __init__(self, features, labels, standardize=False):
        self.standardize = standardize
        self.means = None
        self.stdevs = None
        if standardize:
            self.features = self._standardize(features)
        else:
            self.features = features
        self.labels = labels
        self.feature_cols = features.column_names()
        labeled_feature_vector = XFrame(features)
        label_col = 'label'     # TODO what if there is a feature with this name ?
        feature_cols = self.feature_cols   # need local reference
        labeled_feature_vector[label_col] = labels
        def build_labeled_features(row):
            label = row[label_col]
            features =[row[col] for col in feature_cols]
            return LabeledPoint(label, features)

        self.labeled_feature_vector = labeled_feature_vector.apply(build_labeled_features)
예제 #2
0
    def _base_evaluate(self, data, labels):
        """
        Evaluate the performance of the classifier.

        Use the data to make predictions, then test the effectiveness of 
        the predictions against the labels.

        The data must be a collection of items (XArray of SenseVector).

        Returns
        -------
        out : A list of:
            - overall correct prediction proportion
            - true positive proportion
            - true negative proportion
            - false positive proportion
            - false negative proportion
        """
        results = XFrame()
        predictions = self._base_predict(data)
        results['predicted'] = predictions
        results['actual'] = labels
#        print results
        def evaluate(row):
            prediction = row['predicted']
            actual = row['actual']
            return {'correct': 1 if prediction == actual else 0,
                    'true_pos': 1 if prediction == 1 and actual == 1 else 0,
                    'true_neg': 1 if prediction == 0 and actual == 0 else 0,
                    'false_pos': 1 if prediction == 1 and actual == 0 else 0,
                    'false_neg': 1 if prediction == 0 and actual == 1 else 0,
                    'positive': 1 if actual == 1 else 0,
                    'negative': 1 if actual == 0 else 0
                    }

        score = results.apply(evaluate)
        def sum_item(item):
            return score.apply(lambda x: x[item]).sum()

        all_scores = float(len(labels))
        correct = float(sum_item('correct'))
        tp = float(sum_item('true_pos'))
        tn = float(sum_item('true_neg'))
        fp = float(sum_item('false_pos'))
        fn = float(sum_item('false_neg'))
        pos = float(sum_item('positive'))
        neg = float(sum_item('negative'))

        # precision = true pos / (true pos + false pos)
        # recall = true pos / (true pos + false neg)
        # true pos rate = true pos / positive
        # false pos rate = false pos / negative
        result = {}
        result['correct'] = correct
        result['true_pos'] = tp
        result['true_neg'] = tn
        result['false_pos'] = fp
        result['false_neg'] = fn
        result['all'] = all_scores
        result['accuracy'] = correct / all_scores if all_scores > 0 else float('nan')
        result['precision'] = tp / (tp + fp) if (tp + fp) > 0 else float('nan')
        result['recall'] = tp / (tp + fn) if (tp + fn) > 0 else float('nan')
        result['tpr'] = tp / pos if pos > 0 else float('nan')
        result['fpr'] = fp / neg if neg > 0 else float('nan')
        return result