示例#1
0
文件: _koala.py 项目: jiecaoc/Koala
class Koala(object):

    data = None
    classifier = None

    train_kwargs = None

    _mc = None

    def __init__(self,data=None, target=None, train=False):
        if data is not None:
            self.set_data(data)
            if target is not None:
                self.set_target(target)
                if train:
                    self.train(test_size=0.1)
        return None

    def save(self, path):
        odata = {'data': self.data, 'mc': self._mc, 'classifier': self.classifier}
        with open(path, 'wb') as f:
            pickle_save(odata, f)

    def load(self, path):
        with open(path, 'rb') as f:
            try:
                odata = pickle_load(f)
                self.data = odata['data']
                self._mc = odata['mc']
                self.classifier = odata['classifier']
            except Exception as e:
                print("Invalid data: %s" % (str(e)))

    def set_data(self, df):
        self.data = Dataset(df)
        return None

    def set_target(self, column):
        self.data.role[column] = self.data.TARGET
        return None

    def train(self, **kwargs):

        self.train_kwargs = kwargs

        test_size = 0.1
        if kwargs.get('test_size') is not None:
            test_size = kwargs.get('test_size')
            del kwargs['test_size']

        self._mc = ModelComparison()
        self._mc.train_test_split(self.data, test_size=test_size, random_state=randint(2**16))
        self._mc['RFC'] = RandomForestClassifier(**kwargs)
        self.classifier = self._mc['RFC']
        self._mc.fit()
        return None

    def predict(self, X):
        try:
            predictions = self._mc.le.inverse_transform(self._mc['RFC'].predict(X))
        except AttributeError:
            predictions = self._mc['RFC'].predict(X)
        return predictions

    def accuracy_score(self):
        return self._mc.accuracy_score()[0]

    def precision_score(self, **kwargs):
        return self._mc.precision_score(**kwargs)[0]

    def recall_score(self, **kwargs):
        return self._mc.recall_score(**kwargs)[0]

    def f1_score(self, **kwargs):
        return self._mc.f1_score(**kwargs)[0]

    def metric_score(self, metric, **kwargs):
        return self._mc.metric(metric, **kwargs)[0]

    def confusion_matrix(self):
        return self._mc.cm('RFC')

    def feature_importance(self):
        rf_weights = list(self._mc['RFC'].feature_importances_)
        rf_inputs = list(self.data.filter_cols(role=self.data.INPUT))
        return DataFrame(data=rf_weights, index=rf_inputs,columns=['weight']).sort('weight', ascending=False)

    def feature_reduction_scores(self):
        K = 1 # to be developped more later on

        ordered_features = list(self.feature_importance().index)[::-1]
        ordered_features.append(self.data.filter_cols(role=self.data.TARGET)[0])

        idx = []
        accuracy_score = []

        for i in range(len(ordered_features[:-1])):
            accuracy_score_i = []
            idx.append(len(ordered_features[i:])-1)
            for _ in range(K):
                kl = Koala(data=self.data.frame[ordered_features[i:]], target=self.data.filter_cols(role=self.data.TARGET)[0])
                kl.train(**self.train_kwargs)
                accuracy_score_i.append(kl.accuracy_score())
            accuracy_score.append(sum(accuracy_score_i) / len(accuracy_score_i))
        scores = DataFrame(data={'accuracy': accuracy_score}, index=idx)
        return scores
示例#2
0
class Koala(object):

    data = None
    classifier = None

    train_kwargs = None

    _mc = None

    def __init__(self, data=None, target=None, train=False):
        if data is not None:
            self.set_data(data)
            if target is not None:
                self.set_target(target)
                if train:
                    self.train(test_size=0.1)
        return None

    def save(self, path):
        odata = {
            'data': self.data,
            'mc': self._mc,
            'classifier': self.classifier
        }
        with open(path, 'wb') as f:
            pickle_save(odata, f)

    def load(self, path):
        with open(path, 'rb') as f:
            try:
                odata = pickle_load(f)
                self.data = odata['data']
                self._mc = odata['mc']
                self.classifier = odata['classifier']
            except Exception as e:
                print("Invalid data: %s" % (str(e)))

    def set_data(self, df):
        self.data = Dataset(df)
        return None

    def set_target(self, column):
        self.data.role[column] = self.data.TARGET
        return None

    def train(self, **kwargs):

        self.train_kwargs = kwargs

        test_size = 0.1
        if kwargs.get('test_size') is not None:
            test_size = kwargs.get('test_size')
            del kwargs['test_size']

        self._mc = ModelComparison()
        self._mc.train_test_split(self.data,
                                  test_size=test_size,
                                  random_state=randint(2**16))
        self._mc['RFC'] = RandomForestClassifier(**kwargs)
        self.classifier = self._mc['RFC']
        self._mc.fit()
        return None

    def predict(self, X):
        try:
            predictions = self._mc.le.inverse_transform(
                self._mc['RFC'].predict(X))
        except AttributeError:
            predictions = self._mc['RFC'].predict(X)
        return predictions

    def accuracy_score(self):
        return self._mc.accuracy_score()[0]

    def precision_score(self, **kwargs):
        return self._mc.precision_score(**kwargs)[0]

    def recall_score(self, **kwargs):
        return self._mc.recall_score(**kwargs)[0]

    def f1_score(self, **kwargs):
        return self._mc.f1_score(**kwargs)[0]

    def metric_score(self, metric, **kwargs):
        return self._mc.metric(metric, **kwargs)[0]

    def confusion_matrix(self):
        return self._mc.cm('RFC')

    def feature_importance(self):
        rf_weights = list(self._mc['RFC'].feature_importances_)
        rf_inputs = list(self.data.filter_cols(role=self.data.INPUT))
        return DataFrame(data=rf_weights, index=rf_inputs,
                         columns=['weight']).sort('weight', ascending=False)

    def feature_reduction_scores(self):
        K = 1  # to be developped more later on

        ordered_features = list(self.feature_importance().index)[::-1]
        ordered_features.append(
            self.data.filter_cols(role=self.data.TARGET)[0])

        idx = []
        accuracy_score = []

        for i in range(len(ordered_features[:-1])):
            accuracy_score_i = []
            idx.append(len(ordered_features[i:]) - 1)
            for _ in range(K):
                kl = Koala(
                    data=self.data.frame[ordered_features[i:]],
                    target=self.data.filter_cols(role=self.data.TARGET)[0])
                kl.train(**self.train_kwargs)
                accuracy_score_i.append(kl.accuracy_score())
            accuracy_score.append(
                sum(accuracy_score_i) / len(accuracy_score_i))
        scores = DataFrame(data={'accuracy': accuracy_score}, index=idx)
        return scores
示例#3
0
文件: _koala.py 项目: jiecaoc/Koala
 def set_data(self, df):
     self.data = Dataset(df)
     return None
示例#4
0
 def set_data(self, df):
     self.data = Dataset(df)
     return None