Exemplo n.º 1
0
class IndividualTDE(BaseClassifier):
    """ Single TDE classifier, based off the Bag of SFA Symbols (BOSS) model
    """
    def __init__(self,
                 window_size=10,
                 word_length=8,
                 norm=False,
                 levels=1,
                 igb=False,
                 alphabet_size=4,
                 random_state=None):
        self.window_size = window_size
        self.word_length = word_length
        self.norm = norm
        self.levels = levels
        self.igb = igb
        self.alphabet_size = alphabet_size

        self.random_state = random_state

        binning_method = "information-gain" if igb else "equi-depth"

        self.transformer = SFA(word_length=word_length,
                               alphabet_size=alphabet_size,
                               window_size=window_size,
                               norm=norm,
                               levels=levels,
                               binning_method=binning_method,
                               bigrams=True,
                               remove_repeat_words=True,
                               save_words=False)
        self.transformed_data = []
        self.accuracy = 0

        self.class_vals = []
        self.num_classes = 0
        self.classes_ = []
        self.class_dictionary = {}
        super(IndividualTDE, self).__init__()

    def fit(self, X, y):
        X, y = check_X_y(X, y, enforce_univariate=True)

        sfa = self.transformer.fit_transform(X, y)
        self.transformed_data = [series.to_dict() for series in sfa.iloc[:, 0]]

        self.class_vals = y
        self.num_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]
        for index, classVal in enumerate(self.classes_):
            self.class_dictionary[classVal] = index

        self._is_fitted = True
        return self

    def predict(self, X):
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True)

        rng = check_random_state(self.random_state)

        classes = []
        test_bags = self.transformer.transform(X)
        test_bags = [series.to_dict() for series in test_bags.iloc[:, 0]]

        for i, test_bag in enumerate(test_bags):
            best_sim = -1
            nn = None

            for n, bag in enumerate(self.transformed_data):
                sim = histogram_intersection(test_bag, bag)

                if sim > best_sim or (sim == best_sim and rng.random() < 0.5):
                    best_sim = sim
                    nn = self.class_vals[n]

            classes.append(nn)

        return np.array(classes)

    def predict_proba(self, X):
        preds = self.predict(X)
        dists = np.zeros((X.shape[0], self.num_classes))

        for i in range(0, X.shape[0]):
            dists[i, self.class_dictionary.get(preds[i])] += 1

        return dists

    def _train_predict(self, train_num):
        test_bag = self.transformed_data[train_num]
        best_sim = -1
        nn = None

        for n, bag in enumerate(self.transformed_data):
            if n == train_num:
                continue

            sim = histogram_intersection(test_bag, bag)

            if sim > best_sim:
                best_sim = sim
                nn = self.class_vals[n]

        return nn
Exemplo n.º 2
0
class BOSSIndividual(BaseClassifier):
    """ Single Bag of SFA Symbols (BOSS) classifier

    Bag of SFA Symbols Ensemble: implementation of BOSS from Schaffer :
    @article
    """

    def __init__(self,
                 window_size=10,
                 word_length=8,
                 norm=False,
                 alphabet_size=4,
                 save_words=True,
                 random_state=None
                 ):
        self.window_size = window_size
        self.word_length = word_length
        self.norm = norm
        self.alphabet_size = alphabet_size

        self.save_words = save_words
        self.random_state = random_state

        self.transformer = SFA(word_length=word_length,
                               alphabet_size=alphabet_size,
                               window_size=window_size, norm=norm,
                               remove_repeat_words=True,
                               bigrams=False,
                               save_words=save_words)
        self.transformed_data = []
        self.accuracy = 0

        self.class_vals = []
        self.num_classes = 0
        self.classes_ = []
        self.class_dictionary = {}
        super(BOSSIndividual, self).__init__()

    def fit(self, X, y):
        X, y = check_X_y(X, y, enforce_univariate=True)

        sfa = self.transformer.fit_transform(X)
        self.transformed_data = sfa.iloc[:, 0]

        self.class_vals = y
        self.num_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]
        for index, classVal in enumerate(self.classes_):
            self.class_dictionary[classVal] = index

        self._is_fitted = True
        return self

    def predict(self, X):
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True)

        rng = check_random_state(self.random_state)

        classes = []
        test_bags = self.transformer.transform(X)
        test_bags = test_bags.iloc[:, 0]

        for i, test_bag in enumerate(test_bags):
            best_dist = sys.float_info.max
            nn = None

            for n, bag in enumerate(self.transformed_data):
                dist = boss_distance(test_bag, bag, best_dist)

                if dist < best_dist or (dist == best_dist and rng.random()
                                        < 0.5):
                    best_dist = dist
                    nn = self.class_vals[n]

            classes.append(nn)

        return np.array(classes)

    def predict_proba(self, X):
        preds = self.predict(X)
        dists = np.zeros((X.shape[0], self.num_classes))

        for i in range(0, X.shape[0]):
            dists[i, self.class_dictionary.get(preds[i])] += 1

        return dists

    def _train_predict(self, train_num):
        test_bag = self.transformed_data[train_num]
        best_dist = sys.float_info.max
        nn = None

        for n, bag in enumerate(self.transformed_data):
            if n == train_num:
                continue

            dist = boss_distance(test_bag, bag, best_dist)

            if dist < best_dist:
                best_dist = dist
                nn = self.class_vals[n]

        return nn

    def _shorten_bags(self, word_len):
        new_boss = BOSSIndividual(self.window_size, word_len,
                                  self.norm, self.alphabet_size,
                                  save_words=self.save_words,
                                  random_state=self.random_state)
        new_boss.transformer = self.transformer
        sfa = self.transformer._shorten_bags(word_len)
        new_boss.transformed_data = sfa.iloc[:, 0]

        new_boss.class_vals = self.class_vals
        new_boss.num_classes = self.num_classes
        new_boss.classes_ = self.classes_
        new_boss.class_dictionary = self.class_dictionary

        new_boss._is_fitted = True
        return new_boss

    def _clean(self):
        self.transformer.words = None
        self.transformer.save_words = False

    def _set_word_len(self, word_len):
        self.word_length = word_len
        self.transformer.word_length = word_len