示例#1
0
class MergedNB(NaiveBayes):
    def __init__(self, whether_discrete):
        NaiveBayes.__init__(self)
        self._whether_discrete = np.array(whether_discrete)
        self._whether_continuous = ~self._whether_discrete
        self._multinomial, self._gaussian = MultinomialNB(), GaussianNB()

    def feed_data(self, x, y, sample_weights=None):
        x = np.array(x)
        self._multinomial.feed_data(x[:, self._whether_discrete], y,
                                    sample_weights)
        y = self._multinomial["y"]
        self.label_dic = self._multinomial.label_dic
        self._cat_counter = self._multinomial["cat_counter"]
        self._gaussian.feed_data(x[:, self._whether_continuous], y,
                                 sample_weights)
        self._gaussian.label_dic = self._multinomial.label_dic

    def feed_sample_weights(self, sample_weights=None):
        self._multinomial.feed_sample_weights(sample_weights)
        self._gaussian.feed_sample_weights(sample_weights)

    def _fit(self, lb):
        self._multinomial.fit()
        self._gaussian.fit()
        discrete_func, continuous_func = self._multinomial[
            "func"], self._gaussian["func"]

        def func(input_x, tar_category):
            input_x = np.array(input_x)
            return discrete_func(
                input_x[self._whether_discrete].astype(np.int),
                tar_category) * continuous_func(
                    input_x[self._whether_continuous], tar_category)

        return func

    def _transfer_x(self, x):
        _feat_dics = self._multinomial["feat_dics"]
        idx = 0
        for d, discrete in enumerate(self._whether_discrete):
            if not discrete:
                x[d] = float(x[d])
            else:
                x[d] = _feat_dics[idx][x[d]]
            if discrete:
                idx += 1
        return x
示例#2
0
            for c in range(len(self.label_dic)):
                plt.plot(tmp_x, [self._data[j][c](xx) for xx in tmp_x],
                         c=colors[self.label_dic[c]], label="class: {}".format(self.label_dic[c]))
            plt.xlim(x_min-0.2*gap, x_max+0.2*gap)
            plt.legend()
            if not save:
                plt.show()
            else:
                plt.savefig("d{}".format(j + 1))

if __name__ == '__main__':
    import time

    xs, ys = DataUtil.get_dataset("mushroom", "../../_Data/mushroom.txt", tar_idx=0)
    nb = MultinomialNB()
    nb.feed_data(xs, ys)
    xs, ys = nb["x"].tolist(), nb["y"].tolist()

    train_num = 6000
    x_train, x_test = xs[:train_num], xs[train_num:]
    y_train, y_test = ys[:train_num], ys[train_num:]

    learning_time = time.time()
    nb = GaussianNB()
    nb.fit(x_train, y_train)
    learning_time = time.time() - learning_time

    estimation_time = time.time()
    nb.evaluate(x_train, y_train)
    nb.evaluate(x_test, y_test)
    estimation_time = time.time() - estimation_time