Пример #1
0
    def __init__(self):
        self._header = None
        self._root = None
        self._grace_period = 200
        self._split_confidence = 0.0000001
        self._hoeffding_tie_threshold = 0.05
        self._min_frac_weight_for_two_branches_gain = 0.01

        # Split metric stuff goes here
        self.GINI_SPLIT = 0
        self.INFO_GAIN_SPLIT = 1

        self._selected_split_metric = self.INFO_GAIN_SPLIT
        self._split_metric = InfoGainSplitMetric(
            self._min_frac_weight_for_two_branches_gain)
        # self._selected_split_metric = self.GINI_SPLIT
        # self._split_metric = GiniSplitMetric()

        # Leaf prediction strategy stuff goes here

        # Only used when the leaf prediction strategy is baded on Naive Bayes, not useful right now
        #self._nb_threshold = 0

        self._active_leaf_count = 0
        self._inactive_leaf_count = 0
        self._decision_node_count = 0

        # Print out leaf models in the case of naive Bayes or naive Bayes adaptive leaves
        self._print_leaf_models = False
Пример #2
0
    def __init__(self):
        self._header = None
        self._root = None
        self._grace_period = 200
        self._split_confidence = 0.0000001
        self._hoeffding_tie_threshold = 0.05
        self._min_frac_weight_for_two_branches_gain = 0.01

        # Split metric stuff goes here
        self.GINI_SPLIT = 0
        self.INFO_GAIN_SPLIT = 1

        self._selected_split_metric = self.INFO_GAIN_SPLIT
        self._split_metric = InfoGainSplitMetric(
            self._min_frac_weight_for_two_branches_gain)
        #self._selected_split_metric = self.GINI_SPLIT
        #self._split_metric = GiniSplitMetric()

        # Leaf prediction strategy stuff goes here

        # Only used when the leaf prediction strategy is baded on Naive Bayes, not useful right now
        #self._nb_threshold = 0

        self._active_leaf_count = 0
        self._inactive_leaf_count = 0
        self._decision_node_count = 0

        # Print out leaf models in the case of naive Bayes or naive Bayes adaptive leaves
        self._print_leaf_models = False

        logging.basicConfig(
            level=logging.DEBUG,
            format=
            '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
            #filename='../log/tmp.log',
            datefmt='%a, %d %b %Y %H:%M:%S',
            filemode='w')
        self.logger = logging.getLogger("vfdt")  #self.name
        # 定义一个FileHandler,将INFO级别或更高的日志信息记录到log文件,并将其添加到当前的日志处理对象#
        # fh = logging.FileHandler('../log/' + "vfdt" + '.log', mode='w')# self.name
        # formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
        # fh.setFormatter(formatter)
        # fh.setLevel(logging.INFO)
        # self.logger.addHandler(fh)
        # 定义一个StreamHandler,将INFO级别或更高的日志信息打印到标准错误,并将其添加到当前的日志处理对象#
        console = logging.StreamHandler()
        console.setLevel(logging.INFO)
        formatter = logging.Formatter(
            '%(message)s')  # ('%(name)-12s: %(levelname)-8s )
        console.setFormatter(formatter)

        self.logger.addHandler(console)
Пример #3
0
    def build_classifier(self, dataset, testdataset=None):
        """Build the classifier.

        Args:
            dataset (Dataset): The data to start training the classifier.
        """
        #日志部分
        self.logger.info(
            "Start build classifier--------------------------\n"
            "grace_period = {},\n"
            "split_confidence = {},\n"
            "hoeffding_tie_threshold = {},\n"
            "min_frac_weight_for_two_branches_gain = {}\n"
            "--------------------------------------------------".format(
                self._grace_period, self._split_confidence,
                self._hoeffding_tie_threshold,
                self._min_frac_weight_for_two_branches_gain))

        #训练部分
        self.reset()
        self._header = dataset
        if self._selected_split_metric is self.GINI_SPLIT:
            self._split_metric = GiniSplitMetric()
        else:
            self._split_metric = InfoGainSplitMetric(
                self._min_frac_weight_for_two_branches_gain)

        count = 0
        test_epoch = 100
        test_acc_list = []
        for i in range(dataset.num_instances()):
            self.update_classifier(dataset.instance(i))
            count += 1
            if count % test_epoch == 0 and testdataset != None:
                acc = self.valuate_acc(testdataset)
                test_acc_list.append(acc)

        #画图部分
        if testdataset != None:
            x_axis = range(len(test_acc_list))
            plt.plot(
                x_axis, test_acc_list,
                label='test_acc')  # Plot some data on the (implicit) axes.
            plt.xlabel('iter')
            plt.ylabel('acc')
            figpath = './respic/' + 'acc.jpg'
            plt.title(figpath.split('/')[-1])
            plt.legend()
            plt.savefig(figpath)
            plt.close('all')
Пример #4
0
    def build_classifier(self, dataset):
        """Build the classifier.

        Args:
            dataset (Dataset): The data to start training the classifier.
        """
        self.reset()
        self._header = dataset
        if self._selected_split_metric is self.GINI_SPLIT:
            self._split_metric = GiniSplitMetric()
        else:
            self._split_metric = InfoGainSplitMetric(
                self._min_frac_weight_for_two_branches_gain)

        for i in range(dataset.num_instances()):
            self.update_classifier(dataset.instance(i))