def find_best_child_train(self, children):
        best_loss = sys.maxint
        best_child = None

        for child_1 in children:
            loss_1 = calculate_loss(self.scoring_function, child_1, self.true_output, self.number_of_labels)
            if loss_1 < best_loss:
                best_loss = loss_1
                best_child = child_1

        # Compare only with best
        best_attributes = construct_sparse_attributes(self.attributes, best_child)
        for child_1 in children:
            if child_1 == best_child:
                continue
            loss_1 = calculate_loss(self.scoring_function, child_1, self.true_output, self.number_of_labels)
            if loss_1 == best_loss:
                continue
            if random.uniform(0.0, 1.0) <= self.reduction:
                attributes_1 = construct_sparse_attributes(self.attributes, child_1)
                self.h_training_examples.append(attributes_1 - best_attributes)
                self.h_training_labels.append(np.sign(loss_1 - best_loss))

                self.h_training_examples.append(best_attributes - attributes_1)
                self.h_training_labels.append(np.sign(best_loss - loss_1))

        return best_child
示例#2
0
    def generate_examples_c(self,
                            fitted_h_classifier,
                            x_train,
                            y_train,
                            verbose=0):
        c_start_time = time.clock()

        c_training_x = []
        c_training_y = []

        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i],
                              self.number_of_labels,
                              self.scoring_function,
                              'test',
                              initial_br=self.initial_br,
                              fitted_classifier=fitted_h_classifier)
            outputs = flipbit.greedy_search(
                self.depth_of_search)  # Get outputs using fitted H heuristic

            best_loss = sys.maxint
            best_output = None
            for output in outputs:
                loss = calculate_loss(self.scoring_function, output,
                                      y_train[i], self.number_of_labels)
                if loss < best_loss:
                    best_loss = loss
                    best_output = output

            output_1_attributes = construct_sparse_attributes(
                x_train[i], best_output)
            for output in outputs:
                if best_output == output:
                    continue
                loss_2 = calculate_loss(self.scoring_function, output,
                                        y_train[i], self.number_of_labels)
                if best_loss == loss_2:
                    continue
                output_2_attributes = construct_sparse_attributes(
                    x_train[i], output)

                c_training_x.append(output_1_attributes - output_2_attributes)
                c_training_y.append(np.sign(best_loss - loss_2))

                c_training_x.append(output_2_attributes - output_1_attributes)
                c_training_y.append(np.sign(loss_2 - best_loss))

        c_construction_end_time = time.clock()
        if verbose > 0:
            print("C construction time: {0:.4f}, Examples: {1}".format(
                c_construction_end_time - c_start_time, len(c_training_x)))
        return c_training_x, c_training_y
    def generate_examples_c(self, fitted_h_classifier, x_train, y_train, verbose=0):
        c_start_time = time.clock()

        c_training_x = []
        c_training_y = []

        for i in xrange(len(x_train)):
            flipbit = FlipBit(
                x_train[i],
                self.number_of_labels,
                self.scoring_function,
                "test",
                initial_br=self.initial_br,
                fitted_classifier=fitted_h_classifier,
            )
            outputs = flipbit.greedy_search(self.depth_of_search)  # Get outputs using fitted H heuristic

            best_loss = sys.maxint
            best_output = None
            for output in outputs:
                loss = calculate_loss(self.scoring_function, output, y_train[i], self.number_of_labels)
                if loss < best_loss:
                    best_loss = loss
                    best_output = output

            output_1_attributes = construct_sparse_attributes(x_train[i], best_output)
            for output in outputs:
                if best_output == output:
                    continue
                loss_2 = calculate_loss(self.scoring_function, output, y_train[i], self.number_of_labels)
                if best_loss == loss_2:
                    continue
                output_2_attributes = construct_sparse_attributes(x_train[i], output)

                c_training_x.append(output_1_attributes - output_2_attributes)
                c_training_y.append(np.sign(best_loss - loss_2))

                c_training_x.append(output_2_attributes - output_1_attributes)
                c_training_y.append(np.sign(loss_2 - best_loss))

        c_construction_end_time = time.clock()
        if verbose > 0:
            print(
                "C construction time: {0:.4f}, Examples: {1}".format(
                    c_construction_end_time - c_start_time, len(c_training_x)
                )
            )
        return c_training_x, c_training_y
    def fit_simplified(self, x_train, y_train):
        c_training_examples = []
        c_training_scores = []
        h_training_examples = []
        h_training_scores = []

        start_time = time.clock()
        print "Number of examples in training set: " + str(len(x_train))
        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, true_output=y_train[i])
            outputs = flipbit.greedy_search(self.depth_of_search)
            h_training_examples.extend(flipbit.get_training_examples())
            h_training_scores.extend(flipbit.get_training_scores())

            for j in xrange(len(outputs)):
                example = construct_sparse_attributes(x_train[i], outputs[j])
                score = calculate_loss(self.scoring_function, outputs[j], y_train[i], self.number_of_labels)
                c_training_examples.append(example)
                c_training_scores.append(score)

        generating_end_time = time.clock()

        self.h_regressor.fit(vstack(h_training_examples, format='csr'), h_training_scores)
        print "Number of H regression learning examples: " + str(len(h_training_examples))

        self.c_regressor.fit(vstack(c_training_examples, format='csr'), c_training_scores)
        print "Number of C regression learning examples: " + str(len(c_training_examples))

        fit_time = time.clock()

        construction_time = (generating_end_time - start_time)
        learning_time = (fit_time - generating_end_time)
        print("Construction time: {0:.4f}, Learning HC time: {1:.4f}".format(construction_time, learning_time))
def predict_best_output(attributes, outputs, classifier):

    result_dict = {}
    for i in range(len(outputs)):
        pretendent_attributes = construct_sparse_attributes(attributes, outputs[i])
        result_dict[i] = classifier.decision_function(pretendent_attributes)[0]

    # print result_dict
    index_of_best = min(result_dict.iteritems(), key=operator.itemgetter(1))[0]
    return outputs[index_of_best]
 def predict_best_output(self, example, outputs):
     best_score = sys.maxint
     best_output = None
     for output in outputs:
         attributes = construct_sparse_attributes(example, output)
         score = self.c_regressor.predict(attributes)
         if score < best_score:
             best_score = score
             best_output = output
     return best_output
示例#7
0
def predict_best_output(attributes, outputs, classifier):

    result_dict = {}
    for i in range(len(outputs)):
        pretendent_attributes = construct_sparse_attributes(
            attributes, outputs[i])
        result_dict[i] = classifier.decision_function(pretendent_attributes)[0]

    # print result_dict
    index_of_best = min(result_dict.iteritems(), key=operator.itemgetter(1))[0]
    return outputs[index_of_best]
    def find_best_child(self, children):
        best_score = sys.maxint
        best_child = None

        for child in children:
            example = construct_sparse_attributes(self.attributes, child)
            if (self.regressor is not None) and (self.true_output is None):
                score = self.regressor.predict(example)
            elif (self.true_output is not None) and (self.regressor is None):
                score = calculate_loss(self.scoring_function, child, self.true_output, self.number_of_labels)
                self.h_training_examples.append(example)
                self.h_training_scores.append(score)
            else:
                raise ValueError("Either regressor or true_output must not be None.")

            if score < best_score:
                best_score = score
                best_child = child

        return best_child
    def fit(self, x_train, y_train):
        c_training_examples = []
        c_training_scores = []
        h_training_examples = []
        h_training_scores = []

        start_time = time.clock()
        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, true_output=y_train[i])
            flipbit.greedy_search(self.depth_of_search)  # Run greedy_search to construct H training examples
            h_training_examples.extend(flipbit.get_training_examples())
            h_training_scores.extend(flipbit.get_training_scores())

        h_construction_end_time = time.clock()
        print("H training examples construction time: {0:.4f}".format(h_construction_end_time-start_time))

        self.h_regressor.fit(vstack(h_training_examples, format='csr'), h_training_scores)
        h_fit_end_time = time.clock()
        print("H heuristic train time: {0:.4f}".format(h_fit_end_time-h_construction_end_time))

        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, fitted_regressor=self.h_regressor)
            outputs = flipbit.greedy_search(self.depth_of_search)  # Get outputs using fitted H heuristic

            for j in xrange(len(outputs)):
                example = construct_sparse_attributes(x_train[i], outputs[j])
                score = calculate_loss(self.scoring_function, outputs[j], y_train[i], self.number_of_labels)
                c_training_examples.append(example)
                c_training_scores.append(score)

        c_construction_end_time = time.clock()
        print("C training examples construction time: {0:.4f}".format(c_construction_end_time-h_fit_end_time))

        self.c_regressor.fit(vstack(c_training_examples, format='csr'), c_training_scores)
        c_fit_end_time = time.clock()
        print("C heuristic train time: {0:.4f}".format(c_fit_end_time-c_construction_end_time))

        print("Training examples - Total: {0}, H: {1}, C: {2}".format(len(x_train), len(h_training_examples),
                                                                      len(c_training_examples)))