def fit_simplified(self, x_train, y_train):
        c_training_examples = []
        c_training_scores = []
        h_training_examples = []
        h_training_scores = []

        start_time = time.clock()
        print "Number of examples in training set: " + str(len(x_train))
        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, true_output=y_train[i])
            outputs = flipbit.greedy_search(self.depth_of_search)
            h_training_examples.extend(flipbit.get_training_examples())
            h_training_scores.extend(flipbit.get_training_scores())

            for j in xrange(len(outputs)):
                example = construct_sparse_attributes(x_train[i], outputs[j])
                score = calculate_loss(self.scoring_function, outputs[j], y_train[i], self.number_of_labels)
                c_training_examples.append(example)
                c_training_scores.append(score)

        generating_end_time = time.clock()

        self.h_regressor.fit(vstack(h_training_examples, format='csr'), h_training_scores)
        print "Number of H regression learning examples: " + str(len(h_training_examples))

        self.c_regressor.fit(vstack(c_training_examples, format='csr'), c_training_scores)
        print "Number of C regression learning examples: " + str(len(c_training_examples))

        fit_time = time.clock()

        construction_time = (generating_end_time - start_time)
        learning_time = (fit_time - generating_end_time)
        print("Construction time: {0:.4f}, Learning HC time: {1:.4f}".format(construction_time, learning_time))
    def fit_heuristic_h(self, h_classifier, x_train, y_train, verbose=0):
        h_construction_start_time = time.clock()

        h_training_x = []
        h_training_y = []

        for i in xrange(len(x_train)):
            flipbit = FlipBit(
                x_train[i],
                self.number_of_labels,
                self.scoring_function,
                "train",
                initial_br=self.initial_br,
                true_output=y_train[i],
                reduction=self.h_reduction,
            )
            flipbit.greedy_search(self.depth_of_search)  # Run greedy_search to construct H training examples

            h_training_x.extend(flipbit.get_training_examples())
            h_training_y.extend(flipbit.get_training_labels())

        h_construction_end_time = time.clock()
        if verbose > 0:
            print(
                "H construction time: {0:.4f}, Examples: {1}".format(
                    h_construction_end_time - h_construction_start_time, len(h_training_x)
                )
            )
        h_classifier.fit(vstack(h_training_x, format="csr"), h_training_y)
        h_fit_end_time = time.clock()
        if verbose > 0:
            print("H heuristic train time: {0:.4f}".format(h_fit_end_time - h_construction_end_time))

        return h_classifier
 def predict(self, x_test):
     y_predicted = []
     for example in x_test:
         flipbit = FlipBit(example, self.number_of_labels, scoring_function=self.scoring_function,
                           fitted_regressor=self.h_regressor)
         outputs = flipbit.greedy_search(self.depth_of_search)
         best_output = self.predict_best_output(example, outputs)
         y_predicted.append(best_output)
     return y_predicted
Exemplo n.º 4
0
    def generate_examples_c(self,
                            fitted_h_classifier,
                            x_train,
                            y_train,
                            verbose=0):
        c_start_time = time.clock()

        c_training_x = []
        c_training_y = []

        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i],
                              self.number_of_labels,
                              self.scoring_function,
                              'test',
                              initial_br=self.initial_br,
                              fitted_classifier=fitted_h_classifier)
            outputs = flipbit.greedy_search(
                self.depth_of_search)  # Get outputs using fitted H heuristic

            best_loss = sys.maxint
            best_output = None
            for output in outputs:
                loss = calculate_loss(self.scoring_function, output,
                                      y_train[i], self.number_of_labels)
                if loss < best_loss:
                    best_loss = loss
                    best_output = output

            output_1_attributes = construct_sparse_attributes(
                x_train[i], best_output)
            for output in outputs:
                if best_output == output:
                    continue
                loss_2 = calculate_loss(self.scoring_function, output,
                                        y_train[i], self.number_of_labels)
                if best_loss == loss_2:
                    continue
                output_2_attributes = construct_sparse_attributes(
                    x_train[i], output)

                c_training_x.append(output_1_attributes - output_2_attributes)
                c_training_y.append(np.sign(best_loss - loss_2))

                c_training_x.append(output_2_attributes - output_1_attributes)
                c_training_y.append(np.sign(loss_2 - best_loss))

        c_construction_end_time = time.clock()
        if verbose > 0:
            print("C construction time: {0:.4f}, Examples: {1}".format(
                c_construction_end_time - c_start_time, len(c_training_x)))
        return c_training_x, c_training_y
    def generate_examples_c(self, fitted_h_classifier, x_train, y_train, verbose=0):
        c_start_time = time.clock()

        c_training_x = []
        c_training_y = []

        for i in xrange(len(x_train)):
            flipbit = FlipBit(
                x_train[i],
                self.number_of_labels,
                self.scoring_function,
                "test",
                initial_br=self.initial_br,
                fitted_classifier=fitted_h_classifier,
            )
            outputs = flipbit.greedy_search(self.depth_of_search)  # Get outputs using fitted H heuristic

            best_loss = sys.maxint
            best_output = None
            for output in outputs:
                loss = calculate_loss(self.scoring_function, output, y_train[i], self.number_of_labels)
                if loss < best_loss:
                    best_loss = loss
                    best_output = output

            output_1_attributes = construct_sparse_attributes(x_train[i], best_output)
            for output in outputs:
                if best_output == output:
                    continue
                loss_2 = calculate_loss(self.scoring_function, output, y_train[i], self.number_of_labels)
                if best_loss == loss_2:
                    continue
                output_2_attributes = construct_sparse_attributes(x_train[i], output)

                c_training_x.append(output_1_attributes - output_2_attributes)
                c_training_y.append(np.sign(best_loss - loss_2))

                c_training_x.append(output_2_attributes - output_1_attributes)
                c_training_y.append(np.sign(loss_2 - best_loss))

        c_construction_end_time = time.clock()
        if verbose > 0:
            print(
                "C construction time: {0:.4f}, Examples: {1}".format(
                    c_construction_end_time - c_start_time, len(c_training_x)
                )
            )
        return c_training_x, c_training_y
Exemplo n.º 6
0
    def fit_heuristic_h(self, h_classifier, x_train, y_train, verbose=0):
        h_construction_start_time = time.clock()

        h_training_x = []
        h_training_y = []

        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i],
                              self.number_of_labels,
                              self.scoring_function,
                              'train',
                              initial_br=self.initial_br,
                              true_output=y_train[i],
                              reduction=self.h_reduction)
            flipbit.greedy_search(
                self.depth_of_search
            )  # Run greedy_search to construct H training examples

            h_training_x.extend(flipbit.get_training_examples())
            h_training_y.extend(flipbit.get_training_labels())

        h_construction_end_time = time.clock()
        if verbose > 0:
            print("H construction time: {0:.4f}, Examples: {1}".format(
                h_construction_end_time - h_construction_start_time,
                len(h_training_x)))
        h_classifier.fit(vstack(h_training_x, format='csr'), h_training_y)
        h_fit_end_time = time.clock()
        if verbose > 0:
            print("H heuristic train time: {0:.4f}".format(
                h_fit_end_time - h_construction_end_time))

        return h_classifier
Exemplo n.º 7
0
    def predict(self, x_test, y_test):
        y_predicted = []
        h_acc_sum = 0.0
        hc_loss_sum = 0.0
        for i in xrange(len(x_test)):

            flipbit = FlipBit(x_test[i],
                              self.number_of_labels,
                              self.scoring_function,
                              'test',
                              initial_br=self.initial_br,
                              fitted_classifier=self.h_classifier)
            outputs = flipbit.greedy_search(self.depth_of_search)

            # Calculate prediction loss
            true_output = y_test[i]
            min_loss = 1.0
            for output in outputs:
                loss = calculate_loss(self.scoring_function, output,
                                      true_output, self.number_of_labels)
                if loss < min_loss:
                    min_loss = loss
                if (true_output == output).all():
                    h_acc_sum += 1.0
                    break

            hc_loss_sum += min_loss

            best_output = predict_best_output(x_test[i], outputs,
                                              self.c_classifier)
            y_predicted.append(best_output)
            # print y_test[i], outputs, best_output

        print('Loss 0/1 of H: {0}'.format(1.0 - (h_acc_sum / len(x_test))))
        print("Loss {0} of H: {1}".format(self.scoring_function,
                                          hc_loss_sum / len(x_test)))
        return y_predicted
    def predict(self, x_test, y_test):
        y_predicted = []
        h_acc_sum = 0.0
        hc_loss_sum = 0.0
        for i in xrange(len(x_test)):

            flipbit = FlipBit(
                x_test[i],
                self.number_of_labels,
                self.scoring_function,
                "test",
                initial_br=self.initial_br,
                fitted_classifier=self.h_classifier,
            )
            outputs = flipbit.greedy_search(self.depth_of_search)

            # Calculate prediction loss
            true_output = y_test[i]
            min_loss = 1.0
            for output in outputs:
                loss = calculate_loss(self.scoring_function, output, true_output, self.number_of_labels)
                if loss < min_loss:
                    min_loss = loss
                if (true_output == output).all():
                    h_acc_sum += 1.0
                    break

            hc_loss_sum += min_loss

            best_output = predict_best_output(x_test[i], outputs, self.c_classifier)
            y_predicted.append(best_output)
            # print y_test[i], outputs, best_output

        print("Loss 0/1 of H: {0}".format(1.0 - (h_acc_sum / len(x_test))))
        print("Loss {0} of H: {1}".format(self.scoring_function, hc_loss_sum / len(x_test)))
        return y_predicted
    def fit(self, x_train, y_train):
        c_training_examples = []
        c_training_scores = []
        h_training_examples = []
        h_training_scores = []

        start_time = time.clock()
        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, true_output=y_train[i])
            flipbit.greedy_search(self.depth_of_search)  # Run greedy_search to construct H training examples
            h_training_examples.extend(flipbit.get_training_examples())
            h_training_scores.extend(flipbit.get_training_scores())

        h_construction_end_time = time.clock()
        print("H training examples construction time: {0:.4f}".format(h_construction_end_time-start_time))

        self.h_regressor.fit(vstack(h_training_examples, format='csr'), h_training_scores)
        h_fit_end_time = time.clock()
        print("H heuristic train time: {0:.4f}".format(h_fit_end_time-h_construction_end_time))

        for i in xrange(len(x_train)):
            flipbit = FlipBit(x_train[i], self.number_of_labels, self.scoring_function, fitted_regressor=self.h_regressor)
            outputs = flipbit.greedy_search(self.depth_of_search)  # Get outputs using fitted H heuristic

            for j in xrange(len(outputs)):
                example = construct_sparse_attributes(x_train[i], outputs[j])
                score = calculate_loss(self.scoring_function, outputs[j], y_train[i], self.number_of_labels)
                c_training_examples.append(example)
                c_training_scores.append(score)

        c_construction_end_time = time.clock()
        print("C training examples construction time: {0:.4f}".format(c_construction_end_time-h_fit_end_time))

        self.c_regressor.fit(vstack(c_training_examples, format='csr'), c_training_scores)
        c_fit_end_time = time.clock()
        print("C heuristic train time: {0:.4f}".format(c_fit_end_time-c_construction_end_time))

        print("Training examples - Total: {0}, H: {1}, C: {2}".format(len(x_train), len(h_training_examples),
                                                                      len(c_training_examples)))