示例#1
0
def main():

    test_data_path = 'train.data.csv'
    test_scheme_path = 'wine.names.csv'

    # test_data_path = 'datasets/iris.data'
    # test_scheme_path = 'datasets/iris.names'

    data, attributes, value_type = read(test_data_path, test_scheme_path)
    random.shuffle(data)
    train_dataset = pre_process(data, attributes, value_type)

    cars = rule_generator(train_dataset, 0.22, 0.6)
    cars.prune_rules(train_dataset)
    cars.rules = cars.pruned_rules

    classifier_m1 = classifier_builder_m1(cars, train_dataset)

    # error_rate = get_error_rate(classifier_m1, train_dataset)

    total_car_number = len(cars.rules)
    # total_classifier_rule_num = len(classifier_m1.rule_list)

    # print("_______________________________________________________")
    # print(error_rate)

    # print("_______________________________________________________")
    # print(total_classifier_rule_num)

    print("_______________________________________________________")
    cars.print_rule()
    print("_______________________________________________________")
    cars.prune_rules(train_dataset)
    cars.print_pruned_rule()
    print("_______________________________________________________")
    print()
    classifier_m1.print()

    print("_______________________________________________________")
    print(total_car_number)
示例#2
0
def cross_validate_m1_without_prune(data_path,
                                    scheme_path,
                                    minsup=0.01,
                                    minconf=0.5):
    data, attributes, value_type = read(data_path, scheme_path)
    random.shuffle(data)
    dataset = pre_process(data, attributes, value_type)

    block_size = int(len(dataset) / 10)
    split_point = [k * block_size for k in range(0, 10)]
    split_point.append(len(dataset))

    cba_rg_total_runtime = 0
    cba_cb_total_runtime = 0
    total_car_number = 0
    total_classifier_rule_num = 0
    error_total_rate = 0

    for k in range(len(split_point) - 1):
        print("\nRound %d:" % k)

        training_dataset = dataset[:split_point[k]] + dataset[split_point[k +
                                                                          1]:]
        test_dataset = dataset[split_point[k]:split_point[k + 1]]

        start_time = time.time()
        cars = rule_generator(training_dataset, minsup, minconf)
        end_time = time.time()
        cba_rg_runtime = end_time - start_time
        cba_rg_total_runtime += cba_rg_runtime

        start_time = time.time()
        classifier_m1 = classifier_builder_m1(cars, training_dataset)
        end_time = time.time()
        cba_cb_runtime = end_time - start_time
        cba_cb_total_runtime += cba_cb_runtime

        error_rate = get_error_rate(classifier_m1, test_dataset)
        error_total_rate += error_rate

        total_car_number += len(cars.rules)
        total_classifier_rule_num += len(classifier_m1.rule_list)

        print("CBA's error rate without pruning: %.1lf%%" % (error_rate * 100))
        print("No. of CARs without pruning: %d" % len(cars.rules))
        print("CBA-RG's run time without pruning: %.2lf s" % cba_rg_runtime)
        print("CBA-CB M1's run time without pruning: %.2lf s" % cba_cb_runtime)
        print("No. of rules in classifier of CBA-CB M1 without pruning: %d" %
              len(classifier_m1.rule_list))

    print("\nAverage CBA's error rate without pruning: %.1lf%%" %
          (error_total_rate / 10 * 100))
    print("Average No. of CARs without pruning: %d" %
          int(total_car_number / 10))
    print("Average CBA-RG's run time without pruning: %.2lf s" %
          (cba_rg_total_runtime / 10))
    print("Average CBA-CB M1's run time without pruning: %.2lf s" %
          (cba_cb_total_runtime / 10))
    print(
        "Average No. of rules in classifier of CBA-CB M1 without pruning: %d" %
        int(total_classifier_rule_num / 10))
示例#3
0
文件: cba_cb_m2.py 项目: munif/CBA
            rule_errors += errorsOfRule(cars_list[r_index], dataset)
            class_distribution = compClassDistr(dataset)
            default_class = selectDefault(class_distribution)
            default_errors = defErr(default_class, class_distribution)
            total_errors = rule_errors + default_errors
            classifier.add(cars_list[r_index], default_class, total_errors)
    classifier.discard()

    return classifier


# just for test
if __name__ == "__main__":
    import cba_rg

    dataset = [[1, 1, 1], [1, 1, 1], [1, 2, 1], [2, 2, 1], [2, 2, 1],
               [2, 2, 0], [2, 3, 0], [2, 3, 0], [1, 1, 0], [3, 2, 0]]
    minsup = 0.15
    minconf = 0.6
    cars = cba_rg.rule_generator(dataset, minsup, minconf)
    classifier = classifier_builder_m2(cars, dataset)
    classifier.print()

    print()
    dataset = [[1, 1, 1], [1, 1, 1], [1, 2, 1], [2, 2, 1], [2, 2, 1],
               [2, 2, 0], [2, 3, 0], [2, 3, 0], [1, 1, 0], [3, 2, 0]]
    cars.prune_rules(dataset)
    cars.rules = cars.pruned_rules
    classifier = classifier_builder_m2(cars, dataset)
    classifier.print()
示例#4
0
def cross_validate(data_path,
                   scheme_path,
                   class_first=False,
                   minsup=0.1,
                   minconf=0.6):
    data, attributes, value_type = read(data_path, scheme_path)
    if class_first:
        for i in range(len(data)):
            a = data[i].pop(0)
            data[i].append(a)
        a = attributes.pop(0)
        attributes.append(a)
        b = value_type.pop(0)
        value_type.append(b)
        # print(data[0])
    random.shuffle(data)
    dataset = pre_process(data, attributes, value_type)

    block_size = int(len(dataset) / 10)
    split_point = [k * block_size for k in range(0, 10)]
    split_point.append(len(dataset))

    cba_rg_total_runtime = 0
    cba_cb_total_runtime = 0
    total_car_number = 0
    total_classifier_rule_num = 0
    error_total_rate = 0
    acc_total = 0
    for k in range(len(split_point) - 1):
        print("\nRound %d:" % k)

        training_dataset = dataset[:split_point[k]] + dataset[split_point[k +
                                                                          1]:]
        test_dataset = dataset[split_point[k]:split_point[k + 1]]

        start_time = time.time()
        cars = rule_generator(training_dataset, minsup, minconf)
        end_time = time.time()
        cba_rg_runtime = end_time - start_time
        cba_rg_total_runtime += cba_rg_runtime

        start_time = time.time()
        classifier = classifier_builder_m1(cars, training_dataset)
        end_time = time.time()
        cba_cb_runtime = end_time - start_time
        cba_cb_total_runtime += cba_cb_runtime

        classifier.print()
        res = acc(classifier, test_dataset)
        acc_total += res

        error_rate = get_error_rate(classifier, test_dataset)
        error_total_rate += error_rate

        total_car_number += len(cars.rules)
        total_classifier_rule_num += len(classifier.rule_list)

        print("accuracy:", (res * 100))
        print("No. of CARs : ", len(cars.rules))
        print("CBA-RG's run time : s", cba_rg_runtime)
        print("CBA-CB M1's run time :  s", cba_cb_runtime)
        print("No. of rules in classifier of CBA-CB: ",
              len(classifier.rule_list))

    print("\n Average CBA's accuracy :", (acc_total / 10 * 100))
    print("Average No. of CARs : ", (total_car_number / 10))
    print("Average CBA-RG's run time: ", (cba_rg_total_runtime / 10))
    print("Average CBA-CB run time:  ", (cba_cb_total_runtime / 10))
    print("Average No. of rules in classifier of CBA-CB: ",
          (total_classifier_rule_num / 10))