示例#1
0
 def test_n_negative(self):
     input_n1 = 1
     input_maximum = 2
     input_n2 = 2
     expected_probability_total = 1
     actual_probability_total = 2**-universal_code_integers_maximum(input_n1,input_maximum)+\
                        2**-universal_code_integers_maximum(input_n2, input_maximum)
     assert expected_probability_total == pytest.approx(
         actual_probability_total)
示例#2
0
    def test_numericattribute(self, auxiliar_numericattribute):
        numericattribute = auxiliar_numericattribute
        expected_length_item_1_operator = log2(
            6) + universal_code_integers_maximum(1, 2)
        expected_length_item_2_operator = log2(
            3) + universal_code_integers_maximum(2, 2)

        expected_output = [("column1", 1, expected_length_item_1_operator),
                           ("column1", 2, expected_length_item_2_operator)]
        output = [*compute_item_length(numericattribute)]
        assert expected_output[0][0] == output[0][0]
        assert expected_output[0][1] == output[0][1]
        assert expected_output[0][2] == pytest.approx(output[0][2])
        assert expected_output[1][0] == output[1][0]
        assert expected_output[1][1] == output[1][1]
        assert expected_output[1][2] == pytest.approx(output[1][2])
示例#3
0
 def test_n_one(self):
     #edge case
     input_n = 1
     input_maximum = 1
     expected_codelength = 0
     codelength = universal_code_integers_maximum(input_n, input_maximum)
     assert expected_codelength == pytest.approx(codelength)
示例#4
0
    def test_nominalattribute(self, auxiliar_nominalattribute):
        nominalattribute = auxiliar_nominalattribute
        expected_length_item_1_operator = log2(
            2) + universal_code_integers_maximum(1, 1)

        expected_output = [("column1", 1, expected_length_item_1_operator)]
        output = [*compute_item_length(nominalattribute)]
        assert expected_output[0][0] == output[0][0]
        assert expected_output[0][1] == output[0][1]
        assert expected_output[0][2] == pytest.approx(output[0][2])
示例#5
0
    def test_add_rule_2items(self, search_parameters,
                             generate_input_dataframe_two_target_normal,
                             generate_subgroup_2subgroups):
        data = generate_input_dataframe_two_target_normal
        input_target_model, input_max_depth, input_beam_width, input_minsupp, input_max_rules, input_alpha_gain = search_parameters
        subgroup2add1, subgroup2add2 = generate_subgroup_2subgroups
        input_task = "discovery"

        output_ruleset = GaussianRuleList(data, input_task, input_max_depth,
                                          input_beam_width, input_minsupp,
                                          input_max_rules, input_alpha_gain)
        output_ruleset.add_rule(subgroup2add1, data)
        output_ruleset.add_rule(subgroup2add2, data)

        expected_number_instances = data.number_instances
        expected_bitset_uncovered = mpz()
        expected_bitset_covered = bit_mask(100000)
        expected_number_rules = 2
        expected_length_model = universal_code_integers(2) + \
                                universal_code_integers(1) +uniform_combination_code(1, 2) +\
                                universal_code_integers_maximum(1, 2) + uniform_code(10)+ \
                                universal_code_integers(1) + uniform_combination_code(1, 2) + \
                                universal_code_integers_maximum(1, 1) + uniform_code(2)

        actual_numberinstances1 = popcount(output_ruleset.subgroups[0].bitarray) + \
                                  popcount(output_ruleset.subgroups[1].bitarray &~ output_ruleset.subgroups[0].bitarray) + \
                                  popcount(output_ruleset.bitset_uncovered)
        actual_numberinstances2 = output_ruleset.support_covered + output_ruleset.support_uncovered
        actual_numberinstances3 = popcount(output_ruleset.bitset_covered) + \
                                  popcount(output_ruleset.bitset_uncovered)
        actual_numberinstances4 = output_ruleset.subgroups[0].usage + output_ruleset.subgroups[1].usage +\
                                  output_ruleset.default_rule_statistics.usage

        assert expected_number_instances == actual_numberinstances1
        assert expected_number_instances == actual_numberinstances2
        assert expected_number_instances == actual_numberinstances3
        assert expected_number_instances == actual_numberinstances4
        assert expected_bitset_uncovered == output_ruleset.bitset_uncovered
        assert expected_bitset_covered == output_ruleset.bitset_covered
        assert expected_number_rules == output_ruleset.number_rules
        assert expected_length_model == pytest.approx(
            output_ruleset.length_model)
示例#6
0
def compute_item_length(attribute: Attribute) -> float:
    """ Computes the code of an attribute based on its cardinality
    """
    for n_operators in range(1, attribute.max_operators + 1):
        l_number_operators = universal_code_integers_maximum(
            n_operators, attribute.max_operators)
        l_code = uniform_code(attribute.cardinality_operator[n_operators])

        l_item = l_number_operators + l_code
        yield attribute.name, n_operators, l_item


#def compute_item_length_uniformforall(attribute: Attribute) -> float:
#    cardinality  = sum([attribute.cardinality_operator[n_operators] for n_operators in range(1,attribute.max_operators+1)])
#    l_item = uniform_code(cardinality)
#    for n_operators in range(1,attribute.max_operators+1):
#        yield attribute.name, n_operators, l_item
示例#7
0
    def test_add_rule_itemnumeric(self, search_parameters,
                                  generate_input_dataframe_two_target_normal,
                                  generate_subgroup_oneitem_numeric):
        data = generate_input_dataframe_two_target_normal
        input_target_model, input_max_depth, input_beam_width, input_minsupp, input_max_rules, input_alpha_gain = search_parameters
        subgroup2add = generate_subgroup_oneitem_numeric
        input_task = "discovery"

        output_ruleset = GaussianRuleList(data, input_task, input_max_depth,
                                          input_beam_width, input_minsupp,
                                          input_max_rules, input_alpha_gain)
        output_ruleset.add_rule(subgroup2add, data)

        expected_number_instances = data.number_instances
        expected_bitset_uncovered = indexes2bitset(
            [i for i in range(expected_number_instances) if i > 16666])
        expected_bitset_covered = indexes2bitset(
            [i for i in range(expected_number_instances) if i < 16666 + 1])
        expected_number_rules = 1
        expected_length_model = universal_code_integers(1) + universal_code_integers(1)+\
                                uniform_combination_code(1, 2) + universal_code_integers_maximum(1,2)+ \
                                uniform_code(10)


        actual_numberinstances1 = popcount(output_ruleset.subgroups[0].bitarray) +\
                                 popcount(output_ruleset.bitset_uncovered)
        actual_numberinstances2 = output_ruleset.support_covered + output_ruleset.support_uncovered
        actual_numberinstances3 = popcount(output_ruleset.bitset_covered) +\
                                 popcount(output_ruleset.bitset_uncovered)
        actual_numberinstances4 = output_ruleset.subgroups[
            0].usage + output_ruleset.default_rule_statistics.usage

        assert expected_number_instances == actual_numberinstances1
        assert expected_number_instances == actual_numberinstances2
        assert expected_number_instances == actual_numberinstances3
        assert expected_number_instances == actual_numberinstances4
        assert expected_bitset_uncovered == output_ruleset.bitset_uncovered
        assert expected_bitset_covered == output_ruleset.bitset_covered
        assert expected_number_rules == output_ruleset.number_rules
        assert expected_length_model == pytest.approx(
            output_ruleset.length_model)