def test_simple(self):

        task = task_dummy(self.df, ps.BinaryTarget('columnC', 1))
        qf = ps.StandardQF(0)
        qf.calculate_constant_statistics(task)

        self.ga_qf.calculate_constant_statistics(task)

        #print(qf.calculate_statistics(self.A1, self.df))
        #print(qf.calculate_statistics(self.BA, self.df))
        #print(qf.calculate_statistics(ps.Conjunction([self.A1, self.BA]), self.df))
        #print(qf.calculate_statistics(slice(None), self.df))
        ga_stat = self.ga_qf.calculate_statistics(
            ps.Conjunction([self.A1, self.BA]), self.df)

        self.assertEqual(ga_stat.subgroup_stats,
                         ps.SimplePositivesQF.tpl(3, 2))
        self.assertEqual(ga_stat.generalisation_stats,
                         ps.SimplePositivesQF.tpl(5, 3))
        # Ensure cache works properly
        self.assertEqual(
            ga_stat,
            self.ga_qf.calculate_statistics(ps.Conjunction([self.A1, self.BA]),
                                            self.df))

        ga_score = self.ga_qf.evaluate(ps.Conjunction([self.A1, self.BA]),
                                       self.df)
        ga_score2 = self.ga_qf.evaluate(ps.Conjunction([self.A1, self.BA]),
                                        self.df)

        self.assertEqual(ga_score, ga_score2)
        self.assertAlmostEqual(ga_score, 0.06666666666666)
示例#2
0
 def setUp(self):
     NS_checking = ps.EqualitySelector("checking_status", b"<0")
     NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
     NS_other_parties = ps.EqualitySelector("other_parties", b"none")
     NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
     NS_job = ps.EqualitySelector("job", b"skilled")
     self.result = [ps.Conjunction([NS_checking, NS_foreign_worker]),
                    ps.Conjunction([NS_checking]),
                    ps.Conjunction([NS_checking, NS_other_parties, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_other_parties]),
                    ps.Conjunction([NS_checking, NS_savings_status, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_savings_status]),
                    ps.Conjunction([NS_checking, NS_savings_status, NS_other_parties, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_job, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_savings_status, NS_other_parties]),
                    ps.Conjunction([NS_checking, NS_job]),
                    ]
     self.qualities = [0.055299999999999995,
                       0.05280000000000001,
                       0.052300000000000006,
                       0.05059999999999999,
                       0.04959999999999999,
                       0.048299999999999996,
                       0.04660000000000001,
                       0.04550000000000001,
                       0.0452,
                       0.044399999999999995]
     data = get_credit_data()
     target = ps.BinaryTarget('class', b'bad')
     searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
     self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.StandardQF(1.0))
示例#3
0
 def setUpClass(cls):
     data = get_credit_data()
     target = ps.BinaryTarget('class', b'bad')
     searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
     cls.task = ps.SubgroupDiscoveryTask(data,
                                         target,
                                         searchSpace,
                                         result_set_size=10,
                                         depth=5,
                                         qf=ps.StandardQF(1.0))
     cls.result = ps.SimpleDFS().execute(cls.task)
    def setUp(self):
        NS_checking = ps.EqualitySelector("checking_status", b"<0")
        NS_checking2 = ps.EqualitySelector("checking_status", b"0<=X<200")
        NS_other_parties = ps.EqualitySelector("other_parties",
                                               b"co applicant")
        NS_other = ps.EqualitySelector("purpose", b'other')
        NS_repairs = ps.EqualitySelector("purpose", b'repairs')
        NS_purpose = ps.EqualitySelector("purpose", b'business')

        NS_history = ps.EqualitySelector("credit_history",
                                         b"no credits/all paid")
        NS_history2 = ps.EqualitySelector("credit_history", b"all paid")
        NS_empl = ps.EqualitySelector("employment", b"unemployed")
        NS_job = ps.EqualitySelector("job", b"unemp/unskilled non res")
        NS_bank = ps.EqualitySelector("other_payment_plans", b"bank")
        self.result = [
            ps.Disjunction([NS_checking, NS_checking2, NS_bank]),
            ps.Disjunction([NS_checking, NS_checking2, NS_history]),
            ps.Disjunction([NS_checking, NS_checking2]),
            ps.Disjunction([NS_checking, NS_checking2, NS_other]),
            ps.Disjunction([NS_checking, NS_checking2, NS_repairs]),
            ps.Disjunction([NS_checking, NS_checking2, NS_empl]),
            ps.Disjunction([NS_checking, NS_checking2, NS_other_parties]),
            ps.Disjunction([NS_checking, NS_checking2, NS_history2]),
            ps.Disjunction([NS_checking, NS_checking2, NS_purpose]),
            ps.Disjunction([NS_checking, NS_checking2, NS_job]),
        ]
        self.qualities = [
            0.0779, 0.07740000000000002, 0.0771, 0.07680000000000001,
            0.07670000000000002, 0.0767, 0.07660000000000003,
            0.07650000000000003, 0.07650000000000001, 0.07600000000000001
        ]
        data = get_credit_data()
        target = ps.BinaryTarget('class', b'bad')
        searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
        self.task = ps.SubgroupDiscoveryTask(data,
                                             target,
                                             searchSpace,
                                             result_set_size=10,
                                             depth=3,
                                             qf=ps.StandardQF(1.0))
示例#5
0
    def setUp(self):
        NS_checking = ps.EqualitySelector("checking_status", b"<0")
        NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
        NS_other_parties = ps.EqualitySelector("other_parties", b"none")
        NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
        NS_job = ps.EqualitySelector("job", b"skilled")
        NS_dependents = ps.EqualitySelector("num_dependents", 1.0)
        self.result = [ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_other_parties, NS_savings_status]),  # AND job=='b'skilled'' AND other_parties=='b'none'' AND savings_status=='b'<100'
                       # 0.113713540226172:    checking_status=='b'<0'' AND foreign_worker=='b'yes'' AND job=='b'skilled'' AND savings_status=='b'<100''
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job]),  # checking_status=='b'<0'' AND foreign_worker=='b'yes'' AND job=='b'skilled''
                       # checking_status=='b'<0'' AND job=='b'skilled'' AND other_parties=='b'none'' AND savings_status=='b'<100''
                       ps.Conjunction([NS_checking, NS_job, NS_other_parties, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_other_parties]),
                       ps.Conjunction([NS_checking, NS_job, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_other_parties, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_other_parties]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_dependents, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_job, NS_other_parties])]

        self.qualities = [0.11457431093955019,
                          0.113713540226172,
                          0.11201325679119281,
                          0.1117538749727658,
                          0.11161046793076415,
                          0.11145710640046322,
                          0.11045259291161472,
                          0.10929088624672183,
                          0.10875519439407161,
                          0.10866138825404954,
                          0.10832735026213287,
                          0.10813405094128754]
        data = get_credit_data()
        target = ps.BinaryTarget('class', b'bad')
        searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['class'])
        searchSpace_Numeric = ps.create_numeric_selectors(data, ignore=['class'])
        searchSpace = searchSpace_Nominal + searchSpace_Numeric
        self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=12, depth=5, qf=ps.StandardQF(0.5))
 def setUp(self):
     NS_checking = ps.EqualitySelector("checking_status", b"<0")
     NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
     NS_other_parties = ps.EqualitySelector("other_parties", b"none")
     NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
     NS_payment_plans = ps.EqualitySelector("other_payment_plans", b"none")
     self.result = [
         ps.Conjunction([NS_checking, NS_foreign_worker]),
         ps.Conjunction([NS_checking]),
         ps.Conjunction([NS_checking, NS_other_parties, NS_foreign_worker]),
         ps.Conjunction([NS_checking, NS_other_parties]),
         ps.Conjunction([NS_checking, NS_savings_status,
                         NS_foreign_worker]),
         ps.Conjunction([NS_checking, NS_savings_status]),
         ps.Conjunction([NS_checking, NS_foreign_worker, NS_payment_plans]),
         ps.Conjunction([NS_checking, NS_payment_plans]),
         ps.Conjunction([NS_foreign_worker, NS_savings_status]),
         ps.Conjunction(
             [NS_foreign_worker, NS_other_parties, NS_savings_status]),
     ]
     self.qualities = [
         0.055299999999999995, 0.05280000000000001, 0.052300000000000006,
         0.05059999999999999, 0.04959999999999999, 0.048299999999999996,
         0.0426, 0.04, 0.03869999999999999, 0.03750000000000001
     ]
     data = get_credit_data()
     target = ps.BinaryTarget('class', b'bad')
     searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
     self.task = ps.SubgroupDiscoveryTask(
         data,
         target,
         searchSpace,
         result_set_size=10,
         depth=5,
         qf=ps.StandardQF(1.0),
         constraints=[ps.MinSupportConstraint(200)])
示例#7
0
import pandas as pd
import pysubgroup as ps


data = pd.read_table("../data/titanic.csv")
target = ps.BinaryTarget('Survived', 0)
search_space = ps.create_selectors(data, ignore=['Survived'])
task = ps.SubgroupDiscoveryTask(data, target, search_space,
                                result_set_size=5, depth=2,
                                qf=ps.CombinedInterestingnessMeasure([ps.StandardQF(1), ps.GeneralizationAware_StandardQF(1)]))

result = ps.SimpleDFS().execute(task, use_optimistic_estimates=False)

print(result.to_dataframe())
示例#8
0
from timeit import default_timer as timer
import pysubgroup as ps
from pysubgroup.tests.DataSets import get_credit_data
data = get_credit_data()

print("running")
target = ps.BinaryTarget('class', b'bad')
search_space = ps.create_selectors(data, ignore=['class'])
task = ps.SubgroupDiscoveryTask(data,
                                target,
                                search_space,
                                result_set_size=10,
                                depth=3,
                                qf=ps.ChiSquaredQF(direction="bidirect"))

start = timer()
result = ps.SimpleDFS().execute(task)
end = timer()

print("Time elapsed: ", (end - start))

for (q, sg) in result:
    print(str(q) + ":\t" + str(sg.subgroup_description))

# print WRAccQF().evaluate_from_dataset(data, Subgroup(target, []))