from scipy.io import arff import pysubgroup as ps import pandas as pd data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0]) target = ps.NominalTarget('class', b'bad') searchSpace = ps.createNominalSelectors(data, ignore=['class']) task = ps.SubgroupDiscoveryTask(data, target, searchSpace, resultSetSize=10, depth=3, qf=ps.StandardQF(1.0)) result = ps.BeamSearch(beamWidth=10).execute(task) for (q, sg) in result: print(str(q) + ":\t" + str(sg.subgroupDescription)) print("******") result = ps.SimpleDFS().execute(task) for (q, sg) in result: print(str(q) + ":\t" + str(sg.subgroupDescription)) # print WRAccQF().evaluateFromDataset(data, Subgroup(target, []))
def test_BeamSearch(self): self.runAlgorithm(ps.BeamSearch(beam_width=12), "BeamSearch", self.result, self.qualities, self.task)
def test_BeamSearch_sum(self): self.task.qf = ps.CountCallsInterestingMeasure( ps.StandardQFNumeric(self.task.qf.a, False, 'sum')) self.runAlgorithm(ps.BeamSearch(), "BeamSearch sum", self.result, self.qualities, self.task)
import pysubgroup as ps import pandas as pd data = pd.read_table("../data/titanic.csv") target = ps.NominalTarget('Survived', True) searchspace = ps.create_selectors(data, ignore=['Survived']) task = ps.SubgroupDiscoveryTask(data, target, searchspace, result_set_size=5, depth=2, qf=ps.ChiSquaredQF()) result = ps.BeamSearch().execute(task) for (q, sg) in result: print(str(q) + ":\t" + str(sg.subgroup_description))
def test_BeamSearch(self): self.task.qf = ps.CountCallsInterestingMeasure(ps.PredictionQFNumeric(self.task.qf.a, False)) self.runAlgorithm(ps.BeamSearch(), "BeamSearch sum", self.result, self.qualities, self.task)