from timeit import default_timer as timer import pysubgroup as ps from pysubgroup.tests.DataSets import get_credit_data data = get_credit_data() print("running") target = ps.BinaryTarget('class', b'bad') search_space = ps.create_selectors(data, ignore=['class']) task = ps.SubgroupDiscoveryTask(data, target, search_space, result_set_size=10, depth=3, qf=ps.ChiSquaredQF(direction="bidirect")) start = timer() result = ps.SimpleDFS().execute(task) end = timer() print("Time elapsed: ", (end - start)) for (q, sg) in result: print(str(q) + ":\t" + str(sg.subgroup_description)) # print WRAccQF().evaluate_from_dataset(data, Subgroup(target, []))
import pysubgroup as ps import pandas as pd import matplotlib.pyplot as plt plt.interactive(False) data = pd.read_csv("~/datasets/titanic.csv") target = ps.NominalTarget ('survived', 0) searchSpace = ps.createSelectors(data, ignore=['survived']) task = ps.SubgroupDiscoveryTask (data, target, searchSpace, resultSetSize=5, depth=2, qf=ps.ChiSquaredQF()) result = ps.SimpleDFS().execute(task) dfs = ps.utils.resultsAsDataFrame (data, result) plt = ps.plot_roc (data, dfs, ps.ChiSquaredQF()) plt.show()
import pysubgroup as ps import pandas as pd data = pd.read_table("../data/titanic.csv") target = ps.NominalTarget('Survived', True) searchspace = ps.create_selectors(data, ignore=['Survived']) task = ps.SubgroupDiscoveryTask(data, target, searchspace, result_set_size=5, depth=2, qf=ps.ChiSquaredQF()) result = ps.BeamSearch().execute(task) for (q, sg) in result: print(str(q) + ":\t" + str(sg.subgroup_description))
from scipy.io import arff import pysubgroup as ps import pandas as pd from timeit import default_timer as timer data = pd.DataFrame (arff.loadarff("../data/credit-g.arff") [0]) target = ps.NominalTarget('class', b'bad') searchSpace = ps.createNominalSelectors(data, ignore=['class']) task = ps.SubgroupDiscoveryTask (data, target, searchSpace, resultSetSize=10, depth=5, qf=ps.ChiSquaredQF()) start = timer() result = ps.BSD().execute(task) end = timer() print("Time elapsed: ", (end - start)) for (q, sg) in result: print (str(q) + ":\t" + str(sg.subgroupDescription)) print ("******") start = timer() result = ps.TID_SD().execute(task) end = timer() print("Time elapsed: ", (end - start)) for (q, sg) in result: print (str(q) + ":\t" + str(sg.subgroupDescription))