示例#1
0
from timeit import default_timer as timer
import pysubgroup as ps
from pysubgroup.tests.DataSets import get_credit_data
data = get_credit_data()

print("running")
target = ps.BinaryTarget('class', b'bad')
search_space = ps.create_selectors(data, ignore=['class'])
task = ps.SubgroupDiscoveryTask(data,
                                target,
                                search_space,
                                result_set_size=10,
                                depth=3,
                                qf=ps.ChiSquaredQF(direction="bidirect"))

start = timer()
result = ps.SimpleDFS().execute(task)
end = timer()

print("Time elapsed: ", (end - start))

for (q, sg) in result:
    print(str(q) + ":\t" + str(sg.subgroup_description))

# print WRAccQF().evaluate_from_dataset(data, Subgroup(target, []))
示例#2
0
import pysubgroup as ps
import pandas as pd
import matplotlib.pyplot as plt
plt.interactive(False)


data = pd.read_csv("~/datasets/titanic.csv")
target = ps.NominalTarget ('survived', 0)
searchSpace = ps.createSelectors(data, ignore=['survived'])
task = ps.SubgroupDiscoveryTask (data, target, searchSpace, 
                                 resultSetSize=5, depth=2, 
                                 qf=ps.ChiSquaredQF())

result = ps.SimpleDFS().execute(task)

dfs = ps.utils.resultsAsDataFrame (data, result)
plt = ps.plot_roc (data, dfs, ps.ChiSquaredQF())
plt.show()
示例#3
0
import pysubgroup as ps
import pandas as pd
data = pd.read_table("../data/titanic.csv")
target = ps.NominalTarget('Survived', True)

searchspace = ps.create_selectors(data, ignore=['Survived'])
task = ps.SubgroupDiscoveryTask(data,
                                target,
                                searchspace,
                                result_set_size=5,
                                depth=2,
                                qf=ps.ChiSquaredQF())
result = ps.BeamSearch().execute(task)

for (q, sg) in result:
    print(str(q) + ":\t" + str(sg.subgroup_description))
示例#4
0
from scipy.io import arff

import pysubgroup as ps
import pandas as pd
from timeit import default_timer as timer


data = pd.DataFrame (arff.loadarff("../data/credit-g.arff") [0])

target = ps.NominalTarget('class', b'bad')
searchSpace = ps.createNominalSelectors(data, ignore=['class'])
task = ps.SubgroupDiscoveryTask (data, target, searchSpace, resultSetSize=10, depth=5, qf=ps.ChiSquaredQF())


start = timer()
result = ps.BSD().execute(task)
end = timer()
print("Time elapsed: ", (end - start))
for (q, sg) in result:
    print (str(q) + ":\t" + str(sg.subgroupDescription))


print ("******")
start = timer()
result = ps.TID_SD().execute(task)
end = timer()
print("Time elapsed: ", (end - start))
for (q, sg) in result:
    print (str(q) + ":\t" + str(sg.subgroupDescription))