def test_sequence_of_classifiers(arq="abalone.arff"): pipe = Workflow( File(arq), Binarize(), Report('1 {X.shape} {history^name}'), PCA(n=5), SVMC(), Metric(), Report('2 {X.shape} {history^name}'), DT(), Metric(), Report('3 {X.shape} {history^name}'), ) print('Enh') train = pipe.enhancer.transform(sd.NoData) print('Mod') test = pipe.model(sd.NoData).transform( sd.NoData) # TODO: pq report não aparece no test? print() print("[test_sequence_of_classifiers] Train.........\n", train.history ^ "longname") print("[test_sequence_of_classifiers] Test..........\n", test.history ^ "longname")
UnderS from pjml.tool.meta.wrap import Wrap # print(SelectKB.cs) # exit() # # cs = Pipeline(SelectKB) # print(cs) # exit() # # s = cs.sample() # print(s) # exit() expr = Workflow( OnlyApply(File("abalone3.arff"), Binarize()), Partition(), Map( Wrap( select(SelectBest), ApplyUsing(select(DT, RF, NB)), OnlyApply(Metric(functions=['length'])), OnlyUse(Metric(functions=['accuracy', 'error'])), # AfterUse(Metric(function=['diversity'])) ), ), Report('HISTORY ... S: {history}'), Summ(function='mean_std'), Report('mean and std ... S: $S'), OnlyApply(Copy(from_field="S", to_field="B")), OnlyApply(Report('copy S to B ... B: $B')), OnlyUse(
from pjml.tool.data.modeling.supervised.classifier.svmc import SVMC from pjml.tool.data.processing.feature.binarize import Binarize from pjml.tool.data.processing.instance.sampler.over.random import OverS from pjml.tool.meta.mfe import MFE # ML 1 ======================================================================== # # Armazenar dataset, sem depender do pacote pjml. # from cururu.pickleserver import PickleServer # # try: # PickleServer().store(read_arff('iris.arff')) # except DuplicateEntryException: # pass pipe = Pipeline( Cache(File('bank.arff'), Binarize(), NB(), Metric(), Report('$X'))) print('aaaaaaaa') m = pipe.apply() print(m.data) print('uuuuuuuuuuuuuuu') d = m.use() print(d) exit() # # Source('messedup-dataset'), # Keep(evaluator( # Cache( # ApplyUsing( # NB() # ), # Metric(function='accuracy')
), Metric(functions=['accuracy']) ) ) ) # {history.last.config['function']} print(expr) print('sample .................') pipe = full(rnd(expr, n=10), field='S').sample() pipe.enable_pretty_printing() print(f'Pipe:\n{pipe}') print(f'Wrapped:\n{pipe.unwrap}') pipe = Chain(File('abalone3.arff'), Binarize(), Split(), pipe.unwrap, Metric(), Report()) print('apply .................') model = pipe.apply() # print(222222222222222, dataout.history) # data morre no apply() do predictor print('use .................') # print(3333333333333333, dataout.history) # RUS desaparece no use()
engine='mysql', db='paje:[email protected]/paje', blocking=not True ), engine='dump', blocking=True ) cache = partial(Cache, engine='sqlite', blocking=False) # cache = partial(Cache, engine='amnesia', blocking=True) # expr = Pipeline(File(arq), cache(ApplyUsing(NB()))) # p = expr # p.apply() expr = Pipeline( OnlyApply(File(arq), cache(Binarize())), cache( Partition(), Map( Wrap( select(SelectBest), # slow?? cache(ApplyUsing(select(DT, NB, hold(RF, n_estimators=40)))), OnlyApply(Metric(functions=['length'])), OnlyUse(Metric(functions=['accuracy', 'error'])), # AfterUse(Metric(function=['diversity'])) ), ), # Report('HISTORY ... S: {history}'), Summ(function='mean_std'), ), Report('mean and std ... S: $S'),
# # s = cs.sample() # print(s) # exit() cache = partial(Cache, storage_alias='default_sqlite') # cache = partial(Cache, storage_alias='mysql') # cache = partial(Cache, storage_alias='default_dump') # cache = partial(Cache, storage_alias='amnesia') # expr = Pipeline(File(arq), cache(ApplyUsing(NB()))) # p = expr # p.apply() expr = Workflow( OnlyApply(File(arq), cache(Binarize())), cache( Partition(), Map( Wrap( select(SelectBest), # slow?? cache(ApplyUsing(select(DT, NB, hold(RF, n_estimators=40)))), OnlyApply(Metric(functions=['length'])), OnlyUse(Metric(functions=['accuracy', 'error'])), # AfterUse(Metric(function=['diversity'])) ), ), # Report('HISTORY ... S: {history}'), Summ(function='mean_std'), ), Report('mean and std ... S: $S'),
# Map( # Wrap( # MinMax(), # Cache(ApplyUsing(RF())), # OnlyApply(Metric(functions=['length'])), # OnlyUse(Metric(functions=['accuracy', 'error'])), # # AfterUse(Metric(function=['diversity'])) # ), # ), # Summ(function='mean_std'), # Report('$S'), # ) pipe = Pipeline( File("abalone3.arff"), Binarize(), Partition(), Map( UnderS(sampling_strategy='not minority'), # RF(), Cache(RF()), Metric()), Summ(function='mean_std'), Report('mean S --> \n$S'), Report('mean S --> $S'), OnlyApply(Copy(from_field="S", to_field="B")), OnlyUse(MConcat(fields=["S", "S"], output_field="S", direction='vertical')), Calc(functions=['flatten']), Report('mean S --> $S'))