def printing_test(arq="iris.arff"): print(Chain(Map(select(File(arq))))) exp = Workflow( File(arq), Partition(), Map(PCA(), SVMC(), Metric(enhance=False)), Map(Report("<---------------------- fold"), enhance=False), Summ(function="mean", enhance=False), Reduce(), Report("mean ... S: $S", enhance=False), ) print(exp) print(select(DT(), SVMC())) sel = select(DT(), SVMC()) print(sel) print(Map(DT())) exp = ChainCS( File(arq), Partition(), Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric(enhance=False)), Report("teste"), Map(Report("<---------------------- fold")), ) print(exp)
def ger_workflow(seed=0, arq="iris.arff"): np.random.seed(seed) workflow = Workflow(File(arq), Partition(), Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric(enhance=False)), Summ(function="mean", enhance=False), Reduce(), Report("Mean S: $S", enhance=False), seed=seed) return workflow
def test_with_summ_reduce(arq="iris.arff"): pipe = Workflow( File(arq), Partition(), Map(PCA(), SVMC(), Metric()), Map(Report("<---------------------- etapa")), Summ(), Reduce(), Report("mean ... S: $S"), ) train, test = pipe.dual_transform() print("Train..............\n", train.history ^ "longname") print("Test..........\n", test.history ^ "longname")
def test_split_train_test(arq="iris.arff"): pipe = Cache( File(arq), TsSplit( ), # TsSplit should come before TrSplit to ensure the same original data is used as input for both. TrSplit(), PCA(), SVMC(), Metric(enhance=False), Report("metric ... R: $R", enhance=False), storage_alias="oka") train, test = pipe.dual_transform() print("Train..............\n", train) print("Test..........\n", test)
def test_partition(arq="iris.arff"): pipe = Workflow( File(arq), Partition(), Map(PCA(), SVMC(), Metric(enhance=False)), Summ(function="mean", enhance=False), Reduce(), Report("mean ... S: $S", enhance=False), Report("$X"), Report("$y"), ) train, test = pipe.dual_transform() print("Train..............\n", train) print("Test..........\n", test)
def test_sequence_of_classifiers(arq="abalone.arff"): pipe = Workflow( File(arq), Binarize(), Report('1 {X.shape} {history^name}'), PCA(n=5), SVMC(), Metric(), Report('2 {X.shape} {history^name}'), DT(), Metric(), Report('3 {X.shape} {history^name}'), ) print('Enh') train = pipe.enhancer.transform(sd.NoData) print('Mod') test = pipe.model(sd.NoData).transform( sd.NoData) # TODO: pq report não aparece no test? print() print("[test_sequence_of_classifiers] Train.........\n", train.history ^ "longname") print("[test_sequence_of_classifiers] Test..........\n", test.history ^ "longname")
def test_check_architecture2(arq="iris.arff"): pipe = Workflow( File(arq), Partition(), Map(PCA(), SVMC(), Metric(enhance=False)), Summ(field="Y", function="mean", enhance=False), Report("mean ... S: $S", enhance=False), ) # tenho file na frente train_ = pipe.enhancer.transform(sd.NoData) test_ = pipe.model(sd.NoData).transform(sd.NoData) test_ = pipe.model(sd.NoData).transform((sd.NoData, sd.NoData)) train_, test_ = pipe.dual_transform(sd.NoData, sd.NoData) train_, test_ = pipe.dual_transform(sd.NoData, (sd.NoData, sd.NoData))
def random_search(arq="iris.arff"): np.random.seed(0) exp = Workflow( File(arq), Partition(), Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric()), # Map(Report("<---------------------- fold"), enhance=False), Summ(function="mean"), Reduce(), Report("Mean S: $S"), ) expr = sample(exp, n=10) result = optimize(expr, n=5) result.disable_pretty_printing() print(result)
def test_check_architecture(arq="iris.arff"): pipe = Workflow( File(arq), Partition(partitions=2), Map(PCA(), SVMC(), Metric(enhance=False)), Summ(field="Y", function="mean", enhance=False), ) # tenho file na frente train_01 = pipe.enhancer.transform(sd.NoData) test_01 = pipe.model(sd.NoData).transform(sd.NoData) train_02, test_02 = pipe.dual_transform(sd.NoData, sd.NoData) # Collection uuid depends on data, which depends on consumption. for t, *_ in train_01, train_02, test_01, test_02: # print(111111111, t.y) pass assert train_01.uuid == train_02.uuid assert test_01.uuid == test_02.uuid
# cs = Pipeline(SelectKB) # print(cs) # exit() # # s = cs.sample() # print(s) # exit() expr = Workflow( OnlyApply(File("abalone3.arff"), Binarize()), Partition(), Map( Wrap( select(SelectBest), ApplyUsing(select(DT, RF, NB)), OnlyApply(Metric(functions=['length'])), OnlyUse(Metric(functions=['accuracy', 'error'])), # AfterUse(Metric(function=['diversity'])) ), ), Report('HISTORY ... S: {history}'), Summ(function='mean_std'), Report('mean and std ... S: $S'), OnlyApply(Copy(from_field="S", to_field="B")), OnlyApply(Report('copy S to B ... B: $B')), OnlyUse( MConcat(input_field1="B", input_field2="S", output_field="S", direction='vertical')), OnlyUse(Report('comcat B with S (vertical) ... S: $S')), OnlyUse(Calc(functions=['flatten'])),
from pjml.tool.data.modeling.supervised.classifier.svmc import SVMC from pjml.tool.data.processing.feature.binarize import Binarize from pjml.tool.data.processing.instance.sampler.over.random import OverS from pjml.tool.meta.mfe import MFE # ML 1 ======================================================================== # # Armazenar dataset, sem depender do pacote pjml. # from cururu.pickleserver import PickleServer # # try: # PickleServer().store(read_arff('iris.arff')) # except DuplicateEntryException: # pass pipe = Pipeline( Cache(File('bank.arff'), Binarize(), NB(), Metric(), Report('$X'))) print('aaaaaaaa') m = pipe.apply() print(m.data) print('uuuuuuuuuuuuuuu') d = m.use() print(d) exit() # # Source('messedup-dataset'), # Keep(evaluator( # Cache( # ApplyUsing( # NB() # ), # Metric(function='accuracy')
print('Duplicate! Ignored.') numpy.random.seed(50) # import sklearn # print('The scikit-learn version is {}.'.format(sklearn.__version__)) print('expr .................') expr = Pipeline( OnlyApply(File('iris.arff')), Cache( evaluator( Wrap( shuffle(Std, MinMax), # shuffle(Std, select(UnderS, OverS), MinMax), ApplyUsing(select(DT, NB)), ), Metric(functions=['accuracy']) ) ) ) # {history.last.config['function']} print(expr) print('sample .................') pipe = full(rnd(expr, n=10), field='S').sample() pipe.enable_pretty_printing() print(f'Pipe:\n{pipe}') print(f'Wrapped:\n{pipe.unwrap}') pipe = Chain(File('abalone3.arff'), Binarize(), Split(), pipe.unwrap, Metric(), Report())
def test_pca(arq="iris.arff"): cs = File(arq).cs pipe = Workflow(File(arq), Split(), PCA(), SVMC(), Metric()) train, test = pipe.dual_transform() print("Train..............\n", train.history ^ "name") print("Test..........\n", test.history ^ "name")
def test_metric(arq="iris.arff"): pipe = Workflow(File(arq), Split(), SVMC(), Metric(enhance=False)) train, test = pipe.dual_transform() print("Train..............\n", train) print("Test..........\n", test)
print('Duplicate! Ignored.') numpy.random.seed(50) # import sklearn # print('The scikit-learn version is {}.'.format(sklearn.__version__)) print('expr .................') expr = Workflow( OnlyApply(File('iris.arff')), Cache( evaluator( Wrap( shuffle(Std, MinMax), # shuffle(Std, select(UnderS, OverS), MinMax), ApplyUsing(select(DT, NB)), ), Metric(functions=['accuracy'])))) # {history.last.config['function']} print(expr) print('sample .................') pipe = full(rnd(expr, n=10), field='S').sample() pipe.enable_pretty_printing() print(f'Pipe:\n{pipe}') print(f'Wrapped:\n{pipe.unwrap}') pipe = Chain(File('abalone3.arff'), Binarize(), Split(), pipe.unwrap, Metric(), Report()) print('apply .................') model = pipe.apply() # print(222222222222222, dataout.history)
# # AfterUse(Metric(function=['diversity'])) # ), # ), # Summ(function='mean_std'), # Report('$S'), # ) pipe = Pipeline( File("abalone3.arff"), Binarize(), Partition(), Map( UnderS(sampling_strategy='not minority'), # RF(), Cache(RF()), Metric()), Summ(function='mean_std'), Report('mean S --> \n$S'), Report('mean S --> $S'), OnlyApply(Copy(from_field="S", to_field="B")), OnlyUse(MConcat(fields=["S", "S"], output_field="S", direction='vertical')), Calc(functions=['flatten']), Report('mean S --> $S')) print('Applying...') model = pipe.apply() if model.data: for i, t in enumerate(model.data.history): print(f'hist {i}', t) # exit()