def printing_test(arq="iris.arff"): print(Chain(Map(select(File(arq))))) exp = Workflow( File(arq), Partition(), Map(PCA(), SVMC(), Metric(enhance=False)), Map(Report("<---------------------- fold"), enhance=False), Summ(function="mean", enhance=False), Reduce(), Report("mean ... S: $S", enhance=False), ) print(exp) print(select(DT(), SVMC())) sel = select(DT(), SVMC()) print(sel) print(Map(DT())) exp = ChainCS( File(arq), Partition(), Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric(enhance=False)), Report("teste"), Map(Report("<---------------------- fold")), ) print(exp)
def ger_workflow(seed=0, arq="iris.arff"): np.random.seed(seed) workflow = Workflow(File(arq), Partition(), Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric(enhance=False)), Summ(function="mean", enhance=False), Reduce(), Report("Mean S: $S", enhance=False), seed=seed) return workflow
def test_with_summ_reduce(arq="iris.arff"): pipe = Workflow( File(arq), Partition(), Map(PCA(), SVMC(), Metric()), Map(Report("<---------------------- etapa")), Summ(), Reduce(), Report("mean ... S: $S"), ) train, test = pipe.dual_transform() print("Train..............\n", train.history ^ "longname") print("Test..........\n", test.history ^ "longname")
def test_split_train_test(arq="iris.arff"): pipe = Cache( File(arq), TsSplit( ), # TsSplit should come before TrSplit to ensure the same original data is used as input for both. TrSplit(), PCA(), SVMC(), Metric(enhance=False), Report("metric ... R: $R", enhance=False), storage_alias="oka") train, test = pipe.dual_transform() print("Train..............\n", train) print("Test..........\n", test)
def test_partition(arq="iris.arff"): pipe = Workflow( File(arq), Partition(), Map(PCA(), SVMC(), Metric(enhance=False)), Summ(function="mean", enhance=False), Reduce(), Report("mean ... S: $S", enhance=False), Report("$X"), Report("$y"), ) train, test = pipe.dual_transform() print("Train..............\n", train) print("Test..........\n", test)
def test_check_architecture2(arq="iris.arff"): pipe = Workflow( File(arq), Partition(), Map(PCA(), SVMC(), Metric(enhance=False)), Summ(field="Y", function="mean", enhance=False), Report("mean ... S: $S", enhance=False), ) # tenho file na frente train_ = pipe.enhancer.transform(sd.NoData) test_ = pipe.model(sd.NoData).transform(sd.NoData) test_ = pipe.model(sd.NoData).transform((sd.NoData, sd.NoData)) train_, test_ = pipe.dual_transform(sd.NoData, sd.NoData) train_, test_ = pipe.dual_transform(sd.NoData, (sd.NoData, sd.NoData))
def random_search(arq="iris.arff"): np.random.seed(0) exp = Workflow( File(arq), Partition(), Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric()), # Map(Report("<---------------------- fold"), enhance=False), Summ(function="mean"), Reduce(), Report("Mean S: $S"), ) expr = sample(exp, n=10) result = optimize(expr, n=5) result.disable_pretty_printing() print(result)
def test_check_architecture(arq="iris.arff"): pipe = Workflow( File(arq), Partition(partitions=2), Map(PCA(), SVMC(), Metric(enhance=False)), Summ(field="Y", function="mean", enhance=False), ) # tenho file na frente train_01 = pipe.enhancer.transform(sd.NoData) test_01 = pipe.model(sd.NoData).transform(sd.NoData) train_02, test_02 = pipe.dual_transform(sd.NoData, sd.NoData) # Collection uuid depends on data, which depends on consumption. for t, *_ in train_01, train_02, test_01, test_02: # print(111111111, t.y) pass assert train_01.uuid == train_02.uuid assert test_01.uuid == test_02.uuid
def test_sequence_of_classifiers(arq="abalone.arff"): pipe = Workflow( File(arq), Binarize(), Report('1 {X.shape} {history^name}'), PCA(n=5), SVMC(), Metric(), Report('2 {X.shape} {history^name}'), DT(), Metric(), Report('3 {X.shape} {history^name}'), ) print('Enh') train = pipe.enhancer.transform(sd.NoData) print('Mod') test = pipe.model(sd.NoData).transform( sd.NoData) # TODO: pq report não aparece no test? print() print("[test_sequence_of_classifiers] Train.........\n", train.history ^ "longname") print("[test_sequence_of_classifiers] Test..........\n", test.history ^ "longname")
exit(0) Save('lixo.arff').apply(dout) # ML 2 ======================================================================== pipe = Pipeline( File('iris.arff'), OverS(sampling_strategy='not minority'), ApplyUsing(NB('bernoulli')), Metric(functions='accuracy'), # Report('Accuracy: $r {history}'), Report('Accuracy: $r'), ApplyUsing(DT(max_depth=2)), Metric(functions='accuracy'), Report('Accuracy: $r'), ApplyUsing(SVMC(kernel='linear')), Metric(functions='accuracy'), Report('Accuracy: $r'), ) m = pipe.apply() dataout2 = m.use() # ML 3 ======================================================================== pipe = Pipeline( File('iris.arff'), Cache(MFE()), Report('\nMeta-features Names: $Md \nMeta-features Values: $M \n {name}')) dataout = pipe.apply().data """ Problemas filosoficos obs. Containers sempre contêm referências a outros transformers (sejam leves ou
def test_pca(arq="iris.arff"): cs = File(arq).cs pipe = Workflow(File(arq), Split(), PCA(), SVMC(), Metric()) train, test = pipe.dual_transform() print("Train..............\n", train.history ^ "name") print("Test..........\n", test.history ^ "name")
def test_metric(arq="iris.arff"): pipe = Workflow(File(arq), Split(), SVMC(), Metric(enhance=False)) train, test = pipe.dual_transform() print("Train..............\n", train) print("Test..........\n", test)
def test_split(arq="iris.arff"): pipe = Workflow(File(arq), Split(), SVMC()) train, test = pipe.dual_transform() print("Train..............\n", str(train)) print("Test..........\n", str(test))
def test_svmc(arq="iris.arff"): cs = File(arq).cs pipe = Workflow(File(arq), SVMC()) train, test = pipe.dual_transform() print("Train..............\n", train) print("Test..........\n", test)