def printing_test(arq="iris.arff"):
    print(Chain(Map(select(File(arq)))))
    exp = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Map(Report("<---------------------- fold"), enhance=False),
        Summ(function="mean", enhance=False),
        Reduce(),
        Report("mean ... S: $S", enhance=False),
    )
    print(exp)
    print(select(DT(), SVMC()))

    sel = select(DT(), SVMC())
    print(sel)
    print(Map(DT()))
    exp = ChainCS(
        File(arq),
        Partition(),
        Map(PCA(), select(SVMC(), DT(criterion="gini")),
            Metric(enhance=False)),
        Report("teste"),
        Map(Report("<---------------------- fold")),
    )
    print(exp)
def ger_workflow(seed=0, arq="iris.arff"):
    np.random.seed(seed)

    workflow = Workflow(File(arq),
                        Partition(),
                        Map(PCA(), select(SVMC(), DT(criterion="gini")),
                            Metric(enhance=False)),
                        Summ(function="mean", enhance=False),
                        Reduce(),
                        Report("Mean S: $S", enhance=False),
                        seed=seed)

    return workflow
def test_with_summ_reduce(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric()),
        Map(Report("<---------------------- etapa")),
        Summ(),
        Reduce(),
        Report("mean ... S: $S"),
    )
    train, test = pipe.dual_transform()

    print("Train..............\n", train.history ^ "longname")
    print("Test..........\n", test.history ^ "longname")
def test_split_train_test(arq="iris.arff"):
    pipe = Cache(
        File(arq),
        TsSplit(
        ),  # TsSplit should come before TrSplit to ensure the same original data is used as input for both.
        TrSplit(),
        PCA(),
        SVMC(),
        Metric(enhance=False),
        Report("metric ... R: $R", enhance=False),
        storage_alias="oka")
    train, test = pipe.dual_transform()

    print("Train..............\n", train)
    print("Test..........\n", test)
def test_partition(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Summ(function="mean", enhance=False),
        Reduce(),
        Report("mean ... S: $S", enhance=False),
        Report("$X"),
        Report("$y"),
    )
    train, test = pipe.dual_transform()

    print("Train..............\n", train)
    print("Test..........\n", test)
def test_check_architecture2(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Summ(field="Y", function="mean", enhance=False),
        Report("mean ... S: $S", enhance=False),
    )

    # tenho file na frente
    train_ = pipe.enhancer.transform(sd.NoData)
    test_ = pipe.model(sd.NoData).transform(sd.NoData)
    test_ = pipe.model(sd.NoData).transform((sd.NoData, sd.NoData))
    train_, test_ = pipe.dual_transform(sd.NoData, sd.NoData)
    train_, test_ = pipe.dual_transform(sd.NoData, (sd.NoData, sd.NoData))
def random_search(arq="iris.arff"):
    np.random.seed(0)
    exp = Workflow(
        File(arq),
        Partition(),
        Map(PCA(), select(SVMC(), DT(criterion="gini")), Metric()),
        # Map(Report("<---------------------- fold"), enhance=False),
        Summ(function="mean"),
        Reduce(),
        Report("Mean S: $S"),
    )

    expr = sample(exp, n=10)
    result = optimize(expr, n=5)
    result.disable_pretty_printing()
    print(result)
def test_check_architecture(arq="iris.arff"):
    pipe = Workflow(
        File(arq),
        Partition(partitions=2),
        Map(PCA(), SVMC(), Metric(enhance=False)),
        Summ(field="Y", function="mean", enhance=False),
    )

    # tenho file na frente
    train_01 = pipe.enhancer.transform(sd.NoData)
    test_01 = pipe.model(sd.NoData).transform(sd.NoData)
    train_02, test_02 = pipe.dual_transform(sd.NoData, sd.NoData)

    # Collection uuid depends on data, which depends on consumption.
    for t, *_ in train_01, train_02, test_01, test_02:
        # print(111111111, t.y)
        pass

    assert train_01.uuid == train_02.uuid
    assert test_01.uuid == test_02.uuid
def test_sequence_of_classifiers(arq="abalone.arff"):
    pipe = Workflow(
        File(arq),
        Binarize(),
        Report('1 {X.shape} {history^name}'),
        PCA(n=5),
        SVMC(),
        Metric(),
        Report('2 {X.shape} {history^name}'),
        DT(),
        Metric(),
        Report('3 {X.shape} {history^name}'),
    )
    print('Enh')
    train = pipe.enhancer.transform(sd.NoData)
    print('Mod')
    test = pipe.model(sd.NoData).transform(
        sd.NoData)  # TODO: pq report não aparece no test?
    print()

    print("[test_sequence_of_classifiers] Train.........\n",
          train.history ^ "longname")
    print("[test_sequence_of_classifiers] Test..........\n",
          test.history ^ "longname")
示例#10
0
exit(0)
Save('lixo.arff').apply(dout)

# ML 2 ========================================================================
pipe = Pipeline(
    File('iris.arff'),
    OverS(sampling_strategy='not minority'),
    ApplyUsing(NB('bernoulli')),
    Metric(functions='accuracy'),
    # Report('Accuracy: $r {history}'),
    Report('Accuracy: $r'),
    ApplyUsing(DT(max_depth=2)),
    Metric(functions='accuracy'),
    Report('Accuracy: $r'),
    ApplyUsing(SVMC(kernel='linear')),
    Metric(functions='accuracy'),
    Report('Accuracy: $r'),
)
m = pipe.apply()
dataout2 = m.use()

# ML 3 ========================================================================
pipe = Pipeline(
    File('iris.arff'), Cache(MFE()),
    Report('\nMeta-features Names: $Md \nMeta-features Values: $M \n  {name}'))
dataout = pipe.apply().data
"""
Problemas filosoficos

obs. Containers sempre contêm referências a outros transformers (sejam leves ou 
def test_pca(arq="iris.arff"):
    cs = File(arq).cs
    pipe = Workflow(File(arq), Split(), PCA(), SVMC(), Metric())
    train, test = pipe.dual_transform()
    print("Train..............\n", train.history ^ "name")
    print("Test..........\n", test.history ^ "name")
def test_metric(arq="iris.arff"):
    pipe = Workflow(File(arq), Split(), SVMC(), Metric(enhance=False))
    train, test = pipe.dual_transform()
    print("Train..............\n", train)
    print("Test..........\n", test)
def test_split(arq="iris.arff"):
    pipe = Workflow(File(arq), Split(), SVMC())
    train, test = pipe.dual_transform()
    print("Train..............\n", str(train))
    print("Test..........\n", str(test))
def test_svmc(arq="iris.arff"):
    cs = File(arq).cs
    pipe = Workflow(File(arq), SVMC())
    train, test = pipe.dual_transform()
    print("Train..............\n", train)
    print("Test..........\n", test)