def test_sequence_of_classifiers(arq="abalone.arff"):
    pipe = Workflow(
        File(arq),
        Binarize(),
        Report('1 {X.shape} {history^name}'),
        PCA(n=5),
        SVMC(),
        Metric(),
        Report('2 {X.shape} {history^name}'),
        DT(),
        Metric(),
        Report('3 {X.shape} {history^name}'),
    )
    print('Enh')
    train = pipe.enhancer.transform(sd.NoData)
    print('Mod')
    test = pipe.model(sd.NoData).transform(
        sd.NoData)  # TODO: pq report não aparece no test?
    print()

    print("[test_sequence_of_classifiers] Train.........\n",
          train.history ^ "longname")
    print("[test_sequence_of_classifiers] Test..........\n",
          test.history ^ "longname")
示例#2
0
    UnderS
from pjml.tool.meta.wrap import Wrap

# print(SelectKB.cs)
# exit()
#
# cs = Pipeline(SelectKB)
# print(cs)
# exit()
#
# s = cs.sample()
# print(s)
# exit()

expr = Workflow(
    OnlyApply(File("abalone3.arff"), Binarize()),
    Partition(),
    Map(
        Wrap(
            select(SelectBest),
            ApplyUsing(select(DT, RF, NB)),
            OnlyApply(Metric(functions=['length'])),
            OnlyUse(Metric(functions=['accuracy', 'error'])),
            # AfterUse(Metric(function=['diversity']))
        ), ),
    Report('HISTORY ... S: {history}'),
    Summ(function='mean_std'),
    Report('mean and std ... S: $S'),
    OnlyApply(Copy(from_field="S", to_field="B")),
    OnlyApply(Report('copy S to B ... B: $B')),
    OnlyUse(
示例#3
0
from pjml.tool.data.modeling.supervised.classifier.svmc import SVMC
from pjml.tool.data.processing.feature.binarize import Binarize
from pjml.tool.data.processing.instance.sampler.over.random import OverS
from pjml.tool.meta.mfe import MFE

# ML 1 ========================================================================
# # Armazenar dataset, sem depender do pacote pjml.
# from cururu.pickleserver import PickleServer
#
# try:
#     PickleServer().store(read_arff('iris.arff'))
# except DuplicateEntryException:
#     pass

pipe = Pipeline(
    Cache(File('bank.arff'), Binarize(), NB(), Metric(), Report('$X')))
print('aaaaaaaa')
m = pipe.apply()
print(m.data)
print('uuuuuuuuuuuuuuu')
d = m.use()
print(d)
exit()

#     # Source('messedup-dataset'),
#     Keep(evaluator(
#         Cache(
#             ApplyUsing(
#                 NB()
#             ),
#             Metric(function='accuracy')
示例#4
0
    ),
    Metric(functions=['accuracy'])
    )
    )
)



# {history.last.config['function']}
print(expr)
print('sample .................')
pipe = full(rnd(expr, n=10), field='S').sample()
pipe.enable_pretty_printing()
print(f'Pipe:\n{pipe}')
print(f'Wrapped:\n{pipe.unwrap}')
pipe = Chain(File('abalone3.arff'), Binarize(), Split(), pipe.unwrap,
             Metric(), Report())

print('apply .................')
model = pipe.apply()

# print(222222222222222, dataout.history)
# data morre no apply() do predictor


print('use .................')


# print(3333333333333333, dataout.history)
# RUS desaparece no use()
            engine='mysql',
            db='paje:[email protected]/paje',
            blocking=not True
        ),
        engine='dump', blocking=True
    )

cache = partial(Cache, engine='sqlite', blocking=False)

# cache = partial(Cache, engine='amnesia', blocking=True)

# expr = Pipeline(File(arq), cache(ApplyUsing(NB())))
# p = expr
# p.apply()
expr = Pipeline(
    OnlyApply(File(arq), cache(Binarize())),
    cache(
        Partition(),
        Map(
            Wrap(
                select(SelectBest),  # slow??
                cache(ApplyUsing(select(DT, NB, hold(RF, n_estimators=40)))),
                OnlyApply(Metric(functions=['length'])),
                OnlyUse(Metric(functions=['accuracy', 'error'])),
                # AfterUse(Metric(function=['diversity']))
            ),
        ),
        # Report('HISTORY ... S: {history}'),
        Summ(function='mean_std'),
    ),
    Report('mean and std ... S: $S'),
#
# s = cs.sample()
# print(s)
# exit()

cache = partial(Cache, storage_alias='default_sqlite')
# cache = partial(Cache, storage_alias='mysql')
# cache = partial(Cache, storage_alias='default_dump')
# cache = partial(Cache, storage_alias='amnesia')


# expr = Pipeline(File(arq), cache(ApplyUsing(NB())))
# p = expr
# p.apply()
expr = Workflow(
    OnlyApply(File(arq), cache(Binarize())),
    cache(
        Partition(),
        Map(
            Wrap(
                select(SelectBest),  # slow??
                cache(ApplyUsing(select(DT, NB, hold(RF, n_estimators=40)))),
                OnlyApply(Metric(functions=['length'])),
                OnlyUse(Metric(functions=['accuracy', 'error'])),
                # AfterUse(Metric(function=['diversity']))
            ),
        ),
        # Report('HISTORY ... S: {history}'),
        Summ(function='mean_std'),
    ),
    Report('mean and std ... S: $S'),
示例#7
0
#     Map(
#         Wrap(
#             MinMax(),
#             Cache(ApplyUsing(RF())),
#             OnlyApply(Metric(functions=['length'])),
#             OnlyUse(Metric(functions=['accuracy', 'error'])),
#             # AfterUse(Metric(function=['diversity']))
#         ),
#     ),
#     Summ(function='mean_std'),
#     Report('$S'),
# )

pipe = Pipeline(
    File("abalone3.arff"),
    Binarize(),
    Partition(),
    Map(
        UnderS(sampling_strategy='not minority'),
        # RF(),
        Cache(RF()),
        Metric()),
    Summ(function='mean_std'),
    Report('mean S --> \n$S'),
    Report('mean S --> $S'),
    OnlyApply(Copy(from_field="S", to_field="B")),
    OnlyUse(MConcat(fields=["S", "S"], output_field="S",
                    direction='vertical')),
    Calc(functions=['flatten']),
    Report('mean S --> $S'))