def main():
    """
    Just runs some example code.
    """

    # load ARFF file
    helper.print_title("Loading ARFF file")
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(helper.get_data_dir() + os.sep + "iris.arff")
    print(str(data))

    # load CSV file
    helper.print_title("Loading CSV file")
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(helper.get_data_dir() + os.sep + "iris.csv")
    print(str(data))

    # load directory
    # changes this to something sensible
    text_dir = "/some/where"
    if os.path.exists(text_dir) and os.path.isdir(text_dir):
        helper.print_title("Loading directory: " + text_dir)
        loader = TextDirectoryLoader(options=["-dir", text_dir, "-F", "-charset", "UTF-8"])
        data = loader.load()
        print(unicode(data))
Exemplo n.º 2
0
def main():
    """
    Just runs some example code.
    """

    # load ARFF file
    helper.print_title("Loading ARFF file")
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(helper.get_data_dir() + os.sep + "iris.arff")
    print(str(data))

    # load CSV file
    helper.print_title("Loading CSV file")
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(helper.get_data_dir() + os.sep + "iris.csv")
    print(str(data))

    # load directory
    # changes this to something sensible
    text_dir = "/some/where"
    if os.path.exists(text_dir) and os.path.isdir(text_dir):
        helper.print_title("Loading directory: " + text_dir)
        loader = TextDirectoryLoader(
            options=["-dir", text_dir, "-F", "-charset", "UTF-8"])
        data = loader.load()
        print(unicode(data))
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris)

    # remove class attribute
    helper.print_info("Removing class attribute")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", "last"])
    remove.inputformat(data)
    filtered = remove.filter(data)

    # use MultiFilter
    helper.print_info("Use MultiFilter")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", "first"])
    std = Filter(classname="weka.filters.unsupervised.attribute.Standardize")
    multi = MultiFilter()
    multi.filters = [remove, std]
    multi.inputformat(data)
    filtered_multi = multi.filter(data)

    # output datasets
    helper.print_title("Input")
    print(data)
    helper.print_title("Output")
    print(filtered)
    helper.print_title("Output (MultiFilter)")
    print(filtered_multi)

    # load text dataset
    text = helper.get_data_dir(
    ) + os.sep + "reutersTop10Randomized_1perc_shortened.arff"
    helper.print_info("Loading dataset: " + text)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(text)
    data.class_is_last()

    # apply StringToWordVector
    stemmer = Stemmer(classname="weka.core.stemmers.IteratedLovinsStemmer")
    stopwords = Stopwords(classname="weka.core.stopwords.Rainbow")
    tokenizer = Tokenizer(classname="weka.core.tokenizers.WordTokenizer")
    s2wv = StringToWordVector(options=["-W", "10", "-L", "-C"])
    s2wv.stemmer = stemmer
    s2wv.stopwords = stopwords
    s2wv.tokenizer = tokenizer
    s2wv.inputformat(data)
    filtered = s2wv.filter(data)

    helper.print_title("Input (StringToWordVector)")
    print(data)
    helper.print_title("Output (StringToWordVector)")
    print(filtered)
Exemplo n.º 4
0
def main(args):
    """
    Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
    evaluates the built model on the test set.
    :param args: the commandline arguments (optional, can be dataset filename)
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # generate train/test split of randomized data
    train, test = data.train_test_split(66.0, Random(1))

    # build classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    print(cls)

    # evaluate
    evl = Evaluation(train)
    evl.test_model(cls, test)
    print(evl.summary())
def main():
    """
    Just runs some example code.
    """

    classifier = Classifier("weka.classifiers.trees.J48")

    helper.print_title("Capabilities")
    capabilities = classifier.capabilities
    print(capabilities)

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    data_capabilities = Capabilities.for_instances(iris_data)
    print(data_capabilities)
    print("classifier handles dataset: " + str(capabilities.supports(data_capabilities)))

    # disable/enable
    helper.print_title("Disable/Enable")
    capability = Capability(member="UNARY_ATTRIBUTES")
    capabilities.disable(capability)
    capabilities.min_instances = 10
    print("Removing: " + str(capability))
    print(capabilities)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    full = loader.load_file(iris_file)
    full.class_is_last()

    # remove class attribute
    data = Instances.copy_instances(full)
    data.no_class()
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print("done")

    # classes to clusters
    evl = ClusterEvaluation()
    evl.set_model(clusterer)
    evl.test_model(full)
    helper.print_title("Cluster results")
    print(evl.cluster_results)
    helper.print_title("Classes to clusters")
    print(evl.classes_to_clusters)
def main(args):
    """
    Trains a NaiveBayesUpdateable classifier incrementally on a dataset. The dataset can be supplied as parameter.
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file, incremental=True)
    data.class_is_last()

    # classifier
    nb = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    nb.build_classifier(data)

    # train incrementally
    for inst in loader:
        nb.update_classifier(inst)

    print(nb)
def main(args):
    """
    Trains a NaiveBayesUpdateable classifier incrementally on a dataset. The dataset can be supplied as parameter.
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file, incremental=True)
    data.class_is_last()

    # classifier
    nb = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    nb.build_classifier(data)

    # train incrementally
    for inst in loader:
        nb.update_classifier(inst)

    print(nb)
def gridsearch():
    """
    Applies GridSearch to a dataset. GridSearch package must be not be installed, as the monolithic weka.jar
    already contains this package.
    """

    helper.print_title("GridSearch")

    # load a dataset
    fname = helper.get_data_dir() + os.sep + "bolts.arff"
    helper.print_info("Loading train: " + fname)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(fname)
    train.class_is_last()

    # classifier
    grid = GridSearch(options=["-sample-size", "100.0", "-traversal", "ROW-WISE", "-num-slots", "1", "-S", "1"])
    grid.evaluation = "CC"
    grid.y = {"property": "kernel.gamma", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0, "expression": "pow(BASE,I)"}
    grid.x = {"property": "C", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0, "expression": "pow(BASE,I)"}
    cls = Classifier(
        classname="weka.classifiers.functions.SMOreg",
        options=["-K", "weka.classifiers.functions.supportVector.RBFKernel"])
    grid.classifier = cls
    grid.build_classifier(train)
    print("Model:\n" + str(grid))
    print("\nBest setup:\n" + grid.best.to_commandline())
def main(args):
    """
    Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
    evaluates the built model on the test set.
    :param args: the commandline arguments (optional, can be dataset filename)
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # generate train/test split of randomized data
    train, test = data.train_test_split(66.0, Random(1))

    # build classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    print(cls)

    # evaluate
    evl = Evaluation(train)
    evl.test_model(cls, test)
    print(evl.summary())
def load_incremental():
    """
    Loads a dataset incrementally.
    """

    # setup the flow
    helper.print_title("Load dataset (incremental)")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="load dataset")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    flow.actors.append(loaddataset)

    console = Console()
    flow.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def load_custom_loader():
    """
    Loads a dataset using a custom loader.
    """

    # setup the flow
    helper.print_title("Load dataset (custom loader)")
    iris = helper.get_data_dir() + os.sep + "iris.csv"

    flow = Flow(name="load dataset")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = False
    loaddataset.config["use_custom_loader"] = True
    loaddataset.config["custom_loader"] = Loader(classname="weka.core.converters.CSVLoader")
    flow.actors.append(loaddataset)

    console = Console()
    flow.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def main():
    """
    Just runs some example code.
    """
    """
    Plots a dataset.
    """

    # setup the flow
    helper.print_title("Plot dataset")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="plot dataset")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    flow.actors.append(loaddataset)

    branch = Branch()
    flow.actors.append(branch)

    seq = Sequence(name="matrix plot")
    branch.actors.append(seq)

    mplot = MatrixPlot()
    mplot.config["percent"] = 50.0
    mplot.config["wait"] = False
    seq.actors.append(mplot)

    seq = Sequence(name="line plot")
    branch.actors.append(seq)

    copy = Copy()
    seq.actors.append(copy)

    flter = Filter()
    flter.config["setup"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    flter.config["keep_relationname"] = True
    seq.actors.append(flter)

    lplot = LinePlot()
    lplot.config["percent"] = 50.0
    lplot.config["wait"] = True
    seq.actors.append(lplot)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
Exemplo n.º 14
0
def load_incremental():
    """
    Loads a dataset incrementally.
    """

    # setup the flow
    helper.print_title("Load dataset (incremental)")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="load dataset")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    flow.actors.append(loaddataset)

    console = Console()
    flow.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
Exemplo n.º 15
0
def load_custom_loader():
    """
    Loads a dataset using a custom loader.
    """

    # setup the flow
    helper.print_title("Load dataset (custom loader)")
    iris = helper.get_data_dir() + os.sep + "iris.csv"

    flow = Flow(name="load dataset")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = False
    loaddataset.config["use_custom_loader"] = True
    loaddataset.config["custom_loader"] = Loader(
        classname="weka.core.converters.CSVLoader")
    flow.actors.append(loaddataset)

    console = Console()
    flow.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)

    # cluster data
    helper.print_info("Clustering data")
    for index, inst in enumerate(data):
        cl = clusterer.cluster_instance(inst)
        dist = clusterer.distribution_for_instance(inst)
        print(str(index+1) + ": cluster=" + str(cl) + ", distribution=" + str(dist))
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    full = loader.load_file(iris_file)
    full.class_is_last()

    # remove class attribute
    data = Instances.copy_instances(full)
    data.no_class()
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans",
                          options=["-N", "3"])
    clusterer.build_clusterer(data)
    print("done")

    # classes to clusters
    evl = ClusterEvaluation()
    evl.set_model(clusterer)
    evl.test_model(full)
    helper.print_title("Cluster results")
    print(evl.cluster_results)
    helper.print_title("Classes to clusters")
    print(evl.classes_to_clusters)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # train classifier
    classifier = Classifier("weka.classifiers.trees.J48")
    classifier.build_classifier(iris_data)

    # save and read object
    helper.print_title("I/O: single object")
    outfile = tempfile.gettempdir() + os.sep + "j48.model"
    serialization.write(outfile, classifier)
    model = Classifier(jobject=serialization.read(outfile))
    print(model)

    # save classifier and dataset header (multiple objects)
    helper.print_title("I/O: single object")
    serialization.write_all(outfile, [classifier, Instances.template_instances(iris_data)])
    objects = serialization.read_all(outfile)
    for i, obj in enumerate(objects):
        helper.print_info("Object #" + str(i+1) + ":")
        if javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/core/Instances")):
            obj = Instances(jobject=obj)
        elif javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/classifiers/Classifier")):
            obj = Classifier(jobject=obj)
        print(obj)
def main():
    """
    Shows how to use the CostSensitiveClassifier.
    """

    # load a dataset
    data_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # classifier
    classifier = SingleClassifierEnhancer(
        classname="weka.classifiers.meta.CostSensitiveClassifier",
        options=["-cost-matrix", "[0 1; 2 0]", "-S", "2"])
    base = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
    classifier.classifier = base

    folds = 10
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(classifier, data, folds, Random(1))


    print("")
    print("=== Setup ===")
    print("Classifier: " + classifier.to_commandline())
    print("Dataset: " + data.relationname)
    print("")
    print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ==="))
def main():
    """
    Shows how to use the CostSensitiveClassifier.
    """

    # load a dataset
    data_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # classifier
    classifier = SingleClassifierEnhancer(
        classname="weka.classifiers.meta.CostSensitiveClassifier",
        options=["-cost-matrix", "[0 1; 2 0]", "-S", "2"])
    base = Classifier(classname="weka.classifiers.trees.J48",
                      options=["-C", "0.3"])
    classifier.classifier = base

    folds = 10
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(classifier, data, folds, Random(1))

    print("")
    print("=== Setup ===")
    print("Classifier: " + classifier.to_commandline())
    print("Dataset: " + data.relationname)
    print("")
    print(
        evaluation.summary("=== " + str(folds) +
                           " -fold Cross-Validation ==="))
def main(args):
    """
    Trains Apriori on the specified dataset (uses vote UCI dataset if no dataset specified).
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # build Apriori, using last attribute as class attribute
    apriori = Associator(classname="weka.associations.Apriori", options=["-c", "-1"])
    apriori.build_associations(data)
    print(str(apriori))

    # iterate association rules (low-level)
    helper.print_info("Rules (low-level)")
    # make the underlying rules list object iterable in Python
    rules = javabridge.iterate_collection(apriori.jwrapper.getAssociationRules().getRules().o)
    for i, r in enumerate(rules):
        # wrap the Java object to make its methods accessible
        rule = JWrapper(r)
        print(str(i+1) + ". " + str(rule))
        # output some details on rule
        print("   - consequence support: " + str(rule.getConsequenceSupport()))
        print("   - premise support: " + str(rule.getPremiseSupport()))
        print("   - total support: " + str(rule.getTotalSupport()))
        print("   - total transactions: " + str(rule.getTotalTransactions()))

    # iterate association rules (high-level)
    helper.print_info("Rules (high-level)")
    print("can produce rules? " + str(apriori.can_produce_rules()))
    print("rule metric names: " + str(apriori.rule_metric_names))
    rules = apriori.association_rules()
    if rules is not None:
        print("producer: " + rules.producer)
        print("# rules: " + str(len(rules)))
        for i, rule in enumerate(rules):
            print(str(i+1) + ". " + str(rule))
            # output some details on rule
            print("   - consequence support: " + str(rule.consequence_support))
            print("   - consequence: " + str(rule.consequence))
            print("   - premise support: " + str(rule.premise_support))
            print("   - premise: " + str(rule.premise))
            print("   - total support: " + str(rule.total_support))
            print("   - total transactions: " + str(rule.total_transactions))
            print("   - metric names: " + str(rule.metric_names))
            print("   - metric values: " + str(rule.metric_values))
            print("   - metric value 'Confidence': " + str(rule.metric_value('Confidence')))
            print("   - primary metric name: " + str(rule.primary_metric_name))
            print("   - primary metric value: " + str(rule.primary_metric_value))
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)
    helper.print_info("Evaluating on data")
    evaluation = ClusterEvaluation()
    evaluation.set_model(clusterer)
    evaluation.test_model(data)
    print("# clusters: " + str(evaluation.num_clusters))
    print("log likelihood: " + str(evaluation.log_likelihood))
    print("cluster assignments:\n" + str(evaluation.cluster_assignments))
    plc.plot_cluster_assignments(evaluation, data, inst_no=True)

    # using a filtered clusterer
    helper.print_title("Filtered clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    fclusterer = FilteredClusterer()
    fclusterer.clusterer = clusterer
    fclusterer.filter = remove
    fclusterer.build_clusterer(data)
    print(fclusterer)

    # load a dataset incrementally and build clusterer incrementally
    helper.print_title("Incremental clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    clusterer = Clusterer("weka.clusterers.Cobweb")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    remove.inputformat(iris_inc)
    iris_filtered = remove.outputformat()
    clusterer.build_clusterer(iris_filtered)
    for inst in loader:
        remove.input(inst)
        inst_filtered = remove.output()
        clusterer.update_clusterer(inst_filtered)
    clusterer.update_finished()
    print(clusterer.to_commandline())
    print(clusterer)
    print(clusterer.graph)
    plg.plot_dot_graph(clusterer.graph)
Exemplo n.º 23
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)
    helper.print_info("Evaluating on data")
    evaluation = ClusterEvaluation()
    evaluation.set_model(clusterer)
    evaluation.test_model(data)
    print("# clusters: " + str(evaluation.num_clusters))
    print("log likelihood: " + str(evaluation.log_likelihood))
    print("cluster assignments:\n" + str(evaluation.cluster_assignments))
    plc.plot_cluster_assignments(evaluation, data, inst_no=True)

    # using a filtered clusterer
    helper.print_title("Filtered clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    fclusterer = FilteredClusterer()
    fclusterer.clusterer = clusterer
    fclusterer.filter = remove
    fclusterer.build_clusterer(data)
    print(fclusterer)

    # load a dataset incrementally and build clusterer incrementally
    helper.print_title("Incremental clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    clusterer = Clusterer("weka.clusterers.Cobweb")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    remove.inputformat(iris_inc)
    iris_filtered = remove.outputformat()
    clusterer.build_clusterer(iris_filtered)
    for inst in loader:
        remove.input(inst)
        inst_filtered = remove.output()
        clusterer.update_clusterer(inst_filtered)
    clusterer.update_finished()
    print(clusterer.to_commandline())
    print(clusterer)
    print(clusterer.graph)
    plg.plot_dot_graph(clusterer.graph)
Exemplo n.º 24
0
def incremental():
    """
    Just runs some example code.
    """
    """
    Loads/filters a dataset incrementally.
    """

    # setup the flow
    helper.print_title("Filter datasets (incrementally)")
    iris = helper.get_data_dir() + os.sep + "iris.arff"
    anneal = helper.get_data_dir() + os.sep + "anneal.arff"

    flow = Flow(name="filter datasets (incrementally)")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris, anneal]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    flow.actors.append(loaddataset)

    flter = Filter()
    flter.config["setup"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove",
        options=["-R", "1"])
    flter.config["keep_relationname"] = True
    flow.actors.append(flter)

    console = Console()
    flow.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
Exemplo n.º 25
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # train classifier
    classifier = Classifier("weka.classifiers.trees.J48")
    classifier.build_classifier(iris_data)

    # save and read object
    helper.print_title("I/O: model (using serialization module)")
    outfile = tempfile.gettempdir() + os.sep + "j48.model"
    serialization.write(outfile, classifier)
    model = Classifier(jobject=serialization.read(outfile))
    print(model)

    # save classifier and dataset header (multiple objects)
    helper.print_title("I/O: model and header (using serialization module)")
    serialization.write_all(
        outfile,
        [classifier, Instances.template_instances(iris_data)])
    objects = serialization.read_all(outfile)
    for i, obj in enumerate(objects):
        helper.print_info("Object #" + str(i + 1) + ":")
        if javabridge.get_env().is_instance_of(
                obj,
                javabridge.get_env().find_class("weka/core/Instances")):
            obj = Instances(jobject=obj)
        elif javabridge.get_env().is_instance_of(
                obj,
                javabridge.get_env().find_class(
                    "weka/classifiers/Classifier")):
            obj = Classifier(jobject=obj)
        print(obj)

    # save and read object
    helper.print_title("I/O: just model (using Classifier class)")
    outfile = tempfile.gettempdir() + os.sep + "j48.model"
    classifier.serialize(outfile)
    model, _ = Classifier.deserialize(outfile)
    print(model)

    # save classifier and dataset header (multiple objects)
    helper.print_title("I/O: model and header (using Classifier class)")
    classifier.serialize(outfile, header=iris_data)
    model, header = Classifier.deserialize(outfile)
    print(model)
    if header is not None:
        print(header)
def incremental():
    """
    Just runs some example code.
    """
    """
    Loads/filters a dataset incrementally.
    """

    # setup the flow
    helper.print_title("Filter datasets (incrementally)")
    iris = helper.get_data_dir() + os.sep + "iris.arff"
    anneal = helper.get_data_dir() + os.sep + "anneal.arff"

    flow = Flow(name="filter datasets (incrementally)")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris, anneal]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    flow.actors.append(loaddataset)

    flter = Filter()
    flter.config["setup"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1"])
    flter.config["keep_relationname"] = True
    flow.actors.append(flter)

    console = Console()
    flow.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    flow = Flow(name="list files")

    listfiles = ListFiles()
    listfiles.config["dir"] = str(helper.get_data_dir())
    listfiles.config["list_files"] = True
    listfiles.config["list_dirs"] = False
    listfiles.config["recursive"] = False
    listfiles.config["regexp"] = ".*.arff"
    flow.actors.append(listfiles)

    tee = Tee()
    flow.actors.append(tee)

    convert = Convert()
    convert.config["setup"] = conversion.PassThrough()
    tee.actors.append(convert)

    console = Console()
    console.config["prefix"] = "Match: "
    tee.actors.append(console)

    load = LoadDataset()
    load.config["use_custom_loader"] = True
    flow.actors.append(load)

    cross = CrossValidate()
    cross.config["setup"] = Classifier(classname="weka.classifiers.trees.J48",
                                       options=["-C", "0.3"])
    flow.actors.append(cross)

    summary = EvaluationSummary()
    summary.config["matrix"] = True
    flow.actors.append(summary)

    # print flow
    flow.setup()
    print("\n" + flow.tree + "\n")

    # save the flow
    fname = tempfile.gettempdir() + os.sep + "simpleflow.json"
    Flow.save(flow, fname)

    # load flow
    fl2 = Flow.load(fname)

    # output flow
    fl2.setup()
    print("\n" + fl2.tree + "\n")
 def arffInput(self):
     # load a dataset
     iris_file = helper.get_data_dir(
     ) + os.sep + "/Users/rezakhoshkangini/Documents/Drexel_Documents/Work/Mat-Code/NewCSV/BindedData/Section0.arff"
     helper.print_info("Loading dataset: " + iris_file)
     loader = Loader("weka.core.converters.ArffLoader")
     iris_data = loader.load_file(iris_file)
     iris_data.class_is_last()
     # classifier help
     helper.print_title("Creating help string")
     classifier = Classifier(classname="weka.classifiers.trees.J48")
     print(classifier.to_help())
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    flow = Flow(name="list files")

    listfiles = ListFiles()
    listfiles.config["dir"] = str(helper.get_data_dir())
    listfiles.config["list_files"] = True
    listfiles.config["list_dirs"] = False
    listfiles.config["recursive"] = False
    listfiles.config["regexp"] = ".*.arff"
    flow.actors.append(listfiles)

    tee = Tee()
    flow.actors.append(tee)

    convert = Convert()
    convert.config["setup"] = conversion.PassThrough()
    tee.actors.append(convert)

    console = Console()
    console.config["prefix"] = "Match: "
    tee.actors.append(console)

    load = LoadDataset()
    load.config["use_custom_loader"] = True
    flow.actors.append(load)

    cross = CrossValidate()
    cross.config["setup"] = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
    flow.actors.append(cross)

    summary = EvaluationSummary()
    summary.config["matrix"] = True
    flow.actors.append(summary)

    # print flow
    flow.setup()
    print("\n" + flow.tree + "\n")

    # save the flow
    fname = tempfile.gettempdir() + os.sep + "simpleflow.json"
    Flow.save(flow, fname)

    # load flow
    fl2 = Flow.load(fname)

    # output flow
    fl2.setup()
    print("\n" + fl2.tree + "\n")
Exemplo n.º 30
0
def main(args):
    """
    Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
    evaluates the built model on the test set.
    The predictions get recorded in two different ways:
    1. in-memory via the test_model method
    2. directly to file (more memory efficient), but a separate run of making predictions

    :param args: the commandline arguments (optional, can be dataset filename)
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # generate train/test split of randomized data
    train, test = data.train_test_split(66.0, Random(1))

    # build classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    print(cls)

    # evaluate and record predictions in memory
    helper.print_title("recording predictions in-memory")
    output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV",
        options=["-distribution"])
    evl = Evaluation(train)
    evl.test_model(cls, test, output=output)
    print(evl.summary())
    helper.print_info("Predictions:")
    print(output.buffer_content())

    # record/output predictions separately
    helper.print_title("recording/outputting predictions separately")
    outputfile = helper.get_tmp_dir() + "/j48_vote.csv"
    output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV",
        options=["-distribution", "-suppress", "-file", outputfile])
    output.header = test
    output.print_all(cls, test)
    helper.print_info("Predictions stored in:" + outputfile)
    # by using "-suppress" we don't store the output in memory, the following statement won't output anything
    print(output.buffer_content())
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    vote_file = helper.get_data_dir() + os.sep + "vote.arff"
    helper.print_info("Loading dataset: " + vote_file)
    loader = Loader("weka.core.converters.ArffLoader")
    vote_data = loader.load_file(vote_file)
    vote_data.class_is_last()

    # train and output associator
    associator = Associator(classname="weka.associations.Apriori", options=["-N", "9", "-I"])
    associator.build_associations(vote_data)
    print(associator)
Exemplo n.º 32
0
def main():
    """
    Just runs some example code.
    """
    """
    Loads/filters a dataset incrementally and saves it to a new file.
    """

    # setup the flow
    helper.print_title("Load/filter/save dataset (incrementally)")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="Load/filter/save dataset (incrementally)")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    flow.actors.append(loaddataset)

    flter = Filter()
    flter.config["setup"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove",
        options=["-R", "last"])
    flow.actors.append(flter)

    rename = RenameRelation()
    rename.config["name"] = "iris-reduced"
    flow.actors.append(rename)

    dumper = InstanceDumper()
    dumper.config["output"] = tempfile.gettempdir() + os.sep + "out.arff"
    flow.actors.append(dumper)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
Exemplo n.º 33
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    bodyfat_file = helper.get_data_dir() + os.sep + "bodyfat.arff"
    helper.print_info("Loading dataset: " + bodyfat_file)
    loader = Loader("weka.core.converters.ArffLoader")
    bodyfat_data = loader.load_file(bodyfat_file)
    bodyfat_data.class_is_last()

    # classifier help
    helper.print_title("Creating help string")
    classifier = Classifier(classname="weka.classifiers.trees.M5P")
    classifier.build_classifier(bodyfat_data)
    print(classifier)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    vote_file = helper.get_data_dir() + os.sep + "vote.arff"
    helper.print_info("Loading dataset: " + vote_file)
    loader = Loader("weka.core.converters.ArffLoader")
    vote_data = loader.load_file(vote_file)
    vote_data.class_is_last()

    # train and output associator
    associator = Associator(classname="weka.associations.Apriori",
                            options=["-N", "9", "-I"])
    associator.build_associations(vote_data)
    print(associator)
def main():
    """
    Just runs some example code.
    """
    """
    Loads/filters a dataset incrementally and saves it to a new file.
    """

    # setup the flow
    helper.print_title("Load/filter/save dataset (incrementally)")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="Load/filter/save dataset (incrementally)")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    flow.actors.append(loaddataset)

    flter = Filter()
    flter.config["setup"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    flow.actors.append(flter)

    rename = RenameRelation()
    rename.config["name"] = "iris-reduced"
    flow.actors.append(rename)

    dumper = InstanceDumper()
    dumper.config["output"] = tempfile.gettempdir() + os.sep + "out.arff"
    flow.actors.append(dumper)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
Exemplo n.º 36
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    bodyfat_file = helper.get_data_dir() + os.sep + "bodyfat.arff"
    helper.print_info("Loading dataset: " + bodyfat_file)
    loader = Loader("weka.core.converters.ArffLoader")
    bodyfat_data = loader.load_file(bodyfat_file)
    bodyfat_data.class_is_last()

    # classifier help
    helper.print_title("Creating help string")
    classifier = Classifier(classname="weka.classifiers.trees.M5P")
    classifier.build_classifier(bodyfat_data)
    print(classifier)
Exemplo n.º 37
0
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    helper.print_title("build and save clusterer")
    iris = helper.get_data_dir() + os.sep + "iris_no_class.arff"

    flow = Flow(name="build and save clusterer")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    flow.actors.append(loaddataset)

    train = Train()
    train.config["setup"] = Clusterer(classname="weka.clusterers.SimpleKMeans")
    flow.actors.append(train)

    pick = ContainerValuePicker()
    pick.config["value"] = "Model"
    flow.actors.append(pick)

    console = Console()
    pick.actors.append(console)

    writer = ModelWriter()
    writer.config["output"] = str(
        tempfile.gettempdir()) + os.sep + "simplekmeans.model"
    flow.actors.append(writer)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    helper.print_title("build and save clusterer")
    iris = helper.get_data_dir() + os.sep + "iris_no_class.arff"

    flow = Flow(name="build and save clusterer")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    flow.actors.append(loaddataset)

    train = Train()
    train.config["setup"] = Clusterer(classname="weka.clusterers.SimpleKMeans")
    flow.actors.append(train)

    pick = ContainerValuePicker()
    pick.config["value"] = "Model"
    flow.actors.append(pick)

    console = Console()
    pick.actors.append(console)

    writer = ModelWriter()
    writer.config["output"] = str(tempfile.gettempdir()) + os.sep + "simplekmeans.model"
    flow.actors.append(writer)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    helper.print_title("Cross-validate clusterer")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="cross-validate clusterer")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    flow.actors.append(loaddataset)

    flter = Filter()
    flter.name = "Remove class"
    flter.config["filter"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove",
        options=["-R", "last"])
    flow.actors.append(flter)

    cv = CrossValidate()
    cv.config["setup"] = Clusterer(classname="weka.clusterers.EM")
    flow.actors.append(cv)

    console = Console()
    console.config["prefix"] = "Loglikelihood: "
    flow.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
Exemplo n.º 40
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    anneal_file = helper.get_data_dir() + os.sep + "anneal.arff"
    helper.print_info("Loading dataset: " + anneal_file)
    loader = Loader("weka.core.converters.ArffLoader")
    anneal_data = loader.load_file(anneal_file)
    anneal_data.class_is_last()

    # perform attribute selection
    helper.print_title("Attribute selection")
    search = ASSearch(classname="weka.attributeSelection.BestFirst",
                      options=["-D", "1", "-N", "5"])
    evaluation = ASEvaluation(
        classname="weka.attributeSelection.CfsSubsetEval",
        options=["-P", "1", "-E", "1"])
    attsel = AttributeSelection()
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    print("# attributes: " + str(attsel.number_attributes_selected))
    print("attributes (as numpy array): " + str(attsel.selected_attributes))
    print("attributes (as list): " + str(list(attsel.selected_attributes)))
    print("result string:\n" + attsel.results_string)

    # perform ranking
    helper.print_title("Attribute ranking (2-fold CV)")
    search = ASSearch(classname="weka.attributeSelection.Ranker",
                      options=["-N", "-1"])
    evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval")
    attsel = AttributeSelection()
    attsel.ranking(True)
    attsel.folds(2)
    attsel.crossvalidation(True)
    attsel.seed(42)
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    print("ranked attributes:\n" + str(attsel.ranked_attributes))
    print("result string:\n" + attsel.results_string)
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    helper.print_title("Cross-validate clusterer")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="cross-validate clusterer")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    flow.actors.append(loaddataset)

    flter = Filter()
    flter.name = "Remove class"
    flter.config["filter"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    flow.actors.append(flter)

    cv = CrossValidate()
    cv.config["setup"] = Clusterer(classname="weka.clusterers.EM")
    flow.actors.append(cv)

    console = Console()
    console.config["prefix"] = "Loglikelihood: "
    flow.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def main(args):
    """
    Performs attribute selection on the specified dataset (uses vote UCI dataset if no dataset specified). Last
    attribute is assumed to be the class attribute. Used: CfsSubsetEval, GreedyStepwise, J48
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    use_classifier(data)
    use_filter(data)
    use_low_level(data)
def main(args):
    """
    Performs attribute selection on the specified dataset (uses vote UCI dataset if no dataset specified). Last
    attribute is assumed to be the class attribute. Used: CfsSubsetEval, GreedyStepwise, J48
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    use_classifier(data)
    use_filter(data)
    use_low_level(data)
def main(args):
    """
    Trains Apriori on the specified dataset (uses vote UCI dataset if no dataset specified).
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # build Apriori, using last attribute as class attribute
    apriori = Associator(classname="weka.associations.Apriori",
                         options=["-c", "-1"])
    apriori.build_associations(data)
    print(str(apriori))
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    anneal_file = helper.get_data_dir() + os.sep + "anneal.arff"
    helper.print_info("Loading dataset: " + anneal_file)
    loader = Loader("weka.core.converters.ArffLoader")
    anneal_data = loader.load_file(anneal_file)
    anneal_data.class_is_last()

    # perform attribute selection
    helper.print_title("Attribute selection")
    search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"])
    evaluation = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"])
    attsel = AttributeSelection()
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    print("# attributes: " + str(attsel.number_attributes_selected))
    print("attributes: " + str(attsel.selected_attributes))
    print("result string:\n" + attsel.results_string)

    # perform ranking
    helper.print_title("Attribute ranking (2-fold CV)")
    search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-N", "-1"])
    evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval")
    attsel = AttributeSelection()
    attsel.ranking(True)
    attsel.folds(2)
    attsel.crossvalidation(True)
    attsel.seed(42)
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    print("ranked attributes:\n" + str(attsel.ranked_attributes))
    print("result string:\n" + attsel.results_string)
def multisearch():
    """
    Applies MultiSearch to a dataset. "multisearch-weka-package" package must be installed.
    """

    helper.print_title("MultiSearch")

    # load a dataset
    fname = helper.get_data_dir() + os.sep + "bolts.arff"
    helper.print_info("Loading train: " + fname)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(fname)
    train.class_is_last()

    # classifier
    multi = MultiSearch(options=[
        "-sample-size", "100.0", "-initial-folds", "2", "-subsequent-folds",
        "2", "-num-slots", "1", "-S", "1"
    ])
    multi.evaluation = "CC"
    mparam = MathParameter()
    mparam.prop = "classifier.kernel.gamma"
    mparam.minimum = -3.0
    mparam.maximum = 3.0
    mparam.step = 1.0
    mparam.base = 10.0
    mparam.expression = "pow(BASE,I)"
    lparam = ListParameter()
    lparam.prop = "classifier.C"
    lparam.values = ["-2.0", "-1.0", "0.0", "1.0", "2.0"]
    multi.parameters = [mparam, lparam]
    cls = Classifier(
        classname="weka.classifiers.functions.SMOreg",
        options=["-K", "weka.classifiers.functions.supportVector.RBFKernel"])
    multi.classifier = cls
    multi.build_classifier(train)
    print("Model:\n" + str(multi))
    print("\nBest setup:\n" + multi.best.to_commandline())
def multisearch():
    """
    Applies MultiSearch to a dataset. "multisearch-weka-package" package must be installed.
    """

    helper.print_title("MultiSearch")

    # load a dataset
    fname = helper.get_data_dir() + os.sep + "bolts.arff"
    helper.print_info("Loading train: " + fname)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(fname)
    train.class_is_last()

    # classifier
    multi = MultiSearch(
        options=["-sample-size", "100.0", "-initial-folds", "2", "-subsequent-folds", "2",
                 "-num-slots", "1", "-S", "1"])
    multi.evaluation = "CC"
    mparam = MathParameter()
    mparam.prop = "classifier.kernel.gamma"
    mparam.minimum = -3.0
    mparam.maximum = 3.0
    mparam.step = 1.0
    mparam.base = 10.0
    mparam.expression = "pow(BASE,I)"
    lparam = ListParameter()
    lparam.prop = "classifier.C"
    lparam.values = ["-2.0", "-1.0", "0.0", "1.0", "2.0"]
    multi.parameters = [mparam, lparam]
    cls = Classifier(
        classname="weka.classifiers.functions.SMOreg",
        options=["-K", "weka.classifiers.functions.supportVector.RBFKernel"])
    multi.classifier = cls
    multi.build_classifier(train)
    print("Model:\n" + str(multi))
    print("\nBest setup:\n" + multi.best.to_commandline())
Exemplo n.º 48
0
def main():
    """
    Just runs some example code.
    """
    """
    Displays a dataset as matrixplot.
    """

    # setup the flow
    helper.print_title("Matrix plot")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="matrix plot")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    flow.actors.append(loaddataset)

    plot = MatrixPlot()
    plot.config["percent"] = 50.0
    flow.actors.append(plot)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def main():
    """
    Just runs some example code.
    """
    """
    Displays a dataset as matrixplot.
    """

    # setup the flow
    helper.print_title("Matrix plot")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="matrix plot")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    flow.actors.append(loaddataset)

    plot = MatrixPlot()
    plot.config["percent"] = 50.0
    flow.actors.append(plot)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
Exemplo n.º 50
0
def main():
    """
    Just runs some example code.
    """

    print(helper.get_data_dir())

    # cross-validation + classification
    helper.print_title("Experiment: Cross-validation + classification")
    datasets = [
        helper.get_data_dir() + os.sep + "iris.arff",
        helper.get_data_dir() + os.sep + "anneal.arff"
    ]
    classifiers = [
        Classifier("weka.classifiers.rules.ZeroR"),
        Classifier("weka.classifiers.trees.J48")
    ]
    outfile = tempfile.gettempdir() + os.sep + "results-cv.arff"
    exp = SimpleCrossValidationExperiment(classification=True,
                                          runs=10,
                                          folds=10,
                                          datasets=datasets,
                                          classifiers=classifiers,
                                          result=outfile)
    exp.setup()
    exp.run()

    # evaluate
    loader = converters.loader_for_file(outfile)
    data = loader.load_file(outfile)
    matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText")
    tester = Tester("weka.experiment.PairedCorrectedTTester")
    tester.resultmatrix = matrix
    comparison_col = data.attribute_by_name("Percent_correct").index
    tester.instances = data
    print(tester.header(comparison_col))
    print(tester.multi_resultset_full(0, comparison_col))

    # random split + regression
    helper.print_title("Experiment: Random split + regression")
    datasets = [
        helper.get_data_dir() + os.sep + "bolts.arff",
        helper.get_data_dir() + os.sep + "bodyfat.arff"
    ]
    classifiers = [
        Classifier("weka.classifiers.rules.ZeroR"),
        Classifier("weka.classifiers.functions.LinearRegression")
    ]
    outfile = tempfile.gettempdir() + os.sep + "results-rs.arff"
    exp = SimpleRandomSplitExperiment(classification=False,
                                      runs=10,
                                      percentage=66.6,
                                      preserve_order=False,
                                      datasets=datasets,
                                      classifiers=classifiers,
                                      result=outfile)
    exp.setup()
    exp.run()

    # evaluate
    loader = converters.loader_for_file(outfile)
    data = loader.load_file(outfile)
    matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText")
    tester = Tester("weka.experiment.PairedCorrectedTTester")
    tester.resultmatrix = matrix
    comparison_col = data.attribute_by_name("Correlation_coefficient").index
    tester.instances = data
    print(tester.header(comparison_col))
    print(tester.multi_resultset_full(0, comparison_col))

    # plot
    plot_exp.plot_experiment(matrix,
                             title="Random split",
                             measure="Correlation coefficient",
                             wait=True)
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    helper.print_title("cluster data")
    iris = helper.get_data_dir() + os.sep + "iris_no_class.arff"
    clsfile = str(tempfile.gettempdir()) + os.sep + "simplekmeans.model"

    flow = Flow(name="cluster data")

    start = Start()
    flow.actors.append(start)

    build_save = Trigger()
    build_save.name = "build and save clusterer"
    flow.actors.append(build_save)

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    build_save.actors.append(filesupplier)

    loaddataset = LoadDataset()
    build_save.actors.append(loaddataset)

    ssv = SetStorageValue()
    ssv.config["storage_name"] = "data"
    build_save.actors.append(ssv)

    train = Train()
    train.config["setup"] = Clusterer(classname="weka.clusterers.SimpleKMeans")
    build_save.actors.append(train)

    ssv = SetStorageValue()
    ssv.config["storage_name"] = "model"
    build_save.actors.append(ssv)

    pick = ContainerValuePicker()
    pick.config["value"] = "Model"
    build_save.actors.append(pick)

    console = Console()
    console.config["prefix"] = "built: "
    pick.actors.append(console)

    writer = ModelWriter()
    writer.config["output"] = clsfile
    build_save.actors.append(writer)

    pred_serialized = Trigger()
    pred_serialized.name = "make predictions (serialized model)"
    flow.actors.append(pred_serialized)

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    pred_serialized.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    pred_serialized.actors.append(loaddataset)

    predict = Predict()
    predict.config["model"] = clsfile
    pred_serialized.actors.append(predict)

    console = Console()
    console.config["prefix"] = "serialized: "
    pred_serialized.actors.append(console)

    pred_storage = Trigger()
    pred_storage.name = "make predictions (model from storage)"
    flow.actors.append(pred_storage)

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    pred_storage.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    pred_storage.actors.append(loaddataset)

    predict = Predict()
    predict.config["storage_name"] = "model"
    pred_storage.actors.append(predict)

    console = Console()
    console.config["prefix"] = "storage: "
    pred_storage.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    helper.print_title("build and evaluate classifier")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="build and evaluate classifier")

    start = Start()
    flow.actors.append(start)

    build_save = Trigger()
    build_save.name = "build and store classifier"
    flow.actors.append(build_save)

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    build_save.actors.append(filesupplier)

    loaddataset = LoadDataset()
    build_save.actors.append(loaddataset)

    select = ClassSelector()
    select.config["index"] = "last"
    build_save.actors.append(select)

    ssv = SetStorageValue()
    ssv.config["storage_name"] = "data"
    build_save.actors.append(ssv)

    train = Train()
    train.config["setup"] = Classifier(classname="weka.classifiers.trees.J48")
    build_save.actors.append(train)

    pick = ContainerValuePicker()
    pick.config["value"] = "Model"
    build_save.actors.append(pick)

    ssv = SetStorageValue()
    ssv.config["storage_name"] = "model"
    pick.actors.append(ssv)

    evaluate = Trigger()
    evaluate.name = "evaluate classifier"
    flow.actors.append(evaluate)

    gsv = GetStorageValue()
    gsv.config["storage_name"] = "data"
    evaluate.actors.append(gsv)

    evl = Evaluate()
    evl.config["storage_name"] = "model"
    evaluate.actors.append(evl)

    summary = EvaluationSummary()
    summary.config["matrix"] = True
    evaluate.actors.append(summary)

    console = Console()
    evaluate.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    helper.print_title("Iris dataset")
    print(iris_data)
    helper.print_title("Iris dataset (incrementally output)")
    for i in iris_data:
        print(i)
    helper.print_title("Iris summary")
    print(Instances.summary(iris_data))
    helper.print_title("Iris attributes")
    for a in iris_data.attributes():
        print(a)
    helper.print_title("Instance at #0")
    print(iris_data.get_instance(0))
    print(iris_data.get_instance(0).values)
    print("Attribute stats (first):\n" + str(iris_data.attribute_stats(0)))
    print("total count (first attribute):\n" + str(iris_data.attribute_stats(0).total_count))
    print("numeric stats (first attribute):\n" + str(iris_data.attribute_stats(0).numeric_stats))
    print("nominal counts (last attribute):\n"
          + str(iris_data.attribute_stats(iris_data.num_attributes - 1).nominal_counts))
    helper.print_title("Instance values at #0")
    for v in iris_data.get_instance(0):
        print(v)

    # append datasets
    helper.print_title("append datasets")
    data1 = Instances.copy_instances(iris_data, 0, 2)
    data2 = Instances.copy_instances(iris_data, 2, 2)
    print("Dataset #1:\n" + str(data1))
    print("Dataset #2:\n" + str(data2))
    msg = data1.equal_headers(data2)
    print("#1 == #2 ? " + "yes" if msg is None else msg)
    combined = Instances.append_instances(data1, data2)
    print("Combined:\n" + str(combined))

    # merge datasets
    helper.print_title("merge datasets")
    data1 = Instances.copy_instances(iris_data, 0, 2)
    data1.class_index = -1
    data1.delete_attribute(1)
    data1.delete_first_attribute()
    data2 = Instances.copy_instances(iris_data, 0, 2)
    data2.class_index = -1
    data2.delete_attribute(4)
    data2.delete_attribute(3)
    data2.delete_attribute(2)
    print("Dataset #1:\n" + str(data1))
    print("Dataset #2:\n" + str(data2))
    msg = data1.equal_headers(data2)
    print("#1 == #2 ? " + ("yes" if msg is None else msg))
    combined = Instances.merge_instances(data2, data1)
    print("Combined:\n" + str(combined))

    # load dataset incrementally
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset incrementally: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file, incremental=True)
    iris_data.class_is_last()
    helper.print_title("Iris dataset")
    print(iris_data)
    for inst in loader:
        print(inst)

    # create attributes
    helper.print_title("Creating attributes")
    num_att = Attribute.create_numeric("num")
    print("numeric: " + str(num_att))
    date_att = Attribute.create_date("dat", "yyyy-MM-dd")
    print("date: " + str(date_att))
    nom_att = Attribute.create_nominal("nom", ["label1", "label2"])
    print("nominal: " + str(nom_att))

    # create dataset
    helper.print_title("Create dataset")
    dataset = Instances.create_instances("helloworld", [num_att, date_att, nom_att], 0)
    print(str(dataset))

    # create an instance
    helper.print_title("Create and add instance")
    values = [3.1415926, date_att.parse_date("2014-04-10"), 1.0]
    inst = Instance.create_instance(values)
    print("Instance #1:\n" + str(inst))
    dataset.add_instance(inst)
    values = [2.71828, date_att.parse_date("2014-08-09"), Instance.missing_value()]
    inst = Instance.create_instance(values)
    dataset.add_instance(inst)
    print("Instance #2:\n" + str(inst))
    inst.set_value(0, 4.0)
    print("Instance #2 (updated):\n" + str(inst))
    print("Dataset:\n" + str(dataset))
    dataset.delete_with_missing(2)
    print("Dataset (after delete of missing):\n" + str(dataset))
    values = [(1, date_att.parse_date("2014-07-11"))]
    inst = Instance.create_sparse_instance(values, 3, classname="weka.core.SparseInstance")
    print("sparse Instance:\n" + str(inst))
    dataset.add_instance(inst)
    print("dataset with mixed dense/sparse instance objects:\n" + str(dataset))

    # create dataset (lists)
    helper.print_title("Create dataset from lists")
    x = [[randint(1, 10) for _ in range(5)] for _ in range(10)]
    y = [randint(0, 1) for _ in range(10)]
    dataset2 = ds.create_instances_from_lists(x, y, "generated from lists")
    print(dataset2)
    x = [[randint(1, 10) for _ in range(5)] for _ in range(10)]
    dataset2 = ds.create_instances_from_lists(x, name="generated from lists (no y)")
    print(dataset2)

    # create dataset (matrices)
    helper.print_title("Create dataset from matrices")
    x = np.random.randn(10, 5)
    y = np.random.randn(10)
    dataset3 = ds.create_instances_from_matrices(x, y, "generated from matrices")
    print(dataset3)
    x = np.random.randn(10, 5)
    dataset3 = ds.create_instances_from_matrices(x, name="generated from matrices (no y)")
    print(dataset3)

    # create more sparse instances
    diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + diabetes_file)
    loader = Loader("weka.core.converters.ArffLoader")
    diabetes_data = loader.load_file(diabetes_file)
    diabetes_data.class_is_last()
    helper.print_title("Create sparse instances using template dataset")
    sparse_data = Instances.template_instances(diabetes_data)
    for i in range(diabetes_data.num_attributes - 1):
        inst = Instance.create_sparse_instance(
            [(i, float(i+1) / 10.0)], sparse_data.num_attributes, classname="weka.core.SparseInstance")
        sparse_data.add_instance(inst)
    print("sparse dataset:\n" + str(sparse_data))

    # simple scatterplot of iris dataset: petalwidth x petallength
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    pld.scatter_plot(
        iris_data, iris_data.attribute_by_name("petalwidth").index,
        iris_data.attribute_by_name("petallength").index,
        percent=50,
        wait=False)

    # line plot of iris dataset (without class attribute)
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    pld.line_plot(iris_data, atts=range(iris_data.num_attributes - 1), percent=50, title="Line plot iris", wait=False)

    # matrix plot of iris dataset
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    pld.matrix_plot(iris_data, percent=50, title="Matrix plot iris", wait=True)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris)

    # remove class attribute
    helper.print_info("Removing class attribute")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    remove.inputformat(data)
    filtered = remove.filter(data)

    # use MultiFilter
    helper.print_info("Use MultiFilter")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"])
    std = Filter(classname="weka.filters.unsupervised.attribute.Standardize")
    multi = MultiFilter()
    multi.filters = [remove, std]
    multi.inputformat(data)
    filtered_multi = multi.filter(data)

    # output datasets
    helper.print_title("Input")
    print(data)
    helper.print_title("Output")
    print(filtered)
    helper.print_title("Output (MultiFilter)")
    print(filtered_multi)

    # load text dataset
    text = helper.get_data_dir() + os.sep + "reutersTop10Randomized_1perc_shortened.arff"
    helper.print_info("Loading dataset: " + text)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(text)
    data.class_is_last()

    # apply StringToWordVector
    stemmer = Stemmer(classname="weka.core.stemmers.IteratedLovinsStemmer")
    stopwords = Stopwords(classname="weka.core.stopwords.Rainbow")
    tokenizer = Tokenizer(classname="weka.core.tokenizers.WordTokenizer")
    s2wv = StringToWordVector(options=["-W", "10", "-L", "-C"])
    s2wv.stemmer = stemmer
    s2wv.stopwords = stopwords
    s2wv.tokenizer = tokenizer
    s2wv.inputformat(data)
    filtered = s2wv.filter(data)

    helper.print_title("Input (StringToWordVector)")
    print(data)
    helper.print_title("Output (StringToWordVector)")
    print(filtered)

    # partial classname
    helper.print_title("Creating filter from partial classname")
    clsname = ".Standardize"
    f = Filter(classname=clsname)
    print(clsname + " --> " + f.classname)

    # source code
    helper.print_info("Generate source code")
    bolts = helper.get_data_dir() + os.sep + "labor.arff"
    helper.print_info("Loading dataset: " + bolts)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(bolts)
    replace = Filter(classname="weka.filters.unsupervised.attribute.ReplaceMissingValues")
    replace.inputformat(data)
    replace.filter(data)
    print(replace.to_source("MyReplaceMissingValues", data))
Exemplo n.º 55
0
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    helper.print_title("build and evaluate classifier")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="build and evaluate classifier")

    start = Start()
    flow.actors.append(start)

    build_save = Trigger()
    build_save.name = "build and store classifier"
    flow.actors.append(build_save)

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    build_save.actors.append(filesupplier)

    loaddataset = LoadDataset()
    build_save.actors.append(loaddataset)

    select = ClassSelector()
    select.config["index"] = "last"
    build_save.actors.append(select)

    ssv = SetStorageValue()
    ssv.config["storage_name"] = "data"
    build_save.actors.append(ssv)

    train = Train()
    train.config["setup"] = Classifier(classname="weka.classifiers.trees.J48")
    build_save.actors.append(train)

    pick = ContainerValuePicker()
    pick.config["value"] = "Model"
    build_save.actors.append(pick)

    ssv = SetStorageValue()
    ssv.config["storage_name"] = "model"
    pick.actors.append(ssv)

    evaluate = Trigger()
    evaluate.name = "evaluate classifier"
    flow.actors.append(evaluate)

    gsv = GetStorageValue()
    gsv.config["storage_name"] = "data"
    evaluate.actors.append(gsv)

    evl = Evaluate()
    evl.config["storage_name"] = "model"
    evaluate.actors.append(evl)

    summary = EvaluationSummary()
    summary.config["matrix"] = True
    evaluate.actors.append(summary)

    console = Console()
    evaluate.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    helper.print_title("Attribute selection")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="attribute selection")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = False
    flow.actors.append(loaddataset)

    attsel = AttributeSelection()
    attsel.config["search"] = ASSearch(classname="weka.attributeSelection.BestFirst")
    attsel.config["eval"] = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
    flow.actors.append(attsel)

    results = Tee()
    results.name = "output results"
    flow.actors.append(results)

    picker = ContainerValuePicker()
    picker.config["value"] = "Results"
    picker.config["switch"] = True
    results.actors.append(picker)

    console = Console()
    console.config["prefix"] = "Attribute selection results:"
    results.actors.append(console)

    reduced = Tee()
    reduced.name = "reduced dataset"
    flow.actors.append(reduced)

    picker = ContainerValuePicker()
    picker.config["value"] = "Reduced"
    picker.config["switch"] = True
    reduced.actors.append(picker)

    console = Console()
    console.config["prefix"] = "Reduced dataset:\n\n"
    reduced.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()