Exemplo n.º 1
0
    def test_batch_filtering(self):
        """
        Tests the Filter.filter method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)

        flter = filters.Filter(
            classname="weka.filters.unsupervised.attribute.Remove",
            options=["-R", "1,3"])
        flter.inputformat(data)
        filtered = flter.filter(data)
        self.assertEqual(data.num_attributes - 2,
                         filtered.num_attributes,
                         msg="Number of attributes differ")
        self.assertEqual(data.num_instances,
                         filtered.num_instances,
                         msg="Number of instances differ")

        # multple files
        data = loader.load_file(
            self.datafile("reutersTop10Randomized_1perc_shortened-train.arff"))
        self.assertIsNotNone(data)
        data2 = loader.load_file(
            self.datafile("reutersTop10Randomized_1perc_shortened-test.arff"))
        self.assertIsNotNone(data2)

        flter = filters.Filter(
            classname="weka.filters.unsupervised.attribute.StringToWordVector")
        flter.inputformat(data)
        filtered = flter.filter([data, data2])
        self.assertIsNone(filtered[0].equal_headers(filtered[1]),
                          msg="Headers should be compatible")
Exemplo n.º 2
0
    def test_incremental_filtering(self):
        """
        Tests the Filter.input/output methods.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)

        flter = filters.Filter(
            classname="weka.filters.unsupervised.attribute.Remove",
            options=["-R", "1,3"])
        flter.inputformat(data)
        filtered = flter.outputformat()
        self.assertIsNotNone(filtered)
        self.assertTrue(isinstance(filtered, dataset.Instances),
                        msg="Should be Instances object")

        for inst in data:
            flter.input(inst)
            finst = flter.output()
            filtered.add_instance(finst)

        self.assertEqual(data.num_attributes - 2,
                         filtered.num_attributes,
                         msg="Number of attributes differ")
        self.assertEqual(data.num_instances,
                         filtered.num_instances,
                         msg="Number of instances differ")
    def test_instantiate_clusterer(self):
        """
        Tests the instantion of Clusterer objects.
        """
        cname = "weka.clusterers.SimpleKMeans"
        cls = clusterers.Clusterer(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!")
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")

        cls = clusterers.FilteredClusterer()
        self.assertIsNotNone(cls,
                             msg="Failed to instantiate filtered clusterer!")
        self.assertEqual("weka.clusterers.FilteredClusterer",
                         cls.classname,
                         msg="Classnames differ!")
        cname = "weka.filters.unsupervised.attribute.Remove"
        flter = filters.Filter(classname=cname)
        self.assertEqual(cname,
                         flter.classname,
                         msg="Filter classnames differ!")
        cls.filter = flter
        self.assertEqual(cname,
                         cls.filter.classname,
                         msg="Filter classnames differ!")
        cname = "weka.clusterers.EM"
        cls.clusterer = clusterers.Clusterer(classname=cname)
        self.assertEqual(cname,
                         cls.clusterer.classname,
                         msg="Base clusterer classnames differ!")
def main():
    """
    Just runs some example code.
    """
    """
    Plots a dataset.
    """

    # setup the flow
    helper.print_title("Plot dataset")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="plot dataset")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    flow.actors.append(loaddataset)

    branch = Branch()
    flow.actors.append(branch)

    seq = Sequence(name="matrix plot")
    branch.actors.append(seq)

    mplot = MatrixPlot()
    mplot.config["percent"] = 50.0
    mplot.config["wait"] = False
    seq.actors.append(mplot)

    seq = Sequence(name="line plot")
    branch.actors.append(seq)

    copy = Copy()
    seq.actors.append(copy)

    flter = Filter()
    flter.config["setup"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    flter.config["keep_relationname"] = True
    seq.actors.append(flter)

    lplot = LinePlot()
    lplot.config["percent"] = 50.0
    lplot.config["wait"] = True
    seq.actors.append(lplot)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
    def test_generate_thresholdcurve_data(self):
        """
        Tests the generate_thresholdcurve_data method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(
            classname="weka.filters.unsupervised.attribute.Remove",
            options=["-R", "1-3"])
        cls = classifiers.Classifier(
            classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        data = plot.generate_thresholdcurve_data(evl, 0)
        self.assertEqual(13,
                         data.num_attributes,
                         msg="number of attributes differs")
        self.assertEqual(769, data.num_instances, msg="number of rows differs")
        attname = "True Positives"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
        attname = "False Positive Rate"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
        attname = "Lift"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
Exemplo n.º 6
0
 def test_instantiate_classifier(self):
     """
     Tests the instantiation of several classifier classes.
     """
     cname = "weka.filters.unsupervised.attribute.Remove"
     options = None
     flter = filters.Filter(classname=cname, options=["-R", "1,3"])
     self.assertIsNotNone(flter, msg="Failed to instantiate: " + cname + "/" + str(options))
     self.assertEqual(cname, flter.classname, msg="Classnames differ!")
Exemplo n.º 7
0
    def test_capabilities(self):
        """
        Tests the capabilities.
        """
        cname = "weka.classifiers.trees.J48"
        options = None
        flter = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1,3"])
        self.assertIsNotNone(flter, msg="Failed to instantiate: " + cname + "/" + str(options))

        caps = flter.capabilities
        self.assertIsNotNone(caps, msg="Capabilities are None!")
Exemplo n.º 8
0
    def test_make_copy(self):
        """
        Tests the make_copy class method.
        """
        cname = "weka.filters.unsupervised.attribute.Remove"
        options = None
        flter = filters.Filter(classname=cname, options=["-R", "1,3"])
        self.assertIsNotNone(flter, msg="Failed to instantiate: " + cname + "/" + str(options))
        self.assertEqual(cname, flter.classname, msg="Classnames differ!")

        flter2 = filters.Filter.make_copy(flter)
        self.assertIsNotNone(flter2, msg="Failed to instantiate: " + cname + "/" + str(options))
        self.assertEqual(cname, flter2.classname, msg="Classnames differ!")
Exemplo n.º 9
0
def main():
    """
    Just runs some example code.
    """
    """
    Loads/filters a dataset incrementally and saves it to a new file.
    """

    # setup the flow
    helper.print_title("Load/filter/save dataset (incrementally)")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="Load/filter/save dataset (incrementally)")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    flow.actors.append(loaddataset)

    flter = Filter()
    flter.config["setup"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove",
        options=["-R", "last"])
    flow.actors.append(flter)

    rename = RenameRelation()
    rename.config["name"] = "iris-reduced"
    flow.actors.append(rename)

    dumper = InstanceDumper()
    dumper.config["output"] = tempfile.gettempdir() + os.sep + "out.arff"
    flow.actors.append(dumper)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
Exemplo n.º 10
0
    def test_plot_prc(self):
        """
        Tests the plot_prc method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"])
        cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        plot.plot_prc(evl, class_index=[0, 1], wait=False)
Exemplo n.º 11
0
    def test_instantiate_classifier(self):
        """
        Tests the instantiation of several classifier classes.
        """
        cname = "weka.classifiers.trees.J48"
        options = None
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")

        cname = "weka.classifiers.trees.J48"
        options = ["-C", "0.3"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")

        cname = "weka.classifiers.meta.FilteredClassifier"
        options = ["-W", "weka.classifiers.trees.J48", "--", "-C", "0.3"]
        cls = classifiers.SingleClassifierEnhancer(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")
        fname = "weka.filters.unsupervised.attribute.Remove"
        flter = filters.Filter(classname=fname, options=["-R", "last"])
        cls.filter = flter
        self.assertEqual(fname, cls.filter.classname, msg="Classnames differ!")

        cls = classifiers.FilteredClassifier()
        self.assertIsNotNone(cls, msg="Failed to instantiate FilteredClassifier!")
        self.assertEqual("weka.classifiers.meta.FilteredClassifier", cls.classname, msg="Classnames differ!")

        cname = "weka.classifiers.functions.SMO"
        cls = classifiers.KernelClassifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate KernelClassifier: " + cname)
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")
        kname = "weka.classifiers.functions.supportVector.RBFKernel"
        kernel = classifiers.Kernel(classname=kname)
        self.assertIsNotNone(kernel, msg="Failed to instantiate Kernel: " + kname)
        cls.kernel = kernel
        self.assertEqual(kname, cls.kernel.classname, msg="Kernel classnames differ!")

        cname = "weka.classifiers.meta.Vote"
        cls = classifiers.MultipleClassifiersCombiner(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate MultipleClassifiersCombiner: " + cname)
        self.assertEqual(cname, cls.classname, msg="Classnames differ!")
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    helper.print_title("Cross-validate clusterer")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="cross-validate clusterer")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    flow.actors.append(loaddataset)

    flter = Filter()
    flter.name = "Remove class"
    flter.config["filter"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove",
        options=["-R", "last"])
    flow.actors.append(flter)

    cv = CrossValidate()
    cv.config["setup"] = Clusterer(classname="weka.clusterers.EM")
    flow.actors.append(cv)

    console = Console()
    console.config["prefix"] = "Loglikelihood: "
    flow.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
Exemplo n.º 13
0
def incremental():
    """
    Just runs some example code.
    """
    """
    Loads/filters a dataset incrementally.
    """

    # setup the flow
    helper.print_title("Filter datasets (incrementally)")
    iris = helper.get_data_dir() + os.sep + "iris.arff"
    anneal = helper.get_data_dir() + os.sep + "anneal.arff"

    flow = Flow(name="filter datasets (incrementally)")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris, anneal]
    flow.actors.append(filesupplier)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    flow.actors.append(loaddataset)

    flter = Filter()
    flter.config["setup"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove",
        options=["-R", "1"])
    flter.config["keep_relationname"] = True
    flow.actors.append(flter)

    console = Console()
    flow.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()
Exemplo n.º 14
0
    def test_get_prc(self):
        """
        Tests the get_prc method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"])
        cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        data = plot.generate_thresholdcurve_data(evl, 0)
        area = plot.get_prc(data)
        self.assertAlmostEqual(0.892, area, places=3, msg="PRC differs")
Exemplo n.º 15
0
def main():
    """
    Just runs some example code.
    """

    # setup the flow
    count = 50
    helper.print_title("build clusterer incrementally")
    iris = helper.get_data_dir() + os.sep + "iris.arff"

    flow = Flow(name="build clusterer incrementally")

    filesupplier = FileSupplier()
    filesupplier.config["files"] = [iris]
    flow.actors.append(filesupplier)

    initcounter = InitStorageValue()
    initcounter.config["storage_name"] = "counter"
    initcounter.config["value"] = 0
    flow.actors.append(initcounter)

    loaddataset = LoadDataset()
    loaddataset.config["incremental"] = True
    flow.actors.append(loaddataset)

    remove = Filter(name="remove class attribute")
    remove.config["setup"] = filters.Filter(
        classname="weka.filters.unsupervised.attribute.Remove",
        options=["-R", "last"])
    flow.actors.append(remove)

    inccounter = UpdateStorageValue()
    inccounter.config["storage_name"] = "counter"
    inccounter.config["expression"] = "{X} + 1"
    flow.actors.append(inccounter)

    train = Train()
    train.config["setup"] = Clusterer(classname="weka.clusterers.Cobweb")
    flow.actors.append(train)

    pick = ContainerValuePicker()
    pick.config["value"] = "Model"
    pick.config["switch"] = True
    flow.actors.append(pick)

    tee = Tee(name="output model every " + str(count) + " instances")
    tee.config["condition"] = "@{counter} % " + str(count) + " == 0"
    flow.actors.append(tee)

    trigger = Trigger(name="output # of instances")
    tee.actors.append(trigger)

    getcounter = GetStorageValue()
    getcounter.config["storage_name"] = "counter"
    trigger.actors.append(getcounter)

    console = Console()
    console.config["prefix"] = "# of instances: "
    trigger.actors.append(console)

    console = Console(name="output model")
    tee.actors.append(console)

    # run the flow
    msg = flow.setup()
    if msg is None:
        print("\n" + flow.tree + "\n")
        msg = flow.execute()
        if msg is not None:
            print("Error executing flow:\n" + msg)
    else:
        print("Error setting up flow:\n" + msg)
    flow.wrapup()
    flow.cleanup()