def test_build_classifier(self): """ Tests the build_classifier method. """ # 1. nominal loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.trees.J48" options = ["-C", "0.3"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data) # 2. numeric loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("bolts.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.functions.LinearRegression" options = ["-R", "0.1"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data)
def test_classify_instance(self): """ Tests the classify_instance method. """ # 1. nominal loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.trees.J48" options = ["-C", "0.3"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data) preds = [] for i in range(10, 20): pred = cls.classify_instance(data.get_instance(i)) self.assertIsNotNone(pred) preds.append(pred) self.assertEqual([2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 5.0, 5.0, 2.0, 2.0], preds, msg="Classifications differ") # 2. numeric loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("bolts.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.functions.LinearRegression" options = ["-R", "0.1"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data) preds = [] for i in range(10): pred = cls.classify_instance(data.get_instance(i)) self.assertIsNotNone(pred) preds.append(pred) expected = [ 24.313, 33.359, 28.569, 26.365, 32.680, 29.149, 26.998, 22.971, 13.160, 7.394 ] for i in range(len(preds)): self.assertAlmostEqual(expected[i], preds[i], places=3, msg="Classifications differ")
def test_distribution_for_instance(self): """ Tests the distribution_for_instance method. """ # 1. nominal loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.trees.J48" options = ["-C", "0.3"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data) for i in range(10): dist = cls.distribution_for_instance(data.get_instance(i)) self.assertIsNotNone(dist) self.assertEqual(6, len(dist), msg="Number of classes in prediction differ!") # 2. numeric loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("bolts.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.functions.LinearRegression" options = ["-R", "0.1"] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_classifier(data) for i in range(10): dist = cls.distribution_for_instance(data.get_instance(i)) self.assertIsNotNone(dist) self.assertEqual( 1, len(dist), msg= "Number of classes in prediction should be one for numeric classifier!" )
def test_generate_thresholdcurve_data(self): """ Tests the generate_thresholdcurve_data method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() remove = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"]) cls = classifiers.Classifier( classname="weka.classifiers.bayes.NaiveBayes") fc = classifiers.FilteredClassifier() fc.filter = remove fc.classifier = cls evl = classifiers.Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) data = plot.generate_thresholdcurve_data(evl, 0) self.assertEqual(13, data.num_attributes, msg="number of attributes differs") self.assertEqual(769, data.num_instances, msg="number of rows differs") attname = "True Positives" self.assertIsNotNone(data.attribute_by_name(attname), msg="Failed to locate attribute: " + attname) attname = "False Positive Rate" self.assertIsNotNone(data.attribute_by_name(attname), msg="Failed to locate attribute: " + attname) attname = "Lift" self.assertIsNotNone(data.attribute_by_name(attname), msg="Failed to locate attribute: " + attname)
def test_stats(self): """ Tests the Stats class. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data, msg="Failed to load data!") stats = data.attribute_stats(3) numstats = stats.numeric_stats self.assertAlmostEqual(898, numstats.count, places=3, msg="count differs") self.assertAlmostEqual(70, numstats.max, places=3, msg="max differs") self.assertAlmostEqual(3.635, numstats.mean, places=3, msg="mean differs") self.assertAlmostEqual(0.0, numstats.min, places=3, msg="min differs") self.assertAlmostEqual(13.717, numstats.stddev, places=3, msg="stddev differs") self.assertAlmostEqual(3264, numstats.sum, places=3, msg="sum differs") self.assertAlmostEqual(180636, numstats.sumsq, places=3, msg="sumsq differs")
def test_build_clusterer(self): """ Tests the build_clusterer class. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.delete_last_attribute() # batch cls = clusterers.Clusterer(classname="weka.clusterers.SimpleKMeans") self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!") cls.build_clusterer(data) self.assertIsNotNone( str(cls), msg="Failed to obtain string representation of model") # incremental cls = clusterers.Clusterer(classname="weka.clusterers.Cobweb") self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!") cls.build_clusterer(dataset.Instances.template_instances(data)) for inst in data: cls.update_clusterer(inst) cls.update_finished() self.assertIsNotNone( str(cls), msg="Failed to obtain string representation of model")
def test_attribute_selection(self): """ Tests attribute selection. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.attributeSelection.BestFirst" options = ["-D", "1", "-N", "5"] search = attribute_selection.ASSearch(classname=cname, options=options) self.assertIsNotNone(search, msg="Search should not be None: " + cname + "/" + str(options)) cname = "weka.attributeSelection.CfsSubsetEval" options = ["-P", "1", "-E", "1"] evaluation = attribute_selection.ASEvaluation(classname=cname, options=options) self.assertIsNotNone(evaluation, msg="Evaluation should not be None: " + cname + "/" + str(options)) attsel = attribute_selection.AttributeSelection() self.assertIsNotNone(search, msg="AttributeSelection should not be None!") attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(data) self.assertEqual(9, attsel.number_attributes_selected, msg="number_attributes_selected differs") self.assertEqual([0, 4, 8, 11, 12, 19, 24, 32, 33, 38], attsel.selected_attributes.tolist(), msg="selected_attributes differ") self.assertGreater(len(attsel.results_string), 0, msg="results_string should get produced")
def test_build_and_use_forecaster(self): """ Tests building and using of a forecaster. """ self._ensure_package_is_installed() loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("airline.arff")) self.assertIsNotNone(data, msg="Data should not be none") data.class_is_last() airline_train, airline_test = data.train_test_split(90.0) forecaster = timeseries.WekaForecaster() self.assertIsNotNone(forecaster) forecaster.fields_to_forecast = ["passenger_numbers"] forecaster.base_forecaster = classifiers.Classifier( classname="weka.classifiers.functions.LinearRegression") forecaster.fields_to_forecast = "passenger_numbers" forecaster.build_forecaster(airline_train) num_prime_instances = 12 airline_prime = dataset.Instances.copy_instances( airline_train, airline_train.num_instances - num_prime_instances, num_prime_instances) forecaster.prime_forecaster(airline_prime) num_future_forecasts = airline_test.num_instances preds = forecaster.forecast(num_future_forecasts) self.assertIsNotNone(preds, msg="Predictions should not be none") self.assertEqual(len(preds), airline_test.num_instances, msg="# of predictions should equal prime window size")
def test_batch_filtering(self): """ Tests the Filter.filter method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) flter = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1,3"]) flter.inputformat(data) filtered = flter.filter(data) self.assertEqual(data.num_attributes - 2, filtered.num_attributes, msg="Number of attributes differ") self.assertEqual(data.num_instances, filtered.num_instances, msg="Number of instances differ") # multple files data = loader.load_file( self.datafile("reutersTop10Randomized_1perc_shortened-train.arff")) self.assertIsNotNone(data) data2 = loader.load_file( self.datafile("reutersTop10Randomized_1perc_shortened-test.arff")) self.assertIsNotNone(data2) flter = filters.Filter( classname="weka.filters.unsupervised.attribute.StringToWordVector") flter.inputformat(data) filtered = flter.filter([data, data2]) self.assertIsNone(filtered[0].equal_headers(filtered[1]), msg="Headers should be compatible")
def test_classes_to_clusters(self): """ Tests the classes_to_clusters method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) #data.class_is_last() train = dataset.Instances.copy_instances(data, 0, data.num_instances) train.delete_last_attribute() cls = clusterers.Clusterer(classname="weka.clusterers.SimpleKMeans") self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!") cls.build_clusterer(train) test = dataset.Instances.copy_instances(data, 0, data.num_instances) test.class_is_last() evl = clusterers.ClusterEvaluation() self.assertIsNotNone(cls, msg="Failed to instantiate evaluation!") evl.set_model(cls) evl.test_model(test) self.assertEqual([2, 4], evl.classes_to_clusters.tolist(), msg="classes to clusters differs")
def test_reduce(self): """ Tests reducing of attributes. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.attributeSelection.BestFirst" options = ["-D", "1", "-N", "5"] search = attribute_selection.ASSearch(classname=cname, options=options) self.assertIsNotNone(search, msg="Search should not be None: " + cname + "/" + str(options)) cname = "weka.attributeSelection.CfsSubsetEval" options = ["-P", "1", "-E", "1"] evaluation = attribute_selection.ASEvaluation(classname=cname, options=options) self.assertIsNotNone(evaluation, msg="Evaluation should not be None: " + cname + "/" + str(options)) attsel = attribute_selection.AttributeSelection() self.assertIsNotNone(search, msg="AttributeSelection should not be None!") attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(data) reduced = attsel.reduce_dimensionality(data) self.assertEqual(attsel.number_attributes_selected + 1, reduced.num_attributes, msg="number of attributes differs") self.assertEqual(data.num_instances, reduced.num_instances, msg="number of rows differs")
def test_ranking(self): """ Tests ranking of attributes. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() search = attribute_selection.ASSearch( classname="weka.attributeSelection.Ranker", options=["-N", "-1"]) self.assertIsNotNone(search, msg="Search should not be None!") evaluation = attribute_selection.ASEvaluation( "weka.attributeSelection.InfoGainAttributeEval") self.assertIsNotNone(evaluation, msg="Evaluation should not be None!") attsel = attribute_selection.AttributeSelection() self.assertIsNotNone(attsel, msg="AttributeSelection should not be None!") attsel.ranking(True) attsel.folds(2) attsel.crossvalidation(True) attsel.seed(42) attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(data) self.assertGreater(len(str(attsel.ranked_attributes)), 0, msg="results_string should get produced") self.assertGreater(len(attsel.results_string), 0, msg="results_string should get produced")
def test_distribution_for_instance(self): """ Tests the distribution_for_instance method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.delete_last_attribute() cls = clusterers.Clusterer(classname="weka.clusterers.SimpleKMeans") self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!") cls.build_clusterer(data) preds = [] for i in range(10): dist = cls.distribution_for_instance(data.get_instance(i)) self.assertIsNotNone( dist, msg="Failed to obtain cluster membership for instance") self.assertEqual(2, len(dist), msg="Number of clusters differs") preds.append(dist) expected = [[0., 1.], [1., 0.], [1., 0.], [0., 1.], [0., 1.], [0., 1.], [1., 0.], [1., 0.], [1., 0.], [1., 0.]] self.assertEqual( len(expected), len(preds), msg="Expected/predicted differ in length - update required!") for i in range(len(expected)): self.assertEqual(expected[i], preds[i].tolist(), msg="Cluster distributions differ")
def load_Arff_file(file): if type(file) is not str: file = str(file) loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(file) return data
def test_clusterevaluation(self): """ Tests the ClusterEvaluation class. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.delete_last_attribute() # simple test set test = dataset.Instances.copy_instances(data, 0, 10) cls = clusterers.Clusterer(classname="weka.clusterers.SimpleKMeans") self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!") cls.build_clusterer(data) evl = clusterers.ClusterEvaluation() self.assertIsNotNone(cls, msg="Failed to instantiate evaluation!") evl.set_model(cls) evl.test_model(test) self.assertGreater(len(evl.cluster_results), 0, msg="No evaluation string!") self.assertEqual(0.0, evl.log_likelihood, msg="log_likelihood differs") self.assertEqual(2, evl.num_clusters, msg="num_clusters differs") self.assertEquals( [1., 0., 0., 1., 1., 1., 0., 0., 0., 0.], evl.cluster_assignments.tolist(), msg="cluster_assignments differs") # cross-validation cls = clusterers.Clusterer(classname="weka.clusterers.EM", options=["-I", "3", "-X", "2", "-max", "5"]) self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!") llh = clusterers.ClusterEvaluation.crossvalidate_model(cls, data, 10, classes.Random(1)) self.assertAlmostEqual(-34.397, llh, places=3, msg="Failed to cross-validate clusterer!")
def test_batchpredictor(self): """ Tests the batch predictor methods. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.classifiers.trees.J48" options = [] cls = classifiers.Classifier(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) # batch predictor? self.assertTrue(cls.is_batchpredictor, msg="not a batch predictor: " + cname + "/" + str(options)) # more efficient implementation? cls.has_efficient_batch_prediction() # batch size self.assertIsNotNone(cls.batch_size, msg="batch size is not initialized") # distributions_for_instances cls.build_classifier(data) dists = cls.distributions_for_instances(data) self.assertIsNotNone(dists, msg="no distributions generated") self.assertEqual(len(dists), len(data), msg="number of predictions differ") self.assertEqual(len(dists[0]), data.class_attribute.num_values, msg="size of distribution array does not match number of classes")
def test_incremental_filtering(self): """ Tests the Filter.input/output methods. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) flter = filters.Filter( classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1,3"]) flter.inputformat(data) filtered = flter.outputformat() self.assertIsNotNone(filtered) self.assertTrue(isinstance(filtered, dataset.Instances), msg="Should be Instances object") for inst in data: flter.input(inst) finst = flter.output() filtered.add_instance(finst) self.assertEqual(data.num_attributes - 2, filtered.num_attributes, msg="Number of attributes differ") self.assertEqual(data.num_instances, filtered.num_instances, msg="Number of instances differ")
def test_matrix_plot(self): """ Tests the matrix_plot method. """ loader = converters.Loader("weka.core.converters.ArffLoader") iris_data = loader.load_file(self.datafile("iris.arff")) iris_data.class_is_last() plot.matrix_plot(iris_data, percent=50, title="Matrix plot iris", wait=False)
def test_instance(self): """ Tests the Instance class. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.ORIG.arff")) self.assertIsNotNone(data, msg="Failed to load data!") inst = data.get_instance(0) self.assertEqual(39, inst.num_attributes, msg="num_attributes differs") self.assertEqual(-1, data.class_index, msg="class_index differs") data.class_index = data.num_attributes - 1 self.assertEqual(38, data.class_index, msg="class_index differs") data.class_is_first() self.assertEqual(0, data.class_index, msg="class_index differs") data.class_is_last() self.assertEqual(38, data.class_index, msg="class_index differs") self.assertIsNotNone(inst.dataset, msg="Dataset reference should not be None!") self.assertTrue(inst.has_missing(), msg="Should have missing values") self.assertTrue(inst.is_missing(0), msg="First value should be missing") self.assertFalse(inst.is_missing(1), msg="Second value should not be missing") self.assertEqual("C", inst.get_string_value(1), msg="string value differs") inst.set_string_value(1, "H") self.assertEqual("H", inst.get_string_value(1), msg="string value differs") self.assertEqual(8, inst.get_value(3), msg="numeric value differs") inst.set_value(3, 6.3) self.assertEqual(6.3, inst.get_value(3), msg="numeric value differs") self.assertEqual(1, inst.weight, msg="weight should be 1") inst.weight = 0.5 self.assertEqual(0.5, inst.weight, msg="weights differ") values = [1.0, 2.0, 3.0] inst = dataset.Instance.create_instance(values, classname="weka.core.DenseInstance") self.assertEqual(3, inst.num_attributes, msg="#attributes differ") self.assertEqual(1.0, inst.get_value(0), msg="value at #" + str(0) + " differs") self.assertEqual(2.0, inst.get_value(1), msg="value at #" + str(1) + " differs") self.assertEqual(3.0, inst.get_value(2), msg="value at #" + str(2) + " differs") values = [0.0, 2.0, 0.0] inst = dataset.Instance.create_instance(values, classname="weka.core.SparseInstance") self.assertEqual(3, inst.num_attributes, msg="#attributes differ") self.assertEqual(0.0, inst.get_value(0), msg="value at #" + str(0) + " differs") self.assertEqual(2.0, inst.get_value(1), msg="value at #" + str(1) + " differs") self.assertEqual(0.0, inst.get_value(2), msg="value at #" + str(2) + " differs") values = [(1, 2.0)] inst = dataset.Instance.create_sparse_instance(values, 3, classname="weka.core.SparseInstance") self.assertEqual(3, inst.num_attributes, msg="#attributes differ") self.assertEqual(0.0, inst.get_value(0), msg="value at #" + str(0) + " differs") self.assertEqual(2.0, inst.get_value(1), msg="value at #" + str(1) + " differs") self.assertEqual(0.0, inst.get_value(2), msg="value at #" + str(2) + " differs")
def test_arff_loader(self): """ Tests the Loader class using an ArffLoader. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) self.assertEqual(898, data.num_instances, msg="Number of instances differs!")
def test_incremental_arff_loader(self): """ Tests the Loader class using an incremental ArffLoader. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff"), incremental=True) self.assertIsNotNone(data) count = 0 for inst in loader: count += 1 self.assertEqual(898, count, msg="Number of instances differs!")
def test_scatter_plot(self): """ Tests the scatter_plot method. """ loader = converters.Loader("weka.core.converters.ArffLoader") iris_data = loader.load_file(self.datafile("iris.arff")) iris_data.class_is_last() plot.scatter_plot(iris_data, iris_data.attribute_by_name("petalwidth").index, iris_data.attribute_by_name("petallength").index, percent=50, wait=False)
def test_learning_curve(self): """ Tests the learning_curve method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() cls = [ classifiers.Classifier(classname="weka.classifiers.trees.J48"), classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")] plot.plot_learning_curve(cls, data, wait=False) plot.plot_learning_curve(cls, data, increments=0.1, wait=False)
def test_plot_classifier_errors(self): """ Tests the plot_classifier_errors method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") bolts_data = loader.load_file(self.datafile("bolts.arff")) self.assertIsNotNone(bolts_data) bolts_data.class_is_last() classifier = classifiers.Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"]) evaluation = classifiers.Evaluation(bolts_data) evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42)) plot.plot_classifier_errors(evaluation.predictions, wait=False)
def test_plot_dot_graph(self): """ Tests the plot_dot_graph method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() cls = classifiers.Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) cls.build_classifier(data) graph.plot_dot_graph(cls.graph)
def test_arff_saver(self): """ Tests the Saver class using an ArffSaver. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data) outfile = self.tempfile("out.arff") self.delfile(outfile) saver = converters.Saver(classname="weka.core.converters.ArffSaver") saver.save_file(data, outfile) self.assertTrue(os.path.exists(outfile), "File does not exist: " + outfile) self.delfile(outfile)
def test_build_associator(self): """ Tests the build_classifier method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("nursery.arff")) self.assertIsNotNone(data) data.class_is_last() cname = "weka.associations.Apriori" options = None cls = associations.Associator(classname=cname, options=options) self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options)) cls.build_associations(data)
def test_plot_prc(self): """ Tests the plot_prc method. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("diabetes.arff")) data.class_is_last() remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"]) cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes") fc = classifiers.FilteredClassifier() fc.filter = remove fc.classifier = cls evl = classifiers.Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) plot.plot_prc(evl, class_index=[0, 1], wait=False)
def test_attributestats(self): """ Tests the AttributeStats class. """ loader = converters.Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("anneal.arff")) self.assertIsNotNone(data, msg="Failed to load data!") stats = data.attribute_stats(2) self.assertIsNotNone(stats, msg="Failed to obtain stats!") self.assertEqual(8, stats.distinct_count, "distinct_count differs") self.assertEqual(898, stats.int_count, "int_count differs") self.assertEqual(0, stats.missing_count, "missing_count differs") self.assertEqual([86, 256, 440, 0, 51, 20, 10, 19, 16], stats.nominal_counts.tolist(), "nominal_counts differs") self.assertEqual([86, 256, 440, 0, 51, 20, 10, 19, 16], stats.nominal_weights.tolist(), "nominal_weights differs") self.assertEqual(898, stats.total_count, "total_count differs") self.assertEqual(0, stats.unique_count, "unique_count differs")
def test_plot_cluster_assignments(self): """ Tests the plot_cluster_assignments method. """ loader = converters.Loader("weka.core.converters.ArffLoader") data = loader.load_file(self.datafile("iris.arff")) data.delete_last_attribute() # build a clusterer and output model clusterer = clusterers.Clusterer( classname="weka.clusterers.SimpleKMeans", options=["-N", "3"]) clusterer.build_clusterer(data) evaluation = clusterers.ClusterEvaluation() evaluation.set_model(clusterer) evaluation.test_model(data) plot.plot_cluster_assignments(evaluation, data, inst_no=True, wait=False)