def __init__(self, instance, selected_features, classifiers): self.instance = instance # -> weka.core.dataset.Instance self.selected_features = selected_features # -> set() self.classifiers = [ ] # -> MyClassifier (classificador desenvolvido para o algortimo) self.rate_distrib = None for cls in classifiers: temp = MyClassifier(Classifier.make_copy(cls.classifier), cls.features, cls.weight) self.classifiers.append(temp)
def plot_learning_curve(classifiers, train, test=None, increments=100, metric="percent_correct", title="Learning curve", label_template="[#] @ $", key_loc="lower right", outfile=None, wait=True): """ Plots :param classifiers: list of Classifier template objects :type classifiers: list of Classifier :param train: dataset to use for the building the classifier, used for evaluating it test set None :type train: Instances :param test: optional dataset to use for the testing the built classifiers :type test: Instances :param increments: the increments (>= 1: # of instances, <1: percentage of dataset) :type increments: float :param metric: the name of the numeric metric to plot (Evaluation.<metric>) :type metric: str :param title: the title for the plot :type title: str :param label_template: the template for the label in the plot (#: 1-based index, @: full classname, !: simple classname, $: options) :type label_template: str :param key_loc: the location string for the key :type key_loc: str :param outfile: the output file, ignored if None :type outfile: str :param wait: whether to wait for the user to close the plot :type wait: bool """ if not plot.matplotlib_available: logger.error("Matplotlib is not installed, plotting unavailable!") return if not train.has_class(): logger.error("Training set has no class attribute set!") return if (test is not None) and (train.equal_headers(test) is not None): logger.error("Training and test set are not compatible: " + train.equal_headers(test)) return if increments >= 1: inc = increments else: inc = round(train.num_instances * increments) steps = [] cls = [] evls = {} for classifier in classifiers: cl = Classifier.make_copy(classifier) cls.append(cl) evls[cl] = [] if test is None: tst = train else: tst = test for i in xrange(train.num_instances): if (i > 0) and (i % inc == 0): steps.append(i+1) for cl in cls: # train if cl.is_updateable: if i == 0: tr = Instances.copy_instances(train, 0, 1) cl.build_classifier(tr) else: cl.update_classifier(train.get_instance(i)) else: if (i > 0) and (i % inc == 0): tr = Instances.copy_instances(train, 0, i + 1) cl.build_classifier(tr) # evaluate if (i > 0) and (i % inc == 0): evl = Evaluation(tst) evl.test_model(cl, tst) evls[cl].append(getattr(evl, metric)) fig, ax = plt.subplots() ax.set_xlabel("# of instances") ax.set_ylabel(metric) ax.set_title(title) fig.canvas.set_window_title(title) ax.grid(True) i = 0 for cl in cls: evl = evls[cl] i += 1 plot_label = label_template.\ replace("#", str(i)).\ replace("@", cl.classname).\ replace("!", cl.classname[cl.classname.rfind(".") + 1:]).\ replace("$", join_options(cl.config)) ax.plot(steps, evl, label=plot_label) plt.draw() plt.legend(loc=key_loc, shadow=True) if outfile is not None: plt.savefig(outfile) if wait: plt.show()
def plot_learning_curve(classifiers, train, test=None, increments=100, metric="percent_correct", title="Learning curve", label_template="[#] @ $", key_loc="lower right", outfile=None, wait=True): """ Plots a learning curve. :param classifiers: list of Classifier template objects :type classifiers: list of Classifier :param train: dataset to use for the building the classifier, used for evaluating it test set None :type train: Instances :param test: optional dataset (or list of datasets) to use for the testing the built classifiers :type test: list or Instances :param increments: the increments (>= 1: # of instances, <1: percentage of dataset) :type increments: float :param metric: the name of the numeric metric to plot (Evaluation.<metric>) :type metric: str :param title: the title for the plot :type title: str :param label_template: the template for the label in the plot (#: 1-based index of classifier, @: full classname, !: simple classname, $: options, *: 1-based index of test set) :type label_template: str :param key_loc: the location string for the key :type key_loc: str :param outfile: the output file, ignored if None :type outfile: str :param wait: whether to wait for the user to close the plot :type wait: bool """ if not plot.matplotlib_available: logger.error("Matplotlib is not installed, plotting unavailable!") return if not train.has_class(): logger.error("Training set has no class attribute set!") return if increments >= 1: inc = increments else: inc = round(train.num_instances * increments) if test is None: tst = [train] elif isinstance(test, list): tst = test elif isinstance(test, Instances): tst = [test] else: logger.error("Expected list or Instances object, instead: " + type(test)) return for t in tst: if train.equal_headers(t) is not None: logger.error("Training and test set are not compatible: " + train.equal_headers(t)) return steps = [] cls = [] evls = {} for classifier in classifiers: cl = Classifier.make_copy(classifier) cls.append(cl) evls[cl] = {} for t in tst: evls[cl][t] = [] for i in xrange(train.num_instances): if (i > 0) and (i % inc == 0): steps.append(i + 1) for cl in cls: # train if cl.is_updateable: if i == 0: tr = Instances.copy_instances(train, 0, 1) cl.build_classifier(tr) else: cl.update_classifier(train.get_instance(i)) else: if (i > 0) and (i % inc == 0): tr = Instances.copy_instances(train, 0, i + 1) cl.build_classifier(tr) # evaluate if (i > 0) and (i % inc == 0): for t in tst: evl = Evaluation(t) evl.test_model(cl, t) evls[cl][t].append(getattr(evl, metric)) fig, ax = plt.subplots() ax.set_xlabel("# of instances") ax.set_ylabel(metric) ax.set_title(title) fig.canvas.set_window_title(title) ax.grid(True) i = 0 for cl in cls: evlpertest = evls[cl] i += 1 n = 0 for t in tst: evl = evlpertest[t] n += 1 plot_label = label_template.\ replace("#", str(i)).\ replace("*", str(n)).\ replace("@", cl.classname).\ replace("!", cl.classname[cl.classname.rfind(".") + 1:]).\ replace("$", join_options(cl.config)) ax.plot(steps, evl, label=plot_label) plt.draw() plt.legend(loc=key_loc, shadow=True) if outfile is not None: plt.savefig(outfile) if wait: plt.show()
def perceptron_classifier(cls, features, settings): # carrega o dataset loader = Loader("weka.core.converters.ArffLoader") instancias = loader.load_file( "./src/results/caracteristicas_sounds.arff") # sinaliza que o ultimo atributo é a classe instancias.class_is_last() # Define os Parametros learning_rate = str(settings['learningRate']) training_time = str(settings['trainingTime']) momentum = "0.2" hidden_layers = "a" seed = 2 cross_validation = 20 print('Learning Rate', learning_rate) print('Training Time', training_time) # Carrega o classificafor Multilayer Perceptron de acordo com os parametros definidos classifier = Classifier( classname="weka.classifiers.functions.MultilayerPerceptron", options=[ "-L", learning_rate, "-M", momentum, "-N", training_time, "-V", "0", "-S", str(seed), "-E", "20", "-H", hidden_layers ]) # Constroi o Classificador e Valida o dataset classifier.build_classifier(instancias) evaluation = Evaluation(instancias) # Aplica o Cross Validation rnd = Random(seed) rand_data = Instances.copy_instances(instancias) rand_data.randomize(rnd) if rand_data.class_attribute.is_nominal: rand_data.stratify(cross_validation) for i in range(cross_validation): # treina as instancias train = instancias.train_cv(cross_validation, i) # testa as instancias test = instancias.test_cv(cross_validation, i) # Constroi e Valida o Classificador cls = Classifier.make_copy(classifier) cls.build_classifier(train) evaluation.test_model(cls, test) # Cria uma nova instância com base nas caracteristicas extraidas new_instance = Instance.create_instance(features) # Adiciona a nova instância ao dataset instancias.add_instance(new_instance) # Liga a nova instancia ao dataset treinado com o classificador new_instance.dataset = train # Classifica a nova instância trazendo as probabilidades de ela pertencer as classes definidas classification = classifier.distribution_for_instance(new_instance) result = { 'cat': round(classification[0] * 100, 2), 'dog': round(classification[1] * 100, 2) } print("=== Setup ===") print("Classifier: " + classifier.to_commandline()) print("Dataset: " + instancias.relationname) print("Cross Validation: " + str(cross_validation) + "folds") print("Seed: " + str(seed)) print("") print( evaluation.summary("=== " + str(cross_validation) + " -fold Cross-Validation ===")) print("Classificação", " - Gato: ", result['cat'], " Cachorro: ", result['dog']) return result
def classification(data, train, test, num_clases): baseClassifiers_list = [ "weka.classifiers.bayes.NaiveBayes", "weka.classifiers.functions.MultilayerPerceptron", "weka.classifiers.functions.SMO", "weka.classifiers.lazy.IBk", "weka.classifiers.lazy.KStar", "weka.classifiers.meta.AdaBoostM1", "weka.classifiers.meta.Bagging", "weka.classifiers.meta.LogitBoost", "weka.classifiers.trees.J48", "weka.classifiers.trees.DecisionStump", "weka.classifiers.trees.LMT", "weka.classifiers.trees.RandomForest", "weka.classifiers.trees.REPTree", "weka.classifiers.rules.PART", "weka.classifiers.rules.JRip", "weka.classifiers.functions.Logistic", "weka.classifiers.meta.ClassificationViaRegression", "weka.classifiers.bayes.BayesNet" ] results_train = pd.DataFrame() results_test = pd.DataFrame() cost_matrix_list = [ "[]", "[0]", "[0.0 1.0; 1.0 0.0]", "[0.0 1.0 2.0; 1.0 0.0 1.0; 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0; 1.0 0.0 1.0 2.0; 2.0 1.0 0.0 1.0; 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0; 1.0 0.0 1.0 2.0 3.0; 2.0 1.0 0.0 1.0 2.0; 3.0 2.0 1.0 0.0 1.0; 4.0 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0 5.0; 1.0 0.0 1.0 2.0 3.0 4.0; 2.0 1.0 0.0 1.0 2.0 3.0; 3.0 2.0 1.0 0.0 1.0 2.0; 4.0 3.0 2.0 1.0 0.0 1.0; 5.0 4.0 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0 5.0 6.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0]", "[0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 9.0 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0]" ] real_train = [] # the real label of the dataset for i in range(train.num_instances): real_train.append( train.get_instance(i).values[(train.num_attributes - 1)]) results_train['real'] = real_train real_test = [] # the real label of the dataset for i in range(test.num_instances): real_test.append( test.get_instance(i).values[(test.num_attributes - 1)]) results_test['real'] = real_test num = 0 for clas in baseClassifiers_list: column = "p" + np.str(num) #classifier classifier = SingleClassifierEnhancer( classname="weka.classifiers.meta.CostSensitiveClassifier", options=[ "-cost-matrix", cost_matrix_list[num_clases], "-M", "-S", "1" ]) base = Classifier(classname=clas) classifier.classifier = base predicted_data_train = None predicted_data_test = None evaluation = Evaluation(data) classifier.build_classifier(train) #evaluation.test_model(classifier, train) # add predictions addcls = Filter( classname="weka.filters.supervised.attribute.AddClassification", options=["-classification"]) addcls.set_property("classifier", Classifier.make_copy(classifier)) addcls.inputformat(train) #addcls.filter(train) # trains the classifier pred_train = addcls.filter(train) pred_test = addcls.filter(test) if predicted_data_train is None: predicted_data_train = Instances.template_instances(pred_train, 0) for n in range(pred_train.num_instances): predicted_data_train.add_instance(pred_train.get_instance(n)) if predicted_data_test is None: predicted_data_test = Instances.template_instances(pred_test, 0) for n in range(pred_test.num_instances): predicted_data_test.add_instance(pred_test.get_instance(n)) preds_train = [ ] #labels predicted for the classifer trained in the iteration preds_test = [] for i in range(predicted_data_train.num_instances): preds_train.append( predicted_data_train.get_instance(i).values[( predicted_data_train.num_attributes - 1)]) for i in range(predicted_data_test.num_instances): preds_test.append( predicted_data_test.get_instance(i).values[( predicted_data_test.num_attributes - 1)]) results_train[column] = preds_train results_test[column] = preds_test num = num + 1 return results_train, results_test
def main(): """ Just runs some example code. """ # load a dataset data_file = helper.get_data_dir() + os.sep + "vote.arff" helper.print_info("Loading dataset: " + data_file) loader = Loader("weka.core.converters.ArffLoader") data = loader.load_file(data_file) data.class_is_last() # classifier classifier = Classifier(classname="weka.classifiers.trees.J48") # randomize data folds = 10 seed = 1 rnd = Random(seed) rand_data = Instances.copy_instances(data) rand_data.randomize(rnd) if rand_data.class_attribute.is_nominal: rand_data.stratify(folds) # perform cross-validation and add predictions predicted_data = None evaluation = Evaluation(rand_data) for i in xrange(folds): train = rand_data.train_cv(folds, i) # the above code is used by the StratifiedRemoveFolds filter, # the following code is used by the Explorer/Experimenter # train = rand_data.train_cv(folds, i, rnd) test = rand_data.test_cv(folds, i) # build and evaluate classifier cls = Classifier.make_copy(classifier) cls.build_classifier(train) evaluation.test_model(cls, test) # add predictions addcls = Filter( classname="weka.filters.supervised.attribute.AddClassification", options=["-classification", "-distribution", "-error"]) # setting the java object directory avoids issues with correct quoting in option array addcls.set_property("classifier", Classifier.make_copy(classifier)) addcls.inputformat(train) addcls.filter(train) # trains the classifier pred = addcls.filter(test) if predicted_data is None: predicted_data = Instances.template_instances(pred, 0) for n in xrange(pred.num_instances): predicted_data.add_instance(pred.get_instance(n)) print("") print("=== Setup ===") print("Classifier: " + classifier.to_commandline()) print("Dataset: " + data.relationname) print("Folds: " + str(folds)) print("Seed: " + str(seed)) print("") print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ===")) print("") print(predicted_data)
rand_data.randomize(rnd) if rand_data.class_attribute.is_nominal: rand_data.stratify(folds) # perform cross-validation and add predictions predicted_data = None evaluation = Evaluation(rand_data) for i in xrange(folds): train = rand_data.train_cv(folds, i) # the above code is used by the StratifiedRemoveFolds filter, # the following code is used by the Explorer/Experimenter # train = rand_data.train_cv(folds, i, rnd) test = rand_data.test_cv(folds, i) # build and evaluate classifier cls = Classifier.make_copy(classifier) cls.build_classifier(train) evaluation.test_model(cls, test) # add predictions addcls = Filter( classname="weka.filters.supervised.attribute.AddClassification", options=["-classification", "-distribution", "-error"]) # setting the java object directory avoids issues with correct quoting in option array addcls.set_property("classifier", Classifier.make_copy(classifier)) addcls.inputformat(train) addcls.filter(train) # trains the classifier pred = addcls.filter(test) if predicted_data is None: predicted_data = Instances.template_instances(pred, 0) for n in xrange(pred.num_instances):