def __init__(self, instance, selected_features, classifiers):
     self.instance = instance  # -> weka.core.dataset.Instance
     self.selected_features = selected_features  # -> set()
     self.classifiers = [
     ]  # -> MyClassifier (classificador desenvolvido para o algortimo)
     self.rate_distrib = None
     for cls in classifiers:
         temp = MyClassifier(Classifier.make_copy(cls.classifier),
                             cls.features, cls.weight)
         self.classifiers.append(temp)
def plot_learning_curve(classifiers, train, test=None, increments=100, metric="percent_correct",
                        title="Learning curve", label_template="[#] @ $", key_loc="lower right",
                        outfile=None, wait=True):
    """
    Plots
    :param classifiers: list of Classifier template objects
    :type classifiers: list of Classifier
    :param train: dataset to use for the building the classifier, used for evaluating it test set None
    :type train: Instances
    :param test: optional dataset to use for the testing the built classifiers
    :type test: Instances
    :param increments: the increments (>= 1: # of instances, <1: percentage of dataset)
    :type increments: float
    :param metric: the name of the numeric metric to plot (Evaluation.<metric>)
    :type metric: str
    :param title: the title for the plot
    :type title: str
    :param label_template: the template for the label in the plot
                           (#: 1-based index, @: full classname, !: simple classname, $: options)
    :type label_template: str
    :param key_loc: the location string for the key
    :type key_loc: str
    :param outfile: the output file, ignored if None
    :type outfile: str
    :param wait: whether to wait for the user to close the plot
    :type wait: bool
    """

    if not plot.matplotlib_available:
        logger.error("Matplotlib is not installed, plotting unavailable!")
        return
    if not train.has_class():
        logger.error("Training set has no class attribute set!")
        return
    if (test is not None) and (train.equal_headers(test) is not None):
        logger.error("Training and test set are not compatible: " + train.equal_headers(test))
        return

    if increments >= 1:
        inc = increments
    else:
        inc = round(train.num_instances * increments)

    steps = []
    cls = []
    evls = {}
    for classifier in classifiers:
        cl = Classifier.make_copy(classifier)
        cls.append(cl)
        evls[cl] = []
    if test is None:
        tst = train
    else:
        tst = test

    for i in xrange(train.num_instances):
        if (i > 0) and (i % inc == 0):
            steps.append(i+1)
        for cl in cls:
            # train
            if cl.is_updateable:
                if i == 0:
                    tr = Instances.copy_instances(train, 0, 1)
                    cl.build_classifier(tr)
                else:
                    cl.update_classifier(train.get_instance(i))
            else:
                if (i > 0) and (i % inc == 0):
                    tr = Instances.copy_instances(train, 0, i + 1)
                    cl.build_classifier(tr)
            # evaluate
            if (i > 0) and (i % inc == 0):
                evl = Evaluation(tst)
                evl.test_model(cl, tst)
                evls[cl].append(getattr(evl, metric))

    fig, ax = plt.subplots()
    ax.set_xlabel("# of instances")
    ax.set_ylabel(metric)
    ax.set_title(title)
    fig.canvas.set_window_title(title)
    ax.grid(True)
    i = 0
    for cl in cls:
        evl = evls[cl]
        i += 1
        plot_label = label_template.\
            replace("#", str(i)).\
            replace("@", cl.classname).\
            replace("!", cl.classname[cl.classname.rfind(".") + 1:]).\
            replace("$", join_options(cl.config))
        ax.plot(steps, evl, label=plot_label)
    plt.draw()
    plt.legend(loc=key_loc, shadow=True)
    if outfile is not None:
        plt.savefig(outfile)
    if wait:
        plt.show()
Exemplo n.º 3
0
def plot_learning_curve(classifiers,
                        train,
                        test=None,
                        increments=100,
                        metric="percent_correct",
                        title="Learning curve",
                        label_template="[#] @ $",
                        key_loc="lower right",
                        outfile=None,
                        wait=True):
    """
    Plots a learning curve.

    :param classifiers: list of Classifier template objects
    :type classifiers: list of Classifier
    :param train: dataset to use for the building the classifier, used for evaluating it test set None
    :type train: Instances
    :param test: optional dataset (or list of datasets) to use for the testing the built classifiers
    :type test: list or Instances
    :param increments: the increments (>= 1: # of instances, <1: percentage of dataset)
    :type increments: float
    :param metric: the name of the numeric metric to plot (Evaluation.<metric>)
    :type metric: str
    :param title: the title for the plot
    :type title: str
    :param label_template: the template for the label in the plot
                           (#: 1-based index of classifier, @: full classname, !: simple classname,
                           $: options, *: 1-based index of test set)
    :type label_template: str
    :param key_loc: the location string for the key
    :type key_loc: str
    :param outfile: the output file, ignored if None
    :type outfile: str
    :param wait: whether to wait for the user to close the plot
    :type wait: bool
    """

    if not plot.matplotlib_available:
        logger.error("Matplotlib is not installed, plotting unavailable!")
        return
    if not train.has_class():
        logger.error("Training set has no class attribute set!")
        return

    if increments >= 1:
        inc = increments
    else:
        inc = round(train.num_instances * increments)

    if test is None:
        tst = [train]
    elif isinstance(test, list):
        tst = test
    elif isinstance(test, Instances):
        tst = [test]
    else:
        logger.error("Expected list or Instances object, instead: " +
                     type(test))
        return
    for t in tst:
        if train.equal_headers(t) is not None:
            logger.error("Training and test set are not compatible: " +
                         train.equal_headers(t))
            return

    steps = []
    cls = []
    evls = {}
    for classifier in classifiers:
        cl = Classifier.make_copy(classifier)
        cls.append(cl)
        evls[cl] = {}
        for t in tst:
            evls[cl][t] = []

    for i in xrange(train.num_instances):
        if (i > 0) and (i % inc == 0):
            steps.append(i + 1)
        for cl in cls:
            # train
            if cl.is_updateable:
                if i == 0:
                    tr = Instances.copy_instances(train, 0, 1)
                    cl.build_classifier(tr)
                else:
                    cl.update_classifier(train.get_instance(i))
            else:
                if (i > 0) and (i % inc == 0):
                    tr = Instances.copy_instances(train, 0, i + 1)
                    cl.build_classifier(tr)
            # evaluate
            if (i > 0) and (i % inc == 0):
                for t in tst:
                    evl = Evaluation(t)
                    evl.test_model(cl, t)
                    evls[cl][t].append(getattr(evl, metric))

    fig, ax = plt.subplots()
    ax.set_xlabel("# of instances")
    ax.set_ylabel(metric)
    ax.set_title(title)
    fig.canvas.set_window_title(title)
    ax.grid(True)
    i = 0
    for cl in cls:
        evlpertest = evls[cl]
        i += 1
        n = 0
        for t in tst:
            evl = evlpertest[t]
            n += 1
            plot_label = label_template.\
                replace("#", str(i)).\
                replace("*", str(n)).\
                replace("@", cl.classname).\
                replace("!", cl.classname[cl.classname.rfind(".") + 1:]).\
                replace("$", join_options(cl.config))
            ax.plot(steps, evl, label=plot_label)
    plt.draw()
    plt.legend(loc=key_loc, shadow=True)
    if outfile is not None:
        plt.savefig(outfile)
    if wait:
        plt.show()
    def perceptron_classifier(cls, features, settings):
        # carrega o dataset
        loader = Loader("weka.core.converters.ArffLoader")
        instancias = loader.load_file(
            "./src/results/caracteristicas_sounds.arff")
        # sinaliza que o ultimo atributo é a classe
        instancias.class_is_last()
        # Define os Parametros
        learning_rate = str(settings['learningRate'])
        training_time = str(settings['trainingTime'])
        momentum = "0.2"
        hidden_layers = "a"
        seed = 2
        cross_validation = 20
        print('Learning Rate', learning_rate)
        print('Training Time', training_time)
        # Carrega o classificafor  Multilayer Perceptron de acordo com os parametros definidos
        classifier = Classifier(
            classname="weka.classifiers.functions.MultilayerPerceptron",
            options=[
                "-L", learning_rate, "-M", momentum, "-N", training_time, "-V",
                "0", "-S",
                str(seed), "-E", "20", "-H", hidden_layers
            ])
        # Constroi o Classificador e Valida o dataset
        classifier.build_classifier(instancias)
        evaluation = Evaluation(instancias)
        # Aplica o Cross Validation
        rnd = Random(seed)
        rand_data = Instances.copy_instances(instancias)
        rand_data.randomize(rnd)
        if rand_data.class_attribute.is_nominal:
            rand_data.stratify(cross_validation)
        for i in range(cross_validation):
            # treina as instancias
            train = instancias.train_cv(cross_validation, i)
            # testa as instancias
            test = instancias.test_cv(cross_validation, i)

            # Constroi e Valida o Classificador
            cls = Classifier.make_copy(classifier)
            cls.build_classifier(train)
            evaluation.test_model(cls, test)
        # Cria uma nova instância com base nas caracteristicas extraidas
        new_instance = Instance.create_instance(features)
        # Adiciona a nova instância ao dataset
        instancias.add_instance(new_instance)
        # Liga a nova instancia ao dataset treinado com o classificador
        new_instance.dataset = train
        # Classifica a nova instância trazendo as probabilidades de ela pertencer as classes definidas
        classification = classifier.distribution_for_instance(new_instance)
        result = {
            'cat': round(classification[0] * 100, 2),
            'dog': round(classification[1] * 100, 2)
        }
        print("=== Setup ===")
        print("Classifier: " + classifier.to_commandline())
        print("Dataset: " + instancias.relationname)
        print("Cross Validation: " + str(cross_validation) + "folds")
        print("Seed: " + str(seed))
        print("")
        print(
            evaluation.summary("=== " + str(cross_validation) +
                               " -fold Cross-Validation ==="))
        print("Classificação", " - Gato: ", result['cat'], "  Cachorro: ",
              result['dog'])

        return result
Exemplo n.º 5
0
def classification(data, train, test, num_clases):
    baseClassifiers_list = [
        "weka.classifiers.bayes.NaiveBayes",
        "weka.classifiers.functions.MultilayerPerceptron",
        "weka.classifiers.functions.SMO", "weka.classifiers.lazy.IBk",
        "weka.classifiers.lazy.KStar", "weka.classifiers.meta.AdaBoostM1",
        "weka.classifiers.meta.Bagging", "weka.classifiers.meta.LogitBoost",
        "weka.classifiers.trees.J48", "weka.classifiers.trees.DecisionStump",
        "weka.classifiers.trees.LMT", "weka.classifiers.trees.RandomForest",
        "weka.classifiers.trees.REPTree", "weka.classifiers.rules.PART",
        "weka.classifiers.rules.JRip", "weka.classifiers.functions.Logistic",
        "weka.classifiers.meta.ClassificationViaRegression",
        "weka.classifiers.bayes.BayesNet"
    ]
    results_train = pd.DataFrame()
    results_test = pd.DataFrame()

    cost_matrix_list = [
        "[]", "[0]", "[0.0 1.0; 1.0 0.0]",
        "[0.0 1.0 2.0; 1.0 0.0 1.0; 2.0 1.0 0.0]",
        "[0.0 1.0 2.0 3.0; 1.0 0.0 1.0 2.0; 2.0 1.0 0.0 1.0; 3.0 2.0 1.0 0.0]",
        "[0.0 1.0 2.0 3.0 4.0; 1.0 0.0 1.0 2.0 3.0; 2.0 1.0 0.0 1.0 2.0; 3.0 2.0 1.0 0.0 1.0; 4.0 3.0 2.0 1.0 0.0]",
        "[0.0 1.0 2.0 3.0 4.0 5.0; 1.0 0.0 1.0 2.0 3.0 4.0; 2.0 1.0 0.0 1.0 2.0 3.0; 3.0 2.0 1.0 0.0 1.0 2.0; 4.0 3.0 2.0 1.0 0.0 1.0; 5.0 4.0 3.0 2.0 1.0 0.0]",
        "[0.0 1.0 2.0 3.0 4.0 5.0 6.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0]",
        "[0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0]",
        "[0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0]",
        "[0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0; 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0; 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0; 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0 6.0; 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0 5.0; 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0 4.0; 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0 3.0; 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0 2.0; 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0 1.0; 9.0 8.0 7.0 6.0 5.0 4.0 3.0 2.0 1.0 0.0]"
    ]

    real_train = []  # the real label of the dataset
    for i in range(train.num_instances):
        real_train.append(
            train.get_instance(i).values[(train.num_attributes - 1)])
    results_train['real'] = real_train

    real_test = []  # the real label of the dataset
    for i in range(test.num_instances):
        real_test.append(
            test.get_instance(i).values[(test.num_attributes - 1)])
    results_test['real'] = real_test

    num = 0
    for clas in baseClassifiers_list:
        column = "p" + np.str(num)

        #classifier
        classifier = SingleClassifierEnhancer(
            classname="weka.classifiers.meta.CostSensitiveClassifier",
            options=[
                "-cost-matrix", cost_matrix_list[num_clases], "-M", "-S", "1"
            ])
        base = Classifier(classname=clas)
        classifier.classifier = base

        predicted_data_train = None
        predicted_data_test = None

        evaluation = Evaluation(data)
        classifier.build_classifier(train)
        #evaluation.test_model(classifier, train)

        # add predictions
        addcls = Filter(
            classname="weka.filters.supervised.attribute.AddClassification",
            options=["-classification"])

        addcls.set_property("classifier", Classifier.make_copy(classifier))
        addcls.inputformat(train)
        #addcls.filter(train)  # trains the classifier
        pred_train = addcls.filter(train)

        pred_test = addcls.filter(test)

        if predicted_data_train is None:
            predicted_data_train = Instances.template_instances(pred_train, 0)
        for n in range(pred_train.num_instances):
            predicted_data_train.add_instance(pred_train.get_instance(n))

        if predicted_data_test is None:
            predicted_data_test = Instances.template_instances(pred_test, 0)
        for n in range(pred_test.num_instances):
            predicted_data_test.add_instance(pred_test.get_instance(n))

        preds_train = [
        ]  #labels predicted for the classifer trained in the iteration
        preds_test = []

        for i in range(predicted_data_train.num_instances):
            preds_train.append(
                predicted_data_train.get_instance(i).values[(
                    predicted_data_train.num_attributes - 1)])

        for i in range(predicted_data_test.num_instances):
            preds_test.append(
                predicted_data_test.get_instance(i).values[(
                    predicted_data_test.num_attributes - 1)])

        results_train[column] = preds_train
        results_test[column] = preds_test
        num = num + 1
    return results_train, results_test
Exemplo n.º 6
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    data_file = helper.get_data_dir() + os.sep + "vote.arff"
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # classifier
    classifier = Classifier(classname="weka.classifiers.trees.J48")

    # randomize data
    folds = 10
    seed = 1
    rnd = Random(seed)
    rand_data = Instances.copy_instances(data)
    rand_data.randomize(rnd)
    if rand_data.class_attribute.is_nominal:
        rand_data.stratify(folds)

    # perform cross-validation and add predictions
    predicted_data = None
    evaluation = Evaluation(rand_data)
    for i in xrange(folds):
        train = rand_data.train_cv(folds, i)
        # the above code is used by the StratifiedRemoveFolds filter,
        # the following code is used by the Explorer/Experimenter
        # train = rand_data.train_cv(folds, i, rnd)
        test = rand_data.test_cv(folds, i)

        # build and evaluate classifier
        cls = Classifier.make_copy(classifier)
        cls.build_classifier(train)
        evaluation.test_model(cls, test)

        # add predictions
        addcls = Filter(
            classname="weka.filters.supervised.attribute.AddClassification",
            options=["-classification", "-distribution", "-error"])
        # setting the java object directory avoids issues with correct quoting in option array
        addcls.set_property("classifier", Classifier.make_copy(classifier))
        addcls.inputformat(train)
        addcls.filter(train)  # trains the classifier
        pred = addcls.filter(test)
        if predicted_data is None:
            predicted_data = Instances.template_instances(pred, 0)
        for n in xrange(pred.num_instances):
            predicted_data.add_instance(pred.get_instance(n))

    print("")
    print("=== Setup ===")
    print("Classifier: " + classifier.to_commandline())
    print("Dataset: " + data.relationname)
    print("Folds: " + str(folds))
    print("Seed: " + str(seed))
    print("")
    print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ==="))
    print("")
    print(predicted_data)
Exemplo n.º 7
0
rand_data.randomize(rnd)
if rand_data.class_attribute.is_nominal:
    rand_data.stratify(folds)

# perform cross-validation and add predictions
predicted_data = None
evaluation = Evaluation(rand_data)
for i in xrange(folds):
    train = rand_data.train_cv(folds, i)
    # the above code is used by the StratifiedRemoveFolds filter,
    # the following code is used by the Explorer/Experimenter
    # train = rand_data.train_cv(folds, i, rnd)
    test = rand_data.test_cv(folds, i)

    # build and evaluate classifier
    cls = Classifier.make_copy(classifier)
    cls.build_classifier(train)
    evaluation.test_model(cls, test)

    # add predictions
    addcls = Filter(
        classname="weka.filters.supervised.attribute.AddClassification",
        options=["-classification", "-distribution", "-error"])
    # setting the java object directory avoids issues with correct quoting in option array
    addcls.set_property("classifier", Classifier.make_copy(classifier))
    addcls.inputformat(train)
    addcls.filter(train)  # trains the classifier
    pred = addcls.filter(test)
    if predicted_data is None:
        predicted_data = Instances.template_instances(pred, 0)
    for n in xrange(pred.num_instances):