Exemplo n.º 1
0
def logit_PC(df_train, df_test, attr_label):
    '''
    logistic regression with PC members only
    :param df_train:        training data, pandas data frame
    :param df_test:         testing data, pandas data frame
    :param attr_label:      label attribute, string
    :return:                PC members, logistic regression model and AUC
    '''
    pcs = RF.learnPC_R(df_train, attr_label)
    if pcs:
        # model = LogisticRegression().fit(df_train[pcs], df_train[attr_label])
        # pred = model.predict_proba(df_test[pcs])
        # pred = [x[1] for x in pred]
        # auc = evaluate_auc(df_test[attr_label].values.tolist(), pred)

        df2Instances = DF2Instances(df_train[pcs+[attr_label]], 'train', attr_label)
        data_train = df2Instances.df_to_instances()
        data_train.class_is_last()  # set class attribute
        model = Classifier(classname="weka.classifiers.functions.Logistic")
        model.build_classifier(data_train)

        df2Instances = DF2Instances(df_test[pcs+[attr_label]], 'test', attr_label)
        data_test = df2Instances.df_to_instances()
        data_test.class_is_last()  # set class attribute

        preds = []
        for index, inst in enumerate(data_test):
            preds.append(model.distribution_for_instance(inst)[1])
        auc = evaluate_auc(df_test[attr_label].values.tolist(), preds)

        return pcs, model, auc
    else:
        return pcs, None, None
Exemplo n.º 2
0
    def train_and_predict_instances(self, trainingFile, classifier):

        loader = Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(trainingFile)
        data.class_is_last()
        classes = [str(code) for code in data.class_attribute.values]
        head = [className + " probability" for className in classes]
        head.append("Guess")

        cls = Classifier(classname=classifier)
        cls.build_classifier(data)

        predictions = [[0, 0]] * len(data)
        realLabels = [""] * len(data)
        guess = [0] * len(data)

        for index, inst in enumerate(data):
            pred = cls.classify_instance(inst)
            if inst.get_value(inst.class_index) == pred:
                guess[index] = 1.0
            else:
                guess[index] = 0.0
            dist = cls.distribution_for_instance(inst)
            predictions[index] = [p for p in dist]
            realLabels[index] = classes[int(inst.get_value(inst.class_index))]
            print(
                str(index + 1) + ": label index=" + str(pred) +
                ", class distribution=" + str(dist))

        return [predictions, guess, head, realLabels]
Exemplo n.º 3
0
def TestClassification(arff, modelInput, results):
    # 启动java虚拟机
    jvm.start()
    # 导入分析模型
    objects = serialization.read_all(modelInput)
    clsf = Classifier(jobject=objects[0])
    print(clsf)
    # 导入测试组
    loader = Loader(classname="weka.core.converters.ArffLoader")
    test = loader.load_file(arff)
    test.class_is_first()
    # 分析结果
    resultsFile = open(results, "w")
    resultsFile.write("序号\t原判断\t预测\t良性概率\t恶性概率\n")
    print("序号\t原判断\t预测\t良性概率\t恶性概率")
    for index, inst in enumerate(test):
        pred = clsf.classify_instance(inst)
        dist = clsf.distribution_for_instance(inst)
        sampleID = index + 1
        origin = inst.get_string_value(inst.class_index)
        prediction = inst.class_attribute.value(int(pred))
        sameAsOrigin = "yes" if pred != inst.get_value(
            inst.class_index) else "no"
        NRate = dist.tolist()[0]
        PRate = dist.tolist()[1]
        resultsFile.write(
            "%d\t%s\t%s\t%s\t%s" %
            (sampleID, origin, prediction, str(NRate), str(PRate)) + "\n")
        print("%d\t%s\t%s\t%s\t%s" %
              (sampleID, origin, prediction, str(NRate), str(PRate)))
    resultsFile.close()
    # 退出java虚拟机
    jvm.stop()
    print("检测完成")
def PredecirUnaTemporada(path):
    jvm.start()
    insta = CrearInstanciaParaPredecir(path)
    atributos = ""
    file = open('ModelData/wekaHeader.arff', 'r')
    atributos = file.readlines()
    file.close()

    file = open('ModelData/predictionFiles/inst.arff', 'w')
    file.writelines(atributos)
    file.write("\n" + insta + '\n')
    file.close()

    objects = serialization.read_all("ModelData/77PercentModelPaisajes.model")
    classifier = Classifier(jobject=objects[0])

    loader = Loader()
    data = loader.load_file("ModelData/predictionFiles/inst.arff")
    data.class_is_last()

    clases = ["invierno", "verano", "otono", "primavera"]
    prediccion = ""
    for index, inst in enumerate(data):
        pred = classifier.classify_instance(inst)
        dist = classifier.distribution_for_instance(inst)
        prediccion = clases[int(pred)]
    jvm.stop()
    return prediccion
def main(args):
    """
    Trains a J48 classifier on a training set and outputs the predicted class and class distribution alongside the
    actual class from a test set. Class attribute is assumed to be the last attribute.
    :param args: the commandline arguments (train and test datasets)
    :type args: list
    """

    # load a dataset
    helper.print_info("Loading train: " + args[1])
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(args[1])
    train.class_index = train.num_attributes - 1
    helper.print_info("Loading test: " + args[2])
    test = loader.load_file(args[2])
    test.class_is_last()

    # classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)

    # output predictions
    print("# - actual - predicted - error - distribution")
    for index, inst in enumerate(test):
        pred = cls.classify_instance(inst)
        dist = cls.distribution_for_instance(inst)
        print("%d - %s - %s - %s  - %s" %
              (index + 1, inst.get_string_value(
                  inst.class_index), inst.class_attribute.value(int(pred)),
               "yes" if pred != inst.get_value(inst.class_index) else "no",
               str(dist.tolist())))
def main(args):
    """
    Trains a J48 classifier on a training set and outputs the predicted class and class distribution alongside the
    actual class from a test set. Class attribute is assumed to be the last attribute.
    :param args: the commandline arguments (train and test datasets)
    :type args: list
    """

    # load a dataset
    helper.print_info("Loading train: " + args[1])
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(args[1])
    train.class_index = train.num_attributes - 1
    helper.print_info("Loading test: " + args[2])
    test = loader.load_file(args[2])
    test.class_is_last()

    # classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)

    # output predictions
    print("# - actual - predicted - error - distribution")
    for index, inst in enumerate(test):
        pred = cls.classify_instance(inst)
        dist = cls.distribution_for_instance(inst)
        print(
            "%d - %s - %s - %s  - %s" %
            (index+1,
             inst.get_string_value(inst.class_index),
             inst.class_attribute.value(int(pred)),
             "yes" if pred != inst.get_value(inst.class_index) else "no",
             str(dist.tolist())))
Exemplo n.º 7
0
class python_weka(object):
    def __init__(self, input_x, input_y, labels):
        self.input_x = input_x
        self.input_y = input_y
        self.labels = labels

    def write_arff(self, filename, relation, train_or_predict, input_x, input_y=None):
        f = open(filename, "w")
        f.write("@relation " + relation + "\n")
        for i in self.labels:
            train_or_predict += 1
            if train_or_predict == len(self.labels):
                break
            f.write("@attribute " + i + " " + self.labels[i] + "\n")
        f.write("\n")
        f.write("@data" + "\n")
        for i in range(len(input_x)):
            for j in input_x[i]:
                f.write(str(j) + "  ")
            if train_or_predict == 0:
                f.write(str(input_y[i]))
            else:
                f.write(str(0))
            f.write("\n")
        f.close()

    def train(self):
        filename = "train.arff"
        self.write_arff(filename, "train", 0, self.input_x, self.input_y)
        loader = Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(filename)
        data.class_is_last()
        self.cls = Classifier(classname="weka.classifiers.meta.Bagging", options=["-S", "5"])
        self.cls.build_classifier(data)
        os.remove(filename)

    def predict(self, test_data):
        filename = "test.arff"
        self.write_arff(filename, "test", 0, test_data)
        loader = Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(filename)
        data.class_is_last()
        # evl = Evaluation(data)
        # evl.evaluate_model(self.cls,data)
        # data.set_class_label(data.numAttributes() - 1)
        # data.setClassIndex(data.numAttributes() - 1)
        result = []
        for index, inst in enumerate(data):
            pred = self.cls.classify_instance(inst)
            dist = self.cls.distribution_for_instance(inst)
            result.append(dist[0])
            # print(str(index+1) + ": label index=" + str(pred) + ", class distribution=" + str(dist))
            # print str(index+1) + 'dist:'+ str(dist)
        os.remove(filename)
        return result
def predictWithWeka(csvFilenameWithInputToPredict, modelFilename):
    """
    #   Nota: para usar sin conocer la clase, se puede colocar una clase dummy
    #   e ignorar los valores actual y error de @return results.
    #
    #   Nota: es necesario que el archivo de nombre @csvFilenameWithInputToPredict
    #   contenga instancias de ambas clases (spam y sanas)
    #
    #   @csvFilenameWithInputToPredict : nombre del archivo csv con las instancias
    #                                   a predecir.
    #
    #   @modelFilename : nombre del archivo de modelo generado por weka y 
    #                    compatible con el archivo csv de entrada
    #
    #   @return results : lista de diccionarios con los siguientes indices
    #                      index, actual, predicted, error y distribution
    """
    loader = Loader(classname="weka.core.converters.CSVLoader")
    cls = Classifier(jobject=serialization.read(modelFilename))
    #print(cls)

    data = loader.load_file(csvFilenameWithInputToPredict)
    data.class_is_last()

    multi = MultiFilter()
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", "first"])
    numericToNom = Filter(
        classname="weka.filters.unsupervised.attribute.NumericToNominal",
        options=["-R", "8,11"])
    normalize = Filter(
        classname="weka.filters.unsupervised.attribute.Normalize",
        options=["-S", "1.0", "-T", "0.0"])
    multi.filters = [remove, numericToNom, normalize]
    multi.inputformat(data)
    test = multi.filter(data)

    results = []
    for index, inst in enumerate(test):
        result = dict()

        pred = cls.classify_instance(inst)
        dist = cls.distribution_for_instance(inst)

        result["index"] = index + 1
        result["actual"] = inst.get_string_value(inst.class_index)
        result["predicted"] = inst.class_attribute.value(int(pred))
        result["error"] = "yes" if pred != inst.get_value(
            inst.class_index) else "no"
        result["distribution"] = str(dist.tolist())

        results.append(result)
        #print result

    return results
Exemplo n.º 9
0
    def LMT(self):
        model = Classifier(classname="weka.classifiers.trees.LMT")
        model.build_classifier(self.data_train)
        print(model)

        preds = []
        for index, inst in enumerate(self.data_test):
            preds.append(model.distribution_for_instance(inst)[1])
        auc = evaluate_auc(self.df_test[self.attr_label].values.tolist(),
                           preds)
        return auc
Exemplo n.º 10
0
def predict(attributes):
    jvm.start()
    file_path = print_to_file(attributes)
    # load the saved model
    objects = serialization.read_all("/Users/hosyvietanh/Desktop/data_mining/trained_model.model")
    classifier = Classifier(jobject=objects[0])
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(file_path)
    data.class_is_last()
    for index, inst in enumerate(data):
        pred = classifier.classify_instance(inst)
        dist = classifier.distribution_for_instance(inst)
        return int(pred)
    jvm.stop()
Exemplo n.º 11
0
    def DT(self):
        model = Classifier(classname="weka.classifiers.trees.J48")
        model.build_classifier(self.data_train)
        preds = []
        for index, inst in enumerate(self.data_test):
            preds.append(model.distribution_for_instance(inst)[1])
        auc = evaluate_auc(self.df_test[self.attr_label].values.tolist(),
                           preds)

        ### scikit learn decision tree
        # from sklearn.tree import DecisionTreeClassifier
        # model = DecisionTreeClassifier().fit(self.df_train[self.attributes], self.df_train[self.attr_label])
        # pred = model.predict_proba(self.df_test[self.attributes])
        # pred = [x[1] for x in pred]
        # auc = evaluate_auc(self.df_test[self.attr_label].values.tolist(), pred)
        return auc
Exemplo n.º 12
0
    def logit(self):
        model = Classifier(classname="weka.classifiers.functions.Logistic")
        model.build_classifier(self.data_train)
        preds = []
        for index, inst in enumerate(self.data_test):
            preds.append(model.distribution_for_instance(inst)[1])
        auc = evaluate_auc(self.df_test[self.attr_label].values.tolist(),
                           preds)

        ### scikit learn logit
        # from sklearn.linear_model import LogisticRegression
        # model = LogisticRegression().fit(self.df_train[self.attributes], self.df_train[self.attr_label])
        # pred = model.predict_proba(self.df_test[self.attributes])
        # pred = [x[1] for x in pred]
        # auc = evaluate_auc(self.df_test[self.attr_label].values.tolist(), pred)
        return auc
    def functionProcessamento(self, ca1_r, ca1_l, ca2_ca3_r, ca2_ca3_l, sub_r,
                              sub_l, sexo, id):
        jvm.start()
        path = os.path.dirname(os.path.abspath(__file__))
        # TODO: verificar qual o sexo do individuo para carregar o modelo corretamente
        modelo = path + "\\naive_bayes_feminino_novo.model"
        if (sexo == "Male"):
            print("É masculino")
            modelo = path + "\\naive_bayes_feminino_novo.model"
        objects = serialization.read_all(modelo)
        classifier = Classifier(jobject=objects[0])
        loader = Loader(classname="weka.core.converters.ArffLoader")
        arquivo = open(path + "\\novo_individuo.arff", "w")
        conteudo = list()
        conteudo.append("@relation alzheimer \n\n")
        conteudo.append("@attribute doente {SIM, NAO} \n")
        conteudo.append("@attribute ca1_right real \n")
        conteudo.append("@attribute ca1_left real \n")
        conteudo.append("@attribute ca2_ca3_right real\n")
        conteudo.append("@attribute ca2_ca3_left real \n")
        conteudo.append("@attribute subic_right real \n")
        conteudo.append("@attribute subic_left real \n\n")
        conteudo.append("@data \n")
        #aqui passar as variáveis
        conteudo.append("SIM," + str(ca1_r) + "," + str(ca1_l) + "," +
                        str(ca2_ca3_r) + "," + str(ca2_ca3_l) + "," +
                        str(sub_r) + "," + str(sub_l))
        print(conteudo)
        arquivo.writelines(conteudo)
        arquivo.close()

        data = loader.load_file(path + "\\novo_individuo.arff")
        data.class_is_last()
        for index, inst in enumerate(data):
            pred = classifier.classify_instance(inst)
            dist = classifier.distribution_for_instance(inst)
            pc_doenca = round(((pred) * 100), 2)
            pc_saudavel = round(((100 - pc_doenca)), 2)
            print(" Porcentagem de alzheimer=" + str(pc_doenca) +
                  "%, porcentagem saudavel=" + str(pc_saudavel) + "%")
            alzheimer = Alzheimer.objects.get(id=id)
            alzheimer.resultado_ad = pc_doenca
            alzheimer.resultado_cn = pc_saudavel
            alzheimer.status_seg = 2
            alzheimer.save()
        jvm.stop()
Exemplo n.º 14
0
def run():
    jvm.start()
    load_csv = Loader("weka.core.converters.CSVLoader")
    data_csv = load_csv.load_file(
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.csv"
    )

    saver = Saver("weka.core.converters.ArffSaver")
    saver.save_file(
        data_csv,
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff"
    )

    load_arff = Loader("weka.core.converters.ArffLoader")
    data_arff = load_arff.load_file(
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.arff"
    )
    data_arff.class_is_last()

    cls = Classifier(classname="weka.classifiers.trees.J48",
                     options=["-C", "0.5"])
    cls.build_classifier(data_arff)
    for index, inst in enumerate(data_arff):
        pred = cls.classify_instance(inst)
        dist = cls.distribution_for_instance(inst)
        # save tree prune in txt file

    saveFile = open(
        "/Users/imeiliasantoso/web_graduate_project5/register_page/bank-full_input.txt",
        "w")
    saveFile.write(str(cls))
    # print(cls)
    saveFile.close()

    global j48
    J48_class = Classifier(classname="weka.classifiers.trees.J48",
                           options=["-C", "0.25", "-M", "2"])
    J48_class.build_classifier(data_arff)
    evaluationj48 = Evaluation(data_arff)
    evaluationj48.crossvalidate_model(J48_class, data_arff, 10, Random(100))
    j48 = str(evaluationj48.percent_correct)
    jvm.stop()
    return j48
Exemplo n.º 15
0
    def train_and_separate_validation(self, trainingSet, validationSet,
                                      validationInstancesNames, classifier):

        loader = Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(trainingSet)
        data.class_is_last()
        data2 = loader.load_file(validationSet)
        if not len(data2) == len(validationInstancesNames):
            print(
                "Theres a mismatch between the number of instances in the arff file and the list of instance names."
            )
            raise LookupError
        data2.class_is_last()
        classes = [str(code) for code in data.class_attribute.values]
        header = [[classifier, trainingSet, "", "", ""], ["Instance"] +
                  [className + " probability"
                   for className in classes] + ["Real Class", "Guess"]]

        cls = Classifier(classname=classifier)
        print("Training.")
        cls.build_classifier(data)
        print("Model done!")

        dataMatrix = [["", 0, 0, 0, ""] for i in range(len(data2))]

        print("Validating.")
        for index, inst in enumerate(data2):
            print("Instance: " + str(index + 1) + "/" + str(len(data2)))
            pred = cls.classify_instance(inst)
            if inst.get_value(inst.class_index) == pred:
                guessValue = 1.0
            else:
                guessValue = 0.0
            dist = cls.distribution_for_instance(inst)
            dataMatrix[index][0] = validationInstancesNames[index]
            dataMatrix[index][1:3] = [round(p, 2) for p in dist]
            dataMatrix[index][3] = classes[int(inst.get_value(
                inst.class_index))]
            dataMatrix[index][4] = guessValue

        print("Done\n")
        return [header, dataMatrix]
Exemplo n.º 16
0
    def getDecisionTree(self, inputPath):
        #load arff
        data = self.load_Arff(inputPath)

        #classifier
        data.set_class_index(data.num_attributes() - 1)  # set class attribute
        classifier = Classifier(classname="weka.classifiers.trees.J48",
                                options=["-C", "0.3"])

        data.set_class_index(data.num_attributes() - 1)
        classifier.build_classifier(data)

        classifierStr = str(classifier)
        for index in range(0, data.num_instances()):
            instance = data.get_instance(index)
            #print instance
            result = classifier.distribution_for_instance(instance)

            #print result
        graph = classifier.graph()
        return graph
Exemplo n.º 17
0
    def bayes_classifier(features):
        #carrega o dataset
        instancias = load_any_file("caracteristicas.arff")
        # sinaliza que o ultimo atributo é a classe
        instancias.class_is_last()
        # Carrega o classificafor Naive Bayes e Classifica com base nas características da imagem
        classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
        classifier.build_classifier(instancias)
        # Cria uma nova instância com base nas caracteristicas extraidas
        new_instance = Instance.create_instance(features)
        # Adiciona a nova instância ao dataset
        instancias.add_instance(new_instance)
        # Liga a nova instancia ao dataset
        new_instance.dataset = instancias
        # Classifica a nova instância trazendo as probabilidades de ela pertencer as classes definidas
        classification = classifier.distribution_for_instance(new_instance)

        print("Classificação", " - Apu: ", round(classification[0] * 100, 2),
              "  Nelson: ", round(classification[1], 2))

        return classification
 def getDecisionTree(self, inputPath):   
     #load arff  
     data = self.load_Arff(inputPath)  
         
     #classifier
     data.set_class_index(data.num_attributes() - 1)   # set class attribute
     classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
     
     data.set_class_index(data.num_attributes() - 1)
     classifier.build_classifier(data)
     
     
     classifierStr = str(classifier)
     for index in range(0,data.num_instances()):
         instance = data.get_instance(index)
         #print instance
         result = classifier.distribution_for_instance(instance)
         
         #print result
     graph = classifier.graph()
     return graph
Exemplo n.º 19
0
def classify(train, test, name="RF", tuning=False):
    jvm.start()

    if isinstance(train, list) and isinstance(test, list):
        train = weka_instance(train)
        trn_data = converters.load_any_file(train)
        test = weka_instance(test)
        tst_data = converters.load_any_file(test)

    elif os.path.isfile(train) and os.path.isfile(test):
        trn_data = converters.load_any_file(train)
        tst_data = converters.load_any_file(test)

    else:
        trn = csv_as_ndarray(train)
        tst = csv_as_ndarray(test)

        trn_data = converters.ndarray_to_instances(trn, relation="Train")
        tst_data = converters.ndarray_to_instances(tst, relation="Test")

    trn_data.class_is_last()
    tst_data.class_is_last()

    # t = time()
    if tuning:
        opt = tune(train)
    else:
        opt = default_opt
    # print("Time to tune: {} seconds".format(time() - t))

    cls = Classifier(classname=classifiers[name.lower()], options=opt)

    cls.build_classifier(trn_data)

    distr = [cls.distribution_for_instance(inst)[1] for inst in tst_data]
    preds = [cls.classify_instance(inst) for inst in tst_data]

    jvm.stop()

    return preds, distr
Exemplo n.º 20
0
class ObjectiveClassifier:
    def __init__(self, model_path, senti_path, stop_words, ngrams_path):
        self.loader = Loader(classname="weka.core.converters.ArffLoader")
        self.features_calculator = FeaturesCalculator(ngrams_path)
        self.classifier = Classifier(jobject=serialization.read(model_path))
        self.normalizer = Preprocessor(senti_path)
        self.stop_words = stop_words

    def classify_tweet(self, tweet, polarity='"positive"'):
        tweet_normalized = self.normalizer.preprocess(tweet, self.stop_words,
                                                      "")
        self.features_calculator.calculateFeatures(
            tweet_normalized, "output/tweet_features_objective.arff", polarity)
        tweet_features = self.loader.load_file(
            "output/tweet_features_objective.arff")
        tweet_features.class_is_last()
        for index, inst in enumerate(tweet_features):
            pred = self.classifier.classify_instance(inst)
            dist = self.classifier.distribution_for_instance(inst)
            print("%d - %s - %s" %
                  (index + 1, inst.class_attribute.value(
                      int(pred)), str(dist.tolist())))
Exemplo n.º 21
0
class J48:

    def __init__(self):
        jvm.start()

        data_dir = "./DataSet/"
        self.data = converters.load_any_file(data_dir + "chatbot2.arff")
        self.data.class_is_last()

        self.cls = Classifier(classname="weka.classifiers.trees.J48")
        self.cls.build_classifier(self.data)

        self.intens = self.data.attribute_by_name("intent")


    def transformUserInput(self,user_input):
        '''
        Transforma la entrada del usuario a una representación de 1s y 0s para poder realizar una predicción.

        :param str entrada del usuario
        :return str de 1s y 0s
        :rtype str
        '''
        attributes = self.data.attribute_names()
        data_size = len(attributes)
        vector_input = ['0']*(data_size)

        words = user_input.split()
        attribute_map = { attributes[i] : i for i in range(len(attributes)) }

        for word in words:
            if word in attributes:
                vector_input[attribute_map.get(word)] = '1'

        vector_input[data_size-1] = Instance.missing_value()

        return vector_input



    def getIntent(self,user_input):
        '''
        Identifica el intent por medio de una entrada de usuario y una data haciendo una predicción.

        :param str entrada del usuario
        :param data representación del dataset de GLaDOS
        :return cadena con el intent identificado
        :rtype str
        '''
        vector_input = self.transformUserInput(user_input)

        inst = Instance.create_instance(vector_input)
        #print(inst)
        self.data.add_instance(inst)


        for index, inst in enumerate(self.data):
                pred = int(self.cls.classify_instance(inst))
                dist = self.cls.distribution_for_instance(inst)
                #print("{}: label index={}, class distribution={}".format(index+1, pred, dist))
        
        intent = "desconocido"

        pred = int(self.cls.classify_instance(inst))
        dist = self.cls.distribution_for_instance(inst)
        #print("{}: label index={}, class distribution={}".format(index+1, pred, dist))

        if max(dist) > 0.7:
            intent = self.intens.value(pred)

        return intent
Exemplo n.º 22
0
instances = loader.load_file(
    "/home/farzad/Desktop/jrnl/semiSupervisedPython/originDataset/bupa/train.arff"
)
instances.class_is_last()

tree = Classifier(classname="weka.classifiers.trees.J48")

tree.build_classifier(instances)
# clsLabel = j48.classify_instance(data.get_instance(0))
# print("====================================>",clsLabel)

p_train = np.zeros(shape=(instances.num_instances, 1))
y_train = np.zeros(shape=(instances.num_instances, 1))

for i, instance in enumerate(instances):
    dist = tree.distribution_for_instance(instance)
    p_train[i] = [(dist[1] - 0.5) * 2.0]
    y_train[i] = [tree.classify_instance(instance)]

print("p_train ======> > > >>>> > > >>>> ", len(p_train))
print("p_train ======> > > >>>> > > >>>> ", len(y_train))
print("p_train ======> > > >>>> > > >>>> ", instances.num_instances)
# print("p_train ======> > > >>>> > > >>>> " , p_train)
# print("p_train ======> > > >>>> > > >>>> " , p_train.reshape( -1, 1 ))

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

lr = LR(solver='lbfgs')
lr.fit(p_train, np.ravel(y_train, order='C'))  # LR needs X to be 2-dimensional
# lr.fit( p_train.reshape( -1, 1 ), y_train )     # LR needs X to be 2-dimensional
Exemplo n.º 23
0
def python_wrapper(mImage, prefix, file_name, pre_prefix, dir, permanent_dir,
                   model):
    # Initialization of weka machine learning library
    weka_machine_learning = WML.WekaMachineLearning()
    # tokenization of images
    token = re.split('RGB_|.png', mImage)
    ir_directory = token[0] + 'IR_' + token[1] + '.pgm'
    mat_directory = token[0] + 'Mat_' + token[1]

    # get mat and ir image
    image = segmentor.getImage(ir_directory)
    mat = segmentor.readMatFile(mat_directory)

    # image processing
    edges = segmentor.edgeDetector(image)
    type = segmentor.getTypeOfFruit(image)
    segmentation = segmentor.segmentation(image, type)
    filter = segmentor.filterImageFromSegmentation(image, segmentation)
    output_seg = segmentor.imageMapping(filter, mat['IR'])

    ####################-Anomaly Detection via INFLO-###################

    # file prefix creation for the csv file to save
    prefix_csv = prefix + "\\" + file_name

    # if folder is not there then create it
    # and right the csv to the folder
    if not os.path.exists(prefix):
        os.mkdir(prefix)
        csv = segmentor.writeToCSV(output_seg, prefix_csv)
        print "file is written"

    #else simply write the csv to the folder
    else:
        csv = segmentor.writeToCSV(output_seg, prefix_csv)
        print "file is written"
    #call the INFLO.bat after segmenting the image
    #for anomaly detection
    run_batch_file("rapid_miner_pro_ifruitlfy.bat")
    ############################-Clustering-############################

    # image file directory is stored in ir_directory
    # mat file directory is stored in mat_directory
    # and need to get the INFLO file
    # directory for INFLO file is prefix_csv
    anomaly_file = prefix_csv + '.csv_INFLO.csv'
    # directory for the temperorary files is made so
    # some results can be stored and processed auto-
    # matically by the rapid miner 5, this folder is

    demo_printing_picture(permanent_dir, prefix, mImage, pre_prefix, dir,
                          file_name)
    print(
        "END OF ANOMALY DETECTION CLICK TRAIN AND SHOW RESULT FOR PROCESSING")
    write_temp_dir = permanent_dir + "\\"
    print prefix
    print file_name
    # Clean the junk of the output files
    if os.path.exists(permanent_dir + "//output.csv"):
        os.remove(permanent_dir + "//output.csv")
    features = iFruitFly_clustering.cluster_analysis.cluster_analysis(
        ir_directory, permanent_dir + "\\output_INFLO.csv", mat_directory,
        dir + "\\" + file_name, prefix, file_name, permanent_dir)
    if (features == None):
        print("Image cant be segmented due to poor calibiration")

    #other files are stored for the user in the junk
    else:
        print "printing images->>>>>>> ", prefix + file_name
        image_plotter(features, ir_directory, prefix + file_name)
    import csv
    # Weka Machine Learning Inclusion on 5/30/2017
    # adding one extra column
    with open(permanent_dir + "\\output.csv", 'r') as csvinput:
        with open(permanent_dir + "\\output_n.csv", 'w') as csvoutput:
            writer = csv.writer(csvoutput, lineterminator='\n')
            reader = csv.reader(csvinput)
            all = []
            row = next(reader)
            row.append('result')
            all.append(row)
            for row in reader:
                row.append(0)
                all.append(row)
            writer.writerows(all)
    #model = "J:\iFruitFly\Python Scripts\Model 1\\model.model"
    data_dir = permanent_dir + "\\output_n.csv"
    #data_dir_open = open(data_dir)
    #r = csv.reader(data_dir_open)

    jvm.start()
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(data_dir)
    # using the serialization library for
    # opening the model
    objects = serialization.read_all(model)
    classifier = Classifier(jobject=objects[0])
    print "Model Classified"
    print classifier
    data.class_is_last()
    for index, inst in enumerate(data):
        pred = classifier.classify_instance(inst)
        dist = classifier.distribution_for_instance(inst)
        print pred
Exemplo n.º 24
0
def classifyTest(fileToClassify,
                 fileToCompare,
                 predictionYear=None,
                 pastResultYears=None,
                 classifier=None):
    # Start Java VM
    jvm.start(max_heap_size="1024m")
    # Load CSV files into weka loader
    loader = Loader(classname="weka.core.converters.CSVLoader")
    fileToClassifyData = loader.load_file(fileToClassify)
    fileToClassifyData.class_is_last()
    fileToCompareData = loader.load_file(fileToCompare)
    fileToCompareData.class_is_last()

    # Generate Classifier based on data
    classifier = Classifier(
        classname="weka.classifiers.functions.MultilayerPerceptron",
        options=[
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "a"
        ])
    classifier.build_classifier(fileToClassifyData)
    print(classifier)
    # Var builder for graph
    count = 0.0
    countPred = 0.0
    graphDetails = [
        ['TITLE'],
        ['NFL Data Ratings (Official) {0}'.format(pastResultYears), [], []],
        ['NFL Data Ratings (Predicted) {0}'.format(predictionYear), [], []]
    ]

    # Time to predict results based on classifier
    for index, inst in enumerate(fileToCompareData):
        pred = classifier.classify_instance(inst)
        temp = list(enumerate(inst))[-1][1]
        countPred += pred
        count += temp
        # index=list(enumerate(inst))[3+1][1]
        index += 1
        print('YOLO', list(enumerate(inst))[3][1])
        print("{0:.3f} accurate compared to results.".format(countPred /
                                                             count))

        dist = classifier.distribution_for_instance(inst)
        # NFL Results
        graphDetails[1][1].append(index)
        graphDetails[1][2].append(temp)

        # Predicted Results
        graphDetails[2][1].append(index)
        graphDetails[2][2].append(pred)
        print(
            str(index + 1) + ": label index=" + str(pred) +
            ", class distribution=" + str(dist) + " , original: " + str(temp))
    graphDetails[0][
        0] = 'Player Rating Predictions For {0} ({1:.3f} Accurate)'.format(
            predictionYear, 100 - (countPred / count))
    jvm.stop()
    print(graphDetails)
    BuildGraph(graphDetails)
Exemplo n.º 25
0
class Weka(object): 

    data = None
    dataDir = None
    classifier = None

    def __init__(self, dataDir = '.'):
        self.dataDir = dataDir 
        
        jvm.start()
        

    # Inicializa dados com conteudo do arquivo arff
    def initData(self, arrfFile):
        loader = Loader(classname="weka.core.converters.ArffLoader")
        print self.dataDir + '/' + arrfFile
        self.data = loader.load_file(self.dataDir + '/' + arrfFile)
        self.data.class_is_last()
        
        print 'Carregando arquivo ' + self.dataDir + '/' + arrfFile
        # print(data)
                     

    # Realiza o treinamento do classificador
    def trainData(self, arrfFile = None, classname="weka.classifiers.trees.J48", options=["-C", "0.3"]):
        if arrfFile is not None:
            self.initData( arrfFile )
            
        if self.data is None:
            return 
            
        print 'Contruindo classificador ' + str(classname) + ' ' + ' '.join(options)
        self.classifier = Classifier(classname=classname, options=options)
        self.classifier.build_classifier(self.data)


    # Realiza a classificacao das instancias de um arquivo arff
    def classify(self, predictFile):
            
        if self.data is None or self.classifier is None:
            return [-1]

        loader = Loader(classname="weka.core.converters.ArffLoader")
        predict_data = loader.load_file(self.dataDir + '/' + predictFile)
        predict_data.class_is_last()
        
        values = str(predict_data.class_attribute)[19:-1].split(',')
        
        classes = []
        
        for index, inst in enumerate(predict_data):
            #pred = self.classifier.classify_instance(inst)
            prediction = self.classifier.distribution_for_instance(inst)
            cl = int(values[prediction.argmax()][7:])
            
            print 'Classe:', cl
            classes.append(cl)

        return classes


    # Realiza uma validação cruzada e mostra os resultados na saída padrão
    def crossValidate(self, arrfFile = None, classname="weka.classifiers.trees.J48", options=["-C", "0.3"]):
        
        if arrfFile is not None:
            self.initData( arrfFile )
            
        if self.data is None:
            return 

        print 'Classificador ' + str(classname) + ' ' + ' '.join(options)
        cls = Classifier(classname=classname, options=options)
        
        evl = Evaluation(self.data)
        evl.crossvalidate_model(cls, self.data, 10, Random(1))

        print(evl.percent_correct)
        print(evl.summary())
        print(evl.class_details())
Exemplo n.º 26
0
def main():
    global stop_spinning, name, upper_clothing, lower_clothing, outer_clothing, shoes_clothing, upper_indices, lower_indices, outer_indices, shoes_indices
    '''
    Classifies clothing using stored classification models for each user
    '''
    FSM = ClothingFSM()
    #FSM.username_server()

    clothingdb = MySQLdb.connect(host="localhost",
                                 user="******",
                                 passwd="mypassword", # Change to your SQL DB password
                                 db = "userprofiles")
    cursor = clothingdb.cursor()

    cursor.execute("SELECT * FROM clothing")

    name = "Study"

    #Populate clothing dictionaries with user's wardrobe
    for row in cursor.fetchall():
        print str(row[2])
        print str(row[6])
        if str(row[0]) == name:
            if str(row[1]) == "Upper Body":
                try:
                    upper_clothing[row[2]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
            if str(row[1]) == "Lower Body":
                try:
                    lower_clothing[row[3]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
            if str(row[1]) == "Outerwear":
                try:
                    outer_clothing[row[4]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
            if str(row[1]) == "Shoes":
                try:
                    shoes_clothing[row[5]].append(row[6])
                except:
                    print "Problem appending clothing to dictionary"
    
    print upper_clothing, lower_clothing, outer_clothing, shoes_clothing
    # FSM.received_user_info()

    #In final program, we will receive this information from database


    #Set to true or false if receiving features vs testing defaults
    receive_features = True

    if receive_features == False:
        #Wait to Receive input 

        #Example inputs from user/weather API
        features['casual_formal'] = 3
        #5 is very comfortable 1 is not comfortable
        features['comfort'] = 3
        #1 is not snowing 2 is light snow 3 is heavy snow
        features['snow'] = 1
        #1 is not raining 3 is raining(no medium)
        features['rain'] = 3
        #If user is spending their time mostly outside, set warmth to outsidewarmth. If not, set warmth
        features['warmth'] = 1
        features['outside_warmth'] = 4
        #1 is no 0 is yes
        features['athletic'] = 1

        snowstring = ''
        rainstring = ''
        athleticstring = ''
    
    else:
        FSM.features_server()
    
    
    
 
    upper_array = [None] * 14
    lower_array = [None] * 7
    outer_array = [None] * 3
    shoes_array = [None] * 4
    upper_prediction_array = []
    lower_prediction_array = []
    outer_prediction_array = []
    shoes_prediction_array = []

    warmth_att = Attribute.create_numeric("Warmth")
    comfort_att = Attribute.create_numeric("Comfort")
    casual_att = Attribute.create_numeric("Casual")
    rain_att = Attribute.create_numeric("Rain")
    snow_att = Attribute.create_numeric("Snow")
    athletic_att = Attribute.create_numeric("Athletic")

    
    upper_attributes = [warmth_att, casual_att, comfort_att, athletic_att]
    lower_attributes = [warmth_att, casual_att, comfort_att, athletic_att]
    outer_attributes = [warmth_att, casual_att, comfort_att, snow_att, rain_att]
    shoes_attributes = [casual_att, comfort_att, athletic_att]

    Instances.create_instances("upper_instances", upper_attributes, 0)
    Instances.create_instances("lower_instances", lower_attributes, 0)
    Instances.create_instances("outer_instances", outer_attributes, 0)
    Instances.create_instances("shoes_instances", shoes_attributes, 0)

    #Simulate their wardrobe
    #Upper
    # Tank Top
    if len(upper_clothing['Tank Top']) == 0:
        upper_array[0] = 0 
    else:
        upper_array[0] = 1 
    # T-Shirt
    if len(upper_clothing['T-Shirt']) == 0:
        upper_array[1] = 0 
    else:
        upper_array[1] = 1 
    # Long-Sleeved Shirt
    if len(upper_clothing['Long-sleeved Shirt']) == 0:
        upper_array[2] = 0 
    else:
        upper_array[2] = 1 
    # Athletic Top
    if len(upper_clothing['Athletic Top']) == 0:
        upper_array[3] = 0 
    else:
        upper_array[3] = 1     
    # Button-down Shirt
    if len(upper_clothing['Button-down Shirt']) == 0:
        upper_array[4] = 0 
    else:
        upper_array[4] = 1     
    # Polo Shirt
    if len(upper_clothing['Polo Shirt']) == 0:
        upper_array[5] = 0 
    else:
        upper_array[5] = 1  
    # Dress Shirt
    if len(upper_clothing['Dress Shirt']) == 0:
        upper_array[6] = 0 
    else:
        upper_array[6] = 1  
    # Suit Jacket
    if len(upper_clothing['Suit Jacket']) == 0:
        upper_array[7] = 0 
    else:
        upper_array[7] = 1  
    # Blazer
    if len(upper_clothing['Blazer']) == 0:
        upper_array[8] = 0 
    else:
        upper_array[8] = 1  
    # Hoodie
    if len(upper_clothing['Hoodie']) == 0:
        upper_array[9] = 0 
    else:
        upper_array[9] = 1  
    # Sweater
    if len(upper_clothing['Sweater']) == 0:
        upper_array[10] = 0 
    else:
        upper_array[10] = 1  
    # Blouse
    if len(upper_clothing['Blouse']) == 0:
        upper_array[11] = 0 
    else:
        upper_array[11] = 1

    # Day Dress
    if len(upper_clothing['Day Dress']) == 0:
        upper_array[12] = 0 
    else:
        upper_array[12] = 1
    # Evening Dress
    if len(upper_clothing['Evening Dress']) == 0:
        upper_array[13] = 0 
    else:
        upper_array[13] = 1

    #Lower

    # Regular Shorts
    if len(lower_clothing['Shorts']) == 0:
        lower_array[0] = 0 
    else:
        lower_array[0] = 1
    # Athletic Shorts
    if len(lower_clothing['Athletic Shorts']) == 0:
        lower_array[1] = 0 
    else:
        lower_array[1] = 1
    # Athletic Pants
    if len(lower_clothing['Athletic Pants']) == 0:
        lower_array[2] = 0 
    else:
        lower_array[2] = 1
    # Jeans
    if len(lower_clothing['Jeans']) == 0:
        lower_array[3] = 0 
    else:
        lower_array[3] = 1
    # Trousers
    if len(lower_clothing['Trousers']) == 0:
        lower_array[4] = 0 
    else:
        lower_array[4] = 1
    # Skirt
    if len(lower_clothing['Skirt']) == 0:
        lower_array[5] = 0 
    else:
        lower_array[5] = 1
    # Dress Pants
    if len(lower_clothing['Dress Pants']) == 0:
        lower_array[6] = 0 
    else:
        lower_array[6] = 1

    #Outer
    # Light Jacket
    if len(outer_clothing['Light Jacket']) == 0:
        outer_array[0] = 0 
    else:
        outer_array[0] = 1
    # Heavy Jacket
    if len(outer_clothing['Winter Jacket']) == 0:
        outer_array[1] = 0 
    else:
        outer_array[1] = 1
    # Rain Jacket
    if len(outer_clothing['Rain Jacket']) == 0:
        outer_array[2] = 0 
    else:
        outer_array[2] = 1
    
    #Shoes 
    # Casual Shoes
    if len(shoes_clothing['Casual Shoes']) == 0:
        shoes_array[0] = 0 
    else:
        shoes_array[0] = 1
    # Athletic Shoes
    if len(shoes_clothing['Athletic Shoes']) == 0:
        shoes_array[1] = 0 
    else:
        shoes_array[1] = 1
    # Dress Shoes
    if len(shoes_clothing['Dress Shoes']) == 0:
        shoes_array[2] = 0 
    else:
        shoes_array[2] = 1
    # Dressy Casual  Shoes
    if len(shoes_clothing['Business Casual Shoes']) == 0:
        shoes_array[3] = 0 
    else:
        shoes_array[3] = 1
    

    upper_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], features['athletic']]
    lower_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], math.fabs(1-features['athletic'])]
    outer_list = [features['outside_warmth'], features['casual_formal'], features['comfort'], features['rain'], features['snow']]
    shoes_list = [features['casual_formal'], features['comfort'], math.fabs(1-features['athletic'])]
    upper_instance = Instance.create_instance(upper_list, classname='weka.core.DenseInstance', weight= 1.0)
    lower_instance = Instance.create_instance(lower_list, classname='weka.core.DenseInstance', weight= 1.0)
    outer_instance = Instance.create_instance(outer_list, classname='weka.core.DenseInstance', weight= 1.0)
    shoes_instance = Instance.create_instance(shoes_list, classname='weka.core.DenseInstance', weight= 1.0)

    upper_path = '/home/leo/models/uppermodel2.model'
    lower_path = '/home/leo/models/lowermodel2.model'
    outer_path = '/home/leo/models/outermodel2.model'
    shoes_path = '/home/leo/models/shoesmodel7.model'

    upper_classifier = Classifier(jobject=serialization.read(upper_path))
    lower_classifier = Classifier(jobject=serialization.read(lower_path))
    outer_classifier = Classifier(jobject=serialization.read(outer_path))
    shoes_classifier = Classifier(jobject=serialization.read(shoes_path))

    upper_predictions = upper_classifier.distribution_for_instance(upper_instance)
    lower_predictions = lower_classifier.distribution_for_instance(lower_instance)
    outer_predictions = outer_classifier.distribution_for_instance(outer_instance)
    shoes_predictions = shoes_classifier.distribution_for_instance(shoes_instance)


    if features['rain'] == 1:
        rainstring = 'No'
    if features['rain'] == 3:
        rainstring = 'Yes'
    if features['snow'] == 1:
        snowstring = 'No'
    if features['snow'] == 3:
        snowstring = 'Yes'
    if features['athletic'] == 1:
        athleticstring = 'No'
    if features['athletic'] == 0:
        athleticstring = 'Yes'

    print "Features being Classified:"
    print "Outside Warmth:", features['outside_warmth'], "Inside-Outside:", features['inside_outside'], "Casual-Formal:", features['casual_formal'], "Comfort:", features['comfort'], "Athletic:", athleticstring, "Rain:", rainstring, "Snow:", snowstring



    #Remove Clothing Options User Doesn't Own
    for i in range(len(upper_array)):
        if upper_array[i] == 0:
            upper_prediction_array.append(0)
        else:
            upper_prediction_array.append(upper_predictions[i])

    for i in range(len(lower_array)):
        if lower_array[i] == 0:
            lower_prediction_array.append(0)
        else:
            lower_prediction_array.append(lower_predictions[i])

    for i in range(len(outer_array)):
        if outer_array[i] == 0:
            outer_prediction_array.append(0)
        else:
            outer_prediction_array.append(outer_predictions[i])

    for i in range(len(shoes_array)):
        if shoes_array[i] == 0:
            shoes_prediction_array.append(0)
        else:
            shoes_prediction_array.append(shoes_predictions[i])

    #Find the top 3 options for each classifier
    max_index_upper1 = 0
    max_index_upper2 = 0
    max_index_upper3 = 0
    max_index_upper4 = 0
    max_index_upper5 = 0


    for i in range(1,len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper1]
        if upper_prediction_array[i] > n:
            max_index_upper1 = i

    upper_prediction_array[max_index_upper1] = 0

    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper2]
        if upper_prediction_array[i] > n:
            max_index_upper2 = i

    upper_prediction_array[max_index_upper2] = 0

    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper3]
        if upper_prediction_array[i] > n:
            max_index_upper3 = i

    upper_prediction_array[max_index_upper3] = 0
    
    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper4]
        if upper_prediction_array[i] > n:
            max_index_upper4 = i
    
    upper_prediction_array[max_index_upper4] = 0   

    for i in range(1, len(upper_prediction_array)):
        n = upper_prediction_array[max_index_upper5]
        if upper_prediction_array[i] > n:
            max_index_upper5 = i

    upper_indices = [max_index_upper1, max_index_upper2, max_index_upper3, max_index_upper4, max_index_upper5]

    max_index_lower1 = 0
    max_index_lower2 = 0
    max_index_lower3 = 0
    max_index_lower4 = 0
    max_index_lower5 = 0        

    for i in range(1,len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower1]
        if lower_prediction_array[i] > n:
            max_index_lower1 = i

    lower_prediction_array[max_index_lower1] = 0

    for i in range(1,len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower2]
        if lower_prediction_array[i] > n:
            max_index_lower2 = i

    lower_prediction_array[max_index_lower2] = 0

    for i in range(1,len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower3]
        if lower_prediction_array[i] > n:
            max_index_lower3 = i
    
    lower_prediction_array[max_index_lower3] = 0
    
    for i in range(1, len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower4]
        if lower_prediction_array[i] > n:
            max_index_upper4 = i
    
    lower_prediction_array[max_index_lower4] = 0   

    for i in range(1, len(lower_prediction_array)):
        n = lower_prediction_array[max_index_lower5]
        if lower_prediction_array[i] > n:
            max_index_lower5 = i
    
    lower_indices = [max_index_lower1, max_index_lower2, max_index_lower3, max_index_lower4, max_index_lower5]

    max_index_outer1 = 0
    max_index_outer2 = 0
    max_index_outer3 = 0

    for i in range(1, len(outer_prediction_array)):
        n = outer_prediction_array[max_index_outer1]
        if outer_prediction_array[i] > n:
            max_index_outer1 = i

    outer_prediction_array[max_index_outer1] = 0

    for i in range(1, len(outer_prediction_array)):
        n = outer_prediction_array[max_index_outer2]
        if outer_prediction_array[i] > n:
            max_index_outer2 = i

    outer_prediction_array[max_index_outer2] = 0

    for i in range(1, len(outer_prediction_array)):
        n = outer_prediction_array[max_index_outer3]
        if outer_prediction_array[i] > n:
            max_index_outer3 = i

    outer_indices = [max_index_outer1, max_index_outer2, max_index_outer3]

    max_index_shoes1 = 0
    max_index_shoes2 = 0
    max_index_shoes3 = 0
    max_index_shoes4 = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes1]
        if shoes_prediction_array[i] > n:
            max_index_shoes1 = i

    shoes_prediction_array[max_index_shoes1] = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes2]
        if shoes_prediction_array[i] > n:
            max_index_shoes2 = i

    shoes_prediction_array[max_index_shoes2] = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes3]
        if shoes_prediction_array[i] > n:
            max_index_shoes3 = i

    shoes_prediction_array[max_index_shoes3] = 0

    for i in range(1, len(shoes_prediction_array)):
        n = shoes_prediction_array[max_index_shoes4]
        if shoes_prediction_array[i] > n:
            max_index_shoes4 = i
    
    shoes_indices = [max_index_shoes1, max_index_shoes2, max_index_shoes3, max_index_shoes4]
    
    print "Outer Indices:", outer_indices
    FSM.received_inputs()
    print "Exiting Program"
Exemplo n.º 27
0
class WekaEstimator(BaseEstimator, OptionHandler, RegressorMixin, ClassifierMixin):
    """
    Wraps a Weka classifier (classifier/regressor) within the scikit-learn framework.
    """

    def __init__(self, jobject=None, classifier=None, classname=None, options=None,
                 nominal_input_vars=None, nominal_output_var=None,
                 num_nominal_input_labels=None, num_nominal_output_labels=None):
        """
        Initializes the estimator. Can be either instantiated via the following priority of parameters:
        1. JB_Object representing a Java Classifier object
        2. Classifier pww3 wrapper
        3. classname/options

        :param jobject: the JB_Object representing a Weka classifier to use
        :type jobject: JB_Object
        :param classifier: the classifier wrapper to use
        :type classifier: Classifier
        :param classname: the classname of the Weka classifier to instantiate
        :type classname: str
        :param options: the command-line options of the Weka classifier to instantiate
        :type options: list
        :param nominal_input_vars: the list of 0-based indices of attributes to convert to nominal or range string with 1-based indices
        :type nominal_input_vars: list or str
        :param nominal_output_var: whether to convert the output variable to a nominal one
        :type nominal_output_var: bool
        :param num_nominal_input_labels: the dictionary with the number of labels for the nominal input variables (key is 0-based attribute index)
        :type num_nominal_input_labels: dict
        :param num_nominal_output_labels: the number of labels for the output variable
        :type num_nominal_output_labels: int
        """
        if jobject is not None:
            _jobject = jobject
        elif classifier is not None:
            _jobject = classifier.jobject
        elif classname is not None:
            if options is None:
                options = []
            classifier = Classifier(classname=classname, options=options)
            _jobject = classifier.jobject
        else:
            raise Exception("At least Java classname must be provided!")

        if not is_instance_of(_jobject, "weka.classifiers.Classifier"):
            raise Exception("Java object does not implement weka.classifiers.Classifier!")

        super(WekaEstimator, self).__init__(_jobject)
        self._classifier = Classifier(jobject=_jobject)
        self.header_ = None
        self.classes_ = None
        # the following references are required for get_params/set_params
        self._classname = classname
        self._options = options
        self._nominal_input_vars = nominal_input_vars
        self._nominal_output_var = nominal_output_var
        self._num_nominal_input_labels = num_nominal_input_labels
        self._num_nominal_output_labels = num_nominal_output_labels

    @property
    def classifier(self):
        """
        Returns the underlying classifier object, if any.

        :return: the classifier object
        :rtype: Classifier
        """
        return self._classifier

    @property
    def header(self):
        """
        Returns the underlying dataset header, if any.

        :return: the dataset structure
        :rtype: Instances
        """
        return self.header_

    def fit(self, data, targets):
        """
        Trains the estimator.

        :param data: the input variables as matrix, array-like of shape (n_samples, n_features)
        :type data: ndarray
        :param targets: the class attribute column, array-like of shape (n_samples,)
        :type targets: ndarray
        :return: itself
        :rtype: WekaEstimator
        """
        data, targets = check_X_y(data, y=targets, dtype=None)
        if self._nominal_input_vars is not None:
            data = to_nominal_attributes(data, self._nominal_input_vars)
        if self._nominal_output_var is not None:
            targets = to_nominal_labels(targets)
        d = to_instances(data, targets,
                         num_nominal_labels=self._num_nominal_input_labels,
                         num_class_labels=self._num_nominal_output_labels)
        self._classifier.build_classifier(d)
        self.header_ = d.template_instances(d, 0)
        if d.class_attribute.is_nominal:
            self.classes_ = d.class_attribute.values
        else:
            self.classes_ = None
        return self

    def predict(self, data):
        """
        Performs predictions with the trained classifier.

        :param data: the data matrix to generate predictions for, array-like of shape (n_samples, n_features)
        :type data: ndarray
        :return: the score (or scores)
        :rtype: ndarray
        """
        check_is_fitted(self)
        if self._nominal_input_vars is not None:
            data = to_nominal_attributes(data, self._nominal_input_vars)
        data = check_array(data, dtype=None)
        result = []
        for d in data:
            inst = to_instance(self.header_, d, missing_value())
            if self.header_.class_attribute.is_nominal:
                result.append(self.header_.class_attribute.value(int(self._classifier.classify_instance(inst))))
            else:
                result.append(self._classifier.classify_instance(inst))
        return np.array(result)

    def predict_proba(self, data):
        """
        Performs predictions and returns class probabilities.

        :param data: the data matrix to generate predictions for, array-like of shape (n_samples, n_features)
        :type data: ndarray
        :return: the probabilities
        """
        check_is_fitted(self)
        if self._nominal_input_vars is not None:
            data = to_nominal_attributes(data, self._nominal_input_vars)
        data = check_array(data, dtype=None)
        result = []
        for d in data:
            inst = to_instance(self.header_, d, missing_value())
            result.append(self._classifier.distribution_for_instance(inst))
        return np.array(result)

    def get_params(self, deep=True):
        """
        Returns the parameters for this classifier, basically classname and options list.

        :param deep: ignored
        :type deep: bool
        :return: the dictionary with options
        :rtype: dict
        """
        result = dict()
        result["classname"] = self._classname
        result["options"] = self._options
        if self._nominal_input_vars is not None:
            result["nominal_input_vars"] = self._nominal_input_vars
        if self._nominal_output_var is not None:
            result["nominal_output_var"] = self._nominal_output_var
        if self._num_nominal_input_labels is not None:
            result["num_nominal_input_labels"] = self._num_nominal_input_labels
        if self._num_nominal_output_labels is not None:
            result["num_nominal_output_labels"] = self._num_nominal_output_labels
        return result

    def set_params(self, **params):
        """
        Sets the options for the classifier, expects 'classname' and 'options'.

        :param params: the parameter dictionary
        :type params: dict
        """
        if len(params) == 0:
            return
        if "classname" not in params:
            raise Exception("Cannot find 'classname' in parameters!")
        if "options" not in params:
            raise Exception("Cannot find 'options' in parameters!")
        self._classname = params["classname"]
        self._options = params["options"]
        self._classifier = Classifier(classname=self._classname, options=self._options)
        self._nominal_input_vars = None
        if "nominal_input_vars" in params:
            self._nominal_input_vars = params["nominal_input_vars"]
        self._nominal_output_var = None
        if "nominal_output_var" in params:
            self._nominal_output_var = params["nominal_output_var"]
        self._num_nominal_input_labels = None
        if "num_nominal_input_labels" in params:
            self._num_nominal_input_labels = params["num_nominal_input_labels"]
        self._num_nominal_output_labels = None
        if "num_nominal_output_labels" in params:
            self._num_nominal_output_labels = params["num_nominal_output_labels"]

    def __str__(self):
        """
        For printing the model.

        :return: the model representation, if any
        :rtype: str
        """
        if self._classifier is None:
            return self._classname + ": No model built yet"
        else:
            return str(self._classifier)

    def __copy__(self):
        """
        Creates a deep copy of itself.

        :return: the copy
        :rtype: WekaEstimator
        """
        result = WekaEstimator(jobject=deepcopy(self.jobject))
        result._classname = self._classname
        result._options = self._options[:]
        result._nominal_input_vars = None if (self._nominal_input_vars is None) else self._nominal_input_vars[:]
        result._nominal_output_var = self._nominal_output_var
        return result

    def __repr__(self, N_CHAR_MAX=700):
        """
        Returns a valid Python string using its classname and options.

        :param N_CHAR_MAX: ignored
        :type N_CHAR_MAX: int
        :return: the representation
        :rtype: str
        """
        if isinstance(self._nominal_input_vars, str):
            return "WekaEstimator(classname='%s', options=%s, nominal_input_vars='%s', nominal_output_var=%s)" % (self._classifier.classname, str(self._classifier.options), str(self._nominal_input_vars), str(self._nominal_output_var))
        else:
            return "WekaEstimator(classname='%s', options=%s, nominal_input_vars=%s, nominal_output_var=%s)" % (self._classifier.classname, str(self._classifier.options), str(self._nominal_input_vars), str(self._nominal_output_var))
Exemplo n.º 28
0
class SklearnWekaWrapper(object):

	def __init__(self, class_name, options=None):

		if options is not None:
			self._classifier = Classifier(classname=class_name, options=[option for option in options.split()])
		else:
			self._classifier = Classifier(classname=class_name)

	def fit(self, training_set, ground_through):

		self.ground_through = ground_through

		training_set = self._sklearn2weka(training_set, self.ground_through)
		training_set.class_is_last()

		self._classifier.build_classifier(training_set)

	def predict(self, testing_set):

		testing_set = self._sklearn2weka(testing_set, self.ground_through)
		testing_set.class_is_last()

		preds = []
		for index, inst in enumerate(testing_set):
			pred = self._classifier.classify_instance(inst)
			preds.append(pred)

		preds = np.vectorize(self._dict.get)(preds)

		return np.array(preds)

	def predict_proba(self, testing_set):

		testing_set = self._sklearn2weka(testing_set, self.ground_through)
		testing_set.class_is_last()

		dists = []
		for index, inst in enumerate(testing_set):
			dist = self._classifier.distribution_for_instance(inst)
			dists.append(dist)

		return np.array(dists)

	def _sklearn2weka(self, features, labels=None):

		encoder = CategoricalEncoder(encoding='ordinal')
		labels_nominal = encoder.fit_transform(np.array(labels).reshape(-1, 1))

		if not hasattr(self, 'dict') and labels is not None:

			dict = {}

			for label, nominal in zip(labels, labels_nominal):
				if nominal.item(0) not in dict:
					dict[nominal.item(0)] = label

			self._dict = dict

		labels_column = np.reshape(labels_nominal,[labels_nominal.shape[0], 1])

		weka_dataset = ndarray_to_instances(np.ascontiguousarray(features, dtype=np.float_), 'weka_dataset')
		weka_dataset.insert_attribute(Attribute.create_nominal('tag', [str(float(i)) for i in range(len(self._dict))]), features.shape[1])

		if labels is not None:
			for index, inst in enumerate(weka_dataset):
				inst.set_value(features.shape[1], labels_column[index])
				weka_dataset.set_instance(index,inst)

		return weka_dataset
Exemplo n.º 29
0
#df.drop(['id'],1,inplace=True)

from weka.classifiers import Classifier, Evaluation
from weka.core.classes import Random
#data = ...             # previously loaded data
data.class_is_last()  # set class attribute
dataTestResolutionChange.class_is_last()

classifier = Classifier(classname="weka.classifiers.trees.J48",
                        options=["-C", "0.3"])
# evaluation = Evaluation(data)                     # initialize with priors
# evaluation.crossvalidate_model(classifier, iris_data, 10, Random(42))  # 10-fold CV
classifier.build_classifier(data)

for index, inst in enumerate(dataTestResolutionChange):
    pred = classifier.classify_instance(inst)
    dist = classifier.distribution_for_instance(inst)

    print str(pred)
    #print index
    print inst

    print(
        str(index + 1) + ": label index=" + str(pred) +
        ", class distribution=" + str(dist))
# print(evaluation.summary())
# print("pctCorrect: " + str(evaluation.percent_correct))
# print("incorrect: " + str(evaluation.incorrect))

jvm.stop()
Exemplo n.º 30
0
def classify(fileToClassify,
             fileToCompare,
             predictionYear=None,
             pastResultYears=None,
             prefix="NFL",
             classifierFunction=[
                 "LinearRegression",
                 ["-S", "0", "-R", "1.0E-8", "-num-decimal-places", "4"]
             ]):
    # Start Java VM
    jvm.start(max_heap_size="1024m")
    # Load CSV files into weka loader
    loader = Loader(classname="weka.core.converters.CSVLoader")
    fileToClassifyData = loader.load_file(fileToClassify)
    fileToClassifyData.class_is_last()
    fileToCompareData = loader.load_file(fileToCompare)
    fileToCompareData.class_is_last()
    predictionYear = "".join(map(str, predictionYear))
    pastResultYears = "-".join(map(str, pastResultYears))

    # Generate Classifier based on data
    classifier = Classifier(classname="weka.classifiers.functions.{}".format(
        classifierFunction[0]),
                            options=classifierFunction[1])
    classifier.build_classifier(fileToClassifyData)
    print(classifier)
    # Var builder for graph
    count = 0.0
    countPred = 0.0
    graphDetails = [
        ['TITLE'],
        [
            '{1} Data Ratings (Official) {0}'.format(pastResultYears, prefix),
            [], []
        ],
        [
            '{1} Data Ratings (Predicted) {0}'.format(predictionYear, prefix),
            [], []
        ]
    ]

    # Time to predict results based on classifier
    for index, inst in enumerate(fileToCompareData):
        pred = classifier.classify_instance(inst)
        temp = list(enumerate(inst))[-1][1]
        countPred += pred
        count += temp
        # index=list(enumerate(inst))[3+1][1]
        index += 1
        print('YOLO', list(enumerate(inst))[3][1])
        print("{0:.3f} accurate compared to results.".format(countPred /
                                                             count))

        dist = classifier.distribution_for_instance(inst)
        # NFL Results
        graphDetails[1][1].append(index)
        graphDetails[1][2].append(temp)

        # Predicted Results
        graphDetails[2][1].append(index)
        graphDetails[2][2].append(pred)
        print(
            str(index + 1) + ": label index=" + str(pred) +
            ", class distribution=" + str(dist) + " , original: " + str(temp))
    graphDetails[0][
        0] = 'Player Rating Predictions For {0} ({1:.3f} Accurate)'.format(
            predictionYear, 100 - (countPred / count))
    jvm.stop()
    BuildGraph(graphDetails)
tempList = list()

jvm.start()

data_dir = "C:\Users\Softmints\Desktop\Diss\Code\WEKA"

from weka.core.converters import Loader
#Prepare ARFF Loader
loader = Loader(classname="weka.core.converters.ArffLoader")
#Assign ands load ARFF data file
data = loader.load_file(data_dir + "\TestDataEleventoTwentyTwo.arff")
data.class_is_last()

from weka.classifiers import Classifier
#Classify data using J48 classifer
cls = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
cls.build_classifier(data)

for index, inst in enumerate(data):
	#Output predicition and distribution
    pred = cls.classify_instance(inst)
    dist = cls.distribution_for_instance(inst)
    print(str(index) + ": label index=" + str(pred) + ", class distribution=" + str(dist))
    
    if str(pred) == "0.0":
    	tempList.append(str(index))

print tempList

jvm.stop()
    def perceptron_classifier(cls, features, settings):
        # carrega o dataset
        loader = Loader("weka.core.converters.ArffLoader")
        instancias = loader.load_file(
            "./src/results/caracteristicas_sounds.arff")
        # sinaliza que o ultimo atributo é a classe
        instancias.class_is_last()
        # Define os Parametros
        learning_rate = str(settings['learningRate'])
        training_time = str(settings['trainingTime'])
        momentum = "0.2"
        hidden_layers = "a"
        seed = 2
        cross_validation = 20
        print('Learning Rate', learning_rate)
        print('Training Time', training_time)
        # Carrega o classificafor  Multilayer Perceptron de acordo com os parametros definidos
        classifier = Classifier(
            classname="weka.classifiers.functions.MultilayerPerceptron",
            options=[
                "-L", learning_rate, "-M", momentum, "-N", training_time, "-V",
                "0", "-S",
                str(seed), "-E", "20", "-H", hidden_layers
            ])
        # Constroi o Classificador e Valida o dataset
        classifier.build_classifier(instancias)
        evaluation = Evaluation(instancias)
        # Aplica o Cross Validation
        rnd = Random(seed)
        rand_data = Instances.copy_instances(instancias)
        rand_data.randomize(rnd)
        if rand_data.class_attribute.is_nominal:
            rand_data.stratify(cross_validation)
        for i in range(cross_validation):
            # treina as instancias
            train = instancias.train_cv(cross_validation, i)
            # testa as instancias
            test = instancias.test_cv(cross_validation, i)

            # Constroi e Valida o Classificador
            cls = Classifier.make_copy(classifier)
            cls.build_classifier(train)
            evaluation.test_model(cls, test)
        # Cria uma nova instância com base nas caracteristicas extraidas
        new_instance = Instance.create_instance(features)
        # Adiciona a nova instância ao dataset
        instancias.add_instance(new_instance)
        # Liga a nova instancia ao dataset treinado com o classificador
        new_instance.dataset = train
        # Classifica a nova instância trazendo as probabilidades de ela pertencer as classes definidas
        classification = classifier.distribution_for_instance(new_instance)
        result = {
            'cat': round(classification[0] * 100, 2),
            'dog': round(classification[1] * 100, 2)
        }
        print("=== Setup ===")
        print("Classifier: " + classifier.to_commandline())
        print("Dataset: " + instancias.relationname)
        print("Cross Validation: " + str(cross_validation) + "folds")
        print("Seed: " + str(seed))
        print("")
        print(
            evaluation.summary("=== " + str(cross_validation) +
                               " -fold Cross-Validation ==="))
        print("Classificação", " - Gato: ", result['cat'], "  Cachorro: ",
              result['dog'])

        return result
def predictionFromModel():
    import weka.core.serialization as serialization
    from weka.classifiers import Classifier
    from weka.classifiers import Evaluation

    predictionsPath = outputPrediction
    models_dir = inputModel
    modelsList = os.listdir(inputModel)
    data_dir = input
    folderList = os.listdir(inputModel)
    i = 0
    loader = Loader(classname="weka.core.converters.ArffLoader")
    from weka.core.classes import Random
    from weka.core.dataset import Instances

    data = loader.load_file(os.path.join(inputModel, "genderTest.arff"))
    data.class_is_last()
    modelName = "GenderModel.model"
    objects = serialization.read_all(os.path.join(inputModel, modelName))
    trainedModel = Classifier(jobject=objects[0])
    genderFile = open(os.path.join(outputPrediction, 'Gender_Predictions.csv'),
                      'w')
    with genderFile:
        j = -1
        fieldnames = ['Test_Author_Profile_Id', 'Gender']
        writer = csv.DictWriter(genderFile, fieldnames=fieldnames)
        writer.writeheader()
        for index, inst in enumerate(data):
            j = j + 1
            pred = trainedModel.classify_instance(inst)
            dist = trainedModel.distribution_for_instance(inst)
            print(
                str(index + 1) + ": label index=" + str(pred) +
                ", class distribution=" + str(dist))
            if (str(pred) == '0.0'):
                writer.writerow({
                    'Test_Author_Profile_Id': my_list[j],
                    'Gender': 'male'
                })
            if (str(pred) == '1.0'):
                writer.writerow({
                    'Test_Author_Profile_Id': my_list[j],
                    'Gender': 'female'
                })

    data = loader.load_file(os.path.join(inputModel, "ageTest.arff"))
    data.class_is_last()
    modelName = "AgeModel.model"
    objects = serialization.read_all(os.path.join(inputModel, modelName))
    trainedModel = Classifier(jobject=objects[0])
    ageFile = open(os.path.join(outputPrediction, 'Age_Predictions.csv'), 'w')

    with ageFile:
        j = -1
        fieldnames = ['Test_Author_Profile_Id', 'Age']
        writer = csv.DictWriter(ageFile, fieldnames=fieldnames)
        writer.writeheader()
        for index, inst in enumerate(data):
            j = j + 1
            pred = trainedModel.classify_instance(inst)
            dist = trainedModel.distribution_for_instance(inst)
            print(
                str(index + 1) + ": label index=" + str(pred) +
                ", class distribution=" + str(dist))
            if (str(pred) == '0.0'):
                writer.writerow({
                    'Test_Author_Profile_Id': my_list[j],
                    'Age': '15-19'
                })
            if (str(pred) == '1.0'):
                writer.writerow({
                    'Test_Author_Profile_Id': my_list[j],
                    'Age': '20-24'
                })
            if (str(pred) == '2.0'):
                writer.writerow({
                    'Test_Author_Profile_Id': my_list[j],
                    'Age': '25-xx'
                })
    os._exit(0)
Exemplo n.º 34
0
class Weka(object):

    data = None
    dataDir = None
    classifier = None

    def __init__(self, dataDir='.'):
        self.dataDir = dataDir

        jvm.start()

    # Inicializa dados com conteudo do arquivo arff
    def initData(self, arrfFile):
        loader = Loader(classname="weka.core.converters.ArffLoader")
        print self.dataDir + '/' + arrfFile
        self.data = loader.load_file(self.dataDir + '/' + arrfFile)
        self.data.class_is_last()

        print 'Carregando arquivo ' + self.dataDir + '/' + arrfFile
        # print(data)

    # Realiza o treinamento do classificador
    def trainData(self,
                  arrfFile=None,
                  classname="weka.classifiers.trees.J48",
                  options=["-C", "0.3"]):
        if arrfFile is not None:
            self.initData(arrfFile)

        if self.data is None:
            return

        print 'Contruindo classificador ' + str(classname) + ' ' + ' '.join(
            options)
        self.classifier = Classifier(classname=classname, options=options)
        self.classifier.build_classifier(self.data)

    # Realiza a classificacao das instancias de um arquivo arff
    def classify(self, predictFile):

        if self.data is None or self.classifier is None:
            return [-1]

        loader = Loader(classname="weka.core.converters.ArffLoader")
        predict_data = loader.load_file(self.dataDir + '/' + predictFile)
        predict_data.class_is_last()

        values = str(predict_data.class_attribute)[19:-1].split(',')

        classes = []

        for index, inst in enumerate(predict_data):
            #pred = self.classifier.classify_instance(inst)
            prediction = self.classifier.distribution_for_instance(inst)
            cl = int(values[prediction.argmax()][7:])

            #print 'Classe:', cl
            classes.append(cl)

        return classes

    # Realiza uma validação cruzada e mostra os resultados na saída padrão
    def crossValidate(self,
                      arrfFile=None,
                      classname="weka.classifiers.trees.J48",
                      options=["-C", "0.3"]):

        if arrfFile is not None:
            self.initData(arrfFile)

        if self.data is None:
            return

        print 'Classificador ' + str(classname) + ' ' + ' '.join(options)
        cls = Classifier(classname=classname, options=options)

        evl = Evaluation(self.data)
        evl.crossvalidate_model(cls, self.data, 10, Random(1))

        print(evl.percent_correct)
        print(evl.summary())
        print(evl.class_details())
Exemplo n.º 35
0
class SklearnWekaWrapper(object):

    def __init__(self, classifier_name):
        # Defaults
        class_name = 'weka.classifiers.trees.RandomForest'
        options = None
        self.proba = None

        if classifier_name == 'wrf':
            class_name = 'weka.classifiers.trees.RandomForest'
            options = None
        elif classifier_name == 'wj48':
            class_name = 'weka.classifiers.trees.J48'
            options = None
        elif classifier_name == 'wnb':
            class_name = 'weka.classifiers.bayes.NaiveBayes'
            options = '-D'
        elif classifier_name == 'wbn':
            class_name = 'weka.classifiers.bayes.BayesNet'
            options = '-D -Q weka.classifiers.bayes.net.search.local.TAN -- -S BAYES -E weka.classifiers.bayes.net.estimate.SimpleEstimator -- -A 0.5'
        elif classifier_name == 'wsv':
            # Implementation of one-class SVM used in Anomaly Detection mode
            class_name = 'weka.classifiers.functions.LibSVM'
            options = '-S 2'

        if options is not None:
            self._classifier = Classifier(classname=class_name, options=[option for option in options.split()])
        else:
            self._classifier = Classifier(classname=class_name)

        self.model_ = None

    def fit(self, training_set, ground_truth):
        self.ground_truth = ground_truth

        training_set = self._sklearn2weka(training_set, self.ground_truth)
        training_set.class_is_last()

        t = 0
        t = time() - t
        self._classifier.build_classifier(training_set)
        t = time() - t

        self.model_ = self._classifier
        self.tr_ = t

        return self

    def predict(self, testing_set):
        testing_set = self._sklearn2weka(testing_set, self.oracle)
        testing_set.class_is_last()

        preds = []
        dists = []
        t = 0
        for index, inst in enumerate(testing_set):
            t = time() - t
            pred = self._classifier.classify_instance(inst)
            t = time() - t
            dist = self._classifier.distribution_for_instance(inst)
            preds.append(pred)
            dists.append(dist)

        preds = np.vectorize(self._dict.get)(preds)
        self.proba = dists

        self.te_ = t

        return np.array(preds)

    def predict_proba(self, testing_set):
        if self.proba is None:
            self.predict(testing_set)
        return self.proba

    def set_oracle(self, oracle):
        self.oracle = oracle

    def _sklearn2weka(self, features, labels=None):
        # All weka datasets have to be a zero-based coding for the column of labels
        # We can use non-aligned labels for training and testing because the labels
        # in testing phase are only used to obtain performance, but not for preds.
        # We compute performance off-line.
        labels_encoder = OrdinalEncoder()
        labels_nominal = labels_encoder.fit_transform(np.array(labels).reshape(-1, 1))

        labels_column = np.reshape(labels_nominal, [labels_nominal.shape[0], 1])

        # TODO: find another way to do the same
        # The follow is used to assign the value of _dict only in training phase
        if not hasattr(self, '_dict') and labels is not None:

            dict = {}

            for label, nominal in zip(labels, labels_nominal):
                if nominal.item(0) not in dict:
                    dict[nominal.item(0)] = label

            self._dict = dict

        weka_dataset = ndarray_to_instances(np.ascontiguousarray(features, dtype=np.float_), 'weka_dataset')
        weka_dataset.insert_attribute(Attribute.create_nominal('tag', [str(float(i)) for i in range(len(self._dict))]),
                                      features.shape[1])

        if labels is not None:
            try:
                for index, inst in enumerate(weka_dataset):
                    inst.set_value(features.shape[1], labels_column[index])
                    weka_dataset.set_instance(index, inst)
            except TypeError as e:
                print('Error: it seems InstanceIterator does not implement a valid iterator.')
                print('Please, check the class definition in lib/python3.7/site-packages/weka/core/dataset.py.')
                print('This error could be due to the next() method: it should be declared as __next__().')
                exit()
        return weka_dataset
Exemplo n.º 36
0
from weka.core.converters import Loader
import weka.core.jvm as jvm
from weka.classifiers import Classifier

jvm.start()

#loader = Loader(classname="weka.core.converters.JSONLoader")
#data = loader.load_file("test.json")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file("games.arff")
data.class_is_last()

print(data)

cls = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
cls.build_classifier(data)

for index, inst in enumerate(data):
    pred = cls.classify_instance(inst)
    dist = cls.distribution_for_instance(inst)
    print(
        str(index + 1) + ": label index=" + str(pred) +
        ", class distribution=" + str(dist))

jvm.stop()
Exemplo n.º 37
0
class SklearnWekaWrapper(object):

	def __init__(self, classifier_name):

		if classifier_name == 'wrf':
			class_name='weka.classifiers.trees.RandomForest'
			options=None
		elif classifier_name == 'wj48':
			class_name='weka.classifiers.trees.J48'
			options=None
		elif classifier_name == 'wnb':
			class_name='weka.classifiers.bayes.NaiveBayes'
			options='-D'
		elif classifier_name == 'wbn':
			class_name='weka.classifiers.bayes.BayesNet'
			options='-D -Q weka.classifiers.bayes.net.search.local.TAN -- -S BAYES -E weka.classifiers.bayes.net.estimate.SimpleEstimator -- -A 0.5'

		if options is not None:
			Classifier(classname=class_name, options=[option for option in options.split()])
		else:
			self._classifier = Classifier(classname=class_name)

	def fit(self, training_set, ground_truth):

		self.ground_truth = ground_truth

		training_set = self._sklearn2weka(training_set, self.ground_truth)
		training_set.class_is_last()

		self._classifier.build_classifier(training_set)

	def predict(self, testing_set):

		testing_set = self._sklearn2weka(testing_set, self.ground_truth)
		testing_set.class_is_last()

		preds = []
		for index, inst in enumerate(testing_set):
			pred = self._classifier.classify_instance(inst)
			preds.append(pred)

		preds = np.vectorize(self._dict.get)(preds)

		return np.array(preds)

	def predict_proba(self, testing_set):

		testing_set = self._sklearn2weka(testing_set, self.ground_truth)
		testing_set.class_is_last()

		dists = []
		for index, inst in enumerate(testing_set):
			dist = self._classifier.distribution_for_instance(inst)
			dists.append(dist)

		return np.array(dists)

	def set_oracle(self, oracle):

		pass

	def _sklearn2weka(self, features, labels=None):

		features_encoder = OrdinalEncoder()
		labels_nominal = features_encoder.fit_transform(np.array(labels).reshape(-1, 1))

		if not hasattr(self, 'dict') and labels is not None:

			dict = {}

			for label, nominal in zip(labels, labels_nominal):
				if nominal.item(0) not in dict:
					dict[nominal.item(0)] = label

			self._dict = dict

		labels_column = np.reshape(labels_nominal,[labels_nominal.shape[0], 1])

		weka_dataset = ndarray_to_instances(np.ascontiguousarray(features, dtype=np.float_), 'weka_dataset')
		weka_dataset.insert_attribute(Attribute.create_nominal('tag', [str(float(i)) for i in range(len(self._dict))]), features.shape[1])

		if labels is not None:
			for index, inst in enumerate(weka_dataset):
				inst.set_value(features.shape[1], labels_column[index])
				weka_dataset.set_instance(index,inst)

		return weka_dataset