def experiment_file_random(path_features, path_folder_save_results, options,
                           classifier, fold, random, name):
    print("start weka")
    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))
    d_results = {
        'percent_correct': [],
        'percent_incorrect': [],
        'confusion_matrix': []
    }
    data = converters.load_any_file(path_features)
    data.class_is_last()
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV")
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, fold, Random(random), pout)
    d_results['percent_correct'].append(evl.percent_correct)
    d_results['percent_incorrect'].append(evl.percent_incorrect)
    d_results['confusion_matrix'].append(
        evl.matrix())  # Generates the confusion matrix.

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + str(name) + '.csv',
                     index=False)

    save = pout.buffer_content()

    with open(
            path_folder_save_results + '/' + 'prediction/' + str(name) +
            '.csv', 'w') as f:
        f.write(save)
def Boost_J48(data, rnm):
    data.class_is_last()
    fc1 = FilteredClassifier()
    fc1.classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"])
    fc1.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"])
    fc2 = SingleClassifierEnhancer(classname="weka.classifiers.meta.AdaBoostM1", options=["-P", "100", "-S", "1", "-I", "10"])
    fc2.classifier = fc1
    pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-p", "1"])
    folds = 10
    fc2.build_classifier(data)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(fc2, data, folds, Random(1), pred_output)
    f0 = open(rnm + '_Boost_J48_Tree.txt', 'w')
    print >> f0, "Filename: ", rnm
    print >> f0, '\n\n'
    print >> f0, str(fc2)
    f0.close()
    f1 = open(rnm + '_Boost_J48_Prediction.txt', 'w')
    print >> f1, 'Filename:', rnm
    print >> f1, 'Prediction Summary:', (pred_output.buffer_content())
    f1.close()
    f2 = open(rnm + '_Boost_j48_Evaluation.txt', 'w')
    print >> f2, 'Filename:', rnm
    print >> f2, 'Evaluation Summary:', (evaluation.summary())
    print >> f2, '\n\n\n'
    print >> f2, (evaluation.class_details())
    f2.close()
    plot_roc(evaluation, class_index=[0,1], title=rnm, key_loc='best', outfile=rnm + '_Boost_J48_ROC.png', wait=False)
    value_Boost_J48 = str(evaluation.percent_correct)
    return value_Boost_J48
def RandomTree(data, rnm):
    data.class_is_last()
    fc = FilteredClassifier()
    fc.classifier = Classifier(classname="weka.classifiers.trees.RandomTree", options=["-K", "0", "-M", "1.0", "-V", "0.001", "-S", "1"])
    fc.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"])
    pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-p", "1"])
    folds = 10
    evl = Evaluation(data)
    evl.crossvalidate_model(fc, data, folds, Random(1), pred_output)
    fc.build_classifier(data)
    f0 = open(rnm + '_RT_Tree.txt', 'w')
    print >> f0, "Filename: ", rnm
    print >> f0, '\n\n'
    print >> f0, str(fc)
    f0.close()
    f1 = open(rnm + '_RT_Prediction.txt', 'w')
    print >> f1, 'Filename:', rnm
    print >> f1, 'Prediction Summary:', (pred_output.buffer_content())
    f1.close()
    f2 = open(rnm + '_RT_Evaluation.txt', 'w')
    print >> f2, 'Filename:', rnm
    print >> f2, 'Evaluation Summary:', (evl.summary())
    print >> f2, '\n\n\n'
    print >> f2, (evl.class_details())
    f2.close()
    plot_roc(evl, class_index=[0,1], title=rnm, key_loc='best', outfile=rnm+'_RT_ROC.png', wait=False)
    value_RT = str(evl.percent_correct)
    return value_RT
def experiment_more_file(path_files, path_folder_save_results, fold, options,
                         classifier, random, name):
    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))

    file_list = os.listdir(path_files)

    for file in file_list:
        if ".csv" not in file:
            file_list.remove(file)

    d_results = {
        'name_file': [],
        'percent_correct': [],
        'percent_incorrect': [],
        'confusion_matrix': []
    }

    print(file_list)

    for file in file_list:
        print(str(file))
        data = converters.load_any_file(path_files + "/" + file)

        data.class_is_last()

        pout = PredictionOutput(
            classname="weka.classifiers.evaluation.output.prediction.CSV")

        evl = Evaluation(data)

        evl.crossvalidate_model(cls, data, fold, Random(random), pout)

        d_results['name_file'].append(str(file))
        d_results['percent_correct'].append(evl.percent_correct)
        d_results['percent_incorrect'].append(evl.percent_incorrect)
        d_results['confusion_matrix'].append(
            evl.matrix())  # Generates the confusion matrix.

        save = pout.buffer_content()

        with open(
                path_folder_save_results + '/' + 'prediction/' + str(name) +
                str(file)[:-4] + 'pred_data.csv', 'w') as f:
            f.write(save)

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + str(name) + ".csv",
                     index=False)
Exemplo n.º 5
0
def runSMO(file, bound):
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(file)
    data.class_is_first()

    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", bound])

    cls = KernelClassifier(
        classname="weka.classifiers.functions.SMO",
        options=["-C", "1.0", "-L", "0.001", "-P", "1.0E-12", "-N", "0"])
    kernel = Kernel(
        classname="weka.classifiers.functions.supportVector.PolyKernel",
        options=["-C", "250007", "-E", "1.0"])
    cls.kernel = kernel
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")

    remove.inputformat(data)
    filtered = remove.filter(data)

    evl = Evaluation(filtered)
    evl.crossvalidate_model(cls, filtered, 10, Random(1), pout)

    #print(pout.buffer_content())

    print(evl.percent_correct)
    #print(evl.summary())

    result = evl.class_details()
    print(result)
    return result
Exemplo n.º 6
0
def proses():  #diluar def index = 0
    import math
    from weka.classifiers import Kernel, KernelClassifier
    from weka.classifiers import PredictionOutput
    import numpy as np
    klasifi = KernelClassifier(classname="weka.classifiers.functions.SMOreg",
                               options=["-N", "0"])
    vm = Kernel(classname="weka.classifiers.functions.supportVector.RBFKernel",
                options=["-G", "0.1"])
    klasifi.vm = vm
    output_x = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    kelola = Evaluation(anomali)
    kelola.crossvalidate_model(klasifi,
                               anomali,
                               10,
                               Random(0),
                               output=output_x)
    process = 0
    for x in anomali.values(anomali.class_index):
        data_inst.append(x)
    for x in kelola.predictions:
        i = str(x)
        index = i.split()
        data_pred.append(float(index[2]))
    data_std.insert(idx, math.ceil(np.std(data_inst)) * 0.1)
    print('\n DONE PROCESSING DATASET ATTRIBUTE ',
          anomali.attribute(anomali.class_index), '...')
Exemplo n.º 7
0
def vote_classifier_train(dicrectory, nameOfDataSet, flag):
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(dicrectory)
    data.class_is_last()
    meta = MultipleClassifiersCombiner(
        classname="weka.classifiers.meta.Vote",
        options=[
            '-S', '1', '-B', 'weka.classifiers.trees.J48 -C 0.25 -M 2', '-B',
            'weka.classifiers.trees.RandomTree -K 6 -M 1.0 -V 0.001 -S 1',
            '-B',
            'weka.classifiers.meta.Bagging -P 100 -S 1 -num-slots 1 -I 10 -W weka.classifiers.trees.REPTree -- '
            '-M 2 -V 0.001 -N 3 -S 1 -L -1 -I 0.0', '-B',
            'weka.classifiers.meta.AdaBoostM1 -P 100 -S 1 -I 10 -W weka.classifiers.trees.DecisionStump',
            '-B',
            'weka.classifiers.meta.Bagging -P 100 -S 1 -num-slots 1 -I 10 -W weka.classifiers.trees.REPTree -- '
            '-M 2 -V 0.001 -N 3 -S 1 -L -1 -I 0.0', '-B',
            'weka.classifiers.bayes.NaiveBayes ', '-R', 'AVG'
        ])
    eval = Evaluation(data)
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    if flag:
        eval.crossvalidate_model(meta, data, 10, Random(1), pout)
    else:
        eval.evaluate_train_test_split(meta, data, 80.0, Random(1), pout)
    gc.collect()
    print_and_save('Proposed model', flag, nameOfDataSet, eval)
Exemplo n.º 8
0
def main(args):
    """
    Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
    evaluates the built model on the test set.
    The predictions get recorded in two different ways:
    1. in-memory via the test_model method
    2. directly to file (more memory efficient), but a separate run of making predictions

    :param args: the commandline arguments (optional, can be dataset filename)
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # generate train/test split of randomized data
    train, test = data.train_test_split(66.0, Random(1))

    # build classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    print(cls)

    # evaluate and record predictions in memory
    helper.print_title("recording predictions in-memory")
    output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV",
        options=["-distribution"])
    evl = Evaluation(train)
    evl.test_model(cls, test, output=output)
    print(evl.summary())
    helper.print_info("Predictions:")
    print(output.buffer_content())

    # record/output predictions separately
    helper.print_title("recording/outputting predictions separately")
    outputfile = helper.get_tmp_dir() + "/j48_vote.csv"
    output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV",
        options=["-distribution", "-suppress", "-file", outputfile])
    output.header = test
    output.print_all(cls, test)
    helper.print_info("Predictions stored in:" + outputfile)
    # by using "-suppress" we don't store the output in memory, the following statement won't output anything
    print(output.buffer_content())
Exemplo n.º 9
0
def SimpleLogistic():
    # load a dataset
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file("First_trial_classification.arff")
    data.class_is_last()  # set class attribute

    cls = Classifier(classname="weka.classifiers.functions.SimpleLogistic")
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(486), pout)

    print(evl.summary())
    print(pout.buffer_content())

    # save model
    serialization.write_all("SimpleLogistic2.model", cls)
def index():
    if request.method == "GET":
        return render_template('bot.html')
    if request.method == "POST":
        # jvm.stop()
        jvm.start()
        f = open("instances.arff", "a")
        args = request.form.to_dict()
        weight_lb = float(args['weight']) * 2.20462
        bmi = (weight_lb / pow(float(args['height']), 2)) * 703
        hypertensive_status = args['hypertensive_status']
        heart_disease_status = args['heart_disease_status']
        if heart_disease_status == "Yes":
            heart_disease_status = '1'
        else:
            heart_disease_status = '0'
        if hypertensive_status == "Yes":
            hypertensive_status = '1'
        else:
            hypertensive_status = '0'

        st = "\n"+args['gender']+","+args['age']+","+hypertensive_status+","+heart_disease_status+","+args['marrital_status'] + \
            ","+args['work_type']+","+args['residence']+"," + \
            args['hypertension']+","+str(bmi)+",'"+args['smoking_status'].lower()+"',?"
        print(st)
        f.write(st)
        f.close()
        objects = serialization.read_all("J48.model")
        loader = Loader(classname="weka.core.converters.ArffLoader")
        csr = Classifier(jobject=objects[0])
        output_results = PredictionOutput(
            classname="weka.classifiers.evaluation.output.prediction.CSV")
        data1 = loader.load_file("instances.arff")
        data1.class_is_last()
        ev2 = Evaluation(data1)
        ev2.test_model(csr, data1, output_results)

        TESTDATA = StringIO("Instance,Actual,Predicted," +
                            output_results.buffer_content())
        df = pd.read_csv(TESTDATA)
        prediction = list(df.Predicted).pop().split(":")[1]
        print(prediction)
        # jvm.stop()
        response = {"status": "200", "prediction": prediction}
        return Response(json.dumps(response, indent=2),
                        mimetype="application/json")
Exemplo n.º 11
0
def run_bayesNet(file):
    # Get filename from Pathlib object
    filename = file.parts[-1]
    dir = file.parents[0]

    print("Running BayesNet on %s" % filename)

    if not filename.endswith(".arff"):
        print("%s not ARFF file." % filename)
        return

    # Removes '.arff' from filename
    filename_base = filename[:-5]

    # Load data with class as first attr
    data = load_Arff_file(file)
    data.class_is_first()

    # Use BayesNet and set options
    cls = Classifier(classname="weka.classifiers.bayes.BayesNet",
                     options=[
                         "-D", "-Q",
                         "weka.classifiers.bayes.net.search.local.TAN", "--",
                         "-P", "1", "-S", "BAYES", "-E",
                         "weka.classifiers.bayes.net.estimate.SimpleEstimator",
                         "--", "-A", "0.5"
                     ])

    # Predictions stored in pout
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")

    # Evaluate data
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(cls, data, 10, Random(1), output=pout)

    print(evaluation.summary())
    print(evaluation.class_details())
    print(evaluation.confusion_matrix)

    # Generate grid for ROC
    # plcls.plot_roc(evaluation, class_index=[0,1], wait=True)

    # mk dirs for output
    dir = dir / "bayesNet_results"
    dir.mkdir(parents=True, exist_ok=True)

    # Save summary, class details and confusion matrix to file
    result_output = filename_base + "_bayesNet_eval_results_TAN.txt"
    output_eval(evaluation, dir / result_output)

    # Save the predicited results to file
    prediction_output = filename_base + "_bayesNet_pred_results_TAN.txt"
    output_pred(pout, dir / prediction_output)

    print("BayesNet complete")
Exemplo n.º 12
0
def SMOreg():
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file("First_trial_regression.arff")
    data.class_is_last()

    cls = KernelClassifier(classname="weka.classifiers.functions.SMOreg",
                           options=["-N", "0"])
    kernel = Kernel(
        classname="weka.classifiers.functions.supportVector.RBFKernel",
        options=["-G", "0.2"])
    cls.kernel = kernel
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    evl = Evaluation(data)
    evl.crossvalidate_model(cls, data, 10, Random(486), pout)

    print(evl.summary())
    print(pout.buffer_content())

    # save model
    serialization.write_all("SMOreg.model2", cls)
Exemplo n.º 13
0
def naive_bayse(dicrectory, nameOfDataSet, flag):
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(dicrectory)
    data.class_is_last()
    cls = Classifier(classname='weka.classifiers.bayes.NaiveBayes')
    eval = Evaluation(data)
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    if flag:
        eval.crossvalidate_model(cls, data, 10, Random(1), pout)
    else:
        eval.evaluate_train_test_split(cls, data, 80.0, Random(1), pout)
    print_and_save('Naive Bayes model', flag, nameOfDataSet, eval)
    gc.collect()
Exemplo n.º 14
0
def trainAndMakePred(train, test):
	#IBK test and prediction 
	classifierIBK = Classifier(classname="weka.classifiers.lazy.IBk", options=["-K", "5"])
	classifierIBK.build_classifier(train)
	evaluationIBK = Evaluation(train)
	predicted_labelsIBK = evaluationIBK.test_model(classifierIBK, train)
	print(" IBKTraining information ")
	print(evaluationIBK.summary())
	pred_outputIBK = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV")
	evaluationIBK = Evaluation(test)
	predicted_indicesIBK = evaluationIBK.test_model(classifierIBK, test, pred_outputIBK)
	print(" IBK Prediction information ")
	print(pred_outputIBK)
	
	#Naive bayes and prediction
	classifierNB = Classifier(classname="weka.classifiers.bayes.NaiveBayes", options=["-D"])
	classifierNB.build_classifier(train)
	evaluationNB = Evaluation(train)
	predicted_labelsNB = evaluationNB.test_model(classifierNB, train)
	print(" Naive Bayes Training information ")
	print(evaluationNB.summary())
	pred_outputNB = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV")
	evaluationNB = Evaluation(test)
	predicted_indicesNB = evaluationNB.test_model(classifierNB, test, pred_outputNB)
	print(" Naive Bayes Prediction information ")
	print(pred_outputNB)
	
	#out put predictions to file
	a = 1
	ID = 901
	f = open("predict.csv", "w")
	f.write("ID,Predict 1,Predict 2\n")
	for pred1, pred2 in zip(predicted_indicesIBK, predicted_indicesNB):
		f.write("%s,%s,%s\n" % (ID,pred1,pred2))
		ID += 1
	f.close() 
Exemplo n.º 15
0
def run_ibk(file):
    # Get filename from Pathlib object
    filename = file.parts[-1]
    dir = file.parents[0]

    print("Running IBk on %s" % filename)

    if not filename.endswith(".arff"):
        print("%s not ARFF file." % filename)
        return

    # Removes '.arff' from filename
    filename_base = filename[:-5]

    # Load data with class as first attr
    data = load_Arff_file(file)
    data.class_is_first()

    # Use IBk and set options
    cls = Classifier(classname="weka.classifiers.lazy.IBk",
                     options=["-K", "3"])
    # print(cls.options)

    # Predictions stored in pout
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")

    # Evaluate data
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(cls, data, 10, Random(1), output=pout)

    # Save summary, class details and confusion matrix to file
    result_output = filename_base + "_eval_results.txt"
    output_eval(evaluation, dir / result_output)

    # Save the predicited results to file
    prediction_output = filename_base + "_pred_results.txt"
    output_pred(pout, dir / prediction_output)

    print("IBk complete")
Exemplo n.º 16
0
def run_weka_csv_train_test(train_file_path, test_file_path):
    """
    1) From previous process, for each fold create input .csv's which will then be read here
    1.1) Input csv will be on exact same data, which was fed to DT model
    2) Extract the rules using JRip
    3) Evaluate predictions with same metrics as was done for previous work
    3.1) Accuracy, AUC, F-Score, Precision, Recall
    need to make new .csv, which contains both, payload and usual stuff....
    :return:
    """
    train_df = read_df_csv(train_file_path)
    test_df = read_df_csv(test_file_path)

    cls = Classifier(classname="weka.classifiers.rules.JRip"
                     )  #options=["-O", "2"]), default opt. is 2

    loader = Loader(classname="weka.core.converters.CSVLoader")
    # print(cls.to_help())
    train_jrip_data = read_weka_csv(train_file_path, loader)
    test_jrip_data = read_weka_csv(test_file_path, loader)

    # If dataset included Case ID
    #train_case_id = train_jrip_data.attribute_by_name("Case_ID")
    #test_case_id = test_jrip_data.attribute_by_name("Case_ID")
    #test_jrip_data.delete_attribute(test_case_id.index)
    #train_jrip_data.delete_attribute(train_case_id.index)

    train_label_attribute = train_jrip_data.attribute_by_name("Label")
    test_label_attribute = test_jrip_data.attribute_by_name("Label")

    # Convert numeric attribut to nominal. Required for label!
    nominal = Filter(
        classname="weka.filters.unsupervised.attribute.NumericToNominal",
        options=["-R", "last"])
    nominal.inputformat(train_jrip_data)
    nominaldata1 = nominal.filter(train_jrip_data)

    nominaldata1.class_index = train_label_attribute.index
    nominaldata2 = nominal.filter(
        test_jrip_data)  # re-use the initialized filter!

    nominaldata2.class_index = test_label_attribute.index
    msg = nominaldata1.equal_headers(nominaldata2)

    if msg is not None:
        raise Exception("Train and test not compatible:\n" + msg)

    # Build classifier
    cls.build_classifier(nominaldata1)
    # Get rules
    print(cls.jwrapper)

    pred_output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText",
        options=["-distribution"])

    # Perform evaluation on train and test data
    evl = Evaluation(nominaldata1)
    evl.test_model(cls, nominaldata2, output=pred_output)

    evl2 = Evaluation(nominaldata1)
    evl2.test_model(cls, nominaldata1, output=pred_output)

    # Return interesting metrics for both train and test.
    return get_interesting(evl2), get_interesting(evl)
Exemplo n.º 17
0
        if isreal(classvarStr) == True:

            classifier = Classifier(classname="weka.classifiers.trees.M5P",
                                    options=["-U", "-M", "500.0"])
            print("\n--> building:")
            print(classifier.to_commandline())
            classifier.build_classifier(dataA)
            print("\n--> classifier:\n")
            print(classifier)
            print("\n--> graph:\n")
            print(classifier.graph)

            outputfile = helper.get_tmp_dir() + "/result.csv"
            output = PredictionOutput(
                classname='weka.classifiers.evaluation.output.prediction.CSV',
                options=["-distribution", "-suppress", "-file", outputfile])
            print("\n--> Output:\n")
            output.header = dataA
            output.print_all(classifier, dataA)
            helper.print_info("Predictions stored in:" + outputfile)
            print(output.buffer_content())
            Eval = Evaluation(dataA)
            Eval.test_model(classifier, dataA, output=output)
            print(Eval.summary())
            ListEval = []
            Corr = []
            Corrf = []
            ListEval = Eval.summary().split('Mean absolute error')
            print("ListEval :")
            print(ListEval)
Exemplo n.º 18
0
def experiment_sequential_file(path_indices, path_features,
                               path_folder_save_results, options, classifier,
                               name, indicator_col, images):
    ind_f = load(path_indices)
    lst = ind_f.files

    for item in lst:
        ind = ind_f[item] + 1

    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))

    data = converters.load_any_file(path_features)

    ind = np.append(ind, len(data))

    data.class_is_last()

    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV")

    d_results = {
        'index': [],
        'percent_correct': [],
        'percent_incorrect': [],
        'precision': [],
        'recall': [],
        'f-score': [],
        'confusion_matrix': []
    }

    for j in range(len(ind) - 1):
        first = ind[j]

        if j == len(ind) - 2:
            last = ind[j + 1]
        else:
            last = ind[j + 1] - 1

        d_test = data.subset(row_range=str(first) + '-' + str(last))

        if j == 0:  # first
            d_train = data.subset(row_range=str(last + 1) + '-' +
                                  str(ind[-1]))  # last element
            print(str(last + 1) + '-' + str(ind[-1]))
        elif j == len(ind) - 2:  # last
            d_train = data.subset(row_range='1-' +
                                  str(first - 1))  # last element
            print('1-' + str(first - 1))
        else:  # central
            s = '1-' + str(first - 1) + ',' + str(last + 1) + '-' + str(
                ind[-1])
            print(s)
            d_train = data.subset(row_range=s)

        cls.build_classifier(d_train)

        evl = Evaluation(data)
        evl.test_model(cls, d_test, pout)

        # print(type(d_train))
        # print(type(d_test))

        d_results['index'].append(str(ind[j]))
        d_results['percent_correct'].append(evl.percent_correct)
        d_results['percent_incorrect'].append(evl.percent_incorrect)
        d_results['precision'].append(evl.precision(1))
        d_results['recall'].append(evl.recall(1))
        d_results['f-score'].append(evl.f_measure(1))
        d_results['confusion_matrix'].append(
            evl.matrix())  # Generates the confusion matrix.

    save = pout.buffer_content()

    check_folder_or_create(path_folder_save_results + '/' + 'prediction')

    with open(
            path_folder_save_results + '/' + 'prediction/' + name +
            'pred_data.csv', 'w') as f:
        f.write(save)

    buffer_save = pd.read_csv(path_folder_save_results + '/' + 'prediction/' +
                              name + 'pred_data.csv',
                              index_col=False,
                              header=None)

    col_label = buffer_save[1]
    col_prediction = buffer_save[2]
    col_different = buffer_save[3]

    create_prediction(col_label, col_prediction, col_different, indicator_col,
                      images, name, path_folder_save_results + '/prediction/')

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + name + 'results.csv',
                     index=False)
Exemplo n.º 19
0
def experiment_more_file(path_files,
                         path_folder_save_results,
                         fold,
                         options,
                         classifier,
                         random,
                         name,
                         voting=False):
    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))

    file_list = os.listdir(path_files)

    for file in file_list:
        if ".csv" not in file:
            file_list.remove(file)

    d_results = {
        'name_file': [],
        'percent_correct': [],
        'percent_incorrect': [],
        'precision': [],
        'recall': [],
        'f-score': [],
        'confusion_matrix': []
    }

    for file in file_list:
        indicator_table = pd.read_csv(path_files + '/indicator/' + file[0] +
                                      '_indicator.csv')
        indicator = list(indicator_table['indicator'])
        images = list(indicator_table['image'])

        data = converters.load_any_file(path_files + "/" + file)

        data.class_is_last()

        pout = PredictionOutput(
            classname="weka.classifiers.evaluation.output.prediction.CSV")

        evl = Evaluation(data)

        evl.crossvalidate_model(cls, data, fold, Random(random), pout)

        d_results['name_file'].append(str(file))
        d_results['percent_correct'].append(evl.percent_correct)
        d_results['percent_incorrect'].append(evl.percent_incorrect)
        d_results['precision'].append(evl.precision(1))
        d_results['recall'].append(evl.recall(1))
        d_results['f-score'].append(evl.f_measure(1))
        d_results['confusion_matrix'].append(
            evl.matrix())  # Generates the confusion matrix.

        save = pout.buffer_content()

        check_folder_or_create(path_folder_save_results + '/' + name + '/' +
                               'prediction')

        with open(
                path_folder_save_results + '/' + name + '/' +
                'prediction/pred_data.csv', 'w') as f:
            f.write(save)

        buffer_save = pd.read_csv(path_folder_save_results + '/' + name + '/' +
                                  'prediction/pred_data.csv',
                                  index_col=False)

        col_label = buffer_save['actual']
        col_prediction = buffer_save['predicted']
        col_different = buffer_save['error']

        create_prediction(
            col_label, col_prediction, col_different, indicator, images,
            file[:-4], path_folder_save_results + '/' + name + '/prediction/')

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + str(name) + ".csv")
Exemplo n.º 20
0
def experiment_sequential_file(path_indices, path_features,
                               path_folder_save_results, options, classifier,
                               name):
    ind_f = load(path_indices)

    lst = ind_f.files

    for item in lst:
        ind = ind_f[item] + 1

    cls = Classifier(classname=classifier,
                     options=weka.core.classes.split_options(options))

    data = converters.load_any_file(path_features)

    ind = np.append(ind, len(data))

    data.class_is_last()

    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.CSV")

    d_results = {
        'index': [],
        'percent_correct': [],
        'percent_incorrect': [],
        'confusion_matrix': []
    }

    for j in range(len(ind) - 1):
        print(j)

        print(str(ind[j]) + '-' + str(ind[j + 1]))

        d_test = data.subset(row_range=str(ind[j]) + '-' + str(ind[j + 1]))

        if j == 0:  # first
            d_train = data.subset(row_range=str(ind[j + 1] + 1) + '-' +
                                  str(ind[-1]))  # last element
        elif j == len(ind) - 2:  # last
            d_train = data.subset(row_range='1-' +
                                  str(ind[j] - 1))  # last element
        else:  # central
            s = '1-' + str(ind[j] - 1) + ',' + str(ind[j + 1] + 1) + '-' + str(
                ind[-1])
            d_train = data.subset(row_range=s)

        cls.build_classifier(d_train)

        evl = Evaluation(data)
        evl.test_model(cls, d_test, pout)

        save = pout.buffer_content()

        with open(
                path_folder_save_results + '/' + '/prediction/' + name +
                str(j) + 'pred_data.csv', 'w') as f:
            f.write(save)

        d_results['index'].append(str(ind[j]))
        d_results['percent_correct'].append(evl.percent_correct)
        d_results['percent_incorrect'].append(evl.percent_incorrect)
        d_results['confusion_matrix'].append(
            evl.matrix())  # Generates the confusion matrix.

    d_results = pd.DataFrame(data=d_results)

    d_results.to_csv(path_folder_save_results + '/' + name + 'results.csv',
                     index=False)
Exemplo n.º 21
0
def run_multilayerPercepton(file, file2=None):
    # Get filename from Pathlib object
    filename = file.parts[-1]
    dir = file.parents[0]

    print("Running Multilayer Percepton on %s" % filename)

    if not filename.endswith(".arff"):
        print("%s not ARFF file." % filename)
        return

    # Removes '.arff' from filename
    filename_base = filename[:-5]

    print("loading data...")
    # Load data with class as first attr
    data = load_Arff_file(file)
    data.class_is_first()

    # If 2nd file load that data too
    if file2:
        print("Loading test...")
        test = load_Arff_file(file2)
        test.class_is_first()

    file_names = [
        "MP_N-500_default_H-1",
        "MP_N-500_H-3",
        "MP_N-500_H-5",
        "MP_N-500_H-7",
        "MP_N-500_H-3-5",
        "MP_N-500_H-5-3",
        "MP_N-500_H-3-5-7",
        "MP_N-500_H-7-3-5",
        "MP_N-500_H-5-7-3",
        "MP_N-500_L-01",
        "MP_N-500_L-02",
        "MP_N-500_L-04",
        "MP_N-500_L-05",
        "MP_N-500_M-01",
        "MP_N-500_M-03",
        "MP_N-500_M-04",
        "MP_N-500_M-05",
        "MP_N-500_E-5",
        "MP_N-500_E-10",
        "MP_N-500_E-15",
        "MP_N-500_E-25",
    ]

    options_list = [
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "1"
        ],  # DEFAULT
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "3"
        ],  # -H START
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "5"
        ],
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "7"
        ],
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "3, 5"
        ],
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "5, 3"
        ],
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "3, 5, 7"
        ],
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "7, 3, 5"
        ],
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "5, 7, 3"
        ],  # -H END
        [
            "-L", "0.1", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "1"
        ],  # -L START
        [
            "-L", "0.2", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "1"
        ],
        [
            "-L", "0.4", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "1"
        ],
        [
            "-L", "0.5", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "1"
        ],  # -L END
        [
            "-L", "0.3", "-M", "0.1", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "1"
        ],  # -M START
        [
            "-L", "0.3", "-M", "0.3", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "1"
        ],
        [
            "-L", "0.3", "-M", "0.4", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "1"
        ],
        [
            "-L", "0.3", "-M", "0.5", "-N", "500", "-V", "0", "-S", "0", "-E",
            "20", "-H", "1"
        ],  # -M END
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "5", "-H", "1"
        ],  # -E START
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "10", "-H", "1"
        ],
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "15", "-H", "1"
        ],
        [
            "-L", "0.3", "-M", "0.2", "-N", "500", "-V", "0", "-S", "0", "-E",
            "25", "-H", "1"
        ],  # -E END
    ]

    for i in range(len(options_list)):
        start = time.time()
        print("Beginning iteration " + str(i) + ": " + file_names[i])

        # Use MultilayerPercepton and set options
        cls = Classifier(
            classname="weka.classifiers.functions.MultilayerPerceptron",
            options=options_list[i])
        # Build classifier with train data
        cls.build_classifier(data)

        # Predictions stored in pout
        pout = PredictionOutput(
            classname="weka.classifiers.evaluation.output.prediction.PlainText"
        )

        # Evaluate data on test data
        evaluation = Evaluation(data)
        evaluation.test_model(cls, test, output=pout)

        print(evaluation.summary())
        print(evaluation.class_details())
        print(evaluation.confusion_matrix)

        # Generate grid for ROC
        # plcls.plot_roc(evaluation, class_index=[0,1], wait=True)

        # mk dirs for output
        tempdir = dir / "Results/" / "MP-ALL_N-500_results/" / (file_names[i] +
                                                                "_results/")
        tempdir.mkdir(parents=True, exist_ok=True)

        # Save summary, class details and confusion matrix to file
        result_output = file_names[i] + "_results.txt"
        print(tempdir)
        print(result_output)
        print((tempdir / result_output).absolute())
        output_eval(evaluation, tempdir / result_output)

        # Save the predicited results to file
        prediction_output = file_names[i] + "_prediction.txt"
        output_pred(pout, tempdir / prediction_output)

        end = time.time()
        timetaken = round(end - start, 2)
        print("Time taken to run iteration " + str(i) + ": %s seconds" %
              (timetaken))

    print("Multilayer Percepton complete")
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # classifier help
    helper.print_title("Creating help string")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    print(classifier.to_help())

    # partial classname
    helper.print_title("Creating classifier from partial classname")
    clsname = ".J48"
    classifier = Classifier(classname=clsname)
    print(clsname + " --> " + classifier.classname)

    # classifier from commandline
    helper.print_title("Creating SMO from command-line string")
    cmdline = 'weka.classifiers.functions.SMO -K "weka.classifiers.functions.supportVector.NormalizedPolyKernel -E 3.0"'
    classifier = from_commandline(cmdline,
                                  classname="weka.classifiers.Classifier")
    classifier.build_classifier(iris_data)
    print("input: " + cmdline)
    print("output: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # kernel classifier
    helper.print_title("Creating SMO as KernelClassifier")
    kernel = Kernel(
        classname="weka.classifiers.functions.supportVector.RBFKernel",
        options=["-G", "0.001"])
    classifier = KernelClassifier(classname="weka.classifiers.functions.SMO",
                                  options=["-M"])
    classifier.kernel = kernel
    classifier.build_classifier(iris_data)
    print("classifier: " + classifier.to_commandline())
    print("model:\n" + str(classifier))

    # build a classifier and output model
    helper.print_title("Training J48 classifier on iris")
    classifier = Classifier(classname="weka.classifiers.trees.J48")
    # Instead of using 'options=["-C", "0.3"]' in the constructor, we can also set the "confidenceFactor"
    # property of the J48 classifier itself. However, being of type float rather than double, we need
    # to convert it to the correct type first using the double_to_float function:
    classifier.set_property("confidenceFactor", types.double_to_float(0.3))
    classifier.build_classifier(iris_data)
    print(classifier)
    print(classifier.graph)
    print(classifier.to_source("MyJ48"))
    plot_graph.plot_dot_graph(classifier.graph)

    # evaluate model on test set
    helper.print_title("Evaluating J48 classifier on iris")
    evaluation = Evaluation(iris_data)
    evl = evaluation.test_model(classifier, iris_data)
    print(evl)
    print(evaluation.summary())

    # evaluate model on train/test split
    helper.print_title("Evaluating J48 classifier on iris (random split 66%)")
    classifier = Classifier(classname="weka.classifiers.trees.J48",
                            options=["-C", "0.3"])
    evaluation = Evaluation(iris_data)
    evaluation.evaluate_train_test_split(classifier, iris_data, 66.0,
                                         Random(1))
    print(evaluation.summary())

    # load a dataset incrementally and build classifier incrementally
    helper.print_title("Build classifier incrementally on iris")
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    iris_inc.class_is_last()
    classifier = Classifier(
        classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    classifier.build_classifier(iris_inc)
    for inst in loader:
        classifier.update_classifier(inst)
    print(classifier)

    # construct meta-classifiers
    helper.print_title("Meta classifiers")
    # generic FilteredClassifier instantiation
    print("generic FilteredClassifier instantiation")
    meta = SingleClassifierEnhancer(
        classname="weka.classifiers.meta.FilteredClassifier")
    meta.classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.set_property("filter", flter.jobject)
    print(meta.to_commandline())
    # direct FilteredClassifier instantiation
    print("direct FilteredClassifier instantiation")
    meta = FilteredClassifier()
    meta.classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression")
    flter = Filter("weka.filters.unsupervised.attribute.Remove")
    flter.options = ["-R", "first"]
    meta.filter = flter
    print(meta.to_commandline())
    # generic Vote
    print("generic Vote instantiation")
    meta = MultipleClassifiersCombiner(classname="weka.classifiers.meta.Vote")
    classifiers = [
        Classifier(classname="weka.classifiers.functions.SMO"),
        Classifier(classname="weka.classifiers.trees.J48")
    ]
    meta.classifiers = classifiers
    print(meta.to_commandline())

    # cross-validate nominal classifier
    helper.print_title("Cross-validating NaiveBayes on diabetes")
    diabetes_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + diabetes_file)
    loader = Loader("weka.core.converters.ArffLoader")
    diabetes_data = loader.load_file(diabetes_file)
    diabetes_data.class_is_last()
    classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
    pred_output = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText",
        options=["-distribution"])
    evaluation = Evaluation(diabetes_data)
    evaluation.crossvalidate_model(classifier,
                                   diabetes_data,
                                   10,
                                   Random(42),
                                   output=pred_output)
    print(evaluation.summary())
    print(evaluation.class_details())
    print(evaluation.matrix())
    print("areaUnderPRC/0: " + str(evaluation.area_under_prc(0)))
    print("weightedAreaUnderPRC: " + str(evaluation.weighted_area_under_prc))
    print("areaUnderROC/1: " + str(evaluation.area_under_roc(1)))
    print("weightedAreaUnderROC: " + str(evaluation.weighted_area_under_roc))
    print("avgCost: " + str(evaluation.avg_cost))
    print("totalCost: " + str(evaluation.total_cost))
    print("confusionMatrix: " + str(evaluation.confusion_matrix))
    print("correct: " + str(evaluation.correct))
    print("pctCorrect: " + str(evaluation.percent_correct))
    print("incorrect: " + str(evaluation.incorrect))
    print("pctIncorrect: " + str(evaluation.percent_incorrect))
    print("unclassified: " + str(evaluation.unclassified))
    print("pctUnclassified: " + str(evaluation.percent_unclassified))
    print("coverageOfTestCasesByPredictedRegions: " +
          str(evaluation.coverage_of_test_cases_by_predicted_regions))
    print("sizeOfPredictedRegions: " +
          str(evaluation.size_of_predicted_regions))
    print("falseNegativeRate: " + str(evaluation.false_negative_rate(1)))
    print("weightedFalseNegativeRate: " +
          str(evaluation.weighted_false_negative_rate))
    print("numFalseNegatives: " + str(evaluation.num_false_negatives(1)))
    print("trueNegativeRate: " + str(evaluation.true_negative_rate(1)))
    print("weightedTrueNegativeRate: " +
          str(evaluation.weighted_true_negative_rate))
    print("numTrueNegatives: " + str(evaluation.num_true_negatives(1)))
    print("falsePositiveRate: " + str(evaluation.false_positive_rate(1)))
    print("weightedFalsePositiveRate: " +
          str(evaluation.weighted_false_positive_rate))
    print("numFalsePositives: " + str(evaluation.num_false_positives(1)))
    print("truePositiveRate: " + str(evaluation.true_positive_rate(1)))
    print("weightedTruePositiveRate: " +
          str(evaluation.weighted_true_positive_rate))
    print("numTruePositives: " + str(evaluation.num_true_positives(1)))
    print("fMeasure: " + str(evaluation.f_measure(1)))
    print("weightedFMeasure: " + str(evaluation.weighted_f_measure))
    print("unweightedMacroFmeasure: " +
          str(evaluation.unweighted_macro_f_measure))
    print("unweightedMicroFmeasure: " +
          str(evaluation.unweighted_micro_f_measure))
    print("precision: " + str(evaluation.precision(1)))
    print("weightedPrecision: " + str(evaluation.weighted_precision))
    print("recall: " + str(evaluation.recall(1)))
    print("weightedRecall: " + str(evaluation.weighted_recall))
    print("kappa: " + str(evaluation.kappa))
    print("KBInformation: " + str(evaluation.kb_information))
    print("KBMeanInformation: " + str(evaluation.kb_mean_information))
    print("KBRelativeInformation: " + str(evaluation.kb_relative_information))
    print("SFEntropyGain: " + str(evaluation.sf_entropy_gain))
    print("SFMeanEntropyGain: " + str(evaluation.sf_mean_entropy_gain))
    print("SFMeanPriorEntropy: " + str(evaluation.sf_mean_prior_entropy))
    print("SFMeanSchemeEntropy: " + str(evaluation.sf_mean_scheme_entropy))
    print("matthewsCorrelationCoefficient: " +
          str(evaluation.matthews_correlation_coefficient(1)))
    print("weightedMatthewsCorrelation: " +
          str(evaluation.weighted_matthews_correlation))
    print("class priors: " + str(evaluation.class_priors))
    print("numInstances: " + str(evaluation.num_instances))
    print("meanAbsoluteError: " + str(evaluation.mean_absolute_error))
    print("meanPriorAbsoluteError: " +
          str(evaluation.mean_prior_absolute_error))
    print("relativeAbsoluteError: " + str(evaluation.relative_absolute_error))
    print("rootMeanSquaredError: " + str(evaluation.root_mean_squared_error))
    print("rootMeanPriorSquaredError: " +
          str(evaluation.root_mean_prior_squared_error))
    print("rootRelativeSquaredError: " +
          str(evaluation.root_relative_squared_error))
    print("prediction output:\n" + str(pred_output))
    plot_cls.plot_roc(evaluation,
                      title="ROC diabetes",
                      class_index=range(
                          0, diabetes_data.class_attribute.num_values),
                      wait=False)
    plot_cls.plot_prc(evaluation,
                      title="PRC diabetes",
                      class_index=range(
                          0, diabetes_data.class_attribute.num_values),
                      wait=False)

    # train 2nd classifier on diabetes dataset
    classifier2 = Classifier(classname="weka.classifiers.trees.RandomForest")
    evaluation2 = Evaluation(diabetes_data)
    evaluation2.crossvalidate_model(classifier2, diabetes_data, 10, Random(42))
    plot_cls.plot_rocs({
        "NB": evaluation,
        "RF": evaluation2
    },
                       title="ROC diabetes",
                       class_index=0,
                       wait=False)
    plot_cls.plot_prcs({
        "NB": evaluation,
        "RF": evaluation2
    },
                       title="PRC diabetes",
                       class_index=0,
                       wait=False)

    # load a numeric dataset
    bolts_file = helper.get_data_dir() + os.sep + "bolts.arff"
    helper.print_info("Loading dataset: " + bolts_file)
    loader = Loader("weka.core.converters.ArffLoader")
    bolts_data = loader.load_file(bolts_file)
    bolts_data.class_is_last()

    # build a classifier and output model
    helper.print_title("Training LinearRegression on bolts")
    classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression",
        options=["-S", "1", "-C"])
    classifier.build_classifier(bolts_data)
    print(classifier)

    # cross-validate numeric classifier
    helper.print_title("Cross-validating LinearRegression on bolts")
    classifier = Classifier(
        classname="weka.classifiers.functions.LinearRegression",
        options=["-S", "1", "-C"])
    evaluation = Evaluation(bolts_data)
    evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42))
    print(evaluation.summary())
    print("correlationCoefficient: " + str(evaluation.correlation_coefficient))
    print("errorRate: " + str(evaluation.error_rate))
    helper.print_title("Header - bolts")
    print(str(evaluation.header))
    helper.print_title("Predictions on bolts")
    for index, pred in enumerate(evaluation.predictions):
        print(
            str(index + 1) + ": " + str(pred) + " -> error=" + str(pred.error))
    plot_cls.plot_classifier_errors(evaluation.predictions, wait=False)

    # train 2nd classifier and show errors in same plot
    classifier2 = Classifier(classname="weka.classifiers.functions.SMOreg")
    evaluation2 = Evaluation(bolts_data)
    evaluation2.crossvalidate_model(classifier2, bolts_data, 10, Random(42))
    plot_cls.plot_classifier_errors(
        {
            "LR": evaluation.predictions,
            "SMOreg": evaluation2.predictions
        },
        wait=False)

    # learning curve
    cls = [
        Classifier(classname="weka.classifiers.trees.J48"),
        Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    ]
    plot_cls.plot_learning_curve(cls,
                                 diabetes_data,
                                 increments=0.05,
                                 label_template="[#] !",
                                 metric="percent_correct",
                                 wait=True)

    # access classifier's Java API
    labor_file = helper.get_data_dir() + os.sep + "labor.arff"
    helper.print_info("Loading dataset: " + labor_file)
    loader = Loader("weka.core.converters.ArffLoader")
    labor_data = loader.load_file(labor_file)
    labor_data.class_is_last()

    helper.print_title("Using JRip's Java API to access rules")
    jrip = Classifier(classname="weka.classifiers.rules.JRip")
    jrip.build_classifier(labor_data)
    rset = jrip.jwrapper.getRuleset()
    for i in xrange(rset.size()):
        r = rset.get(i)
        print(str(r.toString(labor_data.class_attribute.jobject)))
Exemplo n.º 23
0
def handle_message(message):
    global accel_x
    global accel_y
    global accel_z
    global gyro_x
    global gyro_y
    global gyro_z
    if message['sensorName'] == 'accelerometer':
        accel_x.append(float(message['x']))
        accel_y.append(float(message['y']))
        accel_z.append(float(message['z']))
    elif message['sensorName'] == 'gyroscope':
        gyro_x.append(float(message['x']))
        gyro_y.append(float(message['y']))
        gyro_z.append(float(message['z']))
    elif message['sensorName'] == "stop":
        # stop signal
        stop()
    if len(gyro_x) >= 25 and len(accel_x) >= 25:
        # only classify when both gyroscope and accelerometer data has more than 25 samples
        processDataToArff(accel_x, accel_y, accel_z, gyro_x, gyro_y, gyro_z)
        jvm.start()
        loader = Loader(classname="weka.core.converters.ArffLoader")
        # load the training data
        train = loader.load_file("train.arff")
        train.class_is_last()
        cls = Classifier(classname="weka.classifiers.trees.LMT")
        # train the classifier
        cls.build_classifier(train)
        pout = PredictionOutput(
            classname="weka.classifiers.evaluation.output.prediction.PlainText")
        evl = Evaluation(train)
        # load the classify data
        test = loader.load_file("classify.arff")
        test.class_is_last()
        evl.test_model(cls, test, pout)
        result = pout.buffer_content()
        resultLines = result.splitlines()
        for i in range(len(resultLines)):
            if (resultLines[i].find("upDown") != -1):
                result = 1
            elif (resultLines[i].find("leftRight") != -1):
                result = 2
            elif (resultLines[i].find("inOut") != -1):
                result = 3
            elif (resultLines[i].find("rotation") != -1):
                result = 4
            else:
                result = "error"
        if result == 1:
            stop()
            playD()
        elif result == 2:
            stop()
            playBm()
        elif result == 3:
            stop()
            playA()
        elif result == 4:
            stop()
            playG()
        # clear the arrays for new data
        gyro_x = []
        gyro_y = []
        gyro_z = []
        accel_x = []
        accel_y = []
        accel_z = []
Exemplo n.º 24
0
processDataToArff("train.arff", False)
processDataToArff("test.arff", True)

# setup training model
loader = Loader(classname="weka.core.converters.ArffLoader")
train = loader.load_file("train.arff")
train.class_is_last()
test = loader.load_file("test.arff")
test.class_is_last()
# print(train)

cls = Classifier(
    classname="weka.classifiers.trees.LMT")  #use LMT as our algorithm
cls.build_classifier(train)  #train the model using train.arff

pout = PredictionOutput(
    classname="weka.classifiers.evaluation.output.prediction.PlainText")
evl = Evaluation(train)
evl.test_model(cls, test, pout)

# print the result
result = pout.buffer_content()
#print(result)

# split the result and only print the gesture
resultLines = result.splitlines()
for i in range(len(resultLines)):
    if (resultLines[i].find("upDown") != -1):
        print("%d upDown" % (i + 1))
    elif (resultLines[i].find("leftRight") != -1):
        print("%d leftRight" % (i + 1))
    elif (resultLines[i].find("inOut") != -1):
sampled = pd.merge_asof(X_resampled,y_resampled,right_index=True,left_index=True)
print(sampled)

#corpus_name = 'data/bugs/resampled.csv'
#corpus = os.path.join("/content/gdrive/My Drive", corpus_name)

#sampled.to_csv(corpus,index = False)

loader = Loader(classname="weka.core.converters.CSVLoader")
data = loader.load_file(corpus)
data.class_is_last()

"""Naive Bayes Classifier for Bug Prediction"""

classifier = Classifier(classname="weka.classifiers.bayes.NaiveBayes")
pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText", options=["-distribution"])
evaluation = Evaluation(data)
evaluation.crossvalidate_model(classifier, data, 10, Random(42), output=pred_output)
plot_cls.plot_roc(evaluation, title="ROC bugs",class_index=range(0, data.class_attribute.num_values), wait=False)
plot_cls.plot_prc(evaluation, title="PRC bugs - NaiveBayes",class_index=range(0, data.class_attribute.num_values), wait=False)

"""Performance Metrics - Naive Bayes Classifier"""

print(evaluation.summary())
print(evaluation.class_details())
print(evaluation.matrix())

print("confusionMatrix: " + str(evaluation.confusion_matrix))
print("fMeasure: " + str(evaluation.f_measure(1)))
print("precision: " + str(evaluation.precision(1)))
print("recall: " + str(evaluation.recall(1)))
Exemplo n.º 26
0
# load diabetes
loader = Loader(classname="weka.core.converters.ArffLoader")
fname = data_dir + os.sep + "diabetes.arff"
print("\nLoading dataset: " + fname + "\n")
data = loader.load_file(fname)
# we'll set the class attribute after filtering

# apply NominalToBinary filter and set class attribute
fltr = Filter("weka.filters.unsupervised.attribute.NominalToBinary")
fltr.inputformat(data)
filtered = fltr.filter(data)
filtered.class_is_last()

# cross-validate LinearRegression on filtered data, display model
cls = Classifier(classname="weka.classifiers.functions.LinearRegression")
pout = PredictionOutput(
    classname="weka.classifiers.evaluation.output.prediction.PlainText")
evl = Evaluation(filtered)
evl.crossvalidate_model(cls, filtered, 10, Random(1), pout)
print("10-fold cross-validation:\n" + evl.summary())
print("Predictions:\n\n" + str(pout))
cls.build_classifier(filtered)
print("Model:\n\n" + str(cls))

# use AddClassification filter with LinearRegression on filtered data
print("Applying AddClassification to filtered data:\n")
fltr = Filter(classname="weka.filters.supervised.attribute.AddClassification",
              options=[
                  "-W", "weka.classifiers.functions.LinearRegression",
                  "-classification"
              ])
fltr.inputformat(filtered)
Exemplo n.º 27
0
print(evl.to_summary())
print(str(cls))
plg.plot_dot_graph(cls.graph())

# 2. filtered classifier
fname = data_dir + os.sep + "simpletext-test.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
test = loader.load_file(fname)
test.set_class_index(test.num_attributes() - 1)
print("Building/evaluating filtered classifier...")
cls = FilteredClassifier()
cls.set_classifier(Classifier(classname="weka.classifiers.trees.J48"))
cls.set_filter(Filter(classname="weka.filters.unsupervised.attribute.StringToWordVector"))
cls.build_classifier(data)
pout = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.PlainText")
pout.set_header(test)
evl = Evaluation(data)
evl.test_model(cls, test, pout)
print(str(pout))
print(str(cls))

# load ReutersCorn-train
fname = data_dir + os.sep + "ReutersCorn-train.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)
data.set_class_index(data.num_attributes() - 1)

# load ReutersCorn-test
fname = data_dir + os.sep + "ReutersCorn-test.arff"
f.close()


# In[4]:


f= open("instances.arff","r")
print(f.read())
f.close()


# In[10]:


from io import StringIO
output_results = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV")
data1 = loader.load_file("instances.arff")
data1.class_is_last()
ev2 = Evaluation(data1)
ev2.test_model(csr,data1,output_results)
print("Class prediction: ",output_results.buffer_content()[-13:-10])
print("\n\n     Instance","     Actual","    Predicted")
print(output_results.buffer_content())
TESTDATA = StringIO("Instance,Actual,Predicted,"+output_results.buffer_content())
# jvm.stop()
x = pd.read_csv(TESTDATA)


# In[14]: