def main(args):
    """
    Trains a J48 classifier on a training set and outputs the predicted class and class distribution alongside the
    actual class from a test set. Class attribute is assumed to be the last attribute.
    :param args: the commandline arguments (train and test datasets)
    :type args: list
    """

    # load a dataset
    helper.print_info("Loading train: " + args[1])
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(args[1])
    train.class_index = train.num_attributes - 1
    helper.print_info("Loading test: " + args[2])
    test = loader.load_file(args[2])
    test.class_is_last()

    # classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)

    # output predictions
    print("# - actual - predicted - error - distribution")
    for index, inst in enumerate(test):
        pred = cls.classify_instance(inst)
        dist = cls.distribution_for_instance(inst)
        print(
            "%d - %s - %s - %s  - %s" %
            (index+1,
             inst.get_string_value(inst.class_index),
             inst.class_attribute.value(int(pred)),
             "yes" if pred != inst.get_value(inst.class_index) else "no",
             str(dist.tolist())))
예제 #2
0
class Classifier():

    def __init__(self):
        jvm.start(class_path=['/vol/customopt/machine-learning/src/weka/weka-3-6-8/weka.jar'])
        self.loader = Loader(classname="weka.core.converters.ArffLoader")
        
    def train(self,classifier,trainfile):
        if classifier == "ripper":
            self.cls = classifiers.Classifier(classname="weka.classifiers.rules.JRip",options=["-P", "false","-E","false","O","5"])
        data = self.loader.load_file(trainfile)
        data.set_class_index(data.num_attributes() - 1)
        self.cls.build_classifier(data)
        return(self.cls.__str__())

    def test(self,testfile):
        predictions = []
        testdata = self.loader.load_file(testfile, incremental=True)
        testdata.set_class_index(testdata.num_attributes() - 1)
        while True:
            inst = self.loader.next_instance(testdata)
            if inst is None:
                break
            predictions.append([self.cls.classify_instance(inst)," ".join([str(round(x,2)) for x in self.cls.distribution_for_instance(inst)])])
        return predictions

    def stop(self):
        jvm.stop()
예제 #3
0
def main():
    """
    Just runs some example code.
    """

    # load ARFF file
    helper.print_title("Loading ARFF file")
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(helper.get_data_dir() + os.sep + "iris.arff")
    print(str(data))

    # load CSV file
    helper.print_title("Loading CSV file")
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(helper.get_data_dir() + os.sep + "iris.csv")
    print(str(data))

    # load directory
    # changes this to something sensible
    text_dir = "/some/where"
    if os.path.exists(text_dir) and os.path.isdir(text_dir):
        helper.print_title("Loading directory: " + text_dir)
        loader = TextDirectoryLoader(
            options=["-dir", text_dir, "-F", "-charset", "UTF-8"])
        data = loader.load()
        print(unicode(data))
def main(args):
    """
    Trains a J48 classifier on a training set and outputs the predicted class and class distribution alongside the
    actual class from a test set. Class attribute is assumed to be the last attribute.
    :param args: the commandline arguments (train and test datasets)
    :type args: list
    """

    # load a dataset
    helper.print_info("Loading train: " + args[1])
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(args[1])
    train.class_index = train.num_attributes - 1
    helper.print_info("Loading test: " + args[2])
    test = loader.load_file(args[2])
    test.class_is_last()

    # classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)

    # output predictions
    print("# - actual - predicted - error - distribution")
    for index, inst in enumerate(test):
        pred = cls.classify_instance(inst)
        dist = cls.distribution_for_instance(inst)
        print("%d - %s - %s - %s  - %s" %
              (index + 1, inst.get_string_value(
                  inst.class_index), inst.class_attribute.value(int(pred)),
               "yes" if pred != inst.get_value(inst.class_index) else "no",
               str(dist.tolist())))
def main():
    """
    Just runs some example code.
    """

    # load ARFF file
    helper.print_title("Loading ARFF file")
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(helper.get_data_dir() + os.sep + "iris.arff")
    print(str(data))

    # load CSV file
    helper.print_title("Loading CSV file")
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(helper.get_data_dir() + os.sep + "iris.csv")
    print(str(data))

    # load directory
    # changes this to something sensible
    text_dir = "/some/where"
    if os.path.exists(text_dir) and os.path.isdir(text_dir):
        helper.print_title("Loading directory: " + text_dir)
        loader = TextDirectoryLoader(options=["-dir", text_dir, "-F", "-charset", "UTF-8"])
        data = loader.load()
        print(unicode(data))
예제 #6
0
def run():
    jvm.start()
    load_csv = Loader("weka.core.converters.CSVLoader")
    data_csv = load_csv.load_file(
        "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.csv"
    )

    saver = Saver("weka.core.converters.ArffSaver")
    saver.save_file(
        data_csv,
        "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.arff"
    )

    load_arff = Loader("weka.core.converters.ArffLoader")
    data_arff = load_arff.load_file(
        "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.arff"
    )
    data_arff.class_is_last()

    global j48
    J48_class = Classifier(classname="weka.classifiers.trees.J48",
                           options=["-C", "0.25", "-M", "2"])
    J48_class.build_classifier(data_arff)
    evaluationj48 = Evaluation(data_arff)
    evaluationj48.crossvalidate_model(J48_class, data_arff, 10, Random(100))
    j48 = str(evaluationj48.percent_correct)
    jvm.stop()
    return j48
예제 #7
0
    def load_data_from_arff(self):
        print("loading data from raw")
        loader = Loader(classname="weka.core.converters.ArffLoader")
        #target
        print("Loading target data")
        all_target = loader.load_file(self.arff_data_path + self.target_name +
                                      ".arff")
        all_target.class_is_last()
        train_vs_test_percent = (self.num_games_target /
                                 self.num_games_source) * 100
        self.target, self.eval = all_target.train_test_split(
            train_vs_test_percent)
        print("target size:", self.target.num_instances)
        print("Eval size:", self.eval.num_instances)

        #source
        print("Loading source data")
        i = 0
        allFiles = os.listdir(self.arff_data_path)
        random.shuffle(allFiles)
        while i < len(allFiles):
            filename = allFiles[i]
            if filename != self.target_name + ".arff":
                print("Loading", filename)
                source = loader.load_file(self.arff_data_path + filename)
                source.class_is_last()
                print("Size:", source.num_instances)
                self.source.append(source)
            i += 1
예제 #8
0
    def crossTest(this, trainingFile, classifier, testFile):

        loader = Loader(classname="weka.core.converters.ArffLoader")
        data1 = loader.load_file(trainingFile)
        data1.class_is_last()

        cls = Classifier(classname=classifier)
        cls.build_classifier(data1)

        data2 = loader.load_file(testFile)
        data2.class_is_last()

        classes = [str(code) for code in data2.class_attribute.values]
        header = ["Accuracy"]
        for name in classes:
            header += [name + " TP", name + " FP", name + " AUC ROC"]
        values = []

        evl = Evaluation(data2)
        evl.test_model(cls, data2)

        values.append(evl.percent_correct)
        for name in classes:
            index = classes.index(name)
            values += [
                evl.true_positive_rate(index) * 100,
                evl.false_positive_rate(index) * 100,
                evl.area_under_roc(index)
            ]

        this.values = values
        this.header = header
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris)

    # remove class attribute
    helper.print_info("Removing class attribute")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", "last"])
    remove.inputformat(data)
    filtered = remove.filter(data)

    # use MultiFilter
    helper.print_info("Use MultiFilter")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", "first"])
    std = Filter(classname="weka.filters.unsupervised.attribute.Standardize")
    multi = MultiFilter()
    multi.filters = [remove, std]
    multi.inputformat(data)
    filtered_multi = multi.filter(data)

    # output datasets
    helper.print_title("Input")
    print(data)
    helper.print_title("Output")
    print(filtered)
    helper.print_title("Output (MultiFilter)")
    print(filtered_multi)

    # load text dataset
    text = helper.get_data_dir(
    ) + os.sep + "reutersTop10Randomized_1perc_shortened.arff"
    helper.print_info("Loading dataset: " + text)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(text)
    data.class_is_last()

    # apply StringToWordVector
    stemmer = Stemmer(classname="weka.core.stemmers.IteratedLovinsStemmer")
    stopwords = Stopwords(classname="weka.core.stopwords.Rainbow")
    tokenizer = Tokenizer(classname="weka.core.tokenizers.WordTokenizer")
    s2wv = StringToWordVector(options=["-W", "10", "-L", "-C"])
    s2wv.stemmer = stemmer
    s2wv.stopwords = stopwords
    s2wv.tokenizer = tokenizer
    s2wv.inputformat(data)
    filtered = s2wv.filter(data)

    helper.print_title("Input (StringToWordVector)")
    print(data)
    helper.print_title("Output (StringToWordVector)")
    print(filtered)
예제 #10
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)
    helper.print_info("Evaluating on data")
    evaluation = ClusterEvaluation()
    evaluation.set_model(clusterer)
    evaluation.test_model(data)
    print("# clusters: " + str(evaluation.num_clusters))
    print("log likelihood: " + str(evaluation.log_likelihood))
    print("cluster assignments:\n" + str(evaluation.cluster_assignments))
    plc.plot_cluster_assignments(evaluation, data, inst_no=True)

    # using a filtered clusterer
    helper.print_title("Filtered clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    fclusterer = FilteredClusterer()
    fclusterer.clusterer = clusterer
    fclusterer.filter = remove
    fclusterer.build_clusterer(data)
    print(fclusterer)

    # load a dataset incrementally and build clusterer incrementally
    helper.print_title("Incremental clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    clusterer = Clusterer("weka.clusterers.Cobweb")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    remove.inputformat(iris_inc)
    iris_filtered = remove.outputformat()
    clusterer.build_clusterer(iris_filtered)
    for inst in loader:
        remove.input(inst)
        inst_filtered = remove.output()
        clusterer.update_clusterer(inst_filtered)
    clusterer.update_finished()
    print(clusterer.to_commandline())
    print(clusterer)
    print(clusterer.graph)
    plg.plot_dot_graph(clusterer.graph)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)
    helper.print_info("Evaluating on data")
    evaluation = ClusterEvaluation()
    evaluation.set_model(clusterer)
    evaluation.test_model(data)
    print("# clusters: " + str(evaluation.num_clusters))
    print("log likelihood: " + str(evaluation.log_likelihood))
    print("cluster assignments:\n" + str(evaluation.cluster_assignments))
    plc.plot_cluster_assignments(evaluation, data, inst_no=True)

    # using a filtered clusterer
    helper.print_title("Filtered clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    fclusterer = FilteredClusterer()
    fclusterer.clusterer = clusterer
    fclusterer.filter = remove
    fclusterer.build_clusterer(data)
    print(fclusterer)

    # load a dataset incrementally and build clusterer incrementally
    helper.print_title("Incremental clusterer")
    loader = Loader("weka.core.converters.ArffLoader")
    iris_inc = loader.load_file(iris_file, incremental=True)
    clusterer = Clusterer("weka.clusterers.Cobweb")
    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"])
    remove.inputformat(iris_inc)
    iris_filtered = remove.outputformat()
    clusterer.build_clusterer(iris_filtered)
    for inst in loader:
        remove.input(inst)
        inst_filtered = remove.output()
        clusterer.update_clusterer(inst_filtered)
    clusterer.update_finished()
    print(clusterer.to_commandline())
    print(clusterer)
    print(clusterer.graph)
    plg.plot_dot_graph(clusterer.graph)
예제 #12
0
def load_data(fname, dir_in="../data/", incremental=True):
    loader = Loader(classname="weka.core.converters.ArffLoader")
    if incremental:
        data = loader.load_file(dir_in + fname, incremental=incremental)
    else:
        data = loader.load_file(dir_in + fname)
    data.class_is_last()

    return data, loader
예제 #13
0
def run_wsn_project():
	loader = Loader(classname="weka.core.converters.ArffLoader")
	a_train_data = loader.load_file('../data/a_train_data.arff')
	a_test_data = loader.load_file('../data/a_test_data.arff')
	b_train_data = loader.load_file('../data/b_train_data.arff')
	b_test_data = loader.load_file('../data/b_test_data.arff')

	a_train_data.class_is_last()
	a_test_data.class_is_last()
	b_train_data.class_is_last()
	b_test_data.class_is_last()

	run_analysis('A', a_train_data, a_test_data, ['Spare_Time/TV','Grooming','Toileting','Sleeping','Breakfast','Showering','Snack','Lunch','Leaving','Dinner'])
	run_analysis('B', b_train_data, b_test_data, ['Spare_Time/TV','Grooming','Toileting','Sleeping','Breakfast','Showering','Snack','Lunch','Leaving','Dinner'])
def main(args):
    """
    Trains a NaiveBayesUpdateable classifier incrementally on a dataset. The dataset can be supplied as parameter.
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file, incremental=True)
    data.class_is_last()

    # classifier
    nb = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    nb.build_classifier(data)

    # train incrementally
    for inst in loader:
        nb.update_classifier(inst)

    print(nb)
def main(args):
    """
    Trains a NaiveBayesUpdateable classifier incrementally on a dataset. The dataset can be supplied as parameter.
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file, incremental=True)
    data.class_is_last()

    # classifier
    nb = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    nb.build_classifier(data)

    # train incrementally
    for inst in loader:
        nb.update_classifier(inst)

    print(nb)
예제 #16
0
    def retrain(self, examples, labels):

        f = open("trainingweka.arff", "w")
        f.write("@relation randomset\n")
        for j in range(len(examples[0])):
            f.write("@attribute feature%d real\n" % j)
        f.write("@attribute class {TRUE, FALSE}\n")
        f.write("@data\n")

        for (example, label) in zip(examples, labels):
            for feature in example:
                f.write("%f," % feature)
            if label == 1:
                f.write("TRUE\n")
            else:
                f.write("FALSE\n")
        f.close()

        loader = Loader(classname="weka.core.converters.ArffLoader")
        # options=["-H", "-B", "10000"])
        self.trainingData = loader.load_file("trainingweka.arff")
        self.trainingData.set_class_index(self.trainingData.num_attributes() -
                                          1)
        self.classifier = Classifier(
            classname="weka.classifiers.functions.Logistic",
            options=["-R", "%f" % (1.0 / self.C)])
        self.classifier.build_classifier(self.trainingData)
예제 #17
0
    def initData(self, arrfFile):
        loader = Loader(classname="weka.core.converters.ArffLoader")
        print self.dataDir + '/' + arrfFile
        self.data = loader.load_file(self.dataDir + '/' + arrfFile)
        self.data.class_is_last()

        print 'Carregando arquivo ' + self.dataDir + '/' + arrfFile
def main():
    """
    Shows how to use the CostSensitiveClassifier.
    """

    # load a dataset
    data_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # classifier
    classifier = SingleClassifierEnhancer(
        classname="weka.classifiers.meta.CostSensitiveClassifier",
        options=["-cost-matrix", "[0 1; 2 0]", "-S", "2"])
    base = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"])
    classifier.classifier = base

    folds = 10
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(classifier, data, folds, Random(1))


    print("")
    print("=== Setup ===")
    print("Classifier: " + classifier.to_commandline())
    print("Dataset: " + data.relationname)
    print("")
    print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ==="))
def main():
    """
    Shows how to use the CostSensitiveClassifier.
    """

    # load a dataset
    data_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # classifier
    classifier = SingleClassifierEnhancer(
        classname="weka.classifiers.meta.CostSensitiveClassifier",
        options=["-cost-matrix", "[0 1; 2 0]", "-S", "2"])
    base = Classifier(classname="weka.classifiers.trees.J48",
                      options=["-C", "0.3"])
    classifier.classifier = base

    folds = 10
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(classifier, data, folds, Random(1))

    print("")
    print("=== Setup ===")
    print("Classifier: " + classifier.to_commandline())
    print("Dataset: " + data.relationname)
    print("")
    print(
        evaluation.summary("=== " + str(folds) +
                           " -fold Cross-Validation ==="))
def PredecirUnaTemporada(path):
    jvm.start()
    insta = CrearInstanciaParaPredecir(path)
    atributos = ""
    file = open('ModelData/wekaHeader.arff', 'r')
    atributos = file.readlines()
    file.close()

    file = open('ModelData/predictionFiles/inst.arff', 'w')
    file.writelines(atributos)
    file.write("\n" + insta + '\n')
    file.close()

    objects = serialization.read_all("ModelData/77PercentModelPaisajes.model")
    classifier = Classifier(jobject=objects[0])

    loader = Loader()
    data = loader.load_file("ModelData/predictionFiles/inst.arff")
    data.class_is_last()

    clases = ["invierno", "verano", "otono", "primavera"]
    prediccion = ""
    for index, inst in enumerate(data):
        pred = classifier.classify_instance(inst)
        dist = classifier.distribution_for_instance(inst)
        prediccion = clases[int(pred)]
    jvm.stop()
    return prediccion
def save_all_scores_on_validate():
    for user in user_list:
        user_validate_dir = os.listdir("../data/arff_files/" + str(user) +
                                       "/validate/")
        user_validate_dir.sort()
        n = len(user_validate_dir)
        for expression_index in range(n):
            print expression_index, "=>", str(
                expression_list[expression_index]), ':', str(
                    user_validate_dir[expression_index])
            id = str(expression_list[expression_index]) + '_' + str(user)
            target_dir = '../results/' + str(
                expression_list[expression_index]) + '/' + str(user) + '/'
            model_dir = '../models/' + str(
                expression_list[expression_index]) + '/' + str(user) + '/'
            validate_data_file = "../data/arff_files/" + str(
                user) + "/validate/" + str(user_validate_dir[expression_index])
            print validate_data_file, "=>", model_dir, "all algos", "=>", target_dir, "\n"
            loader = Loader(classname="weka.core.converters.ArffLoader")
            validate_data = loader.load_file(validate_data_file)
            for algo in algo_func_dict.keys():
                trained_model = Classifier(
                    jobject=serialization.read(model_dir + algo + ".model"))
                scores_matrix = get_classifier_score(trained_model,
                                                     validate_data)
                out_file = target_dir + algo + "_scores.csv"
                #writing scores to target file
                np.savetxt(out_file, scores_matrix, delimiter=",")
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()

    # train classifier
    classifier = Classifier("weka.classifiers.trees.J48")
    classifier.build_classifier(iris_data)

    # save and read object
    helper.print_title("I/O: single object")
    outfile = tempfile.gettempdir() + os.sep + "j48.model"
    serialization.write(outfile, classifier)
    model = Classifier(jobject=serialization.read(outfile))
    print(model)

    # save classifier and dataset header (multiple objects)
    helper.print_title("I/O: single object")
    serialization.write_all(outfile, [classifier, Instances.template_instances(iris_data)])
    objects = serialization.read_all(outfile)
    for i, obj in enumerate(objects):
        helper.print_info("Object #" + str(i+1) + ":")
        if javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/core/Instances")):
            obj = Instances(jobject=obj)
        elif javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/classifiers/Classifier")):
            obj = Classifier(jobject=obj)
        print(obj)
예제 #23
0
    def score(self, testExamples, labels):
        f = open("testingweka.arff", "w")
        f.write("@relation randomset\n")
        for j in range(len(testExamples[0])):
            f.write("@attribute feature%d real\n" % j)
        f.write("@attribute class {TRUE, FALSE}\n")
        f.write("@data\n")
        for (example, label) in zip(testExamples, labels):
            for feature in example:
                f.write("%f," % feature)
            if label == 1:
                f.write("TRUE\n")
            else:
                f.write("FALSE\n")
        f.close()

        loader = Loader(classname="weka.core.converters.ArffLoader")
        #                        options=["-H", "-B", "10000"])
        self.testingData = loader.load_file("testingweka.arff")
        self.testingData.set_class_index(self.testingData.num_attributes() - 1)

        evaluation = Evaluation(self.trainingData)
        evaluation.test_model(self.classifier, self.testingData)

        #print evaluation.percent_correct()
        #jvm.stop()
        return evaluation.percent_correct()
예제 #24
0
def TestClassification(arff, modelInput, results):
    # 启动java虚拟机
    jvm.start()
    # 导入分析模型
    objects = serialization.read_all(modelInput)
    clsf = Classifier(jobject=objects[0])
    print(clsf)
    # 导入测试组
    loader = Loader(classname="weka.core.converters.ArffLoader")
    test = loader.load_file(arff)
    test.class_is_first()
    # 分析结果
    resultsFile = open(results, "w")
    resultsFile.write("序号\t原判断\t预测\t良性概率\t恶性概率\n")
    print("序号\t原判断\t预测\t良性概率\t恶性概率")
    for index, inst in enumerate(test):
        pred = clsf.classify_instance(inst)
        dist = clsf.distribution_for_instance(inst)
        sampleID = index + 1
        origin = inst.get_string_value(inst.class_index)
        prediction = inst.class_attribute.value(int(pred))
        sameAsOrigin = "yes" if pred != inst.get_value(
            inst.class_index) else "no"
        NRate = dist.tolist()[0]
        PRate = dist.tolist()[1]
        resultsFile.write(
            "%d\t%s\t%s\t%s\t%s" %
            (sampleID, origin, prediction, str(NRate), str(PRate)) + "\n")
        print("%d\t%s\t%s\t%s\t%s" %
              (sampleID, origin, prediction, str(NRate), str(PRate)))
    resultsFile.close()
    # 退出java虚拟机
    jvm.stop()
    print("检测完成")
예제 #25
0
    def create_weka_dataset(self, X, y=None):
        """Create weka dataset using temporaly file

        Arguments:
            X {array like} -- non target class instances
            y {array like} -- target class instances

        Returns:
            java object wrapped -- weka dataset
        """
        try:
            # Create new temporal file
            temp = tempfile.NamedTemporaryFile()
            # Concat X and y. Write csv to temporaly file.

            if y is None:
                y = pd.DataFrame(["?"]*X.shape[0], columns=self.experiment_configuration["target"])
            X.reset_index(drop=True, inplace=True)
            y.reset_index(drop=True, inplace=True)
            dataframe = pd.concat([X, y], axis=1, ignore_index=True)
            dataframe.to_csv(temp.name, index=None)
            options = None
            if self.y_uniques is not None:
                options = ["-L", "{}:{}".format(dataframe.shape[1],
                           ",".join(map(str, self.y_uniques)))]
            if not self.is_classification():
                options = ["-R", "last"]
            loader = Loader(classname="weka.core.converters.CSVLoader",
                            options=options)
            data = loader.load_file(temp.name)
            # Last column of data is target
            data.class_is_last()
        finally:
            temp.close()
        return data
예제 #26
0
def runSMO(file, bound):
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(file)
    data.class_is_first()

    remove = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                    options=["-R", bound])

    cls = KernelClassifier(
        classname="weka.classifiers.functions.SMO",
        options=["-C", "1.0", "-L", "0.001", "-P", "1.0E-12", "-N", "0"])
    kernel = Kernel(
        classname="weka.classifiers.functions.supportVector.PolyKernel",
        options=["-C", "250007", "-E", "1.0"])
    cls.kernel = kernel
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")

    remove.inputformat(data)
    filtered = remove.filter(data)

    evl = Evaluation(filtered)
    evl.crossvalidate_model(cls, filtered, 10, Random(1), pout)

    #print(pout.buffer_content())

    print(evl.percent_correct)
    #print(evl.summary())

    result = evl.class_details()
    print(result)
    return result
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    full = loader.load_file(iris_file)
    full.class_is_last()

    # remove class attribute
    data = Instances.copy_instances(full)
    data.no_class()
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print("done")

    # classes to clusters
    evl = ClusterEvaluation()
    evl.set_model(clusterer)
    evl.test_model(full)
    helper.print_title("Cluster results")
    print(evl.cluster_results)
    helper.print_title("Classes to clusters")
    print(evl.classes_to_clusters)
예제 #28
0
def vote_classifier_train(dicrectory, nameOfDataSet, flag):
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(dicrectory)
    data.class_is_last()
    meta = MultipleClassifiersCombiner(
        classname="weka.classifiers.meta.Vote",
        options=[
            '-S', '1', '-B', 'weka.classifiers.trees.J48 -C 0.25 -M 2', '-B',
            'weka.classifiers.trees.RandomTree -K 6 -M 1.0 -V 0.001 -S 1',
            '-B',
            'weka.classifiers.meta.Bagging -P 100 -S 1 -num-slots 1 -I 10 -W weka.classifiers.trees.REPTree -- '
            '-M 2 -V 0.001 -N 3 -S 1 -L -1 -I 0.0', '-B',
            'weka.classifiers.meta.AdaBoostM1 -P 100 -S 1 -I 10 -W weka.classifiers.trees.DecisionStump',
            '-B',
            'weka.classifiers.meta.Bagging -P 100 -S 1 -num-slots 1 -I 10 -W weka.classifiers.trees.REPTree -- '
            '-M 2 -V 0.001 -N 3 -S 1 -L -1 -I 0.0', '-B',
            'weka.classifiers.bayes.NaiveBayes ', '-R', 'AVG'
        ])
    eval = Evaluation(data)
    pout = PredictionOutput(
        classname="weka.classifiers.evaluation.output.prediction.PlainText")
    if flag:
        eval.crossvalidate_model(meta, data, 10, Random(1), pout)
    else:
        eval.evaluate_train_test_split(meta, data, 80.0, Random(1), pout)
    gc.collect()
    print_and_save('Proposed model', flag, nameOfDataSet, eval)
예제 #29
0
def run_webservices_project():
    loader = Loader(classname="weka.core.converters.ArffLoader")
    rt_train_data = loader.load_file('../data/rt_train_data.arff')
    rt_test_data = loader.load_file('../data/rt_test_data.arff')
    tp_train_data = loader.load_file('../data/tp_train_data.arff')
    tp_test_data = loader.load_file('../data/tp_test_data.arff')

    rt_train_data.class_is_last()
    rt_test_data.class_is_last()
    tp_train_data.class_is_last()
    tp_test_data.class_is_last()

    run_analysis('RT', rt_train_data, rt_test_data,
                 ['RT0', 'RT1', 'RT2', 'RT3', 'RT4'])
    run_analysis('TP', tp_train_data, tp_test_data,
                 ['TP0', 'TP1', 'TP2', 'TP3', 'TP4'])
def main():
    """
    Just runs some example code.
    """

    classifier = Classifier("weka.classifiers.trees.J48")

    helper.print_title("Capabilities")
    capabilities = classifier.capabilities
    print(capabilities)

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    data_capabilities = Capabilities.for_instances(iris_data)
    print(data_capabilities)
    print("classifier handles dataset: " + str(capabilities.supports(data_capabilities)))

    # disable/enable
    helper.print_title("Disable/Enable")
    capability = Capability(member="UNARY_ATTRIBUTES")
    capabilities.disable(capability)
    capabilities.min_instances = 10
    print("Removing: " + str(capability))
    print(capabilities)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(iris_file)

    # remove class attribute
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
    clusterer.build_clusterer(data)
    print(clusterer)

    # cluster data
    helper.print_info("Clustering data")
    for index, inst in enumerate(data):
        cl = clusterer.cluster_instance(inst)
        dist = clusterer.distribution_for_instance(inst)
        print(str(index+1) + ": cluster=" + str(cl) + ", distribution=" + str(dist))
예제 #32
0
def main(args):
    """
    Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
    evaluates the built model on the test set.
    :param args: the commandline arguments (optional, can be dataset filename)
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # generate train/test split of randomized data
    train, test = data.train_test_split(66.0, Random(1))

    # build classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    print(cls)

    # evaluate
    evl = Evaluation(train)
    evl.test_model(cls, test)
    print(evl.summary())
def main():

    try:
        jvm.start()

        loader = Loader(classname="weka.core.converters.CSVLoader")
        data = loader.load_file("./data/adult.csv")

        data.class_is_last()  # set class attribute

        # randomize data
        folds = k
        seed = 1
        rnd = Random(seed)
        rand_data = Instances.copy_instances(data)
        rand_data.randomize(rnd)
        if rand_data.class_attribute.is_nominal:
            rand_data.stratify(folds)

        NaiveBayes(rand_data, folds, seed, data)
        DecisionTree(rand_data, folds, seed, data)
    except Exception as e:
        raise e
    finally:
        jvm.stop()
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    full = loader.load_file(iris_file)
    full.class_is_last()

    # remove class attribute
    data = Instances.copy_instances(full)
    data.no_class()
    data.delete_last_attribute()

    # build a clusterer and output model
    helper.print_title("Training SimpleKMeans clusterer")
    clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans",
                          options=["-N", "3"])
    clusterer.build_clusterer(data)
    print("done")

    # classes to clusters
    evl = ClusterEvaluation()
    evl.set_model(clusterer)
    evl.test_model(full)
    helper.print_title("Cluster results")
    print(evl.cluster_results)
    helper.print_title("Classes to clusters")
    print(evl.classes_to_clusters)
def gridsearch():
    """
    Applies GridSearch to a dataset. GridSearch package must be not be installed, as the monolithic weka.jar
    already contains this package.
    """

    helper.print_title("GridSearch")

    # load a dataset
    fname = helper.get_data_dir() + os.sep + "bolts.arff"
    helper.print_info("Loading train: " + fname)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(fname)
    train.class_is_last()

    # classifier
    grid = GridSearch(options=["-sample-size", "100.0", "-traversal", "ROW-WISE", "-num-slots", "1", "-S", "1"])
    grid.evaluation = "CC"
    grid.y = {"property": "kernel.gamma", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0, "expression": "pow(BASE,I)"}
    grid.x = {"property": "C", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0, "expression": "pow(BASE,I)"}
    cls = Classifier(
        classname="weka.classifiers.functions.SMOreg",
        options=["-K", "weka.classifiers.functions.supportVector.RBFKernel"])
    grid.classifier = cls
    grid.build_classifier(train)
    print("Model:\n" + str(grid))
    print("\nBest setup:\n" + grid.best.to_commandline())
예제 #36
0
def main():

    dataset = sys.argv[1]
    #load a dataset
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file("./data/" + dataset + ".arff")
    data.class_is_last()

    num_classes = data.class_attribute.num_values

    os.mkdir('resultados_' + sys.argv[1])
    for random_cv in range(10):  #10 CV

        # generate train/test split of randomized data
        train, test = data.train_test_split(75.0, Random(random_cv))
        results_train, results_test = classification(data, train, test,
                                                     num_classes)
        #        results_test = classification(test, num_classes)

        #Write results in Excel format
        train_name = "./resultados_" + sys.argv[1] + "/resultados_" + sys.argv[
            1] + "_" + "E" + np.str(random_cv) + ".csv"
        test_name = "./resultados_" + sys.argv[1] + "/resultados_" + sys.argv[
            1] + "_" + "T" + np.str(random_cv) + ".csv"

        results_train.to_csv(train_name)
        results_test.to_csv(test_name)
def main(args):
    """
    Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
    evaluates the built model on the test set.
    :param args: the commandline arguments (optional, can be dataset filename)
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # generate train/test split of randomized data
    train, test = data.train_test_split(66.0, Random(1))

    # build classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    print(cls)

    # evaluate
    evl = Evaluation(train)
    evl.test_model(cls, test)
    print(evl.summary())
예제 #38
0
 def initData(self, arrfFile):
     loader = Loader(classname="weka.core.converters.ArffLoader")
     print self.dataDir + '/' + arrfFile
     self.data = loader.load_file(self.dataDir + '/' + arrfFile)
     self.data.class_is_last()
     
     print 'Carregando arquivo ' + self.dataDir + '/' + arrfFile
예제 #39
0
    def predict_proba(self, X):
        evaluation = Evaluation(self.train_data)

        # Add class column (we can't copy X, because this is a large object, so we add the column and remove it later)
        X['class'] = None

        filename = self.to_arff(X, True)

        # Remove class column
        del X['class']

        loader = Loader("weka.core.converters.ArffLoader")
        test_data = loader.load_file(filename)
        test_data.class_is_last()

        evaluation.test_model(self.classifier, test_data)

        probas = None

        # Return probabilities
        for pred in evaluation.predictions:
            if probas is None:
                probas = pred.distribution
            else:
                probas = np.vstack([probas, pred.distribution])

        return probas
예제 #40
0
    def evaluation_data(self, model):
        try:
            loader = Loader(classname="weka.core.converters.ArffLoader")
            data_test = loader.load_file(self.dataTestName)
            #helper.print_info("Evaluating on data:")
            evaluation = ClusterEvaluation()

            evaluation.set_model(model)
            evaluation.test_model(data_test)
            #print("# clusters: " + str(evaluation.num_clusters))
            #print("# log likelihood: " + str(evaluation.log_likelihood))
            cluster_ass = evaluation.cluster_assignments
            #print("# cluster assignments:\n" + str(cluster_ass))
            f = open("result_data.txt", "w+")
            i = 0
            for ins in data_test:
                stt = "normal"
                if (cluster_ass[i] == 0):
                    stt = "anomaly"
                statement = str(ins) + "," + stt
                #print statement
                f.write(statement + "\n")
                i = i + 1

            f.close()
            return evaluation.cluster_results
        except Exception, e:
            raise e
            print(traceback.format_exc())
예제 #41
0
 def _load_data(self, dfile, index = None):
     loader = Loader(classname = 'weka.core.converters.CSVLoader')
     data = loader.load_file(dfile = dfile)
     if index == None:
         data.set_class_index(data.num_attributes() - 1)
     else:
         data.set_class_index(index)
     return data
def main(args):
    """
    Trains Apriori on the specified dataset (uses vote UCI dataset if no dataset specified).
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # build Apriori, using last attribute as class attribute
    apriori = Associator(classname="weka.associations.Apriori", options=["-c", "-1"])
    apriori.build_associations(data)
    print(str(apriori))

    # iterate association rules (low-level)
    helper.print_info("Rules (low-level)")
    # make the underlying rules list object iterable in Python
    rules = javabridge.iterate_collection(apriori.jwrapper.getAssociationRules().getRules().o)
    for i, r in enumerate(rules):
        # wrap the Java object to make its methods accessible
        rule = JWrapper(r)
        print(str(i+1) + ". " + str(rule))
        # output some details on rule
        print("   - consequence support: " + str(rule.getConsequenceSupport()))
        print("   - premise support: " + str(rule.getPremiseSupport()))
        print("   - total support: " + str(rule.getTotalSupport()))
        print("   - total transactions: " + str(rule.getTotalTransactions()))

    # iterate association rules (high-level)
    helper.print_info("Rules (high-level)")
    print("can produce rules? " + str(apriori.can_produce_rules()))
    print("rule metric names: " + str(apriori.rule_metric_names))
    rules = apriori.association_rules()
    if rules is not None:
        print("producer: " + rules.producer)
        print("# rules: " + str(len(rules)))
        for i, rule in enumerate(rules):
            print(str(i+1) + ". " + str(rule))
            # output some details on rule
            print("   - consequence support: " + str(rule.consequence_support))
            print("   - consequence: " + str(rule.consequence))
            print("   - premise support: " + str(rule.premise_support))
            print("   - premise: " + str(rule.premise))
            print("   - total support: " + str(rule.total_support))
            print("   - total transactions: " + str(rule.total_transactions))
            print("   - metric names: " + str(rule.metric_names))
            print("   - metric values: " + str(rule.metric_values))
            print("   - metric value 'Confidence': " + str(rule.metric_value('Confidence')))
            print("   - primary metric name: " + str(rule.primary_metric_name))
            print("   - primary metric value: " + str(rule.primary_metric_value))
 def load_Arff(self, inputPath):
             
     #Loading input file
     #print inputPath
     loader = Loader(classname="weka.core.converters.ArffLoader")
     data = loader.load_file(inputPath)
     
     
     return data
def use_classifier(data_filename, cli):
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_filename)
    data.class_is_last()
    cls = from_commandline(cli, classname="weka.classifiers.Classifier")
    cls.build_classifier(data)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(cls, data, 10, Random(1))
    return cls, evaluation
예제 #45
0
 def train(self):
     filename = "train.arff"
     self.write_arff(filename, "train", 0, self.input_x, self.input_y)
     loader = Loader(classname="weka.core.converters.ArffLoader")
     data = loader.load_file(filename)
     data.class_is_last()
     self.cls = Classifier(classname="weka.classifiers.meta.Bagging", options=["-S", "5"])
     self.cls.build_classifier(data)
     os.remove(filename)
예제 #46
0
파일: util.py 프로젝트: igabriel85/dmon-adp
def convertCsvtoArff(indata, outdata):
    '''
    :param indata: -> input csv file
    :param outdata: -> output file
    :return:
    '''
    loader = Loader(classname="weka.core.converters.CSVLoader")
    data = loader.load_file(indata)
    saver = Saver(classname="weka.core.converters.ArffSaver")
    saver.save_file(data, outdata)
예제 #47
0
def generate_folds(dataset_path, output_folder, n_folds=10, random_state=None):
    """
    Given a dataset df, generate n_folds for it and store them in <output_folder>/<dataset_name>.

    :type dataset_path: str
    :param dataset_path: Path to dataset with .arff file extension (i.e my_dataset.arff)
    :type output_folder: str
    :param output_folder: Path to store both index file with folds and fold files.
    :type n_folds: int
    :param n_folds: Optional - Number of folds to split the dataset into. Defaults to 10.
    :type random_state: int
    :param random_state: Optional - Seed to use in the splitting process. Defaults to None (no seed).
    """

    import warnings
    warnings.filterwarnings('error')

    dataset_name = dataset_path.split('/')[-1].split('.')[0]

    af = load_arff(dataset_path)
    df = load_dataframe(af)

    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state)
    fold_iter = skf.split(df[df.columns[:-1]], df[df.columns[-1]])

    fold_index = dict()

    jvm.start()

    csv_loader = Loader(classname="weka.core.converters.CSVLoader")
    arff_saver = Saver(classname='weka.core.converters.ArffSaver')

    for i, (arg_rest, arg_test) in enumerate(fold_iter):
        fold_index[i] = list(arg_test)

        _temp_path = 'temp_%s_%d.csv' % (dataset_name, i)

        fold_data = df.loc[arg_test]  # type: pd.DataFrame
        fold_data.to_csv(_temp_path, sep=',', index=False)

        java_arff_dataset = csv_loader.load_file(_temp_path)
        java_arff_dataset.relationname = af['relation']
        java_arff_dataset.class_is_last()
        arff_saver.save_file(java_arff_dataset, os.path.join(output_folder, '%s_fold_%d.arff' % (dataset_name, i)))

        os.remove(_temp_path)

    json.dump(
        fold_index, open(os.path.join(output_folder, dataset_name + '.json'), 'w'), indent=2
    )

    jvm.stop()
    warnings.filterwarnings('default')
예제 #48
0
def run(arff_path, model_out):
    jvm.start()
    loader = Loader(classname = "weka.core.converters.ArffLoader")
    data = loader.load_file(arff_path)
    data.class_is_last()
    cls = Logistic()
    cls.build_classifier(data)
    cls.save_model(model_out)
    coefficients = cls.coefficients
    for coeff in coefficients:
        print str(coeff)

    return coefficients
예제 #49
0
def predict(attributes):
    jvm.start()
    file_path = print_to_file(attributes)
    # load the saved model
    objects = serialization.read_all("/Users/hosyvietanh/Desktop/data_mining/trained_model.model")
    classifier = Classifier(jobject=objects[0])
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(file_path)
    data.class_is_last()
    for index, inst in enumerate(data):
        pred = classifier.classify_instance(inst)
        dist = classifier.distribution_for_instance(inst)
        return int(pred)
    jvm.stop()
def playback_speed_checker(inputFile, dirRef):
    
    TRAINING_ARFF = 'dataset_playback.arff'
    inputRef = ""

    # Start JVM
    jvm.start()
    jvm.start(system_cp=True, packages=True)
    jvm.start(max_heap_size="512m")
    
    # Find reference file
    for file in os.listdir(dirRef):
        if str(file).find(str(os.path.basename(inputFile))) != -1:
            inputRef = os.path.join(dirRef, file)
            break

    # Calculation distance
    (result, distance) = dtw_checker(inputFile, inputRef)

    # Loading data
    loader = Loader(classname="weka.core.converters.ArffLoader")    
    data = loader.load_file(TRAINING_ARFF)
    data.class_is_last()                    # set class attribute

    # Train the classifier
    #cls = Classifier(classname="weka.classifiers.functions.SMO")
    cls = Classifier(classname="weka.classifiers.trees.J48", options = ["-C", "0.3", "-M", "10"])
    cls.build_classifier(data)

    # Classify instance
    speed_instance = Instance.create_instance(numpy.ndarray(distance), classname='weka.core.DenseInstance', weight=1.0)
    speed_instance.dataset = data
    
    # Classify instance
    speed_flag = cls.classify_instance(speed_instance)
    
    if (distance == 0):
        speed_class = 'nominal'
    else:
        if speed_flag == 0: speed_class = 'down_speed'
        if speed_flag == 0: speed_class = 'up_speed'
        
#    print os.path.basename(inputFile) + ' --- ' + speed_class
    
    # Stop JVM
    jvm.stop()    

    print "SPEED IS: " + speed_class

    return speed_class
예제 #51
0
 def calculate_amino_type(self, model, pro):
     if pro:
         return [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
     i = Instance.create_instance(values=[1.0, self.a, self.b])
     if (self.a==-1 and self.b==-1 ):
         return [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
     elif (self.a==-1):
         i.set_missing(1)
     elif (self.b==-1):
         i.set_missing(2)
     from weka.core.converters import Loader
     loader = Loader("weka.core.converters.ArffLoader")
     myDataset = loader.load_file("weka/testingthisthingout.arff")
     myDataset.set_class_index(0)
     i.set_dataset(myDataset)
     return model.distribution_for_instance(i)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    vote_file = helper.get_data_dir() + os.sep + "vote.arff"
    helper.print_info("Loading dataset: " + vote_file)
    loader = Loader("weka.core.converters.ArffLoader")
    vote_data = loader.load_file(vote_file)
    vote_data.class_is_last()

    # train and output associator
    associator = Associator(classname="weka.associations.Apriori", options=["-N", "9", "-I"])
    associator.build_associations(vote_data)
    print(associator)
def build_and_classify(classifier, classifier_name, approach_name, infile, percentage='10'):
    """
    Creates model and classifies against input data. Returns accuracy statistics
    """
    # set seed so results are consistent
    random.seed('iot')

    # load data
    loader = Loader(classname='weka.core.converters.CSVLoader')
    data = loader.load_file(infile)
    data.class_is_last()

    # convert all numeric attributes to nominal
    to_nominal = Filter(classname='weka.filters.unsupervised.attribute.NumericToNominal',
                        options=['-R', 'first-last'])
    to_nominal.inputformat(data)
    data = to_nominal.filter(data)

    # randomize data with constant seed
    randomize = Filter(classname='weka.filters.unsupervised.instance.Randomize',
                       options=['-S', '42'])
    randomize.inputformat(data)

    data = randomize.filter(data)

    # create training set and testing set
    train_percent_filter = Filter(classname='weka.filters.unsupervised.instance.RemovePercentage',
                                  options=['-P', percentage, '-V'])
    train_percent_filter.inputformat(data)

    train = train_percent_filter.filter(data)
    test = data

    # build and test classifier
    classifier.build_classifier(train)
    evaluation = Evaluation(train)
    evaluation.test_model(classifier, test)

    # return results as array
    results = [
        approach_name,
        classifier_name,
        percentage,
        evaluation.percent_correct,
        evaluation.weighted_f_measure
    ]
    return results
예제 #54
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    bodyfat_file = helper.get_data_dir() + os.sep + "bodyfat.arff"
    helper.print_info("Loading dataset: " + bodyfat_file)
    loader = Loader("weka.core.converters.ArffLoader")
    bodyfat_data = loader.load_file(bodyfat_file)
    bodyfat_data.class_is_last()

    # classifier help
    helper.print_title("Creating help string")
    classifier = Classifier(classname="weka.classifiers.trees.M5P")
    classifier.build_classifier(bodyfat_data)
    print(classifier)
예제 #55
0
 def predict(self, test_data):
     filename = "test.arff"
     self.write_arff(filename, "test", 0, test_data)
     loader = Loader(classname="weka.core.converters.ArffLoader")
     data = loader.load_file(filename)
     data.class_is_last()
     # evl = Evaluation(data)
     # evl.evaluate_model(self.cls,data)
     # data.set_class_label(data.numAttributes() - 1)
     # data.setClassIndex(data.numAttributes() - 1)
     result = []
     for index, inst in enumerate(data):
         pred = self.cls.classify_instance(inst)
         dist = self.cls.distribution_for_instance(inst)
         result.append(dist[0])
         # print(str(index+1) + ": label index=" + str(pred) + ", class distribution=" + str(dist))
         # print str(index+1) + 'dist:'+ str(dist)
     os.remove(filename)
     return result
예제 #56
0
def riaa_checker(inputFile):
    
    TRAINING_ARFF = 'C:\Users\ASUS\Desktop\IGNASI\SMC\Workspace\dataset_riaa.arff'

    # Start JVM
    jvm.start()
    jvm.start(system_cp=True, packages=True)
    jvm.start(max_heap_size="512m")

    # Calculation of bark bands information
    (absolute_bark, relative_bark, bark_ratios) = compute_bark_spectrum(inputFile)

    # Loading data
    loader = Loader(classname="weka.core.converters.ArffLoader")    
    data = loader.load_file(TRAINING_ARFF)
    data.class_is_last()                    # set class attribute

    # Train the classifier
    cls = Classifier(classname="weka.classifiers.functions.SMO")
    #cls = Classifier(classname="weka.classifiers.trees.J48", options = ["-C", "0.3", "-M", "10"])
    cls.build_classifier(data)

    # Classify instance
    bark_instance = Instance.create_instance(bark_ratios, classname='weka.core.DenseInstance', weight=1.0)
    bark_instance.dataset = data
    
    # Classify instance
    riaa_flag = cls.classify_instance(bark_instance)
    
    if riaa_flag == 0:
        riaa_class = 'riaa_ok'
    else:
        riaa_class = 'riaa_ko'
        
#    print os.path.basename(inputFile) + ' --- ' + riaa_class
    
    # Stop JVM
    jvm.stop()   

    print "RIAA FILTERING?: " + riaa_class

    return riaa_class
def main(args):
    """
    Performs attribute selection on the specified dataset (uses vote UCI dataset if no dataset specified). Last
    attribute is assumed to be the class attribute. Used: CfsSubsetEval, GreedyStepwise, J48
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    use_classifier(data)
    use_filter(data)
    use_low_level(data)