def PredecirUnaTemporada(path):
    jvm.start()
    insta = CrearInstanciaParaPredecir(path)
    atributos = ""
    file = open('ModelData/wekaHeader.arff', 'r')
    atributos = file.readlines()
    file.close()

    file = open('ModelData/predictionFiles/inst.arff', 'w')
    file.writelines(atributos)
    file.write("\n" + insta + '\n')
    file.close()

    objects = serialization.read_all("ModelData/77PercentModelPaisajes.model")
    classifier = Classifier(jobject=objects[0])

    loader = Loader()
    data = loader.load_file("ModelData/predictionFiles/inst.arff")
    data.class_is_last()

    clases = ["invierno", "verano", "otono", "primavera"]
    prediccion = ""
    for index, inst in enumerate(data):
        pred = classifier.classify_instance(inst)
        dist = classifier.distribution_for_instance(inst)
        prediccion = clases[int(pred)]
    jvm.stop()
    return prediccion
Exemplo n.º 2
0
def try_params(n_instances, params, train, test, istest):

    n_instances = int(round(n_instances))
    # print "n_instances:", n_instances
    pprint(params)

    L = list([])

    if params['unpruned'] == True:
        L.append("-N")

    L.append("-M")
    L.append(str(params['min_inst']))

    if params['unsmoothed'] == True:
        L.append("-U")

    if params['regression'] == True:
        L.append("-R")

    clf = Classifier(classname="weka.classifiers.rules.M5Rules", options=L)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, n_instances)

    return result
def autoweka(data, duration, metric, nb_folds):
    classifier = Classifier(
        classname="weka.classifiers.meta.AutoWEKAClassifier",
        options=["-x", nb_folds, "-timeLimit", duration, "-metric", metric]
    )  #classname="weka.classifiers.functions.Logistic", options=["-R", "1.0E-2"]
    classifier.build_classifier(data)
    print(classifier)
Exemplo n.º 4
0
def try_params(n_instances, params, train, valid, test, istest):
    n_instances = int(round(n_instances))
    pprint(params)

    L = list([])

    # L.append("-N")
    # L.append(str(params['minNo']))

    L.append("-O")
    L.append(str(params['optimizations']))

    if params['checkerror'] == False:
        L.append("-E")

    if params['pruning'] == False:
        L.append("-P")

    clf = Classifier(classname="weka.classifiers.rules.JRip", options=L)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Exemplo n.º 5
0
def try_params(n_instances, params, train, valid, test, istest):
    n_instances = int(round(n_instances))
    pprint(params)

    # data = load(directory)

    L = list([])

    L.append("-I")
    L.append(str(params['numInterations']))

    L.append("-K")
    L.append(str(params['numattr']))

    L.append("-depth")
    L.append(str(params['depth']))

    clf = Classifier(classname="weka.classifiers.trees.RandomForest",
                     options=L)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Exemplo n.º 6
0
def main(args):
    """
    Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and
    evaluates the built model on the test set.
    :param args: the commandline arguments (optional, can be dataset filename)
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # generate train/test split of randomized data
    train, test = data.train_test_split(66.0, Random(1))

    # build classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)
    print(cls)

    # evaluate
    evl = Evaluation(train)
    evl.test_model(cls, test)
    print(evl.summary())
Exemplo n.º 7
0
def try_params(n_instances, params, base, train, valid, test, istest):
    n_instances = int(round(n_instances))
    pprint(params)

    L = list()

    if params['missingSeparate'] == True:
        L.append("-M")

    if params['locallyPredictive'] == False:
        L.append("-L")

    if params['search'] == 'GreedyStepwise':
        param_search = gs.get_params()
        search = gs.get_class(param_search)
    else:
        param_search = bf.get_params()
        search = bf.get_class(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval",
                             options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("base", base.jobject)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
def Boost_J48(data, rnm):
    data.class_is_last()
    fc1 = FilteredClassifier()
    fc1.classifier = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"])
    fc1.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"])
    fc2 = SingleClassifierEnhancer(classname="weka.classifiers.meta.AdaBoostM1", options=["-P", "100", "-S", "1", "-I", "10"])
    fc2.classifier = fc1
    pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-p", "1"])
    folds = 10
    fc2.build_classifier(data)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(fc2, data, folds, Random(1), pred_output)
    f0 = open(rnm + '_Boost_J48_Tree.txt', 'w')
    print >> f0, "Filename: ", rnm
    print >> f0, '\n\n'
    print >> f0, str(fc2)
    f0.close()
    f1 = open(rnm + '_Boost_J48_Prediction.txt', 'w')
    print >> f1, 'Filename:', rnm
    print >> f1, 'Prediction Summary:', (pred_output.buffer_content())
    f1.close()
    f2 = open(rnm + '_Boost_j48_Evaluation.txt', 'w')
    print >> f2, 'Filename:', rnm
    print >> f2, 'Evaluation Summary:', (evaluation.summary())
    print >> f2, '\n\n\n'
    print >> f2, (evaluation.class_details())
    f2.close()
    plot_roc(evaluation, class_index=[0,1], title=rnm, key_loc='best', outfile=rnm + '_Boost_J48_ROC.png', wait=False)
    value_Boost_J48 = str(evaluation.percent_correct)
    return value_Boost_J48
Exemplo n.º 9
0
def get_class(params):
    # pprint(params)

    L = list([])

    if params['binary'] == True:
        L.append("-B")

    if params['residuals'] == True:
        L.append("-R")

    if params['crossValidated'] == True:
        L.append("-C")

    if params['probabilities'] == True:
        L.append("-P")

    L.append("-M")
    L.append(str(params['min_inst']))

    if params['weighting'] != 0 and params['probabilities'] == False:
        L.append("-W")
        L.append(str(params['weighting']))

    L.append("-A")
    L.append(str(params['aic']))

    clf = Classifier(classname="weka.classifiers.trees.LMT", options=L)
    return clf
Exemplo n.º 10
0
    def retrain(self, examples, labels):

        f = open("trainingweka.arff", "w")
        f.write("@relation randomset\n")
        for j in range(len(examples[0])):
            f.write("@attribute feature%d real\n" % j)
        f.write("@attribute class {TRUE, FALSE}\n")
        f.write("@data\n")

        for (example, label) in zip(examples, labels):
            for feature in example:
                f.write("%f," % feature)
            if label == 1:
                f.write("TRUE\n")
            else:
                f.write("FALSE\n")
        f.close()

        loader = Loader(classname="weka.core.converters.ArffLoader")
        # options=["-H", "-B", "10000"])
        self.trainingData = loader.load_file("trainingweka.arff")
        self.trainingData.set_class_index(self.trainingData.num_attributes() -
                                          1)
        self.classifier = Classifier(
            classname="weka.classifiers.functions.Logistic",
            options=["-R", "%f" % (1.0 / self.C)])
        self.classifier.build_classifier(self.trainingData)
def RandomTree(data, rnm):
    data.class_is_last()
    fc = FilteredClassifier()
    fc.classifier = Classifier(classname="weka.classifiers.trees.RandomTree", options=["-K", "0", "-M", "1.0", "-V", "0.001", "-S", "1"])
    fc.filter = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"])
    pred_output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-p", "1"])
    folds = 10
    evl = Evaluation(data)
    evl.crossvalidate_model(fc, data, folds, Random(1), pred_output)
    fc.build_classifier(data)
    f0 = open(rnm + '_RT_Tree.txt', 'w')
    print >> f0, "Filename: ", rnm
    print >> f0, '\n\n'
    print >> f0, str(fc)
    f0.close()
    f1 = open(rnm + '_RT_Prediction.txt', 'w')
    print >> f1, 'Filename:', rnm
    print >> f1, 'Prediction Summary:', (pred_output.buffer_content())
    f1.close()
    f2 = open(rnm + '_RT_Evaluation.txt', 'w')
    print >> f2, 'Filename:', rnm
    print >> f2, 'Evaluation Summary:', (evl.summary())
    print >> f2, '\n\n\n'
    print >> f2, (evl.class_details())
    f2.close()
    plot_roc(evl, class_index=[0,1], title=rnm, key_loc='best', outfile=rnm+'_RT_ROC.png', wait=False)
    value_RT = str(evl.percent_correct)
    return value_RT
Exemplo n.º 12
0
def load_model(filename):
    """ Load the model from cache.
    Args:
        filename(str): The target file name (without extension) to load. Example: LMT
    Returns:
        The classifier and data object if the target caching is saved, otherwise None.
    """

    # Path to the cashed model (example: caches/model/LMT.cache)
    path = os.path.join(os.path.join('caches', 'model'), filename + '.cache')

    print("Path to the cashed model to load:", path)

    if os.path.isfile(path):
        cached_model, cached_data_used_for_training = serialization.read_all(
            path)
        print("Loading cached classifier")
        trained_classifier = Classifier(jobject=cached_model)
        print("Loading cached data")
        training_data = Instances(jobject=cached_data_used_for_training)
        localizer_log.msg("Loaded model: {filename}".format(filename=filename))
        return [trained_classifier, training_data]

    localizer_log.msg("Failed to load cache of 'model'.")
    return None
Exemplo n.º 13
0
def create_model(input_file, output_file):
    # Load data
    data = converters.load_any_file(input_file)
    data.class_is_last()  # set class attribute

    # filter data
    print_title("Filtering Data")
    discretize = Filter(
        classname="weka.filters.unsupervised.attribute.Discretize",
        options=["-B", "10", "-M", "-1.0", "-R", "first-last"])
    discretize.inputformat(
        data)  # let the filter know about the type of data to filter
    filtered_data = discretize.filter(data)
    print("Done! (believe it or not)")

    print_title("Build Classifier")
    classifier = Classifier(classname="weka.classifiers.trees.RandomForest",
                            options=["-I", "100", "-K", "0", "-S", "1"])
    classifier.build_classifier(filtered_data)
    print("Done! (believe it or not)")
    serialization.write_all(output_file, [classifier, discretize])
    print("Model and filter saved to ", output_file)

    evaluation = Evaluation(data)  # initialize with priors
    evaluation.crossvalidate_model(classifier, filtered_data, 10,
                                   Random(42))  # 10-fold CV
    print(evaluation.summary())
    print("pctCorrect: " + str(evaluation.percent_correct))
    print("incorrect: " + str(evaluation.incorrect))
Exemplo n.º 14
0
def load_classifier(lang, tag):
    classifier = {}

    if lang == LANG_ID and tag == "nnp":
        objects = serialization.read_all(ID_MODEL_NNP)
    elif lang == LANG_ID and tag == "nn":
        objects = serialization.read_all(ID_MODEL_NN)
    elif lang == LANG_ID and tag == "cdp":
        objects = serialization.read_all(ID_MODEL_CDP)

    elif lang == LANG_EN and tag == "nnp":
        objects = serialization.read_all(EN_MODEL_NNP)
    elif lang == LANG_EN and tag == "jj":
        objects = serialization.read_all(EN_MODEL_JJ)
    elif lang == LANG_EN and tag == "nn":
        objects = serialization.read_all(EN_MODEL_NN)
    elif lang == LANG_EN and tag == "vbp":
        objects = serialization.read_all(EN_MODEL_VBP)
    elif lang == LANG_EN and tag == "cd":
        objects = serialization.read_all(EN_MODEL_CD)
    elif lang == LANG_EN and tag == "vb":
        objects = serialization.read_all(EN_MODEL_VB)

    classifier['classifier'] = Classifier(jobject=objects[0])
    classifier['filter'] = Filter(jobject=objects[1])
    return classifier
Exemplo n.º 15
0
def try_params(n_instances, params, base, train, valid, test, istest):
    n_instances = int(round(n_instances))
    # print "n_instances:", n_instances
    pprint(params)

    L = list([])

    if params['missingMerge'] == False:
        L.append("-M")

    if params['binarizeNumericAttributes'] == True:
        L.append("-B")

    # print L

    search = ASSearch(classname="weka.attributeSelection.Ranker")
    evaluator = ASEvaluation(classname="weka.attributeSelection.InfoGainAttributeEval", options=L)

    clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Exemplo n.º 16
0
def get_class(params):
    # pprint(params)
    L = list([])

    if params['unpruned'] == True:
        L.append("-U")

    if params['collapseTree'] == True:
        L.append("-O")

    if params['unpruned'] == False:
        L.append("-C")
        L.append(str(params['confidenceFactor']))

    L.append("-M")
    L.append(str(params['minNumObj']))

    if params['binarySplits'] == True:
        L.append("-B")

    if params['subtreeRaising'] == True and params['unpruned'] == False:
        L.append("-S")

    if params['useLaplace'] == True:
        L.append("-A")

    if params['useMDL'] == False:
        L.append("-J")

    clf = Classifier(classname="weka.classifiers.trees.J48", options=L)

    return clf
Exemplo n.º 17
0
def try_params(n_instances, params, train, test, istest):

    n_instances = int(round(n_instances))
    # print "n_instances:", n_instances
    pprint(params)

    L = list([])

    L.append("-B")
    L.append(str(params['blend']))

    if params['entropic'] == True:
        L.append("-E")

    L.append("-M")
    L.append(params['missing'])

    clf = Classifier(classname="weka.classifiers.lazy.KStar", options=L)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, n_instances)

    return result
Exemplo n.º 18
0
def get_evaluator(params, base):
    pprint(params)

    L = list([])

    if params['missingMerge'] == False:
        L.append("-M")

    if params['binarizeNumericAttributes'] == True:
        L.append("-B")

    param_search = rk.get_params()

    search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection.Ranker")
    evaluator = ASEvaluation(
        classname="weka.attributeSelection.InfoGainAttributeEval", options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    return clf
Exemplo n.º 19
0
def get_evaluator(params, base):
    pprint(params)

    L = list()

    if params['missingSeparate'] == True:
        L.append("-M")

    if params['locallyPredictive'] == False:
        L.append("-L")

    if params['search'] == 'GreedyStepwise':
        param_search = gs.get_params()
        search = gs.get_search(param_search)
    else:
        param_search = bf.get_params()
        search = bf.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval",
                             options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    return clf
def main(args):
    """
    Trains a J48 classifier on a training set and outputs the predicted class and class distribution alongside the
    actual class from a test set. Class attribute is assumed to be the last attribute.
    :param args: the commandline arguments (train and test datasets)
    :type args: list
    """

    # load a dataset
    helper.print_info("Loading train: " + args[1])
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(args[1])
    train.class_index = train.num_attributes - 1
    helper.print_info("Loading test: " + args[2])
    test = loader.load_file(args[2])
    test.class_is_last()

    # classifier
    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.build_classifier(train)

    # output predictions
    print("# - actual - predicted - error - distribution")
    for index, inst in enumerate(test):
        pred = cls.classify_instance(inst)
        dist = cls.distribution_for_instance(inst)
        print("%d - %s - %s - %s  - %s" %
              (index + 1, inst.get_string_value(
                  inst.class_index), inst.class_attribute.value(int(pred)),
               "yes" if pred != inst.get_value(inst.class_index) else "no",
               str(dist.tolist())))
Exemplo n.º 21
0
def main():
    """
    Shows how to use the CostSensitiveClassifier.
    """

    # load a dataset
    data_file = helper.get_data_dir() + os.sep + "diabetes.arff"
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader("weka.core.converters.ArffLoader")
    data = loader.load_file(data_file)
    data.class_is_last()

    # classifier
    classifier = SingleClassifierEnhancer(
        classname="weka.classifiers.meta.CostSensitiveClassifier",
        options=["-cost-matrix", "[0 1; 2 0]", "-S", "2"])
    base = Classifier(classname="weka.classifiers.trees.J48",
                      options=["-C", "0.3"])
    classifier.classifier = base

    folds = 10
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(classifier, data, folds, Random(1))

    print("")
    print("=== Setup ===")
    print("Classifier: " + classifier.to_commandline())
    print("Dataset: " + data.relationname)
    print("")
    print(
        evaluation.summary("=== " + str(folds) +
                           " -fold Cross-Validation ==="))
Exemplo n.º 22
0
def main():
    """
    Just runs some example code.
    """

    classifier = Classifier("weka.classifiers.trees.J48")

    helper.print_title("Capabilities")
    capabilities = classifier.capabilities
    print(capabilities)

    # load a dataset
    iris_file = helper.get_data_dir() + os.sep + "iris.arff"
    helper.print_info("Loading dataset: " + iris_file)
    loader = Loader("weka.core.converters.ArffLoader")
    iris_data = loader.load_file(iris_file)
    iris_data.class_is_last()
    data_capabilities = Capabilities.for_instances(iris_data)
    print(data_capabilities)
    print("classifier handles dataset: " +
          str(capabilities.supports(data_capabilities)))

    # disable/enable
    helper.print_title("Disable/Enable")
    capability = Capability(member="UNARY_ATTRIBUTES")
    capabilities.disable(capability)
    capabilities.min_instances = 10
    print("Removing: " + str(capability))
    print(capabilities)
def save_all_scores_on_validate():
    for user in user_list:
        user_validate_dir = os.listdir("../data/arff_files/" + str(user) +
                                       "/validate/")
        user_validate_dir.sort()
        n = len(user_validate_dir)
        for expression_index in range(n):
            print expression_index, "=>", str(
                expression_list[expression_index]), ':', str(
                    user_validate_dir[expression_index])
            id = str(expression_list[expression_index]) + '_' + str(user)
            target_dir = '../results/' + str(
                expression_list[expression_index]) + '/' + str(user) + '/'
            model_dir = '../models/' + str(
                expression_list[expression_index]) + '/' + str(user) + '/'
            validate_data_file = "../data/arff_files/" + str(
                user) + "/validate/" + str(user_validate_dir[expression_index])
            print validate_data_file, "=>", model_dir, "all algos", "=>", target_dir, "\n"
            loader = Loader(classname="weka.core.converters.ArffLoader")
            validate_data = loader.load_file(validate_data_file)
            for algo in algo_func_dict.keys():
                trained_model = Classifier(
                    jobject=serialization.read(model_dir + algo + ".model"))
                scores_matrix = get_classifier_score(trained_model,
                                                     validate_data)
                out_file = target_dir + algo + "_scores.csv"
                #writing scores to target file
                np.savetxt(out_file, scores_matrix, delimiter=",")
Exemplo n.º 24
0
def get_evaluator(params, base):
    pprint(params)

    L = list()

    if params['missing_merge'] == True:
        L.append("-M")


    # if params['search'] == 'GreedyStepwise':
    #     param_search = gs.get_params()
    #     search = gs.get_search(param_search)
    # elif params['search'] == 'BestFirst':
    #     param_search = bf.get_params()
    #     search = bf.get_search(param_search)
    # elif params['search'] == 'Ranker':
    param_search = rk.get_params()
    search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(classname="weka.attributeSelection.GainRatioAttributeEval", options=L)

    clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)


    return clf
Exemplo n.º 25
0
def try_params(n_instances, params, train, valid, test, istest):
    n_instances = int(round(n_instances))
    pprint(params)

    L = list([])
    L.append("-L")
    L.append(str(params['leaf']))

    L.append("-S")
    L.append(str(params['splitCriterion']))

    L.append("-E")
    L.append(str(params['splitConfidence']))

    L.append("-H")
    L.append(str(params['hoeffdingTieThreshold']))

    L.append("-M")
    L.append(str(params['minimumFractionOfWeightInfoGain']))

    L.append("-G")
    L.append(str(params['gracePeriod']))

    clf = Classifier(classname="weka.classifiers.trees.HoeffdingTree",
                     options=L)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Exemplo n.º 26
0
def try_params(n_instances, params, base, train, valid, test, istest):
    n_instances = int(round(n_instances))
    pprint(params)

    L = list()
    if params['missing_merge'] == True:
        L.append("-M")


    if params['search'] == 'GreedyStepwise':
        param_search = gs.get_params()
        search = gs.get_search(param_search)
    elif params['search'] == 'BestFirst':
        param_search = bf.get_params()
        search = bf.get_search(param_search)
    elif params['search'] == 'Ranker':
        param_search = rk.get_params()
        search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(classname="weka.attributeSelection.GainRatioAttributeEval", options=L)

    clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Exemplo n.º 27
0
def get_class(params):
    # pprint(params)

    L = list([])
    L.append("-L")
    L.append(str(params['learningRate']))

    L.append("-M")
    L.append(str(params['momentum']))

    if params['nominalToBinaryFilter'] == True:
        L.append("-B")

    L.append("-H")
    L.append(str(params['hiddenLayers']))

    if params['normalizeNumClasses'] == True:
        L.append("-C")

    if params['reset'] == True:
        L.append("-R")

    if params['decay'] == True:
        L.append("-D")

    L.append("-S")
    L.append(str(params['seed']))

    clf = Classifier(
        classname="weka.classifiers.functions.MultilayerPerceptron", options=L)
    return clf
Exemplo n.º 28
0
def logit_PC(df_train, df_test, attr_label):
    '''
    logistic regression with PC members only
    :param df_train:        training data, pandas data frame
    :param df_test:         testing data, pandas data frame
    :param attr_label:      label attribute, string
    :return:                PC members, logistic regression model and AUC
    '''
    pcs = RF.learnPC_R(df_train, attr_label)
    if pcs:
        # model = LogisticRegression().fit(df_train[pcs], df_train[attr_label])
        # pred = model.predict_proba(df_test[pcs])
        # pred = [x[1] for x in pred]
        # auc = evaluate_auc(df_test[attr_label].values.tolist(), pred)

        df2Instances = DF2Instances(df_train[pcs+[attr_label]], 'train', attr_label)
        data_train = df2Instances.df_to_instances()
        data_train.class_is_last()  # set class attribute
        model = Classifier(classname="weka.classifiers.functions.Logistic")
        model.build_classifier(data_train)

        df2Instances = DF2Instances(df_test[pcs+[attr_label]], 'test', attr_label)
        data_test = df2Instances.df_to_instances()
        data_test.class_is_last()  # set class attribute

        preds = []
        for index, inst in enumerate(data_test):
            preds.append(model.distribution_for_instance(inst)[1])
        auc = evaluate_auc(df_test[attr_label].values.tolist(), preds)

        return pcs, model, auc
    else:
        return pcs, None, None
Exemplo n.º 29
0
def try_params(n_instances, params, train, valid, test, istest):

    n_instances = int(round(n_instances))
    pprint(params)

    L = list([])
    L.append("-C")
    L.append(str(params['C']))

    L.append("-N")
    L.append(str(params['filterType']))

    if params['buildCalibrationModels'] == True:
        L.append("-M")

    L.append("-K")
    L.append("weka.classifiers.functions.supportVector." + params['kernel'])

    clf = Classifier(classname="weka.classifiers.functions.SMO", options=L)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Exemplo n.º 30
0
def main(args):
    """
    Trains a NaiveBayesUpdateable classifier incrementally on a dataset. The dataset can be supplied as parameter.
    :param args: the commandline arguments
    :type args: list
    """

    # load a dataset
    if len(args) <= 1:
        data_file = helper.get_data_dir() + os.sep + "vote.arff"
    else:
        data_file = args[1]
    helper.print_info("Loading dataset: " + data_file)
    loader = Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(data_file, incremental=True)
    data.class_is_last()

    # classifier
    nb = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")
    nb.build_classifier(data)

    # train incrementally
    for inst in loader:
        nb.update_classifier(inst)

    print(nb)