示例#1
0
def weka_local_apply_classifier(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    # print("Instances: %s" % type(input_dict['instances']))
    instances = common.deserialize_weka_object(input_dict['instances'])

    if instances.classIndex() == -1:
        instances.setClassIndex(instances.numAttributes() -
                                1)  # last attribute is class

    classifier_serialized = input_dict['classifier']
    predictions = []
    try:
        classifier = common.deserialize_weka_object(classifier_serialized)
        for instance in instances:
            label_ind = int(classifier.classifyInstance(instance))
            label = instances.attribute(instances.numAttributes() -
                                        1).value(label_ind)
            predictions.append(label)

        return {'classes': predictions}
    except:
        raise Exception(
            "Classifier not built. Please use the Build Classifier widget first."
        )
示例#2
0
def test4():
    generic_learners = [
        "weka.classifiers.bayes.BayesNet",
        "weka.classifiers.trees.HoeffdingTree"
    ]

    fn = normpath(join(dirname(__file__), 'weka', 'data', 'iris.arff'))
    instances = library.weka_local_arff_to_weka_instances(
        {'arff': open(fn).read()})
    instances = instances['instances']
    iris_instances = common.deserialize_weka_object(instances)
    class_attribute = iris_instances.classAttribute()

    for generic_learner in generic_learners:
        learner = library.weka_local_generic_learner({
            'weka_class': generic_learner,
            'params': None
        })
        learner = learner['Generic_Weka_learner']

        model = common.deserialize_weka_object(learner)
        model.buildClassifier(iris_instances)

        print '\nalgorithm: %s' % repr(
            type(model))[::-1][2:repr(type(model))[::-1].index('.')][::-1]
        for instance in iris_instances:
            original = int(instance.classValue())
            new = int(model.classifyInstance(instance))
            if original != new:
                print 'misclassified training example: %s was predicted as: %s' % (
                    str(instance), class_attribute.value(new))
示例#3
0
def test2():
    learners = [library.weka_local_ibk({'params': None})['IBk_learner'],
                library.weka_local_j48({'params': None})['J48_learner'],
                library.weka_local_jrip({'params': None})['JRip_learner'],
                library.weka_local_k_star({'params': None})['KStar_learner'],
                library.weka_local_libsvm({'params': None})['LibSVM_learner'],
                library.weka_local_multilayer_perceptron({'params': None})['Multilayer_Perceptron_learner'],
                library.weka_local_naive_bayes({'params': None})['Naive_Bayes_learner'],
                library.weka_local_rep_tree({'params': None})['REPTree_learner'],
                library.weka_local_random_forest({'params': None})['RandomForest_learner'],
                library.weka_local_random_tree({'params': None})['RandomTree_learner'],
                library.weka_local_smo({'params': None})['SMO_learner']
                ]

    fn = normpath(join(dirname(__file__), 'weka', 'data', 'iris.arff'))
    instances = library.weka_local_arff_to_weka_instances({'arff': open(fn).read()})
    instances = instances['instances']
    iris_instances = common.deserialize_weka_object(instances)

    class_attribute = iris_instances.classAttribute()

    for learner in learners:
        print("Learner: " + str(learner))
        model = common.deserialize_weka_object(learner)
        model.buildClassifier(iris_instances)
        print '\nalgorithm: %s' % repr(type(model))[::-1][2:repr(type(model))[::-1].index('.')][::-1]
        for instance in iris_instances:
            original = int(instance.classValue())
            new = int(model.classifyInstance(instance))
            if original != new:
                print 'misclassified training example: %s was predicted as: %s' % (
                    str(instance), class_attribute.value(new))
        print
示例#4
0
def weka_local_build_classifier(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    instances = common.deserialize_weka_object(input_dict['instances'])
    classifier = common.deserialize_weka_object(input_dict['learner'])

    if instances.classIndex() == -1:
        instances.setClassIndex(instances.numAttributes() - 1)
        # raise ValueError('Class not set!')

    classifier.buildClassifier(instances)
    sclassifier = common.serialize_weka_object(classifier)

    return {'classifier': sclassifier}
示例#5
0
def weka_local_build_classifier(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    instances = common.deserialize_weka_object(input_dict['instances'])
    classifier = common.deserialize_weka_object(input_dict['learner'])

    if instances.classIndex() == -1:
        instances.setClassIndex(instances.numAttributes() - 1)
        # raise ValueError('Class not set!')

    classifier.buildClassifier(instances)
    sclassifier = common.serialize_weka_object(classifier)

    return {'classifier': sclassifier}
示例#6
0
    def print_classifier(self):
        if not jp.isThreadAttachedToJVM():
            jp.attachThreadToJVM()

        try:
            classifier = common.deserialize_weka_object(self.sclassifier)
            return classifier.toString()
        except:
            raise Exception("Only WEKA classifiers/models supported. Please provide a valid WEKA learner.")
示例#7
0
def apply_mapped_classifier_get_instances(weka_classifier, original_data,
                                          data):
    '''An advanced version of the Apply Classifier method.
    Addresses incompatible training and test data, and returns a dataset with predictions.

    :param weka_classifier: WekaClassifier object
    :param original_data: original training instances, bunch
    :param data: test instances, bunch
    :return: Dataset (Bunch) object with predictions and a textual report from the InputMappedClassifier class
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    try:
        classifier = common.deserialize_weka_object(
            weka_classifier.sclassifier)
    except:
        raise Exception(
            "Only WEKA classifiers/models supported. Please provide a valid WEKA learner."
        )

    original_training_instances = ut.convert_bunch_to_weka_instances(
        original_data)
    instances = ut.convert_bunch_to_weka_instances(data)

    # serialize classifier with original instances to a file once again for the Mapped classifier
    tfile = common.TemporaryFile(flags='wb+')
    s = jp.JClass('weka.core.SerializationHelper')
    s.writeAll(tfile.name, [classifier, original_training_instances])

    # construct a MappedClassifier
    mapped_classifier = jp.JClass(
        'weka.classifiers.misc.InputMappedClassifier')()
    mapped_classifier.setIgnoreCaseForNames(True)
    mapped_classifier.setTrim(True)
    # mapped_classifier.setSuppressMappingReport(True)
    # mc.setModelHeader(original_training_instances)
    mapped_classifier.setModelPath(tfile.name)

    predictions = []
    try:
        for instance in instances:
            label = int(mapped_classifier.classifyInstance(instance))
            predictions.append(label)

        data["targetPredicted"] = predictions
    except:
        raise Exception(
            "Classifier not built. Please use the Build Classifier widget first."
        )

    report = mapped_classifier.toString()
    if MAPPING_REPORT_START in report:
        report = report[report.index(MAPPING_REPORT_START):]

    return data, report
示例#8
0
def weka_local_apply_mapped_classifier_get_instances(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    MAPPING_REPORT_START = 'Attribute mappings:'

    classifier = common.deserialize_weka_object(input_dict['classifier'])
    original_training_instances = common.deserialize_weka_object(
        input_dict['original_training_instances'])
    instances = common.deserialize_weka_object(input_dict['instances'])

    # serialize classifier with original instances to a file once again for the Mapped classifier
    tfile = common.TemporaryFile(flags='wb+')
    s = jp.JClass('weka.core.SerializationHelper')
    s.writeAll(tfile.name, [classifier, original_training_instances])

    # construct a MappedClassifier
    mappedClassifier = jp.JClass(
        'weka.classifiers.misc.InputMappedClassifier')()
    mappedClassifier.setIgnoreCaseForNames(True)
    mappedClassifier.setTrim(True)
    #mappedClassifier.setSuppressMappingReport(True)
    #mc.setModelHeader(original_training_instances)
    mappedClassifier.setModelPath(tfile.name)

    # use the mapped classifier on new data
    classIndex = instances.classIndex()
    if classIndex == -1:
        raise ValueError('Class not set!')
    classAttribute = instances.classAttribute()
    for instance in instances:
        label = int(mappedClassifier.classifyInstance(instance))
        instance.setClassValue(classAttribute.value(label))

    report = mappedClassifier.toString()
    if MAPPING_REPORT_START in report:
        report = report[report.index(MAPPING_REPORT_START):]

    return {
        'mapping_report': report,
        'instances': common.serialize_weka_object(instances)
    }
示例#9
0
    def print_classifier(self):
        if not jp.isThreadAttachedToJVM():
            jp.attachThreadToJVM()

        try:
            classifier = common.deserialize_weka_object(self.sclassifier)
            return classifier.toString()
        except:
            raise Exception(
                "Only WEKA classifiers/models supported. Please provide a valid WEKA learner."
            )
示例#10
0
def weka_local_apply_classifier_and_get_instances(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    # print("Instances: %s" % type(input_dict['instances']))
    instances = common.deserialize_weka_object(input_dict['instances'])

    if instances.classIndex() == -1:
        instances.setClassIndex(instances.numAttributes() - 1)  # last attribute is class

    classifier_serialized = input_dict['classifier']
    try:
        classifier = common.deserialize_weka_object(classifier_serialized)
        classAttribute = instances.classAttribute()
        for instance in instances:
            label_ind = int(classifier.classifyInstance(instance))
            instance.setClassValue(classAttribute.value(label_ind))

        return {'instances': common.serialize_weka_object(instances)}
    except:
        raise Exception("Classifier not built. Please use the Build Classifier widget first.")
示例#11
0
def weka_local_print_model(input_dict):
    '''
    Prints a WEKA model
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    try:
        model = common.deserialize_weka_object(input_dict['model'])
        return {'model_as_string': model.toString()}
    except:
        raise Exception("Only WEKA classifiers/models supported. Please provide a valid WEKA model.")
示例#12
0
def test2():
    learners = [
        library.weka_local_ibk({'params': None})['IBk_learner'],
        library.weka_local_j48({'params': None})['J48_learner'],
        library.weka_local_jrip({'params': None})['JRip_learner'],
        library.weka_local_k_star({'params': None})['KStar_learner'],
        library.weka_local_libsvm({'params': None})['LibSVM_learner'],
        library.weka_local_multilayer_perceptron(
            {'params': None})['Multilayer_Perceptron_learner'],
        library.weka_local_naive_bayes({'params':
                                        None})['Naive_Bayes_learner'],
        library.weka_local_rep_tree({'params': None})['REPTree_learner'],
        library.weka_local_random_forest({'params':
                                          None})['RandomForest_learner'],
        library.weka_local_random_tree({'params': None})['RandomTree_learner'],
        library.weka_local_smo({'params': None})['SMO_learner']
    ]

    fn = normpath(join(dirname(__file__), 'weka', 'data', 'iris.arff'))
    instances = library.weka_local_arff_to_weka_instances(
        {'arff': open(fn).read()})
    instances = instances['instances']
    iris_instances = common.deserialize_weka_object(instances)

    class_attribute = iris_instances.classAttribute()

    for learner in learners:
        print("Learner: " + str(learner))
        model = common.deserialize_weka_object(learner)
        model.buildClassifier(iris_instances)
        print '\nalgorithm: %s' % repr(
            type(model))[::-1][2:repr(type(model))[::-1].index('.')][::-1]
        for instance in iris_instances:
            original = int(instance.classValue())
            new = int(model.classifyInstance(instance))
            if original != new:
                print 'misclassified training example: %s was predicted as: %s' % (
                    str(instance), class_attribute.value(new))
        print
示例#13
0
def weka_local_instances_to_arff(input_dict):
    '''
    Reads a dataset into a format suitable for WEKA methods
    '''

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    instances = common.deserialize_weka_object(input_dict['instances'])

    arff = instances.toString()

    return {'arff': arff}
示例#14
0
def weka_local_instances_to_arff(input_dict):
    '''
    Reads a dataset into a format suitable for WEKA methods
    '''

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    instances = common.deserialize_weka_object(input_dict['instances'])

    arff = instances.toString()

    return {'arff': arff}
示例#15
0
def weka_local_apply_mapped_classifier(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    MAPPING_REPORT_START = 'Attribute mappings:'

    classifier = common.deserialize_weka_object(input_dict['classifier'])
    original_training_instances = common.deserialize_weka_object(input_dict['original_training_instances'])
    instances = common.deserialize_weka_object(input_dict['instances'])

    # serialize classifier with original instances to a file once again for the Mapped classifier
    tfile = common.TemporaryFile(flags='wb+')
    s = jp.JClass('weka.core.SerializationHelper')
    s.writeAll(tfile.name, [classifier, original_training_instances])

    # construct a MappedClassifier
    mappedClassifier = jp.JClass('weka.classifiers.misc.InputMappedClassifier')()
    mappedClassifier.setIgnoreCaseForNames(True)
    mappedClassifier.setTrim(True)
    #mappedClassifier.setSuppressMappingReport(True)
    #mc.setModelHeader(original_training_instances)
    mappedClassifier.setModelPath(tfile.name)

    # use the mapped classifier on new data
    classes = []
    classIndex = instances.classIndex()
    if classIndex == -1:
        raise ValueError('Class not set!')
    classAttribute = instances.classAttribute()
    for instance in instances:
        label = int(mappedClassifier.classifyInstance(instance))
        classes.append(classAttribute.value(label))

    report = mappedClassifier.toString()
    if MAPPING_REPORT_START in report:
        report = report[report.index(MAPPING_REPORT_START):]

    return {'mapping_report':report, 'classes':classes}
示例#16
0
def test4():
    generic_learners = ["weka.classifiers.bayes.BayesNet", "weka.classifiers.trees.HoeffdingTree"]

    fn = normpath(join(dirname(__file__), 'weka', 'data', 'iris.arff'))
    instances = library.weka_local_arff_to_weka_instances({'arff': open(fn).read()})
    instances = instances['instances']
    iris_instances = common.deserialize_weka_object(instances)
    class_attribute = iris_instances.classAttribute()

    for generic_learner in generic_learners:
        learner = library.weka_local_generic_learner({'weka_class':generic_learner,'params':None})
        learner = learner['Generic_Weka_learner']

        model = common.deserialize_weka_object(learner)
        model.buildClassifier(iris_instances)

        print '\nalgorithm: %s' % repr(type(model))[::-1][2:repr(type(model))[::-1].index('.')][::-1]
        for instance in iris_instances:
            original = int(instance.classValue())
            new = int(model.classifyInstance(instance))
            if original != new:
                print 'misclassified training example: %s was predicted as: %s' % (
                    str(instance), class_attribute.value(new))
示例#17
0
def weka_local_print_model(input_dict):
    '''
    Prints a WEKA model
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    try:
        model = common.deserialize_weka_object(input_dict['model'])
        return {'model_as_string': model.toString()}
    except:
        raise Exception(
            "Only WEKA classifiers/models supported. Please provide a valid WEKA model."
        )
示例#18
0
def apply_mapped_classifier_get_instances(weka_classifier, original_data, data):
    '''An advanced version of the Apply Classifier method.
    Addresses incompatible training and test data, and returns a dataset with predictions.

    :param weka_classifier: WekaClassifier object
    :param original_data: original training instances, bunch
    :param data: test instances, bunch
    :return: Dataset (Bunch) object with predictions and a textual report from the InputMappedClassifier class
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    try:
        classifier = common.deserialize_weka_object(weka_classifier.sclassifier)
    except:
        raise Exception("Only WEKA classifiers/models supported. Please provide a valid WEKA learner.")

    original_training_instances = ut.convert_bunch_to_weka_instances(original_data)
    instances = ut.convert_bunch_to_weka_instances(data)

    # serialize classifier with original instances to a file once again for the Mapped classifier
    tfile = common.TemporaryFile(flags='wb+')
    s = jp.JClass('weka.core.SerializationHelper')
    s.writeAll(tfile.name, [classifier, original_training_instances])

    # construct a MappedClassifier
    mapped_classifier = jp.JClass('weka.classifiers.misc.InputMappedClassifier')()
    mapped_classifier.setIgnoreCaseForNames(True)
    mapped_classifier.setTrim(True)
    # mapped_classifier.setSuppressMappingReport(True)
    # mc.setModelHeader(original_training_instances)
    mapped_classifier.setModelPath(tfile.name)

    predictions = []
    try:
        for instance in instances:
            label = int(mapped_classifier.classifyInstance(instance))
            predictions.append(label)

        data["targetPredicted"] = predictions
    except:
        raise Exception("Classifier not built. Please use the Build Classifier widget first.")

    report = mapped_classifier.toString()
    if MAPPING_REPORT_START in report:
        report = report[report.index(MAPPING_REPORT_START):]

    return data, report
示例#19
0
def print_weka_model(weka_classifier):
    '''Outputs textual information about a Weka model

    :param weka_classifier: a WekaClassifier object
    :return: a textual representation of the Weka model
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    try:
        model = common.deserialize_weka_object(weka_classifier.sclassifier)
    except:
        raise Exception(
            "Only WEKA classifiers/models supported. Please provide a valid WEKA learner."
        )

    return model.toString()
示例#20
0
    def build_classifier(self, data):
        """Builds a classifier

        :param data: bunch
        """
        if not jp.isThreadAttachedToJVM():
            jp.attachThreadToJVM()

        instances = ut.convert_bunch_to_weka_instances(data)

        classifier = common.deserialize_weka_object(self.sclassifier)

        if instances.classIndex() == -1:
            instances.setClassIndex(instances.numAttributes() - 1)
            # raise ValueError('Class not set!')

        classifier.buildClassifier(instances)
        self.sclassifier = common.serialize_weka_object(classifier)
示例#21
0
    def build_classifier(self, data):
        """Builds a classifier

        :param data: bunch
        """
        if not jp.isThreadAttachedToJVM():
            jp.attachThreadToJVM()

        instances = ut.convert_bunch_to_weka_instances(data)

        classifier = common.deserialize_weka_object(self.sclassifier)

        if instances.classIndex() == -1:
            instances.setClassIndex(instances.numAttributes() - 1)
            # raise ValueError('Class not set!')

        classifier.buildClassifier(instances)
        self.sclassifier = common.serialize_weka_object(classifier)
示例#22
0
def weka_local_display_decision_tree(request, input_dict, output_dict, widget):
    """Visualization displaying a decision tree"""

    import subprocess
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    img_type = 'svg'
    if input_dict['img_type'] == 'raster':
        img_type = 'png'

    classifier = common.deserialize_weka_object(input_dict['classifier'])
    dot_text = classifier.graph()

    filename = '/'.join(
        [str(request.user.id),
         'decisionTree-weka-%d.dot' % widget.id])
    destination_dot = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_dot)

    with open(destination_dot, 'w') as dot_file:
        dot_file.write(dot_text)

    # png/svg file
    filename = '/'.join([
        str(request.user.id),
        'decisionTree-weka-%d.%s' % (widget.id, img_type)
    ])
    destination_img = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_img)

    subprocess.call("dot -T%s %s -o %s" %
                    (img_type, destination_dot, destination_img),
                    shell=True)

    return render(request,
                  'visualizations/weka_local_display_decision_tree.html', {
                      'filename': filename,
                      'widget': widget,
                      'input_dict': input_dict
                  })
示例#23
0
def weka_local_display_decision_tree(request, input_dict, output_dict, widget):
    """Visualization displaying a decision tree"""

    import subprocess
    from mothra.settings import MEDIA_ROOT
    from workflows.helpers import ensure_dir

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    img_type = 'svg'
    if input_dict['img_type'] == 'raster':
        img_type = 'png'

    classifier = common.deserialize_weka_object(input_dict['classifier'])
    dot_text = classifier.graph()

    filename = '/'.join([str(request.user.id), 'decisionTree-weka-%d.dot' % widget.id])
    destination_dot = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_dot)

    with open(destination_dot, 'w') as dot_file:
        dot_file.write(dot_text)


    # png/svg file
    filename = '/'.join([str(request.user.id),
                         'decisionTree-weka-%d.%s' % (widget.id, img_type)
                         ])
    destination_img = '/'.join([MEDIA_ROOT, filename])
    ensure_dir(destination_img)

    subprocess.call("dot -T%s %s -o %s" % (img_type, destination_dot, destination_img), shell=True)

    return render(request,
                  'visualizations/weka_local_display_decision_tree.html',
                  {'filename': filename,
                   'widget': widget,
                   'input_dict': input_dict})
示例#24
0
    def apply_classifier(self, data):
        """Applies a classifier on a dataset, and gets predictions

        :param data: bunch
        :return: bunch with targetPredicted
        """
        if not jp.isThreadAttachedToJVM():
            jp.attachThreadToJVM()

        instances = ut.convert_bunch_to_weka_instances(data)

        classifier = common.deserialize_weka_object(self.sclassifier)

        class_index = instances.classIndex()
        if class_index == -1:
            raise ValueError('Class not set!')

        predictions = []
        for instance in instances:
            label = int(classifier.classifyInstance(instance))
            predictions.append(label)

        data["targetPredicted"] = predictions
        return data
示例#25
0
    def apply_classifier(self, data):
        """Applies a classifier on a dataset, and gets predictions

        :param data: bunch
        :return: bunch with targetPredicted
        """
        if not jp.isThreadAttachedToJVM():
            jp.attachThreadToJVM()

        instances = ut.convert_bunch_to_weka_instances(data)

        classifier = common.deserialize_weka_object(self.sclassifier)

        class_index = instances.classIndex()
        if class_index == -1:
            raise ValueError('Class not set!')

        predictions = []
        for instance in instances:
            label = int(classifier.classifyInstance(instance))
            predictions.append(label)

        data["targetPredicted"] = predictions
        return data
示例#26
0
def weka_local_cross_validate(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()
    try:
        num_folds = int(input_dict['folds'])
    except:
        num_folds = 10

    try:
        class_index = int(input_dict['classIndex'])
    except:
        class_index = -1

    instances = common.deserialize_weka_object(input_dict['instances'])

    if instances.classIndex() == -1:
        instances.setClassIndex(instances.numAttributes() - 1)  # last attribute is class

    classifier_serialized = input_dict['learner']
    try:
        classifier = common.deserialize_weka_object(classifier_serialized)
        eval = jp.JClass('weka.classifiers.Evaluation')(instances)
        rand = jp.JClass('java.util.Random')(1)
        eval.crossValidateModel(classifier, instances, num_folds, rand, [])

        if class_index == -1:
            pre, rec, f, auc, tp_r, fp_r = (eval.weightedPrecision(),
                                            eval.weightedRecall(),
                                            eval.weightedFMeasure(),
                                            eval.weightedAreaUnderROC(),
                                            eval.weightedTruePositiveRate(),
                                            eval.weightedTrueNegativeRate())
        else:
            pre, rec, f, auc, tp_r, fp_r = (eval.precision(class_index),
                                            eval.recall(class_index),
                                            eval.fMeasure(class_index),
                                            eval.areaUnderROC(class_index),
                                            eval.truePositiveRate(class_index),
                                            eval.trueNegativeRate(class_index))

        # collect predictions and their probabilities
        classAttribute = instances.classAttribute()
        classifier.buildClassifier(instances)
        actual_classes = []
        predicted_classes = []
        predicted_classes_probs = []
        for instance in instances:
            actual = classAttribute.value(int(instance.classValue()))
            predicted = classAttribute.value(int(classifier.classifyInstance(instance)))
            probs = classifier.distributionForInstance(instance)
            actual_classes.append(actual)
            predicted_classes.append(predicted)
            predicted_classes_probs.append({classAttribute.value(i): p for i,p in enumerate(probs)})

        target = input_dict.get('target')
        if not target:
            target = classAttribute.value(0)
            print('Warning: observing the first class value {}'.format(target))

        # compute input for Viper
        mname = str(classifier.__getattribute__('class'))
        mname = mname[mname.find('weka'):-2]
        name = 'target class "{}": {}'.format(target, mname)
        apv = {'actual':[], 'predicted':[], 'name': name}
        for i, (actual, predicted) in enumerate(zip(actual_classes, predicted_classes)):
            if target == actual:
                apv['actual'].append(1)
            else:
                apv['actual'].append(0)
            if predicted_classes[i] == target:
                apv['predicted'].append(predicted_classes_probs[i][target])
            else:
                apv['predicted'].append(0)


        return {'confusion_matrix': eval.toMatrixString(),
                'accuracy': 100 * (1 - eval.errorRate()),
                'summary': eval.toSummaryString("=== Summary ===", True),
                'accuracy_by_class': eval.toClassDetailsString(),
                'precision': pre,
                'recall': rec,
                'f': f,
                'auc': auc,
                'tp_rate': tp_r,
                'fp_rate': fp_r,
                'apv': apv}
    except Exception, e:
        raise Exception("Error in weka_local_cross_validate() : "+str(e))
示例#27
0
def weka_local_cross_validate(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()
    try:
        num_folds = int(input_dict['folds'])
    except:
        num_folds = 10

    try:
        class_index = int(input_dict['classIndex'])
    except:
        class_index = -1

    instances = common.deserialize_weka_object(input_dict['instances'])

    if instances.classIndex() == -1:
        instances.setClassIndex(instances.numAttributes() -
                                1)  # last attribute is class

    classifier_serialized = input_dict['learner']
    try:
        classifier = common.deserialize_weka_object(classifier_serialized)
        eval = jp.JClass('weka.classifiers.Evaluation')(instances)
        rand = jp.JClass('java.util.Random')(1)
        eval.crossValidateModel(classifier, instances, num_folds, rand, [])

        if class_index == -1:
            pre, rec, f, auc, tp_r, fp_r = (eval.weightedPrecision(),
                                            eval.weightedRecall(),
                                            eval.weightedFMeasure(),
                                            eval.weightedAreaUnderROC(),
                                            eval.weightedTruePositiveRate(),
                                            eval.weightedTrueNegativeRate())
        else:
            pre, rec, f, auc, tp_r, fp_r = (eval.precision(class_index),
                                            eval.recall(class_index),
                                            eval.fMeasure(class_index),
                                            eval.areaUnderROC(class_index),
                                            eval.truePositiveRate(class_index),
                                            eval.trueNegativeRate(class_index))

        # collect predictions and their probabilities
        classAttribute = instances.classAttribute()
        classifier.buildClassifier(instances)
        actual_classes = []
        predicted_classes = []
        predicted_classes_probs = []
        for instance in instances:
            actual = classAttribute.value(int(instance.classValue()))
            predicted = classAttribute.value(
                int(classifier.classifyInstance(instance)))
            probs = classifier.distributionForInstance(instance)
            actual_classes.append(actual)
            predicted_classes.append(predicted)
            predicted_classes_probs.append(
                {classAttribute.value(i): p
                 for i, p in enumerate(probs)})

        target = input_dict.get('target')
        if not target:
            target = classAttribute.value(0)
            print('Warning: observing the first class value {}'.format(target))

        # compute input for Viper
        mname = str(classifier.__getattribute__('class'))
        mname = mname[mname.find('weka'):-2]
        name = 'target class "{}": {}'.format(target, mname)
        apv = {'actual': [], 'predicted': [], 'name': name}
        for i, (actual,
                predicted) in enumerate(zip(actual_classes,
                                            predicted_classes)):
            if target == actual:
                apv['actual'].append(1)
            else:
                apv['actual'].append(0)
            if predicted_classes[i] == target:
                apv['predicted'].append(predicted_classes_probs[i][target])
            else:
                apv['predicted'].append(0)

        return {
            'confusion_matrix': eval.toMatrixString(),
            'accuracy': 100 * (1 - eval.errorRate()),
            'summary': eval.toSummaryString("=== Summary ===", True),
            'accuracy_by_class': eval.toClassDetailsString(),
            'precision': pre,
            'recall': rec,
            'f': f,
            'auc': auc,
            'tp_rate': tp_r,
            'fp_rate': fp_r,
            'apv': apv
        }
    except Exception, e:
        raise Exception("Error in weka_local_cross_validate() : " + str(e))