def weka_local_apply_classifier(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() # print("Instances: %s" % type(input_dict['instances'])) instances = common.deserialize_weka_object(input_dict['instances']) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # last attribute is class classifier_serialized = input_dict['classifier'] predictions = [] try: classifier = common.deserialize_weka_object(classifier_serialized) for instance in instances: label_ind = int(classifier.classifyInstance(instance)) label = instances.attribute(instances.numAttributes() - 1).value(label_ind) predictions.append(label) return {'classes': predictions} except: raise Exception( "Classifier not built. Please use the Build Classifier widget first." )
def test4(): generic_learners = [ "weka.classifiers.bayes.BayesNet", "weka.classifiers.trees.HoeffdingTree" ] fn = normpath(join(dirname(__file__), 'weka', 'data', 'iris.arff')) instances = library.weka_local_arff_to_weka_instances( {'arff': open(fn).read()}) instances = instances['instances'] iris_instances = common.deserialize_weka_object(instances) class_attribute = iris_instances.classAttribute() for generic_learner in generic_learners: learner = library.weka_local_generic_learner({ 'weka_class': generic_learner, 'params': None }) learner = learner['Generic_Weka_learner'] model = common.deserialize_weka_object(learner) model.buildClassifier(iris_instances) print '\nalgorithm: %s' % repr( type(model))[::-1][2:repr(type(model))[::-1].index('.')][::-1] for instance in iris_instances: original = int(instance.classValue()) new = int(model.classifyInstance(instance)) if original != new: print 'misclassified training example: %s was predicted as: %s' % ( str(instance), class_attribute.value(new))
def test2(): learners = [library.weka_local_ibk({'params': None})['IBk_learner'], library.weka_local_j48({'params': None})['J48_learner'], library.weka_local_jrip({'params': None})['JRip_learner'], library.weka_local_k_star({'params': None})['KStar_learner'], library.weka_local_libsvm({'params': None})['LibSVM_learner'], library.weka_local_multilayer_perceptron({'params': None})['Multilayer_Perceptron_learner'], library.weka_local_naive_bayes({'params': None})['Naive_Bayes_learner'], library.weka_local_rep_tree({'params': None})['REPTree_learner'], library.weka_local_random_forest({'params': None})['RandomForest_learner'], library.weka_local_random_tree({'params': None})['RandomTree_learner'], library.weka_local_smo({'params': None})['SMO_learner'] ] fn = normpath(join(dirname(__file__), 'weka', 'data', 'iris.arff')) instances = library.weka_local_arff_to_weka_instances({'arff': open(fn).read()}) instances = instances['instances'] iris_instances = common.deserialize_weka_object(instances) class_attribute = iris_instances.classAttribute() for learner in learners: print("Learner: " + str(learner)) model = common.deserialize_weka_object(learner) model.buildClassifier(iris_instances) print '\nalgorithm: %s' % repr(type(model))[::-1][2:repr(type(model))[::-1].index('.')][::-1] for instance in iris_instances: original = int(instance.classValue()) new = int(model.classifyInstance(instance)) if original != new: print 'misclassified training example: %s was predicted as: %s' % ( str(instance), class_attribute.value(new)) print
def weka_local_build_classifier(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() instances = common.deserialize_weka_object(input_dict['instances']) classifier = common.deserialize_weka_object(input_dict['learner']) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # raise ValueError('Class not set!') classifier.buildClassifier(instances) sclassifier = common.serialize_weka_object(classifier) return {'classifier': sclassifier}
def print_classifier(self): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: classifier = common.deserialize_weka_object(self.sclassifier) return classifier.toString() except: raise Exception("Only WEKA classifiers/models supported. Please provide a valid WEKA learner.")
def apply_mapped_classifier_get_instances(weka_classifier, original_data, data): '''An advanced version of the Apply Classifier method. Addresses incompatible training and test data, and returns a dataset with predictions. :param weka_classifier: WekaClassifier object :param original_data: original training instances, bunch :param data: test instances, bunch :return: Dataset (Bunch) object with predictions and a textual report from the InputMappedClassifier class ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: classifier = common.deserialize_weka_object( weka_classifier.sclassifier) except: raise Exception( "Only WEKA classifiers/models supported. Please provide a valid WEKA learner." ) original_training_instances = ut.convert_bunch_to_weka_instances( original_data) instances = ut.convert_bunch_to_weka_instances(data) # serialize classifier with original instances to a file once again for the Mapped classifier tfile = common.TemporaryFile(flags='wb+') s = jp.JClass('weka.core.SerializationHelper') s.writeAll(tfile.name, [classifier, original_training_instances]) # construct a MappedClassifier mapped_classifier = jp.JClass( 'weka.classifiers.misc.InputMappedClassifier')() mapped_classifier.setIgnoreCaseForNames(True) mapped_classifier.setTrim(True) # mapped_classifier.setSuppressMappingReport(True) # mc.setModelHeader(original_training_instances) mapped_classifier.setModelPath(tfile.name) predictions = [] try: for instance in instances: label = int(mapped_classifier.classifyInstance(instance)) predictions.append(label) data["targetPredicted"] = predictions except: raise Exception( "Classifier not built. Please use the Build Classifier widget first." ) report = mapped_classifier.toString() if MAPPING_REPORT_START in report: report = report[report.index(MAPPING_REPORT_START):] return data, report
def weka_local_apply_mapped_classifier_get_instances(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() MAPPING_REPORT_START = 'Attribute mappings:' classifier = common.deserialize_weka_object(input_dict['classifier']) original_training_instances = common.deserialize_weka_object( input_dict['original_training_instances']) instances = common.deserialize_weka_object(input_dict['instances']) # serialize classifier with original instances to a file once again for the Mapped classifier tfile = common.TemporaryFile(flags='wb+') s = jp.JClass('weka.core.SerializationHelper') s.writeAll(tfile.name, [classifier, original_training_instances]) # construct a MappedClassifier mappedClassifier = jp.JClass( 'weka.classifiers.misc.InputMappedClassifier')() mappedClassifier.setIgnoreCaseForNames(True) mappedClassifier.setTrim(True) #mappedClassifier.setSuppressMappingReport(True) #mc.setModelHeader(original_training_instances) mappedClassifier.setModelPath(tfile.name) # use the mapped classifier on new data classIndex = instances.classIndex() if classIndex == -1: raise ValueError('Class not set!') classAttribute = instances.classAttribute() for instance in instances: label = int(mappedClassifier.classifyInstance(instance)) instance.setClassValue(classAttribute.value(label)) report = mappedClassifier.toString() if MAPPING_REPORT_START in report: report = report[report.index(MAPPING_REPORT_START):] return { 'mapping_report': report, 'instances': common.serialize_weka_object(instances) }
def print_classifier(self): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: classifier = common.deserialize_weka_object(self.sclassifier) return classifier.toString() except: raise Exception( "Only WEKA classifiers/models supported. Please provide a valid WEKA learner." )
def weka_local_apply_classifier_and_get_instances(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() # print("Instances: %s" % type(input_dict['instances'])) instances = common.deserialize_weka_object(input_dict['instances']) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # last attribute is class classifier_serialized = input_dict['classifier'] try: classifier = common.deserialize_weka_object(classifier_serialized) classAttribute = instances.classAttribute() for instance in instances: label_ind = int(classifier.classifyInstance(instance)) instance.setClassValue(classAttribute.value(label_ind)) return {'instances': common.serialize_weka_object(instances)} except: raise Exception("Classifier not built. Please use the Build Classifier widget first.")
def weka_local_print_model(input_dict): ''' Prints a WEKA model ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: model = common.deserialize_weka_object(input_dict['model']) return {'model_as_string': model.toString()} except: raise Exception("Only WEKA classifiers/models supported. Please provide a valid WEKA model.")
def test2(): learners = [ library.weka_local_ibk({'params': None})['IBk_learner'], library.weka_local_j48({'params': None})['J48_learner'], library.weka_local_jrip({'params': None})['JRip_learner'], library.weka_local_k_star({'params': None})['KStar_learner'], library.weka_local_libsvm({'params': None})['LibSVM_learner'], library.weka_local_multilayer_perceptron( {'params': None})['Multilayer_Perceptron_learner'], library.weka_local_naive_bayes({'params': None})['Naive_Bayes_learner'], library.weka_local_rep_tree({'params': None})['REPTree_learner'], library.weka_local_random_forest({'params': None})['RandomForest_learner'], library.weka_local_random_tree({'params': None})['RandomTree_learner'], library.weka_local_smo({'params': None})['SMO_learner'] ] fn = normpath(join(dirname(__file__), 'weka', 'data', 'iris.arff')) instances = library.weka_local_arff_to_weka_instances( {'arff': open(fn).read()}) instances = instances['instances'] iris_instances = common.deserialize_weka_object(instances) class_attribute = iris_instances.classAttribute() for learner in learners: print("Learner: " + str(learner)) model = common.deserialize_weka_object(learner) model.buildClassifier(iris_instances) print '\nalgorithm: %s' % repr( type(model))[::-1][2:repr(type(model))[::-1].index('.')][::-1] for instance in iris_instances: original = int(instance.classValue()) new = int(model.classifyInstance(instance)) if original != new: print 'misclassified training example: %s was predicted as: %s' % ( str(instance), class_attribute.value(new)) print
def weka_local_instances_to_arff(input_dict): ''' Reads a dataset into a format suitable for WEKA methods ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() instances = common.deserialize_weka_object(input_dict['instances']) arff = instances.toString() return {'arff': arff}
def weka_local_apply_mapped_classifier(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() MAPPING_REPORT_START = 'Attribute mappings:' classifier = common.deserialize_weka_object(input_dict['classifier']) original_training_instances = common.deserialize_weka_object(input_dict['original_training_instances']) instances = common.deserialize_weka_object(input_dict['instances']) # serialize classifier with original instances to a file once again for the Mapped classifier tfile = common.TemporaryFile(flags='wb+') s = jp.JClass('weka.core.SerializationHelper') s.writeAll(tfile.name, [classifier, original_training_instances]) # construct a MappedClassifier mappedClassifier = jp.JClass('weka.classifiers.misc.InputMappedClassifier')() mappedClassifier.setIgnoreCaseForNames(True) mappedClassifier.setTrim(True) #mappedClassifier.setSuppressMappingReport(True) #mc.setModelHeader(original_training_instances) mappedClassifier.setModelPath(tfile.name) # use the mapped classifier on new data classes = [] classIndex = instances.classIndex() if classIndex == -1: raise ValueError('Class not set!') classAttribute = instances.classAttribute() for instance in instances: label = int(mappedClassifier.classifyInstance(instance)) classes.append(classAttribute.value(label)) report = mappedClassifier.toString() if MAPPING_REPORT_START in report: report = report[report.index(MAPPING_REPORT_START):] return {'mapping_report':report, 'classes':classes}
def test4(): generic_learners = ["weka.classifiers.bayes.BayesNet", "weka.classifiers.trees.HoeffdingTree"] fn = normpath(join(dirname(__file__), 'weka', 'data', 'iris.arff')) instances = library.weka_local_arff_to_weka_instances({'arff': open(fn).read()}) instances = instances['instances'] iris_instances = common.deserialize_weka_object(instances) class_attribute = iris_instances.classAttribute() for generic_learner in generic_learners: learner = library.weka_local_generic_learner({'weka_class':generic_learner,'params':None}) learner = learner['Generic_Weka_learner'] model = common.deserialize_weka_object(learner) model.buildClassifier(iris_instances) print '\nalgorithm: %s' % repr(type(model))[::-1][2:repr(type(model))[::-1].index('.')][::-1] for instance in iris_instances: original = int(instance.classValue()) new = int(model.classifyInstance(instance)) if original != new: print 'misclassified training example: %s was predicted as: %s' % ( str(instance), class_attribute.value(new))
def weka_local_print_model(input_dict): ''' Prints a WEKA model ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: model = common.deserialize_weka_object(input_dict['model']) return {'model_as_string': model.toString()} except: raise Exception( "Only WEKA classifiers/models supported. Please provide a valid WEKA model." )
def apply_mapped_classifier_get_instances(weka_classifier, original_data, data): '''An advanced version of the Apply Classifier method. Addresses incompatible training and test data, and returns a dataset with predictions. :param weka_classifier: WekaClassifier object :param original_data: original training instances, bunch :param data: test instances, bunch :return: Dataset (Bunch) object with predictions and a textual report from the InputMappedClassifier class ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: classifier = common.deserialize_weka_object(weka_classifier.sclassifier) except: raise Exception("Only WEKA classifiers/models supported. Please provide a valid WEKA learner.") original_training_instances = ut.convert_bunch_to_weka_instances(original_data) instances = ut.convert_bunch_to_weka_instances(data) # serialize classifier with original instances to a file once again for the Mapped classifier tfile = common.TemporaryFile(flags='wb+') s = jp.JClass('weka.core.SerializationHelper') s.writeAll(tfile.name, [classifier, original_training_instances]) # construct a MappedClassifier mapped_classifier = jp.JClass('weka.classifiers.misc.InputMappedClassifier')() mapped_classifier.setIgnoreCaseForNames(True) mapped_classifier.setTrim(True) # mapped_classifier.setSuppressMappingReport(True) # mc.setModelHeader(original_training_instances) mapped_classifier.setModelPath(tfile.name) predictions = [] try: for instance in instances: label = int(mapped_classifier.classifyInstance(instance)) predictions.append(label) data["targetPredicted"] = predictions except: raise Exception("Classifier not built. Please use the Build Classifier widget first.") report = mapped_classifier.toString() if MAPPING_REPORT_START in report: report = report[report.index(MAPPING_REPORT_START):] return data, report
def print_weka_model(weka_classifier): '''Outputs textual information about a Weka model :param weka_classifier: a WekaClassifier object :return: a textual representation of the Weka model ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: model = common.deserialize_weka_object(weka_classifier.sclassifier) except: raise Exception( "Only WEKA classifiers/models supported. Please provide a valid WEKA learner." ) return model.toString()
def build_classifier(self, data): """Builds a classifier :param data: bunch """ if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() instances = ut.convert_bunch_to_weka_instances(data) classifier = common.deserialize_weka_object(self.sclassifier) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # raise ValueError('Class not set!') classifier.buildClassifier(instances) self.sclassifier = common.serialize_weka_object(classifier)
def weka_local_display_decision_tree(request, input_dict, output_dict, widget): """Visualization displaying a decision tree""" import subprocess from mothra.settings import MEDIA_ROOT from workflows.helpers import ensure_dir if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() img_type = 'svg' if input_dict['img_type'] == 'raster': img_type = 'png' classifier = common.deserialize_weka_object(input_dict['classifier']) dot_text = classifier.graph() filename = '/'.join( [str(request.user.id), 'decisionTree-weka-%d.dot' % widget.id]) destination_dot = '/'.join([MEDIA_ROOT, filename]) ensure_dir(destination_dot) with open(destination_dot, 'w') as dot_file: dot_file.write(dot_text) # png/svg file filename = '/'.join([ str(request.user.id), 'decisionTree-weka-%d.%s' % (widget.id, img_type) ]) destination_img = '/'.join([MEDIA_ROOT, filename]) ensure_dir(destination_img) subprocess.call("dot -T%s %s -o %s" % (img_type, destination_dot, destination_img), shell=True) return render(request, 'visualizations/weka_local_display_decision_tree.html', { 'filename': filename, 'widget': widget, 'input_dict': input_dict })
def weka_local_display_decision_tree(request, input_dict, output_dict, widget): """Visualization displaying a decision tree""" import subprocess from mothra.settings import MEDIA_ROOT from workflows.helpers import ensure_dir if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() img_type = 'svg' if input_dict['img_type'] == 'raster': img_type = 'png' classifier = common.deserialize_weka_object(input_dict['classifier']) dot_text = classifier.graph() filename = '/'.join([str(request.user.id), 'decisionTree-weka-%d.dot' % widget.id]) destination_dot = '/'.join([MEDIA_ROOT, filename]) ensure_dir(destination_dot) with open(destination_dot, 'w') as dot_file: dot_file.write(dot_text) # png/svg file filename = '/'.join([str(request.user.id), 'decisionTree-weka-%d.%s' % (widget.id, img_type) ]) destination_img = '/'.join([MEDIA_ROOT, filename]) ensure_dir(destination_img) subprocess.call("dot -T%s %s -o %s" % (img_type, destination_dot, destination_img), shell=True) return render(request, 'visualizations/weka_local_display_decision_tree.html', {'filename': filename, 'widget': widget, 'input_dict': input_dict})
def apply_classifier(self, data): """Applies a classifier on a dataset, and gets predictions :param data: bunch :return: bunch with targetPredicted """ if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() instances = ut.convert_bunch_to_weka_instances(data) classifier = common.deserialize_weka_object(self.sclassifier) class_index = instances.classIndex() if class_index == -1: raise ValueError('Class not set!') predictions = [] for instance in instances: label = int(classifier.classifyInstance(instance)) predictions.append(label) data["targetPredicted"] = predictions return data
def weka_local_cross_validate(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: num_folds = int(input_dict['folds']) except: num_folds = 10 try: class_index = int(input_dict['classIndex']) except: class_index = -1 instances = common.deserialize_weka_object(input_dict['instances']) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # last attribute is class classifier_serialized = input_dict['learner'] try: classifier = common.deserialize_weka_object(classifier_serialized) eval = jp.JClass('weka.classifiers.Evaluation')(instances) rand = jp.JClass('java.util.Random')(1) eval.crossValidateModel(classifier, instances, num_folds, rand, []) if class_index == -1: pre, rec, f, auc, tp_r, fp_r = (eval.weightedPrecision(), eval.weightedRecall(), eval.weightedFMeasure(), eval.weightedAreaUnderROC(), eval.weightedTruePositiveRate(), eval.weightedTrueNegativeRate()) else: pre, rec, f, auc, tp_r, fp_r = (eval.precision(class_index), eval.recall(class_index), eval.fMeasure(class_index), eval.areaUnderROC(class_index), eval.truePositiveRate(class_index), eval.trueNegativeRate(class_index)) # collect predictions and their probabilities classAttribute = instances.classAttribute() classifier.buildClassifier(instances) actual_classes = [] predicted_classes = [] predicted_classes_probs = [] for instance in instances: actual = classAttribute.value(int(instance.classValue())) predicted = classAttribute.value(int(classifier.classifyInstance(instance))) probs = classifier.distributionForInstance(instance) actual_classes.append(actual) predicted_classes.append(predicted) predicted_classes_probs.append({classAttribute.value(i): p for i,p in enumerate(probs)}) target = input_dict.get('target') if not target: target = classAttribute.value(0) print('Warning: observing the first class value {}'.format(target)) # compute input for Viper mname = str(classifier.__getattribute__('class')) mname = mname[mname.find('weka'):-2] name = 'target class "{}": {}'.format(target, mname) apv = {'actual':[], 'predicted':[], 'name': name} for i, (actual, predicted) in enumerate(zip(actual_classes, predicted_classes)): if target == actual: apv['actual'].append(1) else: apv['actual'].append(0) if predicted_classes[i] == target: apv['predicted'].append(predicted_classes_probs[i][target]) else: apv['predicted'].append(0) return {'confusion_matrix': eval.toMatrixString(), 'accuracy': 100 * (1 - eval.errorRate()), 'summary': eval.toSummaryString("=== Summary ===", True), 'accuracy_by_class': eval.toClassDetailsString(), 'precision': pre, 'recall': rec, 'f': f, 'auc': auc, 'tp_rate': tp_r, 'fp_rate': fp_r, 'apv': apv} except Exception, e: raise Exception("Error in weka_local_cross_validate() : "+str(e))
def weka_local_cross_validate(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: num_folds = int(input_dict['folds']) except: num_folds = 10 try: class_index = int(input_dict['classIndex']) except: class_index = -1 instances = common.deserialize_weka_object(input_dict['instances']) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # last attribute is class classifier_serialized = input_dict['learner'] try: classifier = common.deserialize_weka_object(classifier_serialized) eval = jp.JClass('weka.classifiers.Evaluation')(instances) rand = jp.JClass('java.util.Random')(1) eval.crossValidateModel(classifier, instances, num_folds, rand, []) if class_index == -1: pre, rec, f, auc, tp_r, fp_r = (eval.weightedPrecision(), eval.weightedRecall(), eval.weightedFMeasure(), eval.weightedAreaUnderROC(), eval.weightedTruePositiveRate(), eval.weightedTrueNegativeRate()) else: pre, rec, f, auc, tp_r, fp_r = (eval.precision(class_index), eval.recall(class_index), eval.fMeasure(class_index), eval.areaUnderROC(class_index), eval.truePositiveRate(class_index), eval.trueNegativeRate(class_index)) # collect predictions and their probabilities classAttribute = instances.classAttribute() classifier.buildClassifier(instances) actual_classes = [] predicted_classes = [] predicted_classes_probs = [] for instance in instances: actual = classAttribute.value(int(instance.classValue())) predicted = classAttribute.value( int(classifier.classifyInstance(instance))) probs = classifier.distributionForInstance(instance) actual_classes.append(actual) predicted_classes.append(predicted) predicted_classes_probs.append( {classAttribute.value(i): p for i, p in enumerate(probs)}) target = input_dict.get('target') if not target: target = classAttribute.value(0) print('Warning: observing the first class value {}'.format(target)) # compute input for Viper mname = str(classifier.__getattribute__('class')) mname = mname[mname.find('weka'):-2] name = 'target class "{}": {}'.format(target, mname) apv = {'actual': [], 'predicted': [], 'name': name} for i, (actual, predicted) in enumerate(zip(actual_classes, predicted_classes)): if target == actual: apv['actual'].append(1) else: apv['actual'].append(0) if predicted_classes[i] == target: apv['predicted'].append(predicted_classes_probs[i][target]) else: apv['predicted'].append(0) return { 'confusion_matrix': eval.toMatrixString(), 'accuracy': 100 * (1 - eval.errorRate()), 'summary': eval.toSummaryString("=== Summary ===", True), 'accuracy_by_class': eval.toClassDetailsString(), 'precision': pre, 'recall': rec, 'f': f, 'auc': auc, 'tp_rate': tp_r, 'fp_rate': fp_r, 'apv': apv } except Exception, e: raise Exception("Error in weka_local_cross_validate() : " + str(e))