def main(): """ Just runs some example code. """ # load a dataset iris_file = helper.get_data_dir() + os.sep + "iris.arff" helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_data = loader.load_file(iris_file) iris_data.class_is_last() # train classifier classifier = Classifier("weka.classifiers.trees.J48") classifier.build_classifier(iris_data) # save and read object helper.print_title("I/O: single object") outfile = tempfile.gettempdir() + os.sep + "j48.model" serialization.write(outfile, classifier) model = Classifier(jobject=serialization.read(outfile)) print(model) # save classifier and dataset header (multiple objects) helper.print_title("I/O: single object") serialization.write_all(outfile, [classifier, Instances.template_instances(iris_data)]) objects = serialization.read_all(outfile) for i, obj in enumerate(objects): helper.print_info("Object #" + str(i+1) + ":") if javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/core/Instances")): obj = Instances(jobject=obj) elif javabridge.get_env().is_instance_of(obj, javabridge.get_env().find_class("weka/classifiers/Classifier")): obj = Classifier(jobject=obj) print(obj)
def test_read_write_all(self): """ Tests methods read_all and write_all. """ fname = self.tempfile("readwrite.ser") self.delfile(fname) lin = [] for i in range(4): lin.append(javabridge.make_instance("java/lang/Integer", "(I)V", i)) serialization.write_all(fname, lin) self.assertTrue(os.path.exists(fname), msg="Failed to write to " + fname + "?") lout = serialization.read_all(fname) self.assertIsNotNone(lout, msg="Failed to read from " + fname + "?") self.delfile(fname) self.assertEqual(len(lin), len(lout), msg="Number of elements differ") for i in range(len(lin)): iin = javabridge.call(lin[i], "intValue", "()I") iout = javabridge.call(lout[i], "intValue", "()I") self.assertEqual(iin, iout, msg="Input/output differ at #" + str(i))
def save_model(model, data, filename): """Save the model to the target caching file. The caches should be defined in the config file. See README and config.sample for reference. Args: model(obj): The model to be saved. Should be a weka.classifier.Classifier object. data(obj): The training set to be cached. target(str): The target option in '[cached]' section in the config file. filename(str): The target file to save. Returns: True if the target caching is saved, otherwise False. """ folder = os.path.join('caches', 'model') path = os.path.join(folder, filename + '.cache') build_if_not_exist(folder) serialization.write_all(path, [model, Instances.template_instances(data)]) localizer_log.msg( "Saved cache of {target_name}.".format(target_name='model')) return True
def create_model(input_file, output_file): # Load data data = converters.load_any_file(input_file) data.class_is_last() # set class attribute # filter data print_title("Filtering Data") discretize = Filter( classname="weka.filters.unsupervised.attribute.Discretize", options=["-B", "10", "-M", "-1.0", "-R", "first-last"]) discretize.inputformat( data) # let the filter know about the type of data to filter filtered_data = discretize.filter(data) print("Done! (believe it or not)") print_title("Build Classifier") classifier = Classifier(classname="weka.classifiers.trees.RandomForest", options=["-I", "100", "-K", "0", "-S", "1"]) classifier.build_classifier(filtered_data) print("Done! (believe it or not)") serialization.write_all(output_file, [classifier, discretize]) print("Model and filter saved to ", output_file) evaluation = Evaluation(data) # initialize with priors evaluation.crossvalidate_model(classifier, filtered_data, 10, Random(42)) # 10-fold CV print(evaluation.summary()) print("pctCorrect: " + str(evaluation.percent_correct)) print("incorrect: " + str(evaluation.incorrect))
def main(): """ Just runs some example code. """ # load a dataset iris_file = helper.get_data_dir() + os.sep + "iris.arff" helper.print_info("Loading dataset: " + iris_file) loader = Loader("weka.core.converters.ArffLoader") iris_data = loader.load_file(iris_file) iris_data.class_is_last() # train classifier classifier = Classifier("weka.classifiers.trees.J48") classifier.build_classifier(iris_data) # save and read object helper.print_title("I/O: model (using serialization module)") outfile = tempfile.gettempdir() + os.sep + "j48.model" serialization.write(outfile, classifier) model = Classifier(jobject=serialization.read(outfile)) print(model) # save classifier and dataset header (multiple objects) helper.print_title("I/O: model and header (using serialization module)") serialization.write_all( outfile, [classifier, Instances.template_instances(iris_data)]) objects = serialization.read_all(outfile) for i, obj in enumerate(objects): helper.print_info("Object #" + str(i + 1) + ":") if javabridge.get_env().is_instance_of( obj, javabridge.get_env().find_class("weka/core/Instances")): obj = Instances(jobject=obj) elif javabridge.get_env().is_instance_of( obj, javabridge.get_env().find_class( "weka/classifiers/Classifier")): obj = Classifier(jobject=obj) print(obj) # save and read object helper.print_title("I/O: just model (using Classifier class)") outfile = tempfile.gettempdir() + os.sep + "j48.model" classifier.serialize(outfile) model, _ = Classifier.deserialize(outfile) print(model) # save classifier and dataset header (multiple objects) helper.print_title("I/O: model and header (using Classifier class)") classifier.serialize(outfile, header=iris_data) model, header = Classifier.deserialize(outfile) print(model) if header is not None: print(header)
def do_execute(self): """ The actual execution of the actor. :return: None if successful, otherwise error message :rtype: str """ result = None cont = self.input.payload serialization.write_all( str(self.resolve_option("output")), [cont.get("Model").jobject, cont.get("Header").jobject]) return result
def SimpleLogistic(): # load a dataset loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("First_trial_classification.arff") data.class_is_last() # set class attribute cls = Classifier(classname="weka.classifiers.functions.SimpleLogistic") pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText") evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(486), pout) print(evl.summary()) print(pout.buffer_content()) # save model serialization.write_all("SimpleLogistic2.model", cls)
def serialize(self, ser_file, header=None): """ Serializes the clusterer to the specified file. :param ser_file: the file to save the model to :type ser_file: str :param header: the (optional) dataset header to store alongside; recommended :type header: Instances """ if (header is not None) and header.num_instances > 0: header = Instances.template_instances(header) if header is not None: serialization.write_all(ser_file, [self, header]) else: serialization.write(ser_file, self)
def SMOreg(): loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file("First_trial_regression.arff") data.class_is_last() cls = KernelClassifier(classname="weka.classifiers.functions.SMOreg", options=["-N", "0"]) kernel = Kernel( classname="weka.classifiers.functions.supportVector.RBFKernel", options=["-G", "0.2"]) cls.kernel = kernel pout = PredictionOutput( classname="weka.classifiers.evaluation.output.prediction.PlainText") evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(486), pout) print(evl.summary()) print(pout.buffer_content()) # save model serialization.write_all("SMOreg.model2", cls)
def createTrainedModel(): from weka.core.converters import Loader folderList = os.listdir(outputModel) i = 0 classi = "" loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(os.path.join(outputModel, "genderTrain.arff")) data.class_is_last() from weka.classifiers import Classifier classi = "weka.classifiers.bayes.NaiveBayes" cls = Classifier(classname=classi) from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-1.7976931348623157E308", "-1"]) #evaluator = ASEvaluation(classname="weka.attributeSelection.ChiSquaredAttributeEval") #attsel = AttributeSelection() #attsel.search(search) #attsel.evaluator(evaluator) #attsel.select_attributes(data) cls.build_classifier(data) import weka.core.serialization as serialization from weka.core.dataset import Instances serialization.write_all( os.path.join(outputModel, "GenderModel" + ".model"), [cls, Instances.template_instances(data)]) from weka.classifiers import Evaluation from weka.core.classes import Random evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) print "Gender model predictions" print cls #print(evl.percent_correct) print(evl.summary()) print(evl.class_details()) data = loader.load_file(os.path.join(outputModel, "ageTrain.arff")) data.class_is_last() classi = "weka.classifiers.bayes.NaiveBayes" cls = Classifier(classname=classi) from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-1.7976931348623157E308", "-1"]) #evaluator = ASEvaluation(classname="weka.attributeSelection.ChiSquaredAttributeEval") #attsel = AttributeSelection() #attsel.search(search) #attsel.evaluator(evaluator) #attsel.select_attributes(data) #classi = "weka.classifiers.trees.J48" #classi = "weka.classifiers.functions.Logistic" #classi = "weka.classifiers.trees.RandomForest" #classi = "weka.classifiers.bayes.NaiveBayes" #classi = "weka.classifiers.functions.SMOreg" cls.build_classifier(data) print "Age model predictions" print cls import weka.core.serialization as serialization from weka.core.dataset import Instances serialization.write_all(os.path.join(outputModel, "AgeModel" + ".model"), [cls, Instances.template_instances(data)]) evl = Evaluation(data) evl.crossvalidate_model(cls, data, 10, Random(1)) #print(evl.percent_correct) print(evl.summary()) print(evl.class_details()) os._exit(0)
def saveClassifier(self, filename, path='/home/sbiastoch/Schreibtisch/classifiers/'): serialization.write_all(path+filename, [self.classifier, Instances.template_instances(self.data)])