from weka.core.converters import Loader, Saver from weka.core.dataset import Instances from weka.filters import Filter jvm.start() # load weather.nominal fname = data_dir + os.sep + "weather.nominal.arff" print("\nLoading dataset: " + fname + "\n") loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(fname) # output header print(Instances.template_instances(data)) # remove attribute no 3 print("\nRemove attribute no 3") fltr = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "3"]) fltr.inputformat(data) filtered = fltr.filter(data) # output header print(Instances.template_instances(filtered)) # save modified dataset saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(filtered, data_dir + os.sep + "weather.nominal-filtered.arff") jvm.stop()
def save_Arff(self, data, outputPath): saver = Saver() saver.save_file(data, outputPath)
def getFunctionFeaturesArff(self, wholeSetArff, functionFeatureArff): data = self.load_Arff(wholeSetArff) filteredData = self.getSetDataBySetIndex(data, 2) saver = Saver() saver.save_file(filteredData, functionFeatureArff)
def main(): """ Runs a filter from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Executes a filter from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-i", metavar="input1", dest="input1", required=True, help="input file 1") parser.add_argument("-o", metavar="output1", dest="output1", required=True, help="output file 1") parser.add_argument("-r", metavar="input2", dest="input2", help="input file 2") parser.add_argument("-s", metavar="output2", dest="output2", help="output file 2") parser.add_argument("-c", metavar="classindex", default="-1", dest="classindex", help="1-based class attribute index") parser.add_argument("filter", help="filter classname, e.g., weka.filters.AllFilter") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional filter options") parsed = parser.parse_args() if parsed.input2 is None and not parsed.output2 is None: raise Exception("No second input file provided ('-r ...')!") jars = [] if not parsed.classpath is None: jars = parsed.classpath.split(os.pathsep) params = [] if not parsed.input1 is None: params.extend(["-i", parsed.input1]) if not parsed.output1 is None: params.extend(["-o", parsed.output1]) if not parsed.input2 is None: params.extend(["-r", parsed.input2]) if not parsed.output2 is None: params.extend(["-s", parsed.output2]) if not parsed.classindex is None: params.extend(["-c", parsed.classindex]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + utils.join_options(sys.argv[1:])) try: flter = Filter(parsed.filter) if len(parsed.option) > 0: flter.set_options(parsed.option) loader = Loader(classname="weka.core.converters.ArffLoader") in1 = loader.load_file(parsed.input1) cls = parsed.classindex if str(parsed.classindex) == "first": cls = "0" if str(parsed.classindex) == "last": cls = str(in1.num_attributes() - 1) in1.set_class_index(int(cls)) flter.set_inputformat(in1) out1 = flter.filter(in1) saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(out1, parsed.output1) if not parsed.input2 is None: in2 = loader.load_file(parsed.input2) in2.set_class_index(int(cls)) out2 = flter.filter(in2) saver.save_file(out2, parsed.output2) except Exception, e: print(e)
#Filter function from weka.filters import Filter print("Filter operation") remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveUseless", options=["-M", "99.0"]) remove.inputformat(data) filtered = remove.filter(data) print(filtered) #EM imputation function em = Filter( classname="weka.filters.unsupervised.attribute.ReplaceMissingValues") em.inputformat(filtered) em_imputed = em.filter(filtered) print(em_imputed) #weka.filters.unsupervised.attribute.EMImputation -N -1 -E 1.0E-4 -Q 1.0E-8 #Saver arff to csv saver = Saver(classname="weka.core.converters.CSVSaver") saver.save_file(em_imputed, "./Dataset/hepatitis.csv") saver.save_file(filtered, "./Dataset/hepatitis_removed_useless.csv") #saver.save_file(data, "./Dataset/hepatitis_weka.csv") #converters.save_any_file("./final_small_em.csv") #converters.save_any_file("/some/where/else/iris.csv") #converters.save_any_file("/some/where/else/iris.csv") #cls =Classifier(classname="weka.classifiers.trees.J48") #print(cls) jvm.stop()
def save_dataset(self,filepath): saver = Saver(classname="weka.core.converters.CSVSaver") saver.save_file(self.dataset, filepath)