Exemplo n.º 1
0
from weka.core.converters import Loader, Saver
from weka.core.dataset import Instances
from weka.filters import Filter

jvm.start()

# load weather.nominal
fname = data_dir + os.sep + "weather.nominal.arff"
print("\nLoading dataset: " + fname + "\n")
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(fname)

# output header
print(Instances.template_instances(data))

# remove attribute no 3
print("\nRemove attribute no 3")
fltr = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "3"])
fltr.inputformat(data)
filtered = fltr.filter(data)

# output header
print(Instances.template_instances(filtered))

# save modified dataset
saver = Saver(classname="weka.core.converters.ArffSaver")
saver.save_file(filtered, data_dir + os.sep + "weather.nominal-filtered.arff")

jvm.stop()

Exemplo n.º 2
0
 def save_Arff(self, data, outputPath):
     saver = Saver()
     saver.save_file(data, outputPath)
Exemplo n.º 3
0
    def getFunctionFeaturesArff(self, wholeSetArff, functionFeatureArff):
        data = self.load_Arff(wholeSetArff)
        filteredData = self.getSetDataBySetIndex(data, 2)

        saver = Saver()
        saver.save_file(filteredData, functionFeatureArff)
def main():
    """
    Runs a filter from the command-line. Calls JVM start/stop automatically.
    Use -h to see all options.
    """
    parser = argparse.ArgumentParser(
        description='Executes a filter from the command-line. Calls JVM start/stop automatically.')
    parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories")
    parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m")
    parser.add_argument("-i", metavar="input1", dest="input1", required=True, help="input file 1")
    parser.add_argument("-o", metavar="output1", dest="output1", required=True, help="output file 1")
    parser.add_argument("-r", metavar="input2", dest="input2", help="input file 2")
    parser.add_argument("-s", metavar="output2", dest="output2", help="output file 2")
    parser.add_argument("-c", metavar="classindex", default="-1", dest="classindex", help="1-based class attribute index")
    parser.add_argument("filter", help="filter classname, e.g., weka.filters.AllFilter")
    parser.add_argument("option", nargs=argparse.REMAINDER, help="additional filter options")
    parsed = parser.parse_args()
    if parsed.input2 is None and not parsed.output2 is None:
        raise Exception("No second input file provided ('-r ...')!")

    jars = []
    if not parsed.classpath is None:
        jars = parsed.classpath.split(os.pathsep)
    params = []
    if not parsed.input1 is None:
        params.extend(["-i", parsed.input1])
    if not parsed.output1 is None:
        params.extend(["-o", parsed.output1])
    if not parsed.input2 is None:
        params.extend(["-r", parsed.input2])
    if not parsed.output2 is None:
        params.extend(["-s", parsed.output2])
    if not parsed.classindex is None:
        params.extend(["-c", parsed.classindex])

    jvm.start(jars, max_heap_size=parsed.heap, packages=True)

    logger.debug("Commandline: " + utils.join_options(sys.argv[1:]))

    try:
        flter = Filter(parsed.filter)
        if len(parsed.option) > 0:
            flter.set_options(parsed.option)
        loader = Loader(classname="weka.core.converters.ArffLoader")
        in1 = loader.load_file(parsed.input1)
        cls = parsed.classindex
        if str(parsed.classindex) == "first":
            cls = "0"
        if str(parsed.classindex) == "last":
            cls = str(in1.num_attributes() - 1)
        in1.set_class_index(int(cls))
        flter.set_inputformat(in1)
        out1 = flter.filter(in1)
        saver = Saver(classname="weka.core.converters.ArffSaver")
        saver.save_file(out1, parsed.output1)
        if not parsed.input2 is None:
            in2 = loader.load_file(parsed.input2)
            in2.set_class_index(int(cls))
            out2 = flter.filter(in2)
            saver.save_file(out2, parsed.output2)
    except Exception, e:
        print(e)
#Filter function
from weka.filters import Filter
print("Filter operation")
remove = Filter(classname="weka.filters.unsupervised.attribute.RemoveUseless",
                options=["-M", "99.0"])
remove.inputformat(data)
filtered = remove.filter(data)
print(filtered)

#EM imputation function
em = Filter(
    classname="weka.filters.unsupervised.attribute.ReplaceMissingValues")
em.inputformat(filtered)
em_imputed = em.filter(filtered)
print(em_imputed)
#weka.filters.unsupervised.attribute.EMImputation -N -1 -E 1.0E-4 -Q 1.0E-8
#Saver arff to csv
saver = Saver(classname="weka.core.converters.CSVSaver")
saver.save_file(em_imputed, "./Dataset/hepatitis.csv")
saver.save_file(filtered, "./Dataset/hepatitis_removed_useless.csv")
#saver.save_file(data, "./Dataset/hepatitis_weka.csv")

#converters.save_any_file("./final_small_em.csv")
#converters.save_any_file("/some/where/else/iris.csv")
#converters.save_any_file("/some/where/else/iris.csv")

#cls =Classifier(classname="weka.classifiers.trees.J48")

#print(cls)
jvm.stop()
Exemplo n.º 6
0
 def save_dataset(self,filepath):
     saver = Saver(classname="weka.core.converters.CSVSaver")
     saver.save_file(self.dataset, filepath)