Пример #1
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    anneal_file = helper.get_data_dir() + os.sep + "anneal.arff"
    helper.print_info("Loading dataset: " + anneal_file)
    loader = Loader("weka.core.converters.ArffLoader")
    anneal_data = loader.load_file(anneal_file)
    anneal_data.class_is_last()

    # perform attribute selection
    helper.print_title("Attribute selection")
    search = ASSearch(classname="weka.attributeSelection.BestFirst",
                      options=["-D", "1", "-N", "5"])
    evaluation = ASEvaluation(
        classname="weka.attributeSelection.CfsSubsetEval",
        options=["-P", "1", "-E", "1"])
    attsel = AttributeSelection()
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    print("# attributes: " + str(attsel.number_attributes_selected))
    print("attributes (as numpy array): " + str(attsel.selected_attributes))
    print("attributes (as list): " + str(list(attsel.selected_attributes)))
    print("result string:\n" + attsel.results_string)

    # perform ranking
    helper.print_title("Attribute ranking (2-fold CV)")
    search = ASSearch(classname="weka.attributeSelection.Ranker",
                      options=["-N", "-1"])
    evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval")
    attsel = AttributeSelection()
    attsel.ranking(True)
    attsel.folds(2)
    attsel.crossvalidation(True)
    attsel.seed(42)
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    print("ranked attributes:\n" + str(attsel.ranked_attributes))
    print("result string:\n" + attsel.results_string)
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    anneal_file = helper.get_data_dir() + os.sep + "anneal.arff"
    helper.print_info("Loading dataset: " + anneal_file)
    loader = Loader("weka.core.converters.ArffLoader")
    anneal_data = loader.load_file(anneal_file)
    anneal_data.class_is_last()

    # perform attribute selection
    helper.print_title("Attribute selection")
    search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"])
    evaluation = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"])
    attsel = AttributeSelection()
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    print("# attributes: " + str(attsel.number_attributes_selected))
    print("attributes: " + str(attsel.selected_attributes))
    print("result string:\n" + attsel.results_string)

    # perform ranking
    helper.print_title("Attribute ranking (2-fold CV)")
    search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-N", "-1"])
    evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval")
    attsel = AttributeSelection()
    attsel.ranking(True)
    attsel.folds(2)
    attsel.crossvalidation(True)
    attsel.seed(42)
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    print("ranked attributes:\n" + str(attsel.ranked_attributes))
    print("result string:\n" + attsel.results_string)
data_dir = "\\\\egr-1l11qd2\\CLS_lab\\Junya Zhao\\Data driven model _paper [June 25_2018\\FeatureSelection\\EvlSearch\\"
globbed_files = glob.glob(data_dir + "*.csv")
for csv in globbed_files:
    data = converters.load_any_file(csv)
    data.class_is_last()
    search = ASSearch(classname="weka.attributeSelection.EvolutionarySearch",
                      options=[
                          "-population-size", "200", "-generations", "500",
                          "-crossover-probability", "0.6"
                      ])
    evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval",
                             options=["-P", "1", "E", "1"])
    attsel = AttributeSelection()
    attsel.folds(10)
    attsel.crossvalidation(True)
    attsel.seed(1)
    attsel.search(search)
    attsel.evaluator(evaluator)
    attsel.select_attributes(data)
    evl = Evaluation(data)
    print("# attributes: " + str(attsel.number_attributes_selected))
    print("attributes: " + str(attsel.selected_attributes))
    print("result string:\n" + attsel.results_string)
    print(evl)
    # write the report for each file
    with open(f"{csv}._report.csv", "a") as outfile:
        outfile.write(attsel.results_string)
    #with open(f"{csv}._label.txt","a") as output:
    #output.write(str(attsel.selected_attributes))

jvm.stop()