def try_params(n_instances, params, base, train, valid, test, istest): n_instances = int(round(n_instances)) # print "n_instances:", n_instances pprint(params) L = list([]) if params['outputDetailedInfo'] == True: L.append("-D") param_search = rk.get_params() search = rk.get_class(param_search) # search = ASSearch(classname="weka.attributeSelection.Ranker") evaluator = ASEvaluation( classname="weka.attributeSelection.CorrelationAttributeEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) if istest: result = test_weka_classifier(clf, train, test) else: result = train_and_eval_weka_classifier(clf, train, valid, n_instances) return result
def Feature_Selection(infile): directory = os.getcwd() + '/' csvpath = directory + infile jvm.start(packages=True, max_heap_size="4g") print "\n\n" print "Loaded file: ", infile csvloader = Loader(classname="weka.core.converters.CSVLoader") csvdata = csvloader.load_file(csvpath) remover = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", " 1"]) remover.inputformat(csvdata) filtered_data = remover.filter(csvdata) filtered_data.class_is_last() search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) attribs = AttributeSelection() attribs.search(search) attribs.evaluator(evaluator) attribs.select_attributes(filtered_data) print "Summary of Attribute Selection: " print attribs.results_string jvm.stop() return
def try_params(n_instances, params, base, train, valid, test, istest): n_instances = int(round(n_instances)) pprint(params) L = list() if params['missing_merge'] == True: L.append("-M") if params['search'] == 'GreedyStepwise': param_search = gs.get_params() search = gs.get_search(param_search) elif params['search'] == 'BestFirst': param_search = bf.get_params() search = bf.get_search(param_search) elif params['search'] == 'Ranker': param_search = rk.get_params() search = rk.get_search(param_search) # search = ASSearch(classname="weka.attributeSelection."+params['search']) evaluator = ASEvaluation(classname="weka.attributeSelection.GainRatioAttributeEval", options=L) clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) if istest: result = test_weka_classifier(clf, train, test) else: result = train_and_eval_weka_classifier(clf, train, valid, n_instances) return result
def try_params(n_instances, params, base, train, valid, test, istest): n_instances = int(round(n_instances)) pprint(params) L = list() if params['missingSeparate'] == True: L.append("-M") if params['locallyPredictive'] == False: L.append("-L") if params['search'] == 'GreedyStepwise': param_search = gs.get_params() search = gs.get_class(param_search) else: param_search = bf.get_params() search = bf.get_class(param_search) # search = ASSearch(classname="weka.attributeSelection."+params['search']) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("base", base.jobject) if istest: result = test_weka_classifier(clf, train, test) else: result = train_and_eval_weka_classifier(clf, train, valid, n_instances) return result
def get_evaluator(params, base): pprint(params) L = list() if params['missingSeparate'] == True: L.append("-M") if params['locallyPredictive'] == False: L.append("-L") if params['search'] == 'GreedyStepwise': param_search = gs.get_params() search = gs.get_search(param_search) else: param_search = bf.get_params() search = bf.get_search(param_search) # search = ASSearch(classname="weka.attributeSelection."+params['search']) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) return clf
def get_evaluator(params, base): pprint(params) L = list([]) if params['missingMerge'] == False: L.append("-M") if params['binarizeNumericAttributes'] == True: L.append("-B") param_search = rk.get_params() search = rk.get_search(param_search) # search = ASSearch(classname="weka.attributeSelection.Ranker") evaluator = ASEvaluation( classname="weka.attributeSelection.InfoGainAttributeEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) return clf
def get_evaluator(params, base): pprint(params) L = list([]) if params['outputDetailedInfo'] == True: L.append("-D") param_search = rk.get_params() search = rk.get_search(param_search) # search = ASSearch(classname="weka.attributeSelection.Ranker") evaluator = ASEvaluation( classname="weka.attributeSelection.CorrelationAttributeEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) return clf
def get_evaluator(params, base): pprint(params) L = list([]) if params['weightByDistance'] == True: L.append("-W") L.append("-M") L.append(str(params['sampleSize'])) L.append("-K") L.append(str(params['numNeighbours'])) L.append("-A") L.append(str(params['sigma'])) param_search = rk.get_params() search = rk.get_search(param_search) # search = ASSearch(classname="weka.attributeSelection.Ranker") evaluator = ASEvaluation(classname="weka.attributeSelection.ReliefFAttributeEval", options=L) clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) return clf
def get_evaluator(params, base): pprint(params) L = list() if params['missing_merge'] == True: L.append("-M") # if params['search'] == 'GreedyStepwise': # param_search = gs.get_params() # search = gs.get_search(param_search) # elif params['search'] == 'BestFirst': # param_search = bf.get_params() # search = bf.get_search(param_search) # elif params['search'] == 'Ranker': param_search = rk.get_params() search = rk.get_search(param_search) # search = ASSearch(classname="weka.attributeSelection."+params['search']) evaluator = ASEvaluation( classname="weka.attributeSelection.SymmetricalUncertAttributeEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) return clf
def try_params(n_instances, params, base, train, valid, test, istest): n_instances = int(round(n_instances)) # print "n_instances:", n_instances pprint(params) L = list([]) if params['missingMerge'] == False: L.append("-M") if params['binarizeNumericAttributes'] == True: L.append("-B") # print L search = ASSearch(classname="weka.attributeSelection.Ranker") evaluator = ASEvaluation(classname="weka.attributeSelection.InfoGainAttributeEval", options=L) clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) if istest: result = test_weka_classifier(clf, train, test) else: result = train_and_eval_weka_classifier(clf, train, valid, n_instances) return result
def evaluator(self): """ Returns the evaluator. :return: the evaluator in use :rtype: ASEvaluation """ return ASEvaluation(jobject=javabridge.call(self.jobject, "getEvaluator", "()Lweka/attributeSelection/ASEvaluation;"))
def main(): """ Just runs some example code. """ # load a dataset anneal_file = helper.get_data_dir() + os.sep + "anneal.arff" helper.print_info("Loading dataset: " + anneal_file) loader = Loader("weka.core.converters.ArffLoader") anneal_data = loader.load_file(anneal_file) anneal_data.class_is_last() # perform attribute selection helper.print_title("Attribute selection") search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) evaluation = ASEvaluation( classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(anneal_data) print("# attributes: " + str(attsel.number_attributes_selected)) print("attributes (as numpy array): " + str(attsel.selected_attributes)) print("attributes (as list): " + str(list(attsel.selected_attributes))) print("result string:\n" + attsel.results_string) # perform ranking helper.print_title("Attribute ranking (2-fold CV)") search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-N", "-1"]) evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval") attsel = AttributeSelection() attsel.ranking(True) attsel.folds(2) attsel.crossvalidation(True) attsel.seed(42) attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(anneal_data) print("ranked attributes:\n" + str(attsel.ranked_attributes)) print("result string:\n" + attsel.results_string)
def all_feature(file): jvm.start(packages=True) data = converters.load_any_file(file) data.class_is_last() search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"]) attsel = AttributeSelection() attsel.search(search) evaluator = ASEvaluation( classname="weka.attributeSelection.ChiSquaredAttributeEval") attsel.evaluator(evaluator) attsel.select_attributes(data) t = attsel.ranked_attributes[:, 0] chi = t.astype(int) evaluator = ASEvaluation( classname="weka.attributeSelection.InfoGainAttributeEval") attsel.evaluator(evaluator) attsel.select_attributes(data) t = attsel.ranked_attributes[:, 0] info_gain = t.astype(int) evaluator = ASEvaluation( classname="weka.attributeSelection.GainRatioAttributeEval") attsel.evaluator(evaluator) attsel.select_attributes(data) t = attsel.ranked_attributes[:, 0] gain_ratio = t.astype(int) evaluator = ASEvaluation( classname="weka.attributeSelection.SymmetricalUncertAttributeEval") attsel.evaluator(evaluator) attsel.select_attributes(data) t = attsel.ranked_attributes[:, 0] symmetric_uncertainty = t.astype(int) jvm.stop() return chi, info_gain, gain_ratio, symmetric_uncertainty
def showAttributeRanking(self, data): search = ASSearch( classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"]) evaluator = ASEvaluation( classname="weka.attributeSelection.InfoGainAttributeEval") attsel = AttributeSelection() attsel.set_search(search) attsel.set_evaluator(evaluator) attsel.select_attributes(data) print("# attributes: " + str(attsel.get_number_attributes_selected())) print("attributes: " + str(attsel.get_selected_attributes())) print("result string:\n" + attsel.to_results_string())
def filter_data(self, data): print("Filtering Data..\n") flter = Filter( classname="weka.filters.supervised.attribute.AttributeSelection") aseval = ASEvaluation( classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) assearch = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) flter.set_property("evaluator", aseval.jobject) flter.set_property("search", assearch.jobject) flter.inputformat(data) filtered = flter.filter(data) return filtered
def featureSelection(self): alg_search = ASSearch( classname="weka.attributeSelection.GeneticSearch", options=["-Z", "1024", "-G", "20", "-C", "0.6", "-M", "0.3"]) alg_evaluation = ASEvaluation( classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) feature_selection = AttributeSelection() feature_selection.search(alg_search) feature_selection.evaluator(alg_evaluation) feature_selection.select_attributes(self.original_data) self.selected_features = feature_selection.selected_attributes self.num_features = feature_selection.number_attributes_selected self.data_selected = feature_selection.reduce_dimensionality( self.original_data)
def use_filter(data): """ Uses the AttributeSelection filter for attribute selection. :param data: the dataset to use :type data: Instances """ print("\n2. Filter") flter = Filter(classname="weka.filters.supervised.attribute.AttributeSelection") aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) flter.set_property("evaluator", aseval.jobject) flter.set_property("search", assearch.jobject) flter.inputformat(data) filtered = flter.filter(data) print(str(filtered))
def use_low_level(data): """ Uses the attribute selection API directly. :param data: the dataset to use :type data: Instances """ print("\n3. Low-level") attsel = AttributeSelection() aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) attsel.jwrapper.setEvaluator(aseval.jobject) attsel.jwrapper.setSearch(assearch.jobject) attsel.select_attributes(data) indices = attsel.selected_attributes print("selected attribute indices (starting with 0):\n" + str(indices.tolist()))
def cfs(table, cores): loader = Loader("weka.core.converters.CSVLoader") anneal_data = loader.load_file(table) anneal_data.class_is_last() logger.info("Running attribute selection for: " + str(table.split("/")[-1]) + ". Please, wait a moment.") search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "0", "-N", "5"]) evaluation = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-Z", "-P", cores, "-E", cores]) attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(anneal_data) logger.info("Selected attributes: " + str(attsel.selected_attributes)) anneal_data.delete(index=None) # TO-DO: Borrar instancias aun no funciona return list(attsel.selected_attributes)
def relieff(filter_data, feature_names): # define search and evaluation for ReliefF search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"]) # last param is number of nearest neighbors evaluation = ASEvaluation(classname="weka.attributeSelection.ReliefFAttributeEval", options=["-M", "-1", "-D", "1", "-K", "10"]) # run the ReliefF alg relieff = AttributeSelection() relieff.search(search) relieff.evaluator(evaluation) relieff.select_attributes(filter_data) results = relieff.selected_attributes # weka wrapper returns the class col number with the results, so slice -1 return [feature_names[i] for i in results[:-1]]
def use_filter(data): """ Uses the AttributeSelection filter for attribute selection. :param data: the dataset to use :type data: Instances """ print("\n2. Filter") flter = wfilters.AttributeSelection() aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) flter.evaluator = aseval flter.search = assearch flter.inputformat(data) filtered = flter.filter(data) print(str(filtered)) print("Evaluator:\n", flter.evaluator) print("Search:\n", flter.search)
def feature_selection_weka(x_train, y_train, x_test, input_path, features): percent = int(x_train.shape[1] * (features / 100.0)) if not os.path.exists('Weka'): os.mkdir('Weka') if not os.path.exists(input_path + f'selected_features_weka_{features}.csv'): x_train = x_train.loc[:, (x_train != x_train.iloc[0]).any()] sava_data = x_train.copy() sava_data.columns = [str(a) + "a" for a in range(sava_data.shape[1])] sava_data['target'] = y_train sava_data.to_csv('Weka/train_weka_format.csv', index=False) from weka.attribute_selection import ASEvaluation, AttributeSelection, ASSearch from weka.core.converters import Loader, Saver loader = Loader(classname="weka.core.converters.CSVLoader") data = loader.load_file('Weka/train_weka_format.csv', class_index='last') search = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-C", "-R", "-N", f"{percent}"]) evaluator = ASEvaluation( classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1", "-L"]) attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) ranked_attributes = pd.DataFrame(attsel.ranked_attributes, columns=['Feature', 'Rank']) ranked_attributes['Feature'] = ranked_attributes['Feature'].astype(int) set_of_features = ranked_attributes.loc[:percent - 1, 'Feature'] x_train.iloc[:, set_of_features].to_csv( input_path + f'selected_features_weka_{features}.csv') selected_features = x_train.iloc[:, set_of_features].columns else: selected_features = pd.read_csv( input_path + f'selected_features_weka_{features}.csv', index_col=0).columns x_train_filtered = x_train.loc[:, selected_features] x_val_filtered = x_test.loc[:, selected_features] return x_train_filtered, x_val_filtered
def information_gain(filter_data, feature_names): # last param determines how many attributes are returned # 2nd param controls the score threshold search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"]) # has no params evaluation = ASEvaluation( classname="weka.attributeSelection.InfoGainAttributeEval", options=[]) # run the Information Gain alg info_gain = AttributeSelection() info_gain.search(search) info_gain.evaluator(evaluation) info_gain.select_attributes(filter_data) results = info_gain.selected_attributes # weka wrapper returns the class col number with the results, so slice -1 return [feature_names[i] for i in results[:-1]]
def use_classifier(data): """ Uses the meta-classifier AttributeSelectedClassifier for attribute selection. :param data: the dataset to use :type data: Instances """ print("\n1. Meta-classifier") classifier = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier") aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) base = Classifier(classname="weka.classifiers.trees.J48") # setting nested options is always a bit tricky, getting all the escaped double quotes right # simply using the bean property for setting Java objects is often easier and less error prone classifier.set_property("classifier", base.jobject) classifier.set_property("evaluator", aseval.jobject) classifier.set_property("search", assearch.jobject) evaluation = Evaluation(data) evaluation.crossvalidate_model(classifier, data, 10, Random(1)) print(evaluation.summary())
def get_IG(ofile_dir, loader): data = loader.load_file(ofile_dir) data.class_is_last() evaluator = ASEvaluation(classname="weka.attributeSelection.InfoGainAttributeEval") search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"]) attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) results = {} if attsel.number_attributes_selected < 2: flag = 0 output = attsel.results_string for i in output.split('\n'): if (flag != 0): if len(i.split(' '))>2: t=[] for f in i.split(' '): if f!='': t.append(f) r_tax = '' for c in range(len(t)): if c>1: r_tax = r_tax+t[c]+' ' results.update({str(r_tax.strip()): float(t[0].strip())}) else: break if "Ranked attributes" in i: flag = 1 mean_score = sum(results.values())/len(results.values()) os.system("rm -r "+ofile_dir) else: results = dict([(str(data.attribute(attr[0]).name), attr[1]) for attr in attsel.ranked_attributes]) mean_score = attsel.ranked_attributes[:,1].mean() return results, mean_score
def try_params(n_instances, params, base, train, valid, test, istest): n_instances = int(round(n_instances)) # print "n_instances:", n_instances pprint(params) L = list([]) if params['weightByDistance'] == True: L.append("-W") L.append("-M") L.append(str(params['sampleSize'])) L.append("-K") L.append(str(params['numNeighbours'])) L.append("-A") L.append(str(params['sigma'])) # print L search = ASSearch(classname="weka.attributeSelection.Ranker") evaluator = ASEvaluation( classname="weka.attributeSelection.ReliefFAttributeEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) if istest: result = test_weka_classifier(clf, train, test) else: result = train_and_eval_weka_classifier(clf, train, valid, n_instances) return result
def get_evaluator(params, base): pprint(params) L = list() if params['use_training'] == True: L.append("-D") L.append("-S") L.append(str(params['seed'])) L.append("-B") L.append(str(params['minimum_bucket'])) # # if params['search'] == 'GreedyStepwise': # param_search = gs.get_params() # search = gs.get_search(param_search) # elif params['search'] == 'BestFirst': # param_search = bf.get_params() # search = bf.get_search(param_search) # elif params['search'] == 'Ranker': param_search = rk.get_params() search = rk.get_search(param_search) # search = ASSearch(classname="weka.attributeSelection."+params['search']) evaluator = ASEvaluation( classname="weka.attributeSelection.OneRAttributeEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) return clf
def get_evaluator(params, base): pprint(params) L = list() if params['center'] == True: L.append("-C") L.append("-A") L.append(str(params['max_a'])) L.append("-R") L.append(str(params['variance'])) # if params['search'] == 'GreedyStepwise': # param_search = gs.get_params() # search = gs.get_search(param_search) # elif params['search'] == 'BestFirst': # param_search = bf.get_params() # search = bf.get_search(param_search) # elif params['search'] == 'Ranker': param_search = rk.get_params() search = rk.get_search(param_search) # search = ASSearch(classname="weka.attributeSelection."+params['search']) evaluator = ASEvaluation(classname="weka.attributeSelection.PrincipalComponents", options=L) clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) return clf
def get_evaluator(params, base): pprint(params) L = list() L.append("-E") L.append(str(params['ev_measure'])) L.append("-R") L.append(str(params['seed'])) L.append("-T") L.append(str(params['threshold'])) if params['search'] == 'GreedyStepwise': param_search = gs.get_params() search = gs.get_search(param_search) elif params['search'] == 'BestFirst': param_search = bf.get_params() search = bf.get_search(param_search) elif params['search'] == 'Ranker': param_search = rk.get_params() search = rk.get_search(param_search) # search = ASSearch(classname="weka.attributeSelection."+params['search']) evaluator = ASEvaluation( classname="weka.attributeSelection.WrapperSubsetEval", options=L) clf = Classifier( classname="weka.classifiers.meta.AttributeSelectedClassifier") clf.set_property("evaluator", evaluator.jobject) clf.set_property("search", search.jobject) clf.set_property("base", base.jobject) return clf
pkg.install_package(chisq_name) print("pkg %s installed, please restart" % chisq_name) jvm.stop() sys.exit(1) """ data_dir = "\\\\egr-1l11qd2\\CLS_lab\\Junya Zhao\\Data driven model _paper [June 25_2018\\FeatureSelection\\EvlSearch\\" globbed_files = glob.glob(data_dir + "*.csv") for csv in globbed_files: data = converters.load_any_file(csv) data.class_is_last() search = ASSearch(classname="weka.attributeSelection.EvolutionarySearch", options=[ "-population-size", "200", "-generations", "500", "-crossover-probability", "0.6" ]) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "E", "1"]) attsel = AttributeSelection() attsel.folds(10) attsel.crossvalidation(True) attsel.seed(1) attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) evl = Evaluation(data) print("# attributes: " + str(attsel.number_attributes_selected)) print("attributes: " + str(attsel.selected_attributes)) print("result string:\n" + attsel.results_string) print(evl) # write the report for each file with open(f"{csv}._report.csv", "a") as outfile: outfile.write(attsel.results_string)
["-D", "0", "-N", "1"] ), ( ["-F", "10", "-T", "-1", "-B", "weka.classifiers.trees.J48"], ["-D", "1", "-N", "2"] ), ( ["-F", "10", "-T", "-1", "-B", "weka.classifiers.trees.J48"], ["-D", "2", "-N", "2"] ), ) # attribute selection for setup in setups: evl, search = setup aseval = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval", options=evl) assearch = ASSearch(classname="weka.attributeSelection.BestFirst", options=search) print("\n--> Attribute selection\n") print(aseval.to_commandline()) print(assearch.to_commandline()) attsel = AttributeSelection() attsel.evaluator(aseval) attsel.search(assearch) attsel.select_attributes(data) print(attsel.results_string) # cross-validation aseval = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval", options=["-F", "10", "-B", "weka.classifiers.trees.J48"]) assearch = ASSearch(classname="weka.attributeSelection.BestFirst",